Commit 45407671 by rawmind0

Copy latest to v0.6.2 chart

parent 9d4d8852
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
apiVersion: v1
appVersion: "1.0"
description: Machine Learning Toolkit for Kubernetes
name: kubeflow
version: 0.1.0
icon: file://../icon.jpg
maintainers:
- name: guangbochen
email: support@rancher.com
home: https://www.kubeflow.org/docs/about/kubeflow/
# Kubeflow
The Kubeflow project is dedicated to making deployments of machine learning (ML) workflows on Kubernetes simple, portable and scalable. Our goal is not to recreate other services, but to provide a straightforward way to deploy best-of-breed open-source systems for ML to diverse infrastructures. Anywhere you are running Kubernetes, you should be able to run Kubeflow
## Who should consider using Kubeflow?
Based on the current functionality you should consider using Kubeflow if:
- You want to train/serve TensorFlow models in different environments (e.g. local, on prem, and cloud)
- You want to use Jupyter notebooks to manage TensorFlow training jobs
- You want to launch training jobs that use resources – such as additional CPUs or GPUs – that aren’t available on your personal computer
- You want to combine TensorFlow with other processes
> For example, you may want to use [tensorflow/agents](https://github.com/google-research/batch-ppo) to run simulations to generate data for training reinforcement learning models.
This list is based ONLY on current capabilities. We are investing significant resources to expand the functionality and actively soliciting help from companies and individuals interested in contributing (see [Contributing](https://www.kubeflow.org/docs/about/contributing/)).
## How it works?
For more details of how Kubeflow works please reference the [Kubeflow Doc](https://www.kubeflow.org/docs/about/kubeflow/).
# Kubeflow
The Kubeflow project is dedicated to making deployments of machine learning (ML) workflows on Kubernetes simple, portable and scalable. Our goal is not to recreate other services, but to provide a straightforward way to deploy best-of-breed open-source systems for ML to diverse infrastructures. Anywhere you are running Kubernetes, you should be able to run Kubeflow
## Who should consider using Kubeflow?
Based on the current functionality you should consider using Kubeflow if:
- You want to train/serve TensorFlow models in different environments (e.g. local, on prem, and cloud)
- You want to use Jupyter notebooks to manage TensorFlow training jobs
- You want to launch training jobs that use resources – such as additional CPUs or GPUs – that aren’t available on your personal computer
- You want to combine TensorFlow with other processes
> For example, you may want to use [tensorflow/agents](https://github.com/google-research/batch-ppo) to run simulations to generate data for training reinforcement learning models.
categories:
- machine learning
rancher_max_version: 2.3.1
labels:
io.rancher.certified: experimental
namespace: kubeflow
questions:
- variable: ambassador.service.type
default: "NodePort"
description: "Define Kubeflow Ambassador UI service type"
type: enum
required: true
options:
- "ClusterIP"
- "NodePort"
- "Rancher-Proxy"
label: Kubeflow Ambassador UI Service
group: "Kubeflow Ambassador Settings"
show_subquestion_if: "NodePort"
subquestions:
- variable: ambassador.service.nodePort
default: ""
description: "NodePort port number(to set explicitly, choose port between 30000-32767)"
type: int
min: 30000
max: 32767
label: Ambassador UI Service NodePort number
- variable: katib.vizier.service.type
default: "ClusterIP"
description: "Define kubeflow katib vizier service type"
type: enum
required: true
options:
- "ClusterIP"
- "NodePort"
label: Kubeflow Katib Vizier Service
group: "Kubeflow Katib Settings"
show_subquestion_if: "NodePort"
subquestions:
- variable: katib.vizier.service.nodePort
default: ""
description: "NodePort port number(to set explicitly, choose port between 30000-32767)"
type: int
min: 30000
max: 32767
label: Katib Vizier Service NodePort number
- variable: katib.vizierdb.persistence.enabled
default: false
description: "Enable persistent volume for Katib Vizier"
type: boolean
required: true
label: Katib Vizier Persistent Volume Enabled
show_subquestion_if: true
group: "Kubeflow Katib Settings"
subquestions:
- variable: katib.vizierdb.persistence.size
default: "10Gi"
description: "Katib Vizier Persistent Volume Size"
type: string
label: Katib Vizier Volume Size
- variable: katib.vizierdb.persistence.storageClass
default: ""
description: "If undefined or null, uses the default StorageClass. Default to null"
type: storageclass
label: Default StorageClass for Katib Vizier
- variable: katib.vizierdb.persistence.existingClaim
default: ""
description: "If not empty, uses the specified existing PVC instead of creating new one"
type: string
label: Existing Persistent Volume Claim for Katib Vizier
- variable: jupyterhub.enabled
default: true
description: "Enable jupyterhub of single-user Jupyter notebook server"
type: boolean
required: true
label: Enable JupyterHub
group: "JupyterHub Settings"
show_subquestion_if: true
subquestions:
- variable: jupyterhub.image.repository
default: "gcr.io/kubeflow/jupyterhub-k8s"
description: "Docker image of the JupyterHub"
type: string
label: JupyterHub Image Repository
- variable: jupyterhub.image.tag
default: "v20180531-3bb991b1"
description: "The image tag of JupyterHub"
type: string
label: JupyterHub Image Tag
- variable: tfJobOperator.enabled
default: true
description: "Enable tensorflow job operator"
type: boolean
required: true
label: Enable TensorFlow Job Operator
group: "TensorFlow Operator Settings"
show_subquestion_if: true
subquestions:
- variable: tfJobOperator.image.repository
default: "gcr.io/kubeflow-images-public/tf_operator"
description: "Docker image of the TensorFlow Job Operator"
type: string
label: Tensorflow Job Operator Image Repository
- variable: tfJobOperator.image.tag
default: "v0.3.0"
description: "The image tag of Tensorflow Job Operator"
type: string
label: Tensorflow Job Operator Image Tag
- variable: pytorchOperator.enabled
default: false
description: "Enable PyTorch - a deep learning framework."
type: boolean
required: true
label: Enable PyTorch Operator
group: "PyTorch Settings"
show_subquestion_if: true
subquestions:
- variable: pytorchOperator.image.repository
default: "gcr.io/kubeflow-images-public/pytorch-operator"
description: "Docker image of the PyTorch operator"
type: string
label: PyTorch Operator Image Repository
- variable: pytorchOperator.image.tag
default: "v0.3.0"
description: "The image tag of PyTorch operator"
type: string
label: PyTorch Operator Image Tag
- variable: chainer.enabled
default: false
description: "Enable Chainer operator - a flexible framework of neural networks"
type: boolean
required: true
label: Enable Chainer Operator
group: "Chainer Settings"
show_subquestion_if: true
subquestions:
- variable: chainer.image.repository
default: "gcr.io/kubeflow-images-public/chainer-operator"
description: "Docker image of the chainer operator"
type: string
label: Chainer Operator Image Repository
- variable: chainer.image.tag
default: "v0.3.0"
description: "The image tag of chainer operator"
type: string
label: Chainer Operator Image Tag
- variable: mxnetOperator.enabled
default: false
description: "Enable apache MXNet - a flexible and efficient library for deep learning."
type: boolean
required: true
label: Enable Apache MXNet Operator
group: "Apache MXNet Settings"
show_subquestion_if: true
subquestions:
- variable: mxnetOperator.image.repository
default: "mxjob/mxnet-operator"
description: "Docker image of the MXNet operator"
type: string
label: MXNet Operator Image Repository
- variable: mxnetOperator.image.tag
default: "v1"
description: "The image tag of MXNet operator"
type: string
label: MXNet Operator Image Tag
- variable: pytorchOperator.enabled
default: false
description: "Enable PyTorch - a deep learning framework."
type: boolean
required: true
label: Enable PyTorch Operator
group: "PyTorch Settings"
show_subquestion_if: true
subquestions:
- variable: pytorchOperator.image.repository
default: "gcr.io/kubeflow-images-public/pytorch-operator"
description: "Docker image of the PyTorch operator"
type: string
label: PyTorch Operator Image Repository
- variable: pytorchOperator.image.tag
default: "v0.3.0"
description: "The image tag of PyTorch operator"
type: string
label: PyTorch Operator Image Tag
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "kubeflow.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "kubeflow.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kubeflow.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
ksonnet.io/component: ambassador
name: ambassador
namespace: kubeflow
spec:
replicas: {{ .Values.ambassador.replicas }}
template:
metadata:
labels:
service: ambassador
namespace: kubeflow
spec:
containers:
- name: ambassador
env:
- name: AMBASSADOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: AMBASSADOR_SINGLE_NAMESPACE
value: "true"
image: "{{ .Values.ambassador.image.repository }}:{{ .Values.ambassador.image.tag }}"
livenessProbe:
httpGet:
path: /ambassador/v0/check_alive
port: 8877
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /ambassador/v0/check_ready
port: 8877
initialDelaySeconds: 30
periodSeconds: 30
resources:
limits:
cpu: 1
memory: 400Mi
requests:
cpu: 200m
memory: 100Mi
- name: statsd
image: "{{ .Values.ambassador.statsdImage.repository }}:{{ .Values.ambassador.statsdImage.tag }}"
- name: statsd-sink
image: "{{ .Values.ambassador.exporterImage.repository }}:{{ .Values.ambassador.exporterImage.tag }}"
restartPolicy: Always
serviceAccountName: ambassador
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: ambassador
name: ambassador
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
labels:
ksonnet.io/component: ambassador
name: ambassador
namespace: kubeflow
rules:
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- create
- update
- patch
- get
- list
- watch
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
labels:
ksonnet.io/component: ambassador
name: ambassador
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: ambassador
subjects:
- kind: ServiceAccount
name: ambassador
namespace: kubeflow
apiVersion: v1
kind: Service
metadata:
labels:
ksonnet.io/component: ambassador
service: ambassador
{{- if eq .Values.ambassador.service.type "Rancher-Proxy" }}
kubernetes.io/cluster-service: "true"
{{- end }}
name: ambassador
namespace: kubeflow
spec:
ports:
- name: ambassador
port: 80
targetPort: 80
{{- if .Values.ambassador.service.nodePort }}
nodePort: {{ .Values.ambassador.service.nodePort }}
{{- end }}
selector:
service: ambassador
{{- if eq .Values.ambassador.service.type "Rancher-Proxy" }}
type: ClusterIP
{{- else }}
type: {{ .Values.ambassador.service.type }}
{{- end }}
---
apiVersion: v1
kind: Service
metadata:
labels:
ksonnet.io/component: ambassador
service: ambassador-admin
name: ambassador-admin
namespace: kubeflow
spec:
ports:
- name: ambassador-admin
port: 8877
targetPort: 8877
selector:
service: ambassador
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: k8s-dashboard-ui-mapping
prefix: /k8s/ui/
rewrite: /
tls: true
service: kubernetes-dashboard.kube-system
labels:
ksonnet.io/component: ambassador
name: k8s-dashboard
namespace: kubeflow
spec:
ports:
- port: 443
targetPort: 8443
selector:
k8s-app: kubernetes-dashboard
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/port: "9102"
prometheus.io/scrape: "true"
labels:
ksonnet.io/component: ambassador
service: ambassador
name: statsd-sink
namespace: kubeflow
spec:
ports:
- name: statsd-sink
port: 9102
protocol: TCP
targetPort: 9102
selector:
service: ambassador
type: ClusterIP
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: centraldashboard
name: centraldashboard
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: centraldashboard
ksonnet.io/component: centraldashboard
name: centraldashboard
namespace: kubeflow
rules:
- apiGroups:
- ""
resources:
- pods
- pods/exec
- pods/log
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: centralui-mapping
prefix: /
rewrite: /
service: centraldashboard.kubeflow
labels:
app: centraldashboard
ksonnet.io/component: centraldashboard
name: centraldashboard
namespace: kubeflow
spec:
ports:
- port: 80
targetPort: 8082
selector:
app: centraldashboard
sessionAffinity: None
type: ClusterIP
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: centraldashboard
ksonnet.io/component: centraldashboard
name: centraldashboard
namespace: kubeflow
spec:
template:
metadata:
labels:
app: centraldashboard
spec:
containers:
- image: "{{ .Values.centraldashboard.image.repository }}:{{ .Values.centraldashboard.image.tag }}"
name: centraldashboard
ports:
- containerPort: 8082
serviceAccountName: centraldashboard
{{- if .Values.chainerOperator.enabled }}
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
labels:
ksonnet.io/component: chainer-operator
annotations:
"helm.sh/hook": crd-install
"helm.sh/hook-delete-policy": before-hook-creation
name: chainerjobs.kubeflow.org
spec:
group: kubeflow.org
names:
categories:
- all
kind: ChainerJob
plural: chainerjobs
shortNames:
- chj
- chjs
- chjob
- chjobs
singular: chainerjob
scope: Namespaced
version: v1alpha1
{{- end }}
{{- if .Values.chainerOperator.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: chainer-operator
ksonnet.io/component: chainer-operator
name: chainer-operator
namespace: kubeflow
spec:
replicas: 1
selector:
matchLabels:
app: chainer-operator
template:
metadata:
labels:
app: chainer-operator
spec:
containers:
- args:
- -v
- "2"
- -stderrthreshold
- INFO
image: "{{ .Values.chainerOperator.image.repository }}:{{ .Values.chainerOperator.image.tag }}"
imagePullPolicy: Always
name: chainer-operator
serviceAccountName: chainer-operator
{{- end }}
{{- if .Values.chainerOperator.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
ksonnet.io/component: chainer-operator
name: chainer-operator
rules:
- apiGroups:
- ""
resources:
- configmaps
- serviceaccounts
verbs:
- create
- update
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- create
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
verbs:
- create
- update
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- create
- list
- update
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- create
- list
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- apiGroups:
- kubeflow.org
resources:
- chainerjobs
verbs:
- '*'
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: chainer-operator
name: chainer-operator
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
ksonnet.io/component: chainer-operator
name: chainer-operator
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: chainer-operator
subjects:
- kind: ServiceAccount
name: chainer-operator
namespace: kubeflow
{{- end }}
{{- if .Values.jupyterhub.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyterhub-config
namespace: kubeflow
data:
{{ (.Files.Glob "scripts/jupyterhub_config.py").AsConfig | indent 2 }}
{{- end }}
{{- if .Values.jupyterhub.enabled }}
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: jupyter-hub
ksonnet.io/component: jupyterhub
name: jupyter-hub
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyter-notebook
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyter-notebook-role
namespace: kubeflow
rules:
- apiGroups:
- ""
resources:
- pods
- services
verbs:
- '*'
- apiGroups:
- ""
- apps
- extensions
resources:
- deployments
- replicasets
verbs:
- '*'
- apiGroups:
- kubeflow.org
resources:
- '*'
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyter-role
namespace: kubeflow
rules:
- apiGroups:
- ""
resources:
- pods
- persistentvolumeclaims
verbs:
- get
- watch
- list
- create
- delete
- apiGroups:
- ""
resources:
- events
verbs:
- get
- watch
- list
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyter-notebook-role
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: jupyter-notebook-role
subjects:
- kind: ServiceAccount
name: jupyter-notebook
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
labels:
ksonnet.io/component: jupyterhub
name: jupyter-role
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: jupyter-role
subjects:
- kind: ServiceAccount
name: jupyter-hub
namespace: kubeflow
{{- end }}
{{- if .Values.jupyterhub.enabled }}
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
app: tf-hub
ksonnet.io/component: jupyterhub
name: tf-hub-0
namespace: kubeflow
spec:
clusterIP: None
ports:
- name: hub
port: 8000
selector:
app: tf-hub
---
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: tf-hub-lb-hub-mapping
prefix: /hub/
rewrite: /hub/
timeout_ms: 300000
service: tf-hub-lb.kubeflow
use_websocket: true
---
apiVersion: ambassador/v0
kind: Mapping
name: tf-hub-lb-user-mapping
prefix: /user/
rewrite: /user/
timeout_ms: 300000
service: tf-hub-lb.kubeflow
use_websocket: true
labels:
app: tf-hub-lb
ksonnet.io/component: jupyterhub
name: tf-hub-lb
namespace: kubeflow
spec:
ports:
- name: hub
port: 80
targetPort: 8000
selector:
app: tf-hub
type: {{ .Values.jupyterhub.service.type }}
{{- end }}
{{- if .Values.jupyterhub.enabled }}
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
labels:
ksonnet.io/component: jupyterhub
name: tf-hub
namespace: kubeflow
spec:
replicas: 1
serviceName: ""
template:
metadata:
labels:
app: tf-hub
spec:
containers:
- command:
- jupyterhub
- -f
- /etc/config/jupyterhub_config.py
env:
- name: NOTEBOOK_PVC_MOUNT
value: /home/jovyan
- name: CLOUD_NAME
value: "null"
- name: REGISTRY
value: gcr.io
- name: REPO_NAME
value: kubeflow-images-public
- name: KF_AUTHENTICATOR
value: "null"
- name: DEFAULT_JUPYTERLAB
value: "false"
- name: KF_PVC_LIST
value: "null"
image: "{{ .Values.jupyterhub.image.repository }}:{{ .Values.jupyterhub.image.tag }}"
name: tf-hub
ports:
- containerPort: 8000
- containerPort: 8081
volumeMounts:
- mountPath: /etc/config
name: config-volume
serviceAccountName: jupyter-hub
volumes:
- configMap:
name: jupyterhub-config
name: config-volume
updateStrategy:
type: RollingUpdate
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: katib
name: metricscollector-template
namespace: kubeflow
data:
defaultMetricsCollectorTemplate.yaml: "apiVersion: batch/v1beta1\nkind: CronJob\nmetadata:\n
\ name: {{.WorkerId}}\n namespace: {{.NameSpace}} \nspec:\n schedule: \"*/1
* * * *\"\n successfulJobsHistoryLimit: 1\n failedJobsHistoryLimit: 1\n jobTemplate:\n
\ spec:\n template:\n spec:\n serviceAccountName: metrics-collector\n
\ containers:\n - name: {{.WorkerId}}\n image: katib/metrics-collector\n
\ args:\n - \"./metricscollector\"\n - \"-s\"\n
\ - \"{{.StudyId}}\"\n - \"-t\"\n - \"{{.TrialId}}\"\n
\ - \"-w\"\n - \"{{.WorkerId}}\"\n - \"-n\"\n
\ - \"{{.NameSpace}}\"\n restartPolicy: Never\n"
---
apiVersion: v1
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: katib
name: worker-template
namespace: kubeflow
data:
defaultWorkerTemplate.yaml: |
apiVersion: batch/v1
namespace: kubeflow
kind: Job
metadata:
name: {{.WorkerId}}
spec:
template:
spec:
containers:
- name: {{.WorkerId}}
image: alpine
restartPolicy: Never
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
labels:
ksonnet.io/component: katib
annotations:
"helm.sh/hook": crd-install
"helm.sh/hook-delete-policy": before-hook-creation
name: studyjobs.kubeflow.org
spec:
group: kubeflow.org
names:
kind: StudyJob
plural: studyjobs
singular: studyjob
version: v1alpha1
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: studyjob-controller
ksonnet.io/component: katib
name: studyjob-controller
namespace: kubeflow
spec:
replicas: 1
selector:
matchLabels:
app: studyjob-controller
template:
metadata:
labels:
app: studyjob-controller
spec:
containers:
- image: "{{ .Values.katib.studyJobControllerImage.repository }}:{{ .Values.katib.studyJobControllerImage.tag }}"
imagePullPolicy: Always
name: studyjob-controller
volumeMounts:
- mountPath: /worker-template
name: worker-template
- mountPath: /metricscollector-template
name: metricscollector-template
serviceAccountName: studyjob-controller
volumes:
- configMap:
name: worker-template
name: worker-template
- configMap:
name: metricscollector-template
name: metricscollector-template
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: core
ksonnet.io/component: katib
name: vizier-core
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: core
name: vizier-core
spec:
containers:
- args:
- ./vizier-manager
- -w
- kubernetes
- -i
- k-cluster.example.net
image: "{{ .Values.katib.vizierCoreImage.repository }}:{{ .Values.katib.vizierCoreImage.tag }}"
# image: gcr.io/kubeflow-images-public/katib/vizier-core:v0.1.2-alpha-45-g3dce496
name: vizier-core
ports:
- containerPort: 6789
name: api
serviceAccountName: vizier-core
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: db
ksonnet.io/component: katib
name: vizier-db
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: db
name: vizier-db
spec:
containers:
- args:
- --datadir
- /var/lib/mysql/datadir
env:
- name: MYSQL_ROOT_PASSWORD
value: test
- name: MYSQL_ALLOW_EMPTY_PASSWORD
value: "true"
- name: MYSQL_DATABASE
value: vizier
image: "{{ .Values.katib.vizierDbImage.repository }}:{{ .Values.katib.vizierDbImage.tag }}"
name: vizier-db
ports:
- containerPort: 3306
name: dbapi
volumeMounts:
- mountPath: /var/lib/mysql
name: vizier-db
volumes:
- name: vizier-db
{{- if .Values.katib.vizierdb.persistence.enabled }}
persistentVolumeClaim:
claimName: {{ .Values.katib.vizierdb.persistence.existingClaim | default ("vizier-db") }}
{{- else }}
emptyDir: {}
{{- end }}
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: suggestion-bayesianoptimization
ksonnet.io/component: katib
name: vizier-suggestion-bayesianoptimization
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: suggestion-bayesianoptimization
name: vizier-suggestion-bayesianoptimization
spec:
containers:
- image: "{{ .Values.katib.suggestionBayesianOptimizationImage.repository }}:{{ .Values.katib.suggestionBayesianOptimizationImage.tag }}"
name: vizier-suggestion-bayesianoptimization
ports:
- containerPort: 6789
name: api
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: suggestion-grid
ksonnet.io/component: katib
name: vizier-suggestion-grid
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: suggestion-grid
name: vizier-suggestion-grid
spec:
containers:
- image: "{{ .Values.katib.suggestionGridImage.repository }}:{{ .Values.katib.suggestionGridImage.tag }}"
# - image: gcr.io/kubeflow-images-public/katib/suggestion-grid:v0.1.2-alpha-45-g3dce496
name: vizier-suggestion-grid
ports:
- containerPort: 6789
name: api
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: suggestion-hyperband
ksonnet.io/component: katib
name: vizier-suggestion-hyperband
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: suggestion-hyperband
name: vizier-suggestion-hyperband
spec:
containers:
- image: "{{ .Values.katib.suggestionHyperbandImage.repository }}:{{ .Values.katib.suggestionHyperbandImage.tag }}"
# - image: gcr.io/kubeflow-images-public/katib/suggestion-hyperband:v0.1.2-alpha-45-g3dce496
name: vizier-suggestion-hyperband
ports:
- containerPort: 6789
name: api
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: vizier
component: suggestion-random
ksonnet.io/component: katib
name: vizier-suggestion-random
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: vizier
component: suggestion-random
name: vizier-suggestion-random
spec:
containers:
- image: "{{ .Values.katib.suggestionRandomImage.repository }}:{{ .Values.katib.suggestionRandomImage.tag }}"
# - image: gcr.io/kubeflow-images-public/katib/suggestion-random:v0.1.2-alpha-45-g3dce496
name: vizier-suggestion-random
ports:
- containerPort: 6789
name: api
apiVersion: v1
kind: Service
metadata:
labels:
app: modeldb
component: backend
ksonnet.io/component: katib
name: modeldb-backend
namespace: kubeflow
spec:
ports:
- name: api
port: 6543
protocol: TCP
selector:
app: modeldb
component: backend
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: modeldb
component: db
ksonnet.io/component: katib
name: modeldb-db
namespace: kubeflow
spec:
ports:
- name: dbapi
port: 27017
protocol: TCP
selector:
app: modeldb
component: db
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: modeldb-mapping
prefix: /katib/
rewrite: /katib/
method: GET
service: modeldb-frontend.kubeflow:3000
labels:
app: modeldb
component: frontend
ksonnet.io/component: katib
name: modeldb-frontend
namespace: kubeflow
spec:
ports:
- name: api
port: 3000
protocol: TCP
selector:
app: modeldb
component: frontend
type: ClusterIP
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: modeldb
component: backend
ksonnet.io/component: katib
name: modeldb-backend
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: modeldb
component: backend
name: modeldb-backend
spec:
containers:
- args:
- modeldb-db
image: "{{ .Values.katib.modeldbImage.repository }}:{{ .Values.katib.modeldbImage.tag }}"
name: modeldb-backend
ports:
- containerPort: 6543
name: api
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: modeldb
component: db
ksonnet.io/component: katib
name: modeldb-db
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: modeldb
component: db
name: modeldb-db
spec:
containers:
- image: "{{ .Values.katib.modeldbDatabaseImage.repository }}:{{ .Values.katib.modeldbDatabaseImage.tag }}"
name: modeldb-db
ports:
- containerPort: 27017
name: dbapi
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: modeldb
component: frontend
ksonnet.io/component: katib
name: modeldb-frontend
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
app: modeldb
component: frontend
name: modeldb-frontend
spec:
containers:
- args:
- modeldb-backend
env:
- name: ROOT_PATH
value: /katib
# image: gcr.io/kubeflow-images-public/katib/katib-frontend:v0.1.2-alpha-45-g3dce496
image: "{{ .Values.katib.modeldbFrontendImage.repository }}:{{ .Values.katib.modeldbFrontendImage.tag }}"
imagePullPolicy: IfNotPresent
name: modeldb-frontend
ports:
- containerPort: 3000
name: webapi
{{- if and .Values.katib.vizierdb.persistence.enabled (not .Values.katib.vizierdb.persistence.existingClaim) }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: vizier
component: db
ksonnet.io/component: katib
name: vizier-db
namespace: kubeflow
spec:
accessModes:
- {{ .Values.katib.vizierdb.persistence.accessMode | quote }}
resources:
requests:
storage: {{ .Values.katib.vizierdb.persistence.size | quote }}
{{- if .Values.katib.vizierdb.persistence.storageClass }}
{{- if (eq "-" .Values.katib.vizierdb.persistence.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: "{{ .Values.katib.vizierdb.persistence.storageClass }}"
{{- end }}
{{- end }}
{{- end -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
ksonnet.io/component: katib
name: metrics-collector
rules:
- apiGroups:
- ""
resources:
- pods
- pods/log
- pods/status
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
ksonnet.io/component: katib
name: studyjob-controller
rules:
- apiGroups:
- ""
resources:
- configmaps
- serviceaccounts
verbs:
- create
- update
- list
- watch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- apiGroups:
- kubeflow.org
resources:
- studyjobs
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
ksonnet.io/component: katib
name: vizier-core
rules:
- apiGroups:
- ""
resources:
- pods
- nodes
- nodes/*
- pods/log
- pods/status
- services
- persistentvolumes
- persistentvolumes/status
- persistentvolumeclaims
- persistentvolumeclaims/status
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
- jobs/status
verbs:
- '*'
- apiGroups:
- extensions
resources:
- ingresses
- ingresses/status
- deployments
- deployments/status
verbs:
- '*'
- apiGroups:
- ""
resources:
- services
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
ksonnet.io/component: katib
name: vizier-core
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: vizier-core
subjects:
- kind: ServiceAccount
name: vizier-core
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: katib
name: metrics-collector
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: katib
name: studyjob-controller
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
ksonnet.io/component: katib
name: vizier-core
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
ksonnet.io/component: katib
name: metrics-collector
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: metrics-collector
subjects:
- kind: ServiceAccount
name: metrics-collector
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
ksonnet.io/component: katib
name: studyjob-controller
namespace: kubeflow
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: studyjob-controller
subjects:
- kind: ServiceAccount
name: studyjob-controller
namespace: kubeflow
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: core
ksonnet.io/component: katib
name: vizier-core
namespace: kubeflow
spec:
ports:
- name: api
{{- if .Values.katib.vizier.service.nodePort }}
nodePort: {{ .Values.katib.vizier.service.nodePort }}
{{- end }}
port: 6789
protocol: TCP
selector:
app: vizier
component: core
type: {{ .Values.katib.vizier.service.type }}
---
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: db
ksonnet.io/component: katib
name: vizier-db
namespace: kubeflow
spec:
ports:
- name: dbapi
port: 3306
protocol: TCP
selector:
app: vizier
component: db
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: suggestion-bayesianoptimization
ksonnet.io/component: katib
name: vizier-suggestion-bayesianoptimization
namespace: kubeflow
spec:
ports:
- name: api
port: 6789
protocol: TCP
selector:
app: vizier
component: suggestion-bayesianoptimization
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: suggestion-grid
ksonnet.io/component: katib
name: vizier-suggestion-grid
namespace: kubeflow
spec:
ports:
- name: api
port: 6789
protocol: TCP
selector:
app: vizier
component: suggestion-grid
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: suggestion-hyperband
ksonnet.io/component: katib
name: vizier-suggestion-hyperband
namespace: kubeflow
spec:
ports:
- name: api
port: 6789
protocol: TCP
selector:
app: vizier
component: suggestion-hyperband
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
labels:
app: vizier
component: suggestion-random
ksonnet.io/component: katib
name: vizier-suggestion-random
namespace: kubeflow
spec:
ports:
- name: api
port: 6789
protocol: TCP
selector:
app: vizier
component: suggestion-random
type: ClusterIP
{{- if .Values.mxnetOperator.enabled }}
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
labels:
ksonnet.io/component: mxnet-operator
annotations:
"helm.sh/hook": crd-install
"helm.sh/hook-delete-policy": before-hook-creation
name: mxjobs.kubeflow.org
spec:
group: kubeflow.org
names:
kind: MXJob
plural: mxjobs
singular: mxjob
version: v1alpha1
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: mxnet-operator
ksonnet.io/component: mxnet-operator
name: mxnet-operator
rules:
- apiGroups:
- kubeflow.org
resources: - mxjobs
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- pods
- services
- endpoints
- persistentvolumeclaims
- events
verbs:
- '*'
- apiGroups:
- apps
- extensions
resources:
- deployments
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app: mxnet-operator
ksonnet.io/component: mxnet-operator
name: mxnet-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: mxnet-operator
subjects:
- kind: ServiceAccount
name: mxnet-operator
namespace: kubeflow
---
apiVersion: v1
data:
controller_config_file.yaml: |-
{
}
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: mxnet-operator
name: mxnet-operator-config
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: mxnet-operator
ksonnet.io/component: mxnet-operator
name: mxnet-operator
namespace: kubeflow
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
ksonnet.io/component: mxnet-operator
name: mxnet-operator
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
name: mxnet-operator
spec:
containers:
- command:
- /opt/mlkube/mxnet-operator
- --alsologtostderr
- -v=1
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
image: "{{ .Values.mxnetOperator.image.repository }}:{{ .Values.mxnetOperator.image.tag }}"
imagePullPolicy: Always
name: mxnet-operator
volumeMounts:
- mountPath: /etc/config
name: config-volume
serviceAccountName: mxnet-operator
volumes:
- configMap:
name: mxnet-operator-config
name: config-volume
{{- end }}
{{- if .Values.pytorchOperator.enabled }}
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
labels:
ksonnet.io/component: pytorch-operator
annotations:
"helm.sh/hook": crd-install
"helm.sh/hook-delete-policy": before-hook-creation
name: pytorchjobs.kubeflow.org
spec:
group: kubeflow.org
names:
kind: PyTorchJob
plural: pytorchjobs
singular: pytorchjob
validation:
openAPIV3Schema:
properties:
spec:
properties:
pytorchReplicaSpecs:
properties:
Master:
properties:
replicas:
maximum: 1
minimum: 1
type: integer
Worker:
properties:
replicas:
minimum: 1
type: integer
version: v1alpha2
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: pytorch-operator
ksonnet.io/component: pytorch-operator
name: pytorch-operator
rules:
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- pods
- services
- endpoints
- persistentvolumeclaims
- events
verbs:
- '*'
- apiGroups:
- apps
- extensions
resources:
- deployments
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app: pytorch-operator
ksonnet.io/component: pytorch-operator
name: pytorch-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: pytorch-operator
subjects:
- kind: ServiceAccount
name: pytorch-operator
namespace: kubeflow
---
apiVersion: v1
data:
controller_config_file.yaml: |-
{
}
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: pytorch-operator
name: pytorch-operator-config
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: pytorch-operator
ksonnet.io/component: pytorch-operator
name: pytorch-operator
namespace: kubeflow
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
ksonnet.io/component: pytorch-operator
name: pytorch-operator
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
name: pytorch-operator
spec:
containers:
- command:
- /pytorch-operator.v2
- --alsologtostderr
- -v=1
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
image: "{{ .Values.pytorchOperator.image.repository }}:{{ .Values.pytorchOperator.image.tag }}"
name: pytorch-operator
volumeMounts:
- mountPath: /etc/config
name: config-volume
serviceAccountName: pytorch-operator
volumes:
- configMap:
name: pytorch-operator-config
name: config-volume
{{- end }}
{{- if .Values.tfJobOperator.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
ksonnet.io/component: tf-job-operator
name: tf-job-operator-config
namespace: kubeflow
data:
controller_config_file.yaml: |-
{
"grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py"
}
{{- end }}
{{- if .Values.tfJobOperator.enabled }}
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
labels:
ksonnet.io/component: tf-job-operator
annotations:
"helm.sh/hook": crd-install
"helm.sh/hook-delete-policy": before-hook-creation
name: tfjobs.kubeflow.org
spec:
version: v1alpha2
group: kubeflow.org
names:
kind: TFJob
plural: tfjobs
singular: tfjob
validation:
openAPIV3Schema:
properties:
spec:
properties:
tfReplicaSpecs:
properties:
Chief:
properties:
replicas:
maximum: 1
minimum: 1
type: integer
PS:
properties:
replicas:
minimum: 1
type: integer
Worker:
properties:
replicas:
minimum: 1
type: integer
{{- end }}
{{- if .Values.tfJobOperator.enabled }}
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
ksonnet.io/component: tf-job-operator
name: tf-job-dashboard
namespace: kubeflow
spec:
template:
metadata:
labels:
name: tf-job-dashboard
spec:
containers:
- command:
- /opt/tensorflow_k8s/dashboard/backend
env:
- name: KUBEFLOW_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: "{{ .Values.tfJobOperator.image.repository }}:{{ .Values.tfJobOperator.image.tag }}"
name: tf-job-dashboard
ports:
- containerPort: 8080
serviceAccountName: tf-job-dashboard
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
ksonnet.io/component: tf-job-operator
name: tf-job-operator-v1alpha2
namespace: kubeflow
spec:
replicas: 1
template:
metadata:
labels:
name: tf-job-operator
spec:
containers:
- command:
- /opt/kubeflow/tf-operator.v2
- --alsologtostderr
- -v=1
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
image: "{{ .Values.tfJobOperator.image.repository }}:{{ .Values.tfJobOperator.image.tag }}"
name: tf-job-operator
volumeMounts:
- mountPath: /etc/config
name: config-volume
serviceAccountName: tf-job-operator
volumes:
- configMap:
name: tf-job-operator-config
name: config-volume
{{- end }}
{{- if .Values.tfJobOperator.enabled }}
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: tf-job-dashboard
ksonnet.io/component: tf-job-operator
name: tf-job-dashboard
rules:
- apiGroups:
- tensorflow.org
- kubeflow.org
resources:
- tfjobs
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- pods
- services
- endpoints
- persistentvolumeclaims
- events
- pods/log
- namespaces
verbs:
- '*'
- apiGroups:
- apps
- extensions
resources:
- deployments
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app: tf-job-operator
ksonnet.io/component: tf-job-operator
name: tf-job-operator
rules:
- apiGroups:
- tensorflow.org
- kubeflow.org
resources:
- tfjobs
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- pods
- services
- endpoints
- persistentvolumeclaims
- events
verbs:
- '*'
- apiGroups:
- apps
- extensions
resources:
- deployments
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app: tf-job-dashboard
ksonnet.io/component: tf-job-operator
name: tf-job-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: tf-job-dashboard
subjects:
- kind: ServiceAccount
name: tf-job-dashboard
namespace: kubeflow
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app: tf-job-operator
ksonnet.io/component: tf-job-operator
name: tf-job-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: tf-job-operator
subjects:
- kind: ServiceAccount
name: tf-job-operator
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: tf-job-dashboard
ksonnet.io/component: tf-job-operator
name: tf-job-dashboard
namespace: kubeflow
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: tf-job-operator
ksonnet.io/component: tf-job-operator
name: tf-job-operator
namespace: kubeflow
{{- end }}
{{- if .Values.tfJobOperator.enabled }}
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: tfjobs-ui-mapping
prefix: /tfjobs/
rewrite: /tfjobs/
service: tf-job-dashboard.kubeflow
labels:
ksonnet.io/component: tf-job-operator
name: tf-job-dashboard
namespace: kubeflow
spec:
ports:
- port: 80
targetPort: 8080
selector:
name: tf-job-dashboard
type: ClusterIP
{{- end }}
# Default values for kubeflow.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
ambassador:
replicas: 3
image:
repository: quay.io/datawire/ambassador
tag: 0.40.2
statsdImage:
repository: quay.io/datawire/statsd
tag: 0.38.0
exporterImage:
repository: prom/statsd-exporter
tag: v0.8.0
service:
type: ClusterIP
centraldashboard:
image:
repository: gcr.io/kubeflow-images-public/centraldashboard
tag: v0.3.0
katib:
modeldbImage:
repository: gcr.io/kubeflow-images-public/modeldb-backend
tag: v0.2.0
modeldbDatabaseImage:
repository: mongo
tag: 3.4
modeldbFrontendImage:
repository: katib/katib-frontend
tag: latest
studyJobControllerImage:
repository: katib/studyjob-controller
tag: v0.3
vizierCoreImage:
repository: katib/vizier-core
tag: v0.3
vizierDbImage:
repository: mysql
tag: 8.0.3
suggestionBayesianOptimizationImage:
repository: gcr.io/kubeflow-images-public/katib/suggestion-bayesianoptimization
tag: v0.1.2-alpha-45-g3dce496
suggestionGridImage:
repository: katib/suggestion-grid
tag: v0.3
suggestionHyperbandImage:
repository: katib/suggestion-hyperband
tag: v0.3
suggestionRandomImage:
repository: katib/suggestion-random
tag: v0.3
vizierdb:
persistence:
enabled: true
## mariadb data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
# storageClass: "-"
accessMode: ReadWriteOnce
size: 10Gi
vizier:
service:
type: ClusterIP
# nodePort: 30678
jupyterhub:
enabled: true
image:
repository: gcr.io/kubeflow/jupyterhub-k8s
tag: v20180531-3bb991b1
service:
type: ClusterIP
tfJobOperator:
enabled: true
image:
repository: gcr.io/kubeflow-images-public/tf_operator
tag: v0.3.0
chainerOperator:
enabled: false
image:
repository: gcr.io/kubeflow-images-public/chainer-operator
tag: v0.3.0
mxnetOperator:
enabled: false
image:
repository: mxjob/mxnet-operator
tag: v1
pytorchOperator:
enabled: false
image:
repository: gcr.io/kubeflow-images-public/pytorch-operator
tag: v0.3.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment