Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/model-engine/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.10
version: 0.2.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
57 changes: 55 additions & 2 deletions charts/model-engine/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ We truncate at 40 chars because some Kubernetes name fields are limited to 63 (b
If release name contains chart name it will be used as a full name.
*/}}
{{- define "modelEngine.fullname" -}}
{{- if .Values.serviceIdentifier }}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 40 | trimSuffix "-" }}
{{- else if .Values.serviceIdentifier }}
{{- printf "%s-%s" .Chart.Name .Values.serviceIdentifier | trunc 40 | trimSuffix "-" }}
{{- else }}
{{- default .Chart.Name | trunc 40 | trimSuffix "-" }}
Expand Down Expand Up @@ -321,6 +323,8 @@ env:
value: {{ .Values.aws.profileName }}
- name: S3_WRITE_AWS_PROFILE
value: {{ .Values.aws.s3WriteProfileName }}
{{- else }}
{{- /* On-prem: Do NOT set AWS_PROFILE - boto3 uses default credential chain */ -}}
{{- end }}
{{- with .Values.secrets }}
{{- if .kubernetesDatabaseSecretName }}
Expand Down Expand Up @@ -367,7 +371,13 @@ env:
- name: CELERY_RESULT_BACKEND
value: {{ .Values.celeryResultBackend | quote }}
{{- end }}
{{- if .Values.redis.auth}}
{{- if .Values.redis.authSecretName }}
- name: REDIS_AUTH_TOKEN
valueFrom:
secretKeyRef:
name: {{ .Values.redis.authSecretName }}
key: {{ .Values.redis.authSecretKey | default "auth_token" }}
{{- else if .Values.redis.auth }}
- name: REDIS_AUTH_TOKEN
value: {{ .Values.redis.auth }}
{{- end }}
Expand Down Expand Up @@ -399,6 +409,9 @@ env:
value: {{ .Values.tag }}
- name: GIT_TAG
value: {{ .Values.tag }}
{{- with .Values.extraEnvVars }}
{{- toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "modelEngine.serviceEnvGitTagFromPythonReplace" }}
Expand Down Expand Up @@ -455,6 +468,10 @@ volumes:
- key: infra_service_config
path: config.yaml
{{- end }}
{{- with .Values.extraVolumes }}
{{- toYaml . | nindent 2 }}
{{- end }}
{{- include "modelEngine.tokenVolume" . | nindent 2 }}
{{- end }}

{{- define "modelEngine.volumeMounts" }}
Expand All @@ -474,6 +491,10 @@ volumeMounts:
- name: infra-service-config-volume
mountPath: /workspace/model-engine/model_engine_server/core/configs
{{- end }}
{{- with .Values.extraVolumeMounts }}
{{- toYaml . | nindent 2 }}
{{- end }}
{{- include "modelEngine.tokenVolumeMount" . | nindent 2 }}
{{- end }}

{{- define "modelEngine.forwarderVolumeMounts" }}
Expand Down Expand Up @@ -502,3 +523,35 @@ namespaces:
- {{ . }}
{{- end }}
{{- end }}

{{- define "modelEngine.tokenVolume" }}
{{- if not .Values.automountServiceAccountToken }}
- name: token-volume
projected:
defaultMode: 0444
sources:
- serviceAccountToken:
path: token
expirationSeconds: 86400
# We also need to project the CA cert and namespace files
- configMap:
name: kube-root-ca.crt
items:
- key: ca.crt
path: ca.crt
- downwardAPI:
items:
- path: namespace
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
{{- end }}
{{- end }}

{{- define "modelEngine.tokenVolumeMount" }}
{{- if not .Values.automountServiceAccountToken }}
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: token-volume
readOnly: true
{{- end }}
{{- end }}
2 changes: 1 addition & 1 deletion charts/model-engine/templates/balloon_cpu_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- image: public.ecr.aws/ubuntu/ubuntu:latest
- image: {{ $.Values.utilityImages.ubuntu.repository }}:{{ $.Values.utilityImages.ubuntu.tag }}
imagePullPolicy: IfNotPresent
name: main
resources:
Expand Down
7 changes: 6 additions & 1 deletion charts/model-engine/templates/balloon_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ spec:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
{{- range $.Values.gpuTolerations }}
- key: {{ .key | quote }}
operator: "Exists"
effect: "NoSchedule"
{{- end }}
containers:
- image: public.ecr.aws/ubuntu/ubuntu:latest
- image: {{ $.Values.utilityImages.ubuntu.repository }}:{{ $.Values.utilityImages.ubuntu.tag }}
imagePullPolicy: IfNotPresent
name: main
resources:
Expand Down
1 change: 1 addition & 0 deletions charts/model-engine/templates/cacher_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ spec:
{{- toYaml .Values.resources | nindent 12 }}
{{- include "modelEngine.cacherEnv" . | indent 10 }}
{{- include "modelEngine.volumeMounts" . | indent 10 }}
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- include "modelEngine.volumes" . | indent 6 }}
{{- with .Values.nodeSelector }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
{{ $broker_name = "sqs-message-broker-master" }}
{{- else if eq $message_broker "servicebus" }}
{{ $broker_name = "servicebus-message-broker-master" }}
{{- else if and .Values.config .Values.config.values .Values.config.values.infra (eq (.Values.config.values.infra.cloud_provider | default "") "gcp") }}
{{ $broker_name = "redis-gcp-memorystore-message-broker-master" }}
{{- end }}
apiVersion: apps/v1
kind: StatefulSet
Expand Down Expand Up @@ -99,6 +101,9 @@ spec:
name: config-volume
subPath: config
{{- end }}
{{- if not $.Values.automountServiceAccountToken }}
{{- include "modelEngine.tokenVolumeMount" $ | nindent 8 }}
{{- end }}
{{- if .Values.config.values }}
- name: {{ .Chart.Name }}-service-config-volume
mountPath: /workspace/model-engine/service_configs
Expand All @@ -112,13 +117,17 @@ spec:
operator: Equal
value: 'true'
effect: NoSchedule
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" $ }}
volumes:
{{- if .Values.aws }}
- configMap:
name: {{ .Values.aws.configMap.name }}
name: config-volume
{{- end }}
{{- if not .Values.automountServiceAccountToken }}
{{- include "modelEngine.tokenVolume" . | nindent 6 }}
{{- end }}
{{- if .Values.config.values }}
- name: {{ .Chart.Name }}-service-config-volume
configMap:
Expand Down
54 changes: 54 additions & 0 deletions charts/model-engine/templates/database_init_job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{{- if .Values.db.runDbInitScript }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "modelEngine.fullname" . }}-database-setup-{{ .Release.Revision }}
labels:
{{- include "modelEngine.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-2"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 0
activeDeadlineSeconds: 600
template:
metadata:
labels:
sidecar.istio.io/inject: "false"
{{- include "modelEngine.labels" . | nindent 8 }}
spec:
restartPolicy: Never
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
containers:
- name: {{ include "modelEngine.fullname" . }}
image: "{{ .Values.image.gatewayRepository }}:{{ .Values.tag}}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- dumb-init
- --
args:
- python
- -m
- model_engine_server.entrypoints.init_database
{{- include "modelEngine.serviceEnvGitTagFromHelmVar" . | indent 10 }}
{{- include "modelEngine.volumeMounts" . | indent 10 }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- include "modelEngine.volumes" . | indent 6 }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
5 changes: 3 additions & 2 deletions charts/model-engine/templates/database_migration_job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "modelEngine.fullname" . }}-database-migration
name: {{ include "modelEngine.fullname" . }}-database-migration-{{ .Release.Revision }}
labels:
{{- include "modelEngine.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-1"
"helm.sh/hook-delete-policy": hook-succeeded
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 0
activeDeadlineSeconds: 600
Expand All @@ -35,6 +35,7 @@ spec:
- /workspace/model-engine/model_engine_server/db/migrations/run_database_migration.sh
{{- include "modelEngine.serviceEnvGitTagFromHelmVar" . | indent 10 }}
{{- include "modelEngine.volumeMounts" . | indent 10 }}
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- include "modelEngine.volumes" . | indent 6 }}
{{- with .Values.nodeSelector }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ spec:
{{- toYaml .Values.resources | nindent 12 }}
{{- include "modelEngine.builderEnv" . | indent 10 }}
{{- include "modelEngine.volumeMounts" . | indent 10 }}
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- include "modelEngine.volumes" . | indent 6 }}
{{- with .Values.nodeSelector }}
Expand Down
1 change: 1 addition & 0 deletions charts/model-engine/templates/gateway_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ spec:
{{- toYaml .Values.resources | nindent 12 }}
{{- include "modelEngine.gatewayEnv" . | indent 10 }}
{{- include "modelEngine.volumeMounts" . | indent 10 }}
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- include "modelEngine.volumes" . | indent 6 }}
{{- with .Values.nodeSelector }}
Expand Down
14 changes: 7 additions & 7 deletions charts/model-engine/templates/inference_framework_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ metadata:
"helm.sh/hook": pre-install
"helm.sh/hook-weight": "-2"
data:
deepspeed: "latest"
text_generation_inference: "latest"
vllm: "latest"
vllm_batch: "latest"
vllm_batch_v2: "latest"
lightllm: "latest"
tensorrt_llm: "latest"
deepspeed: {{ .Values.inferenceFramework.deepspeed | default "latest" | quote }}
text_generation_inference: {{ .Values.inferenceFramework.text_generation_inference | default "latest" | quote }}
vllm: {{ .Values.inferenceFramework.vllm | default "latest" | quote }}
vllm_batch: {{ .Values.inferenceFramework.vllm_batch | default "latest" | quote }}
vllm_batch_v2: {{ .Values.inferenceFramework.vllm_batch_v2 | default "latest" | quote }}
lightllm: {{ .Values.inferenceFramework.lightllm | default "latest" | quote }}
tensorrt_llm: {{ .Values.inferenceFramework.tensorrt_llm | default "latest" | quote }}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "modelEngine.fullname" . }}-populate-fine-tuning-repository
name: {{ include "modelEngine.fullname" . }}-populate-fine-tuning-repository-{{ .Release.Revision }}
labels:
{{- include "modelEngine.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": hook-succeeded
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 0
activeDeadlineSeconds: 600
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,16 @@ spec:
- --nodelabels=k8s.amazonaws.com/accelerator=nvidia-ampere-a100
- --logtostderr=true
- --v=2
{{- if not $.Values.automountServiceAccountToken }}
volumeMounts:
{{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }}
{{- end }}
priorityClassName: system-cluster-critical
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- if not .Values.automountServiceAccountToken }}
volumes:
{{- include "modelEngine.tokenVolume" . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,16 @@ spec:
- --nodelabels=k8s.amazonaws.com/accelerator=nvidia-ampere-a10
- --logtostderr=true
- --v=2
{{- if not $.Values.automountServiceAccountToken }}
volumeMounts:
{{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }}
{{- end }}
priorityClassName: system-cluster-critical
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- if not .Values.automountServiceAccountToken }}
volumes:
{{- include "modelEngine.tokenVolume" . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,16 @@ spec:
- --nodelabels=k8s.amazonaws.com/accelerator=nvidia-tesla-t4
- --logtostderr=true
- --v=2
{{- if not $.Values.automountServiceAccountToken }}
volumeMounts:
{{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }}
{{- end }}
priorityClassName: system-cluster-critical
automountServiceAccountToken: {{ .Values.automountServiceAccountToken }}
serviceAccountName: {{ include "modelEngine.fullname" . }}
{{- if not .Values.automountServiceAccountToken }}
volumes:
{{- include "modelEngine.tokenVolume" . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
4 changes: 2 additions & 2 deletions charts/model-engine/templates/restart_keda_operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "modelEngine.fullname" . }}-restart-keda-operator
name: {{ include "modelEngine.fullname" . }}-restart-keda-operator-{{ .Release.Revision }}
labels:
{{- include "modelEngine.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": hook-succeeded
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 0
activeDeadlineSeconds: 600
Expand Down
1 change: 1 addition & 0 deletions charts/model-engine/templates/service_account.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ metadata:
imagePullSecrets:
- name: egp-ecr-regcred
{{- end }}
automountServiceAccountToken: {{ $.Values.automountServiceAccountToken }}
---
{{- end }}
4 changes: 4 additions & 0 deletions charts/model-engine/templates/service_account_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ metadata:
annotations:
{{- toYaml . | nindent 4 }}
{{- if $.Values.azure }}
{{- if $.Values.azure.inference_client_id }}
azure.workload.identity/client-id: {{ $.Values.azure.inference_client_id }}
{{- else }}
azure.workload.identity/client-id: {{ $.Values.azure.client_id }}
{{- end }}
{{- end }}
{{- end }}
{{- if $.Values.azure }}
imagePullSecrets:
Expand Down
Loading