diff --git a/charts/model-engine/Chart.yaml b/charts/model-engine/Chart.yaml index 0074202a..a50ff2b5 100644 --- a/charts/model-engine/Chart.yaml +++ b/charts/model-engine/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.10 +version: 0.2.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/model-engine/templates/_helpers.tpl b/charts/model-engine/templates/_helpers.tpl index 9a7b113f..41a1b5a8 100644 --- a/charts/model-engine/templates/_helpers.tpl +++ b/charts/model-engine/templates/_helpers.tpl @@ -11,7 +11,9 @@ We truncate at 40 chars because some Kubernetes name fields are limited to 63 (b If release name contains chart name it will be used as a full name. */}} {{- define "modelEngine.fullname" -}} -{{- if .Values.serviceIdentifier }} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 40 | trimSuffix "-" }} +{{- else if .Values.serviceIdentifier }} {{- printf "%s-%s" .Chart.Name .Values.serviceIdentifier | trunc 40 | trimSuffix "-" }} {{- else }} {{- default .Chart.Name | trunc 40 | trimSuffix "-" }} @@ -321,6 +323,8 @@ env: value: {{ .Values.aws.profileName }} - name: S3_WRITE_AWS_PROFILE value: {{ .Values.aws.s3WriteProfileName }} + {{- else }} + {{- /* On-prem: Do NOT set AWS_PROFILE - boto3 uses default credential chain */ -}} {{- end }} {{- with .Values.secrets }} {{- if .kubernetesDatabaseSecretName }} @@ -367,7 +371,13 @@ env: - name: CELERY_RESULT_BACKEND value: {{ .Values.celeryResultBackend | quote }} {{- end }} - {{- if .Values.redis.auth}} + {{- if .Values.redis.authSecretName }} + - name: REDIS_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.redis.authSecretName }} + key: {{ .Values.redis.authSecretKey | default "auth_token" }} + {{- else if .Values.redis.auth }} - name: REDIS_AUTH_TOKEN value: {{ .Values.redis.auth }} {{- end }} @@ -399,6 +409,9 @@ env: value: {{ .Values.tag }} - name: GIT_TAG value: {{ .Values.tag }} + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 2 }} + {{- end }} {{- end }} {{- define "modelEngine.serviceEnvGitTagFromPythonReplace" }} @@ -455,6 +468,10 @@ volumes: - key: infra_service_config path: config.yaml {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 2 }} + {{- end }} + {{- include "modelEngine.tokenVolume" . | nindent 2 }} {{- end }} {{- define "modelEngine.volumeMounts" }} @@ -474,6 +491,10 @@ volumeMounts: - name: infra-service-config-volume mountPath: /workspace/model-engine/model_engine_server/core/configs {{- end }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 2 }} + {{- end }} + {{- include "modelEngine.tokenVolumeMount" . | nindent 2 }} {{- end }} {{- define "modelEngine.forwarderVolumeMounts" }} @@ -502,3 +523,35 @@ namespaces: - {{ . }} {{- end }} {{- end }} + +{{- define "modelEngine.tokenVolume" }} +{{- if not .Values.automountServiceAccountToken }} +- name: token-volume + projected: + defaultMode: 0444 + sources: + - serviceAccountToken: + path: token + expirationSeconds: 86400 + # We also need to project the CA cert and namespace files + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace +{{- end }} +{{- end }} + +{{- define "modelEngine.tokenVolumeMount" }} +{{- if not .Values.automountServiceAccountToken }} +- mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: token-volume + readOnly: true +{{- end }} +{{- end }} diff --git a/charts/model-engine/templates/balloon_cpu_deployment.yaml b/charts/model-engine/templates/balloon_cpu_deployment.yaml index 583e3c1e..ddee291d 100644 --- a/charts/model-engine/templates/balloon_cpu_deployment.yaml +++ b/charts/model-engine/templates/balloon_cpu_deployment.yaml @@ -30,7 +30,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - - image: public.ecr.aws/ubuntu/ubuntu:latest + - image: {{ $.Values.utilityImages.ubuntu.repository }}:{{ $.Values.utilityImages.ubuntu.tag }} imagePullPolicy: IfNotPresent name: main resources: diff --git a/charts/model-engine/templates/balloon_deployments.yaml b/charts/model-engine/templates/balloon_deployments.yaml index 49a1890f..2b43cf15 100644 --- a/charts/model-engine/templates/balloon_deployments.yaml +++ b/charts/model-engine/templates/balloon_deployments.yaml @@ -34,8 +34,13 @@ spec: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + {{- range $.Values.gpuTolerations }} + - key: {{ .key | quote }} + operator: "Exists" + effect: "NoSchedule" + {{- end }} containers: - - image: public.ecr.aws/ubuntu/ubuntu:latest + - image: {{ $.Values.utilityImages.ubuntu.repository }}:{{ $.Values.utilityImages.ubuntu.tag }} imagePullPolicy: IfNotPresent name: main resources: diff --git a/charts/model-engine/templates/cacher_deployment.yaml b/charts/model-engine/templates/cacher_deployment.yaml index 1c9b230a..0f2db059 100644 --- a/charts/model-engine/templates/cacher_deployment.yaml +++ b/charts/model-engine/templates/cacher_deployment.yaml @@ -64,6 +64,7 @@ spec: {{- toYaml .Values.resources | nindent 12 }} {{- include "modelEngine.cacherEnv" . | indent 10 }} {{- include "modelEngine.volumeMounts" . | indent 10 }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} {{- include "modelEngine.volumes" . | indent 6 }} {{- with .Values.nodeSelector }} diff --git a/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml b/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml index 349da61b..6183a3b0 100644 --- a/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml +++ b/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml @@ -10,6 +10,8 @@ {{ $broker_name = "sqs-message-broker-master" }} {{- else if eq $message_broker "servicebus" }} {{ $broker_name = "servicebus-message-broker-master" }} +{{- else if and .Values.config .Values.config.values .Values.config.values.infra (eq (.Values.config.values.infra.cloud_provider | default "") "gcp") }} +{{ $broker_name = "redis-gcp-memorystore-message-broker-master" }} {{- end }} apiVersion: apps/v1 kind: StatefulSet @@ -99,6 +101,9 @@ spec: name: config-volume subPath: config {{- end }} + {{- if not $.Values.automountServiceAccountToken }} + {{- include "modelEngine.tokenVolumeMount" $ | nindent 8 }} + {{- end }} {{- if .Values.config.values }} - name: {{ .Chart.Name }}-service-config-volume mountPath: /workspace/model-engine/service_configs @@ -112,6 +117,7 @@ spec: operator: Equal value: 'true' effect: NoSchedule + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" $ }} volumes: {{- if .Values.aws }} @@ -119,6 +125,9 @@ spec: name: {{ .Values.aws.configMap.name }} name: config-volume {{- end }} + {{- if not .Values.automountServiceAccountToken }} + {{- include "modelEngine.tokenVolume" . | nindent 6 }} + {{- end }} {{- if .Values.config.values }} - name: {{ .Chart.Name }}-service-config-volume configMap: diff --git a/charts/model-engine/templates/database_init_job.yaml b/charts/model-engine/templates/database_init_job.yaml new file mode 100644 index 00000000..74516223 --- /dev/null +++ b/charts/model-engine/templates/database_init_job.yaml @@ -0,0 +1,54 @@ +{{- if .Values.db.runDbInitScript }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "modelEngine.fullname" . }}-database-setup-{{ .Release.Revision }} + labels: + {{- include "modelEngine.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-2" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + activeDeadlineSeconds: 600 + template: + metadata: + labels: + sidecar.istio.io/inject: "false" + {{- include "modelEngine.labels" . | nindent 8 }} + spec: + restartPolicy: Never + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} + containers: + - name: {{ include "modelEngine.fullname" . }} + image: "{{ .Values.image.gatewayRepository }}:{{ .Values.tag}}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - dumb-init + - -- + args: + - python + - -m + - model_engine_server.entrypoints.init_database + {{- include "modelEngine.serviceEnvGitTagFromHelmVar" . | indent 10 }} + {{- include "modelEngine.volumeMounts" . | indent 10 }} + serviceAccountName: {{ include "modelEngine.fullname" . }} + {{- include "modelEngine.volumes" . | indent 6 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/model-engine/templates/database_migration_job.yaml b/charts/model-engine/templates/database_migration_job.yaml index 183814c6..0f91d7a7 100644 --- a/charts/model-engine/templates/database_migration_job.yaml +++ b/charts/model-engine/templates/database_migration_job.yaml @@ -2,13 +2,13 @@ apiVersion: batch/v1 kind: Job metadata: - name: {{ include "modelEngine.fullname" . }}-database-migration + name: {{ include "modelEngine.fullname" . }}-database-migration-{{ .Release.Revision }} labels: {{- include "modelEngine.labels" . | nindent 4 }} annotations: "helm.sh/hook": pre-install,pre-upgrade "helm.sh/hook-weight": "-1" - "helm.sh/hook-delete-policy": hook-succeeded + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: backoffLimit: 0 activeDeadlineSeconds: 600 @@ -35,6 +35,7 @@ spec: - /workspace/model-engine/model_engine_server/db/migrations/run_database_migration.sh {{- include "modelEngine.serviceEnvGitTagFromHelmVar" . | indent 10 }} {{- include "modelEngine.volumeMounts" . | indent 10 }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} {{- include "modelEngine.volumes" . | indent 6 }} {{- with .Values.nodeSelector }} diff --git a/charts/model-engine/templates/endpoint_builder_deployment.yaml b/charts/model-engine/templates/endpoint_builder_deployment.yaml index ad39332a..7791d405 100644 --- a/charts/model-engine/templates/endpoint_builder_deployment.yaml +++ b/charts/model-engine/templates/endpoint_builder_deployment.yaml @@ -74,6 +74,7 @@ spec: {{- toYaml .Values.resources | nindent 12 }} {{- include "modelEngine.builderEnv" . | indent 10 }} {{- include "modelEngine.volumeMounts" . | indent 10 }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} {{- include "modelEngine.volumes" . | indent 6 }} {{- with .Values.nodeSelector }} diff --git a/charts/model-engine/templates/gateway_deployment.yaml b/charts/model-engine/templates/gateway_deployment.yaml index 6aad3cc2..726d6e85 100644 --- a/charts/model-engine/templates/gateway_deployment.yaml +++ b/charts/model-engine/templates/gateway_deployment.yaml @@ -75,6 +75,7 @@ spec: {{- toYaml .Values.resources | nindent 12 }} {{- include "modelEngine.gatewayEnv" . | indent 10 }} {{- include "modelEngine.volumeMounts" . | indent 10 }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} {{- include "modelEngine.volumes" . | indent 6 }} {{- with .Values.nodeSelector }} diff --git a/charts/model-engine/templates/inference_framework_config.yaml b/charts/model-engine/templates/inference_framework_config.yaml index 45759d77..af2387ea 100644 --- a/charts/model-engine/templates/inference_framework_config.yaml +++ b/charts/model-engine/templates/inference_framework_config.yaml @@ -9,10 +9,10 @@ metadata: "helm.sh/hook": pre-install "helm.sh/hook-weight": "-2" data: - deepspeed: "latest" - text_generation_inference: "latest" - vllm: "latest" - vllm_batch: "latest" - vllm_batch_v2: "latest" - lightllm: "latest" - tensorrt_llm: "latest" + deepspeed: {{ .Values.inferenceFramework.deepspeed | default "latest" | quote }} + text_generation_inference: {{ .Values.inferenceFramework.text_generation_inference | default "latest" | quote }} + vllm: {{ .Values.inferenceFramework.vllm | default "latest" | quote }} + vllm_batch: {{ .Values.inferenceFramework.vllm_batch | default "latest" | quote }} + vllm_batch_v2: {{ .Values.inferenceFramework.vllm_batch_v2 | default "latest" | quote }} + lightllm: {{ .Values.inferenceFramework.lightllm | default "latest" | quote }} + tensorrt_llm: {{ .Values.inferenceFramework.tensorrt_llm | default "latest" | quote }} diff --git a/charts/model-engine/templates/populate_fine_tuning_repository_job.yaml b/charts/model-engine/templates/populate_fine_tuning_repository_job.yaml index 080f21e6..499504fe 100644 --- a/charts/model-engine/templates/populate_fine_tuning_repository_job.yaml +++ b/charts/model-engine/templates/populate_fine_tuning_repository_job.yaml @@ -2,13 +2,13 @@ apiVersion: batch/v1 kind: Job metadata: - name: {{ include "modelEngine.fullname" . }}-populate-fine-tuning-repository + name: {{ include "modelEngine.fullname" . }}-populate-fine-tuning-repository-{{ .Release.Revision }} labels: {{- include "modelEngine.labels" . | nindent 4 }} annotations: "helm.sh/hook": post-install "helm.sh/hook-weight": "1" - "helm.sh/hook-delete-policy": hook-succeeded + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: backoffLimit: 0 activeDeadlineSeconds: 600 diff --git a/charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml b/charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml index f89f298e..f9e0fabe 100644 --- a/charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml +++ b/charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml @@ -43,7 +43,16 @@ spec: - --nodelabels=k8s.amazonaws.com/accelerator=nvidia-ampere-a100 - --logtostderr=true - --v=2 + {{- if not $.Values.automountServiceAccountToken }} + volumeMounts: + {{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }} + {{- end }} priorityClassName: system-cluster-critical + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} + {{- if not .Values.automountServiceAccountToken }} + volumes: + {{- include "modelEngine.tokenVolume" . | nindent 8 }} + {{- end }} {{- end }} {{- end }} diff --git a/charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml b/charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml index 70274d26..7d6b4a01 100644 --- a/charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml +++ b/charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml @@ -43,7 +43,16 @@ spec: - --nodelabels=k8s.amazonaws.com/accelerator=nvidia-ampere-a10 - --logtostderr=true - --v=2 + {{- if not $.Values.automountServiceAccountToken }} + volumeMounts: + {{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }} + {{- end }} priorityClassName: system-cluster-critical + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} + {{- if not .Values.automountServiceAccountToken }} + volumes: + {{- include "modelEngine.tokenVolume" . | nindent 8 }} + {{- end }} {{- end }} {{- end }} diff --git a/charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml b/charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml index 7175d985..9f0214cc 100644 --- a/charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml +++ b/charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml @@ -43,7 +43,16 @@ spec: - --nodelabels=k8s.amazonaws.com/accelerator=nvidia-tesla-t4 - --logtostderr=true - --v=2 + {{- if not $.Values.automountServiceAccountToken }} + volumeMounts: + {{- include "modelEngine.tokenVolumeMount" $ | nindent 12 }} + {{- end }} priorityClassName: system-cluster-critical + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} + {{- if not .Values.automountServiceAccountToken }} + volumes: + {{- include "modelEngine.tokenVolume" . | nindent 8 }} + {{- end }} {{- end }} {{- end }} diff --git a/charts/model-engine/templates/restart_keda_operator.yaml b/charts/model-engine/templates/restart_keda_operator.yaml index 8937ea82..35e37cb4 100644 --- a/charts/model-engine/templates/restart_keda_operator.yaml +++ b/charts/model-engine/templates/restart_keda_operator.yaml @@ -5,13 +5,13 @@ apiVersion: batch/v1 kind: Job metadata: - name: {{ include "modelEngine.fullname" . }}-restart-keda-operator + name: {{ include "modelEngine.fullname" . }}-restart-keda-operator-{{ .Release.Revision }} labels: {{- include "modelEngine.labels" . | nindent 4 }} annotations: "helm.sh/hook": post-install "helm.sh/hook-weight": "1" - "helm.sh/hook-delete-policy": hook-succeeded + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: backoffLimit: 0 activeDeadlineSeconds: 600 diff --git a/charts/model-engine/templates/service_account.yaml b/charts/model-engine/templates/service_account.yaml index dc41c998..c226505f 100644 --- a/charts/model-engine/templates/service_account.yaml +++ b/charts/model-engine/templates/service_account.yaml @@ -21,5 +21,6 @@ metadata: imagePullSecrets: - name: egp-ecr-regcred {{- end }} +automountServiceAccountToken: {{ $.Values.automountServiceAccountToken }} --- {{- end }} diff --git a/charts/model-engine/templates/service_account_inference.yaml b/charts/model-engine/templates/service_account_inference.yaml index 9a4a698c..712f9df5 100644 --- a/charts/model-engine/templates/service_account_inference.yaml +++ b/charts/model-engine/templates/service_account_inference.yaml @@ -14,8 +14,12 @@ metadata: annotations: {{- toYaml . | nindent 4 }} {{- if $.Values.azure }} + {{- if $.Values.azure.inference_client_id }} + azure.workload.identity/client-id: {{ $.Values.azure.inference_client_id }} + {{- else }} azure.workload.identity/client-id: {{ $.Values.azure.client_id }} {{- end }} + {{- end }} {{- end }} {{- if $.Values.azure }} imagePullSecrets: diff --git a/charts/model-engine/templates/service_config_map.yaml b/charts/model-engine/templates/service_config_map.yaml index 0f2049f9..f879b4a7 100644 --- a/charts/model-engine/templates/service_config_map.yaml +++ b/charts/model-engine/templates/service_config_map.yaml @@ -14,9 +14,13 @@ data: gateway_namespace: {{ .Release.Namespace | quote }} {{- with .Values.config.values.launch }} {{- range $key, $value := . }} + {{- if kindIs "bool" $value }} + {{ $key }}: {{ $value }} + {{- else }} {{ $key }}: {{ $value | quote }} {{- end }} {{- end }} + {{- end }} infra_service_config: |- env: {{ .Values.context | quote }} {{- if .Values.celery_enable_sha256 }} @@ -30,9 +34,13 @@ data: {{- end }} {{- with .Values.config.values.infra }} {{- range $key, $value := . }} + {{- if kindIs "bool" $value }} + {{ $key }}: {{ $value }} + {{- else }} {{ $key }}: {{ $value | quote }} {{- end }} {{- end }} + {{- end }} --- @@ -52,9 +60,13 @@ data: gateway_namespace: {{ .Release.Namespace | quote }} {{- with .Values.config.values.launch }} {{- range $key, $value := . }} + {{- if kindIs "bool" $value }} + {{ $key }}: {{ $value }} + {{- else }} {{ $key }}: {{ $value | quote }} {{- end }} {{- end }} + {{- end }} infra_service_config: |- env: {{ .Values.context | quote }} {{- if .Values.celery_enable_sha256 }} @@ -68,7 +80,11 @@ data: {{- end }} {{- with .Values.config.values.infra }} {{- range $key, $value := . }} + {{- if kindIs "bool" $value }} + {{ $key }}: {{ $value }} + {{- else }} {{ $key }}: {{ $value | quote }} {{- end }} {{- end }} + {{- end }} {{- end }} diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml index 80d210a0..29b360b5 100644 --- a/charts/model-engine/templates/service_template_config_map.yaml +++ b/charts/model-engine/templates/service_template_config_map.yaml @@ -21,6 +21,7 @@ {{- $service_template_aws_config_map_name := .Values.serviceTemplate.awsConfigMapName }} {{- $celery_broker_type := .Values.celeryBrokerType }} {{- $node_selector := .Values.nodeSelector }} +{{- $gpu_tolerations := .Values.gpuTolerations }} {{- $require_aws_config := not (empty .Values.aws) }} {{- $enable_datadog := .Values.datadog.enabled }} {{- $azure_cloud_provider := not (empty .Values.azure) }} @@ -106,6 +107,11 @@ data: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + {{- range $gpu_tolerations }} + - key: {{ .key | quote }} + operator: "Exists" + effect: "NoSchedule" + {{- end }} {{- end }} priorityClassName: ${PRIORITY} containers: @@ -320,6 +326,10 @@ data: imagePullPolicy: IfNotPresent command: ${COMMAND} env: ${MAIN_ENV} + {{- with $.Values.extraPodEnvFrom }} + envFrom: + {{- toYaml . | nindent 16 }} + {{- end }} readinessProbe: httpGet: path: ${HEALTHCHECK_ROUTE} @@ -545,6 +555,11 @@ data: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + {{- range $gpu_tolerations }} + - key: {{ .key | quote }} + operator: "Exists" + effect: "NoSchedule" + {{- end }} {{- end }} priorityClassName: ${PRIORITY} containers: @@ -704,6 +719,10 @@ data: imagePullPolicy: IfNotPresent command: ${COMMAND} env: ${MAIN_ENV} + {{- with $.Values.extraPodEnvFrom }} + envFrom: + {{- toYaml . | nindent 18 }} + {{- end }} readinessProbe: httpGet: path: ${HEALTHCHECK_ROUTE} @@ -811,6 +830,11 @@ data: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + {{- range $gpu_tolerations }} + - key: {{ .key | quote }} + operator: "Exists" + effect: "NoSchedule" + {{- end }} {{- end }} priorityClassName: ${PRIORITY} containers: @@ -1017,7 +1041,7 @@ data: apiVersion: batch/v1 kind: Job metadata: - name: ${RESOURCE_NAME} + name: ${RESOURCE_NAME}-{{ .Release.Revision }} labels: {{- $job_template_labels | nindent 8 }} spec: @@ -1157,6 +1181,11 @@ data: - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" + {{- range $gpu_tolerations }} + - key: {{ .key | quote }} + operator: "Exists" + effect: "NoSchedule" + {{- end }} {{- end }} {{- if $service_template_service_account_name }} serviceAccountName: {{ $service_template_service_account_name }} @@ -1311,7 +1340,7 @@ data: {{- end }} {{- end }} containers: - - image: public.ecr.aws/docker/library/busybox:latest + - image: {{ $.Values.utilityImages.busybox.repository }}:{{ $.Values.utilityImages.busybox.tag }} imagePullPolicy: IfNotPresent name: busybox command: ["/bin/sh", "-ec", "while : ; do sleep 30 ; done"] @@ -1351,7 +1380,7 @@ data: spec: containers: - name: ${NAME} - image: curlimages/curl:7.72.0 + image: {{ $.Values.utilityImages.curl.repository }}:{{ $.Values.utilityImages.curl.tag }} imagePullPolicy: IfNotPresent command: - curl diff --git a/charts/model-engine/templates/spellbook_init_job.yaml b/charts/model-engine/templates/spellbook_init_job.yaml index ed23f4e6..d70c71dd 100644 --- a/charts/model-engine/templates/spellbook_init_job.yaml +++ b/charts/model-engine/templates/spellbook_init_job.yaml @@ -2,13 +2,13 @@ apiVersion: batch/v1 kind: Job metadata: - name: {{ include "modelEngine.fullname" . }}-spellbook-setup + name: {{ include "modelEngine.fullname" . }}-spellbook-setup-{{ .Release.Revision }} labels: {{- include "modelEngine.labels" . | nindent 4 }} annotations: "helm.sh/hook": post-install "helm.sh/hook-weight": "0" - "helm.sh/hook-delete-policy": hook-succeeded + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: backoffLimit: 0 activeDeadlineSeconds: 600 @@ -38,6 +38,7 @@ spec: - '{{- include "modelEngine.gatewayurl" . }}' {{- include "modelEngine.serviceEnvGitTagFromHelmVar" . | indent 10 }} {{- include "modelEngine.volumeMounts" . | indent 10 }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} serviceAccountName: {{ include "modelEngine.fullname" . }} {{- include "modelEngine.volumes" . | indent 6 }} {{- with .Values.nodeSelector }} diff --git a/charts/model-engine/values.yaml b/charts/model-engine/values.yaml index b0335ddb..1af7c20b 100644 --- a/charts/model-engine/values.yaml +++ b/charts/model-engine/values.yaml @@ -27,6 +27,8 @@ celery_broker_type_redis: null redis: auth: + authSecretName: "" + authSecretKey: "" enableTLS: false enableAuth: false kedaSecretName: "" @@ -41,4 +43,38 @@ balloonNodeSelector: nodeSelector: node-lifecycle: normal keda: - cooldownPeriod: 300 \ No newline at end of file + cooldownPeriod: 300 + +# Configurable inference framework image tags +inferenceFramework: + vllm: "latest" + deepspeed: "latest" + text_generation_inference: "latest" + lightllm: "latest" + tensorrt_llm: "latest" + vllm_batch: "latest" + vllm_batch_v2: "latest" + +# Extensibility hooks for wrapper charts (e.g. airgapped/on-prem deployments) +extraEnvVars: [] # Extra env vars for all main containers (gateway, builder, cacher) +extraVolumes: [] # Extra volumes for all deployments +extraVolumeMounts: [] # Extra volume mounts for main containers +extraPodEnvFrom: [] # Extra envFrom for inference pods (e.g., MinIO credentials) + +# Service account token control +automountServiceAccountToken: true + +# Configurable utility images (for airgapped environments) +utilityImages: + busybox: + repository: "public.ecr.aws/docker/library/busybox" + tag: "latest" + curl: + repository: "curlimages/curl" + tag: "7.72.0" + ubuntu: + repository: "public.ecr.aws/ubuntu/ubuntu" + tag: "latest" + +# Additional GPU tolerations for endpoint pods +gpuTolerations: [] \ No newline at end of file