diff --git a/chart-dependencies/gke-l7-regional-external-managed/install.sh b/chart-dependencies/gke-l7-regional-external-managed/install.sh new file mode 100755 index 0000000..f1f641a --- /dev/null +++ b/chart-dependencies/gke-l7-regional-external-managed/install.sh @@ -0,0 +1 @@ +#!/usr/bin/env bash diff --git a/chart-dependencies/gke-l7-rilb/install.sh b/chart-dependencies/gke-l7-rilb/install.sh new file mode 100755 index 0000000..f1f641a --- /dev/null +++ b/chart-dependencies/gke-l7-rilb/install.sh @@ -0,0 +1 @@ +#!/usr/bin/env bash diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml index 7fb2011..5347258 100644 --- a/charts/llm-d/Chart.yaml +++ b/charts/llm-d/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: llm-d type: application -version: 1.0.20 +version: 1.0.21 appVersion: "0.1" icon:  description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md index 44109fa..dd29c12 100644 --- a/charts/llm-d/README.md +++ b/charts/llm-d/README.md @@ -1,7 +1,7 @@ # llm-d Helm Chart -![Version: 1.0.20](https://img.shields.io/badge/Version-1.0.20-informational?style=flat-square) +![Version: 1.0.21](https://img.shields.io/badge/Version-1.0.21-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) llm-d is a Kubernetes-native high-performance distributed LLM inference framework @@ -150,7 +150,7 @@ Kubernetes: `>= 1.30.0-0` | gateway.annotations | Additional annotations provided to the Gateway resource | object | `{}` | | gateway.enabled | Deploy resources related to Gateway | bool | `true` | | gateway.fullnameOverride | String to fully override gateway.fullname | string | `""` | -| gateway.gatewayClassName | Gateway class that determines the backend used Currently supported values: "kgateway" or "istio" | string | `"istio"` | +| gateway.gatewayClassName | Gateway class that determines the backend used. Currently supported values: "istio", "kgateway", "gke-l7-rilb", or "gke-l7-regional-external-managed" | string | `"istio"` | | gateway.nameOverride | String to partially override gateway.fullname | string | `""` | | gateway.serviceType | Gateway's service type. Ingress is only available if the service type is set to NodePort. Accepted values: ["LoadBalancer", "NodePort"] | string | `"NodePort"` | | global | Global parameters Global Docker image parameters Please, note that this will override the image parameters, including dependencies, configured to use the global value Current available global Docker image parameters: imageRegistry, imagePullSecrets and storageClass | object | See below | diff --git a/charts/llm-d/templates/inference-gateway/_helpers.tpl b/charts/llm-d/templates/inference-gateway/_helpers.tpl index 7f37aa3..913c512 100644 --- a/charts/llm-d/templates/inference-gateway/_helpers.tpl +++ b/charts/llm-d/templates/inference-gateway/_helpers.tpl @@ -9,3 +9,14 @@ Create a default fully qualified app name for inferenceGateway. {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} {{- end -}} {{- end -}} + +{{/* +Resolve gateway class name +*/}} +{{- define "gateway.className" -}} + {{- if contains "gke-l7" .Values.gateway.gatewayClassName -}} + {{- print .Values.gateway.gatewayClassName -}} + {{- else -}} + {{- .Values.gateway.gatewayClassName -}} + {{- end -}} +{{- end -}} diff --git a/charts/llm-d/templates/sample-application/httproutes.yaml b/charts/llm-d/templates/sample-application/httproutes.yaml index 671ce2f..cc4d1c3 100644 --- a/charts/llm-d/templates/sample-application/httproutes.yaml +++ b/charts/llm-d/templates/sample-application/httproutes.yaml @@ -25,7 +25,55 @@ spec: kind: InferencePool name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" port: {{ .Values.sampleApplication.inferencePoolPort }} + {{- if not (contains "gke-l7" .Values.gateway.gatewayClassName) }} timeouts: backendRequest: "0s" request: "0s" + {{- end }} +{{- end }} + +{{- if and .Values.gateway.enabled .Values.sampleApplication.enabled (contains "gke-l7" .Values.gateway.gatewayClassName) }} +--- +apiVersion: networking.gke.io/v1 +kind: GCPBackendPolicy +metadata: + name: {{ include "sampleApplication.sanitizedModelName" . }}-backend-policy + namespace: {{ .Release.Namespace }} + labels: {{ include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: sample-application + {{- if $.Values.commonLabels }} + {{- include "common.tplvalues.render" ( dict "value" $.Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if $.Values.commonAnnotations }} + {{- include "common.tplvalues.render" ( dict "value" $.Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + default: + logging: + enabled: true + timeoutSec: 300 + targetRef: + group: inference.networking.x-k8s.io + kind: InferencePool + name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" +--- +apiVersion: networking.gke.io/v1 +kind: HealthCheckPolicy +metadata: + name: {{ include "sampleApplication.sanitizedModelName" . }}-health-check-policy + namespace: {{ .Release.Namespace }} + labels: {{ include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: sample-application +spec: + targetRef: + group: "inference.networking.x-k8s.io" + kind: InferencePool + name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" + default: + config: + type: HTTP + httpHealthCheck: + requestPath: /health + port: {{ .Values.sampleApplication.inferencePoolPort }} {{- end }} diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json index 47a46fa..4c948a4 100644 --- a/charts/llm-d/values.schema.json +++ b/charts/llm-d/values.schema.json @@ -69,7 +69,7 @@ }, "gatewayClassName": { "default": "istio", - "description": "Gateway class that determines the backend used Currently supported values: \"kgateway\" or \"istio\"", + "description": "Gateway class that determines the backend used. Currently supported values: \"istio\", \"kgateway\", \"gke-l7-rilb\", or \"gke-l7-regional-external-managed\"", "required": [], "title": "gatewayClassName" }, diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json index f11d4a4..2fd5e3d 100644 --- a/charts/llm-d/values.schema.tmpl.json +++ b/charts/llm-d/values.schema.tmpl.json @@ -69,7 +69,7 @@ }, "gatewayClassName": { "default": "istio", - "description": "Gateway class that determines the backend used Currently supported values: \"kgateway\" or \"istio\"", + "description": "Gateway class that determines the backend used. Currently supported values: \"istio\", \"kgateway\", \"gke-l7-rilb\", or \"gke-l7-regional-external-managed\"", "required": [], "title": "gatewayClassName" }, diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml index cd62337..1255c5a 100644 --- a/charts/llm-d/values.yaml +++ b/charts/llm-d/values.yaml @@ -191,8 +191,8 @@ gateway: # -- String to partially override gateway.fullname nameOverride: "" - # -- Gateway class that determines the backend used - # Currently supported values: "kgateway" or "istio" + # -- Gateway class that determines the backend used. + # Currently supported values: "istio", "kgateway", "gke-l7-rilb", or "gke-l7-regional-external-managed" gatewayClassName: istio # @schema diff --git a/quickstart/README.md b/quickstart/README.md index 3807dc7..253371f 100644 --- a/quickstart/README.md +++ b/quickstart/README.md @@ -121,6 +121,7 @@ The installer needs to be run from the `llm-d-deployer/quickstart` directory as | `-t`, `--download-timeout` | Timeout for model download job | `./llmd-installer.sh --download-timeout` | | `-D`, `--download-model` | Download the model to PVC from Hugging Face | `./llmd-installer.sh --download-model` | | `-m`, `--disable-metrics-collection` | Disable metrics collection (Prometheus will not be installed) | `./llmd-installer.sh --disable-metrics-collection` | +| `-j`, `--gateway` | Select gateway type (istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed) (default: istio) | `./llm-installer.sh --gateway gke-l7-rilb` | | `-h`, `--help` | Show this help and exit | `./llmd-installer.sh --help` | ## Examples diff --git a/quickstart/llmd-installer.sh b/quickstart/llmd-installer.sh index bb921eb..6c44415 100755 --- a/quickstart/llmd-installer.sh +++ b/quickstart/llmd-installer.sh @@ -56,7 +56,7 @@ Options: -t, --download-timeout Timeout for model download job -k, --minikube Deploy on an existing minikube instance with hostPath storage -g, --context Supply a specific Kubernetes context - -j, --gateway Select gateway type (istio or kgateway) + -j, --gateway Select gateway type (istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed) (default: istio) -r, --release (Helm) Chart release name -h, --help Show this help and exit EOF @@ -226,9 +226,13 @@ validate_hf_token() { } validate_gateway_type() { - if [[ "${GATEWAY_TYPE}" != "istio" && "${GATEWAY_TYPE}" != "kgateway" ]]; then - die "Invalid gateway type: ${GATEWAY_TYPE}. Supported types are: istio, kgateway." - fi + case "${GATEWAY_TYPE}" in + istio|kgateway|gke-l7-rilb|gke-l7-regional-external-managed) + ;; # valid + *) + die "Invalid gateway type: ${GATEWAY_TYPE}. Supported types are: istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed." + ;; + esac log_success "Gateway type validated" }