diff --git a/chart-dependencies/gke-l7-regional-external-managed/install.sh b/chart-dependencies/gke-l7-regional-external-managed/install.sh new file mode 100755 index 0000000..f1f641a --- /dev/null +++ b/chart-dependencies/gke-l7-regional-external-managed/install.sh @@ -0,0 +1 @@ +#!/usr/bin/env bash diff --git a/chart-dependencies/gke-l7-rilb/install.sh b/chart-dependencies/gke-l7-rilb/install.sh new file mode 100755 index 0000000..f1f641a --- /dev/null +++ b/chart-dependencies/gke-l7-rilb/install.sh @@ -0,0 +1 @@ +#!/usr/bin/env bash diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml index 7fb2011..5347258 100644 --- a/charts/llm-d/Chart.yaml +++ b/charts/llm-d/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: llm-d type: application -version: 1.0.20 +version: 1.0.21 appVersion: "0.1" icon: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+CjwhLS0gQ3JlYXRlZCB3aXRoIElua3NjYXBlIChodHRwOi8vd3d3Lmlua3NjYXBlLm9yZy8pIC0tPgoKPHN2ZwogICB3aWR0aD0iODBtbSIKICAgaGVpZ2h0PSI4MG1tIgogICB2aWV3Qm94PSIwIDAgODAuMDAwMDA0IDgwLjAwMDAwMSIKICAgdmVyc2lvbj0iMS4xIgogICBpZD0ic3ZnMSIKICAgeG1sOnNwYWNlPSJwcmVzZXJ2ZSIKICAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZGVmcwogICAgIGlkPSJkZWZzMSIgLz48cGF0aAogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNTEuNjI5Nyw0My4wNzY3IGMgLTAuODI1NCwwIC0xLjY1MDgsMC4yMTI4IC0yLjM4ODEsMC42Mzg0IGwgLTEwLjcyNjksNi4xOTI2IGMgLTEuNDc2MywwLjg1MjIgLTIuMzg3MywyLjQzNDUgLTIuMzg3Myw0LjEzNTQgdiAxMi4zODQ3IGMgMCwxLjcwNDEgMC45MTI4LDMuMjg1NCAyLjM4ODUsNC4xMzU4IGwgMTAuNzI1Nyw2LjE5MTggYyAxLjQ3NDcsMC44NTEzIDMuMzAxNSwwLjg1MTMgNC43NzYyLDAgTCA2NC43NDQ3LDcwLjU2MzIgQyA2Ni4yMjEsNjkuNzExIDY3LjEzMiw2OC4xMjg4IDY3LjEzMiw2Ni40Mjc4IFYgNTQuMDQzMSBjIDAsLTEuNzAzNiAtMC45MTIzLC0zLjI4NDggLTIuMzg3MywtNC4xMzU0IGwgLThlLTQsLTRlLTQgLTEwLjcyNjEsLTYuMTkyMiBjIC0wLjczNzQsLTAuNDI1NiAtMS41NjI3LC0wLjYzODQgLTIuMzg4MSwtMC42Mzg0IHogbSAwLDMuNzM5NyBjIDAuMTc3NCwwIDAuMzU0NiwwLjA0NyAwLjUxNjcsMC4xNDA2IGwgMTAuNzI3Niw2LjE5MjUgNGUtNCw0ZS00IGMgMC4zMTkzLDAuMTg0IDAuNTE0MywwLjUyMDMgMC41MTQzLDAuODkzMiB2IDEyLjM4NDcgYyAwLDAuMzcyMSAtMC4xOTI3LDAuNzA3MyAtMC41MTU1LDAuODkzNiBsIC0xMC43MjY4LDYuMTkyMiBjIC0wLjMyNDMsMC4xODcyIC0wLjcwOTEsMC4xODcyIC0xLjAzMzQsMCBsIC0xMC43MjcyLC02LjE5MjYgLThlLTQsLTRlLTQgQyA0MC4wNjU3LDY3LjEzNjcgMzkuODcwNyw2Ni44MDA3IDM5Ljg3MDcsNjYuNDI3OCBWIDU0LjA0MzEgYyAwLC0wLjM3MiAwLjE5MjcsLTAuNzA3NyAwLjUxNTUsLTAuODk0IEwgNTEuMTEzLDQ2Ljk1NyBjIDAuMTYyMSwtMC4wOTQgMC4zMzkzLC0wLjE0MDYgMC41MTY3LC0wLjE0MDYgeiIKICAgICBpZD0icGF0aDEyMiIgLz48cGF0aAogICAgIGlkPSJwYXRoMTI0IgogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLWxpbmVjYXA6cm91bmQ7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNjMuMzg5MDE4LDM0LjgxOTk1OCB2IDIyLjM0NDE3NSBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLDEuODcxNTQxIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLC0xLjg3MTU0MSBWIDMyLjY1ODY0NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzNi43MzQyLDI4LjIzNDggYyAwLjQwOTcsMC43MTY1IDEuMDA0MiwxLjMyNzMgMS43Mzk4LDEuNzU2MSBsIDEwLjcwMSw2LjIzNzIgYyAxLjQ3MjcsMC44NTg0IDMuMjk4NCwwLjg2MzcgNC43NzUsMC4wMTkgbCAxMC43NTA2LC02LjE0ODUgYyAxLjQ3OTMsLTAuODQ2IDIuMzk4NywtMi40MjM0IDIuNDA0NCwtNC4xMjY3IGwgMC4wNSwtMTIuMzg0NCBjIDAuMDEsLTEuNzAyOSAtMC45LC0zLjI4ODYgLTIuMzcxMiwtNC4xNDYxIEwgNTQuMDgzMiwzLjIwNCBDIDUyLjYxMDUsMi4zNDU1IDUwLjc4NDcsMi4zNDAyIDQ5LjMwODIsMy4xODUgTCAzOC41NTc1LDkuMzMzNSBjIC0xLjQ3ODksMC44NDU4IC0yLjM5ODQsMi40MjI3IC0yLjQwNDYsNC4xMjU0IGwgMTBlLTUsOGUtNCAtMC4wNSwxMi4zODUgYyAwLDAuODUxNSAwLjIyMTYsMS42NzM1IDAuNjMxNCwyLjM5IHogbSAzLjI0NjMsLTEuODU2NiBjIC0wLjA4OCwtMC4xNTQgLTAuMTM1MywtMC4zMzExIC0wLjEzNDUsLTAuNTE4MyBsIDAuMDUsLTEyLjM4NjYgMmUtNCwtNmUtNCBjIDAsLTAuMzY4NCAwLjE5NjMsLTAuNzA0NyAwLjUyLC0wLjg4OTkgTCA1MS4xNjY5LDYuNDM0MyBjIDAuMzIyOSwtMC4xODQ3IDAuNzA5NywtMC4xODM4IDEuMDMxNiwwIGwgMTAuNzAwNiw2LjIzNzQgYyAwLjMyMzUsMC4xODg1IDAuNTE0NSwwLjUyMjYgMC41MTMsMC44OTcgbCAtMC4wNSwxMi4zODYyIHYgOWUtNCBjIDAsMC4zNjg0IC0wLjE5NiwwLjcwNDUgLTAuNTE5NywwLjg4OTYgbCAtMTAuNzUwNiw2LjE0ODUgYyAtMC4zMjMsMC4xODQ3IC0wLjcxMDEsMC4xODQgLTEuMDMyLDAgTCA0MC4zNTkyLDI2Ljc1NjcgYyAtMC4xNjE3LC0wLjA5NCAtMC4yOTA1LC0wLjIyNDggLTAuMzc4NSwtMC4zNzg4IHoiCiAgICAgaWQ9InBhdGgxMjYiIC8+PHBhdGgKICAgICBpZD0icGF0aDEyOSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDIzLjcyODgzNSwyMi4xMjYxODUgNDMuMTI0OTI0LDExLjAzMzIyIEEgMS44NzE1NDMsMS44NzE1NDMgMCAwIDAgNDMuODIwMzkxLDguNDc5NDY2NiAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCA0MS4yNjY2MzcsNy43ODM5OTk4IEwgMTkuOTk0NDAxLDE5Ljk0OTk2NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzMS40NzY2LDQ4LjQ1MDQgYyAwLjQxNDUsLTAuNzEzOCAwLjY0NSwtMS41MzQ0IDAuNjQ3MiwtMi4zODU4IGwgMC4wMzIsLTEyLjM4NiBjIDAsLTEuNzA0NiAtMC45MDY0LC0zLjI4NyAtMi4zNzczLC00LjE0MTIgTCAxOS4wNjg4LDIzLjMxOCBjIC0xLjQ3MzcsLTAuODU1OCAtMy4yOTk1LC0wLjg2MDUgLTQuNzc2LC0wLjAxMSBMIDMuNTUyMSwyOS40NzI3IGMgLTEuNDc2OCwwLjg0NzggLTIuMzk0MiwyLjQyNzUgLTIuMzk4Niw0LjEzMDQgbCAtMC4wMzIsMTIuMzg1NyBjIDAsMS43MDQ3IDAuOTA2MywzLjI4NzEgMi4zNzcyLDQuMTQxMiBsIDEwLjcwOTgsNi4yMTk1IGMgMS40NzMyLDAuODU1NSAzLjI5ODcsMC44NjA2IDQuNzc1LDAuMDEyIGwgNmUtNCwtNGUtNCAxMC43NDEyLC02LjE2NTggYyAwLjczODUsLTAuNDIzOSAxLjMzNjksLTEuMDMwOCAxLjc1MTUsLTEuNzQ0NSB6IG0gLTMuMjM0LC0xLjg3ODEgYyAtMC4wODksMC4xNTM0IC0wLjIxODYsMC4yODMxIC0wLjM4MSwwLjM3NjMgbCAtMTAuNzQyMyw2LjE2NyAtNmUtNCwyZS00IGMgLTAuMzE5NCwwLjE4MzYgLTAuNzA4MiwwLjE4MzQgLTEuMDMwNywwIEwgNS4zNzgyLDQ2Ljg5NjQgQyA1LjA1NjUsNDYuNzA5NiA0Ljg2MzMsNDYuMzc0NSA0Ljg2NDMsNDYuMDAxOSBsIDAuMDMyLC0xMi4zODU4IGMgMCwtMC4zNzQ0IDAuMTk0MiwtMC43MDcyIDAuNTE4OSwtMC44OTM2IGwgMTAuNzQyMiwtNi4xNjY3IDZlLTQsLTRlLTQgYyAwLjMxOTQsLTAuMTgzNyAwLjcwNzgsLTAuMTgzNyAxLjAzMDMsMCBsIDEwLjcwOTgsNi4yMTk0IGMgMC4zMjE3LDAuMTg2OSAwLjUxNTIsMC41MjIxIDAuNTE0MiwwLjg5NDggbCAtMC4wMzIsMTIuMzg1NiBjIC00ZS00LDAuMTg3MiAtMC4wNDksMC4zNjQxIC0wLjEzNzksMC41MTc0IHoiCiAgICAgaWQ9InBhdGgxMzkiIC8+PHBhdGgKICAgICBpZD0icGF0aDE0MSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDMyLjcxMTI5OSw2Mi43NjU3NDYgMTMuMzg4OTY5LDUxLjU0NDc5OCBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIC0yLjU1ODI5NSwwLjY3ODU2OCAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCAwLjY3ODU2OSwyLjU1ODI5NiBsIDIxLjE5MTM0NCwxMi4zMDYzMyB6IiAvPjwvc3ZnPgo= description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md index 44109fa..dd29c12 100644 --- a/charts/llm-d/README.md +++ b/charts/llm-d/README.md @@ -1,7 +1,7 @@ # llm-d Helm Chart -![Version: 1.0.20](https://img.shields.io/badge/Version-1.0.20-informational?style=flat-square) +![Version: 1.0.21](https://img.shields.io/badge/Version-1.0.21-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) llm-d is a Kubernetes-native high-performance distributed LLM inference framework @@ -150,7 +150,7 @@ Kubernetes: `>= 1.30.0-0` | gateway.annotations | Additional annotations provided to the Gateway resource | object | `{}` | | gateway.enabled | Deploy resources related to Gateway | bool | `true` | | gateway.fullnameOverride | String to fully override gateway.fullname | string | `""` | -| gateway.gatewayClassName | Gateway class that determines the backend used Currently supported values: "kgateway" or "istio" | string | `"istio"` | +| gateway.gatewayClassName | Gateway class that determines the backend used. Currently supported values: "istio", "kgateway", "gke-l7-rilb", or "gke-l7-regional-external-managed" | string | `"istio"` | | gateway.nameOverride | String to partially override gateway.fullname | string | `""` | | gateway.serviceType | Gateway's service type. Ingress is only available if the service type is set to NodePort. Accepted values: ["LoadBalancer", "NodePort"] | string | `"NodePort"` | | global | Global parameters Global Docker image parameters Please, note that this will override the image parameters, including dependencies, configured to use the global value Current available global Docker image parameters: imageRegistry, imagePullSecrets and storageClass | object | See below | diff --git a/charts/llm-d/templates/inference-gateway/_helpers.tpl b/charts/llm-d/templates/inference-gateway/_helpers.tpl index 7f37aa3..913c512 100644 --- a/charts/llm-d/templates/inference-gateway/_helpers.tpl +++ b/charts/llm-d/templates/inference-gateway/_helpers.tpl @@ -9,3 +9,14 @@ Create a default fully qualified app name for inferenceGateway. {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} {{- end -}} {{- end -}} + +{{/* +Resolve gateway class name +*/}} +{{- define "gateway.className" -}} + {{- if contains "gke-l7" .Values.gateway.gatewayClassName -}} + {{- print .Values.gateway.gatewayClassName -}} + {{- else -}} + {{- .Values.gateway.gatewayClassName -}} + {{- end -}} +{{- end -}} diff --git a/charts/llm-d/templates/sample-application/httproutes.yaml b/charts/llm-d/templates/sample-application/httproutes.yaml index 671ce2f..cc4d1c3 100644 --- a/charts/llm-d/templates/sample-application/httproutes.yaml +++ b/charts/llm-d/templates/sample-application/httproutes.yaml @@ -25,7 +25,55 @@ spec: kind: InferencePool name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" port: {{ .Values.sampleApplication.inferencePoolPort }} + {{- if not (contains "gke-l7" .Values.gateway.gatewayClassName) }} timeouts: backendRequest: "0s" request: "0s" + {{- end }} +{{- end }} + +{{- if and .Values.gateway.enabled .Values.sampleApplication.enabled (contains "gke-l7" .Values.gateway.gatewayClassName) }} +--- +apiVersion: networking.gke.io/v1 +kind: GCPBackendPolicy +metadata: + name: {{ include "sampleApplication.sanitizedModelName" . }}-backend-policy + namespace: {{ .Release.Namespace }} + labels: {{ include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: sample-application + {{- if $.Values.commonLabels }} + {{- include "common.tplvalues.render" ( dict "value" $.Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if $.Values.commonAnnotations }} + {{- include "common.tplvalues.render" ( dict "value" $.Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + default: + logging: + enabled: true + timeoutSec: 300 + targetRef: + group: inference.networking.x-k8s.io + kind: InferencePool + name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" +--- +apiVersion: networking.gke.io/v1 +kind: HealthCheckPolicy +metadata: + name: {{ include "sampleApplication.sanitizedModelName" . }}-health-check-policy + namespace: {{ .Release.Namespace }} + labels: {{ include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: sample-application +spec: + targetRef: + group: "inference.networking.x-k8s.io" + kind: InferencePool + name: "{{ include "sampleApplication.sanitizedModelName" . }}-inference-pool" + default: + config: + type: HTTP + httpHealthCheck: + requestPath: /health + port: {{ .Values.sampleApplication.inferencePoolPort }} {{- end }} diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json index 47a46fa..4c948a4 100644 --- a/charts/llm-d/values.schema.json +++ b/charts/llm-d/values.schema.json @@ -69,7 +69,7 @@ }, "gatewayClassName": { "default": "istio", - "description": "Gateway class that determines the backend used Currently supported values: \"kgateway\" or \"istio\"", + "description": "Gateway class that determines the backend used. Currently supported values: \"istio\", \"kgateway\", \"gke-l7-rilb\", or \"gke-l7-regional-external-managed\"", "required": [], "title": "gatewayClassName" }, diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json index f11d4a4..2fd5e3d 100644 --- a/charts/llm-d/values.schema.tmpl.json +++ b/charts/llm-d/values.schema.tmpl.json @@ -69,7 +69,7 @@ }, "gatewayClassName": { "default": "istio", - "description": "Gateway class that determines the backend used Currently supported values: \"kgateway\" or \"istio\"", + "description": "Gateway class that determines the backend used. Currently supported values: \"istio\", \"kgateway\", \"gke-l7-rilb\", or \"gke-l7-regional-external-managed\"", "required": [], "title": "gatewayClassName" }, diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml index cd62337..1255c5a 100644 --- a/charts/llm-d/values.yaml +++ b/charts/llm-d/values.yaml @@ -191,8 +191,8 @@ gateway: # -- String to partially override gateway.fullname nameOverride: "" - # -- Gateway class that determines the backend used - # Currently supported values: "kgateway" or "istio" + # -- Gateway class that determines the backend used. + # Currently supported values: "istio", "kgateway", "gke-l7-rilb", or "gke-l7-regional-external-managed" gatewayClassName: istio # @schema diff --git a/quickstart/README.md b/quickstart/README.md index 3807dc7..253371f 100644 --- a/quickstart/README.md +++ b/quickstart/README.md @@ -121,6 +121,7 @@ The installer needs to be run from the `llm-d-deployer/quickstart` directory as | `-t`, `--download-timeout` | Timeout for model download job | `./llmd-installer.sh --download-timeout` | | `-D`, `--download-model` | Download the model to PVC from Hugging Face | `./llmd-installer.sh --download-model` | | `-m`, `--disable-metrics-collection` | Disable metrics collection (Prometheus will not be installed) | `./llmd-installer.sh --disable-metrics-collection` | +| `-j`, `--gateway` | Select gateway type (istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed) (default: istio) | `./llm-installer.sh --gateway gke-l7-rilb` | | `-h`, `--help` | Show this help and exit | `./llmd-installer.sh --help` | ## Examples diff --git a/quickstart/llmd-installer.sh b/quickstart/llmd-installer.sh index bb921eb..6c44415 100755 --- a/quickstart/llmd-installer.sh +++ b/quickstart/llmd-installer.sh @@ -56,7 +56,7 @@ Options: -t, --download-timeout Timeout for model download job -k, --minikube Deploy on an existing minikube instance with hostPath storage -g, --context Supply a specific Kubernetes context - -j, --gateway Select gateway type (istio or kgateway) + -j, --gateway Select gateway type (istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed) (default: istio) -r, --release (Helm) Chart release name -h, --help Show this help and exit EOF @@ -226,9 +226,13 @@ validate_hf_token() { } validate_gateway_type() { - if [[ "${GATEWAY_TYPE}" != "istio" && "${GATEWAY_TYPE}" != "kgateway" ]]; then - die "Invalid gateway type: ${GATEWAY_TYPE}. Supported types are: istio, kgateway." - fi + case "${GATEWAY_TYPE}" in + istio|kgateway|gke-l7-rilb|gke-l7-regional-external-managed) + ;; # valid + *) + die "Invalid gateway type: ${GATEWAY_TYPE}. Supported types are: istio, kgateway, gke-l7-rilb, gke-l7-regional-external-managed." + ;; + esac log_success "Gateway type validated" }