Docs Standalone Kubernetes Models Blog Enterprise Community @agentgateway

Agentgateway Model and Provider Cookbook

Route to any LLM through a single gateway. Agentgateway supports any provider with an OpenAI-compatible API.

1002+

Models

44+

LLM Gateway Providers

API Endpoints

Search by Endpoints

1 Secret

2 Backend

3 Route

Native Providers

First-class support with full API translation in agentgateway.

OpenAI

Native

53 models

gpt-5.5 gpt-5.5-pro gpt-5.5-2026-04-23 +50 more

api.openai.com

Auth: $OPENAI_API_KEY

View configuration

OpenAI Configuration

Supported Models (53) — click a model to use it

gpt-5.5 gpt-5.5-pro gpt-5.5-2026-04-23 gpt-5.4 gpt-5.4-mini gpt-5.4-nano gpt-5.4-thinking gpt-5.3-codex gpt-5.2 gpt-5.2-pro gpt-5.2-codex gpt-5.1 gpt-5.1-mini gpt-5.1-codex gpt-5 gpt-5-mini gpt-5-nano gpt-5-codex gpt-4.1 gpt-4.1-mini gpt-4.1-nano gpt-4o gpt-4o-mini gpt-4-turbo gpt-4 gpt-3.5-turbo chatgpt-4o-latest o1 o1-mini o1-preview o3 o3-mini o3-pro o4-mini codex-mini-latest gpt-realtime gpt-realtime-mini gpt-audio gpt-4o-realtime gpt-4o-transcribe gpt-4o-mini-transcribe gpt-4o-transcribe-diarize gpt-4o-mini-tts whisper-1 tts-1 tts-1-hd gpt-image-1 gpt-image-1-mini dall-e-3 sora-2 sora-2-pro text-embedding-3-large text-embedding-3-small

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: gpt-4o
      apiKey: "$OPENAI_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export OPENAI_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: openai-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $OPENAI_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: openai
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: gpt-4o
  policies:
    auth:
      secretRef:
        name: openai-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: openai
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /openai
    backendRefs:
    - name: openai
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/openai" -H content-type:application/json -d '{
  "model": "gpt-4o",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Baseten

Native

3 models

openai/gpt-oss-120b openai/gpt-oss-20b openai/gpt-oss-safeguard

inference.baseten.co

Auth: $BASETEN_API_KEY

View configuration

Baseten Configuration

Supported Models (3) — click a model to use it

openai/gpt-oss-120b openai/gpt-oss-20b openai/gpt-oss-safeguard

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: baseten
    params:
      model: openai/gpt-oss-120b
      apiKey: "$BASETEN_API_KEY"
      baseUrl: "https://inference.baseten.co/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export BASETEN_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: baseten-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $BASETEN_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: baseten
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: openai/gpt-oss-120b
        host: inference.baseten.co
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: baseten-secret
    tls:
      sni: inference.baseten.co
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: baseten
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /baseten
    backendRefs:
    - name: baseten
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/baseten" -H content-type:application/json -d '{
  "model": "openai/gpt-oss-120b",
  "messages": [{"role": "user", "content": "Hello from Baseten!"}]
}' | jq

Anthropic

Native

15 models

claude-opus-4-7 claude-opus-4-6 claude-sonnet-4-6 +12 more

api.anthropic.com

Auth: $ANTHROPIC_API_KEY

View configuration

Anthropic Configuration

Supported Models (15) — click a model to use it

claude-opus-4-7 claude-opus-4-6 claude-sonnet-4-6 claude-opus-4-5 claude-sonnet-4-5 claude-opus-4-1 claude-opus-4-20250514 claude-sonnet-4-20250514 claude-haiku-4-5 claude-haiku-4-5-20251001 claude-3.7-sonnet claude-3.5-sonnet claude-3.5-haiku claude-3-opus claude-3-haiku

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: anthropic
    params:
      model: claude-sonnet-4-20250514
      apiKey: "$ANTHROPIC_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export ANTHROPIC_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: anthropic-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $ANTHROPIC_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: anthropic
  namespace: agentgateway-system
spec:
  ai:
    provider:
      anthropic:
        model: "claude-sonnet-4-20250514"
  policies:
    auth:
      secretRef:
        name: anthropic-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: anthropic
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /anthropic
    backendRefs:
    - name: anthropic
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/anthropic" -H content-type:application/json -d '{
  "model": "claude-sonnet-4-20250514",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Amazon Bedrock

Native

50 models

anthropic.claude-opus-4.7 anthropic.claude-sonnet-4.6 anthropic.claude-opus-4.6 +47 more

bedrock-runtime.{region}.amazonaws.com

Auth: $AWS_ACCESS_KEY_ID

View configuration

Amazon Bedrock Configuration

Supported Models (50) — click a model to use it

anthropic.claude-opus-4.7 anthropic.claude-sonnet-4.6 anthropic.claude-opus-4.6 anthropic.claude-sonnet-4.5 anthropic.claude-opus-4.5 anthropic.claude-opus-4.1 anthropic.claude-sonnet-4 anthropic.claude-opus-4 anthropic.claude-haiku-4-5 anthropic.claude-3.7-sonnet anthropic.claude-3.5-sonnet anthropic.claude-3.5-haiku anthropic.claude-3-haiku amazon.nova-premier amazon.nova-pro amazon.nova-lite amazon.nova-micro amazon.nova-sonic amazon.nova-2-pro amazon.nova-2-lite amazon.titan-text-premier amazon.titan-text-express amazon.titan-embed-text-v2 meta.llama4-maverick-17b meta.llama4-scout-17b meta.llama3-3-70b-instruct meta.llama3-1-405b-instruct meta.llama3-1-70b-instruct meta.llama3-1-8b-instruct meta.llama3-2-90b-instruct meta.llama3-2-11b-instruct mistral.mistral-large-3 mistral.mistral-large mistral.mixtral-8x7b mistral.pixtral-large cohere.command-r-plus cohere.command-r deepseek.v4-pro deepseek.v4-flash deepseek.v3.2 deepseek.v3.1 deepseek.r1 ai21.jamba-1-5-large ai21.jamba-1-5-mini minimax.minimax-m2.1 qwen.qwen3-235b-a22b qwen.qwen3-32b stability.sd3-5-large google.gemma-3-27b-it google.gemma-3-12b-it

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: bedrock
    params:
      model: us.anthropic.claude-sonnet-4-20250514-v1:0
      region: us-east-1

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret (IAM credentials)
export AWS_ACCESS_KEY_ID="<your-access-key>"
export AWS_SECRET_ACCESS_KEY="<your-secret-key>"
export AWS_SESSION_TOKEN="<your-session-token>"

kubectl create secret generic bedrock-secret \
  -n agentgateway-system \
  --from-literal=accessKey="$AWS_ACCESS_KEY_ID" \
  --from-literal=secretKey="$AWS_SECRET_ACCESS_KEY" \
  --from-literal=sessionToken="$AWS_SESSION_TOKEN" \
  --type=Opaque \
  --dry-run=client -o yaml | kubectl apply -f -

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: bedrock
  namespace: agentgateway-system
spec:
  ai:
    provider:
      bedrock:
        model: "us.anthropic.claude-sonnet-4-20250514-v1:0"
        region: "us-east-1"
  policies:
    auth:
      secretRef:
        name: bedrock-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: bedrock
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /bedrock
    backendRefs:
    - name: bedrock
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/bedrock" -H content-type:application/json -d '{
  "model": "",
  "messages": [{"role": "user", "content": "Hello from Bedrock!"}]
}' | jq

Google Gemini

Native

43 models

gemini-3.1-pro-preview gemini-3.1-flash-lite-preview gemini-3.1-flash-image-preview +40 more

generativelanguage.googleapis.com

Auth: $GOOGLE_KEY

View configuration

Google Gemini Configuration

Supported Models (43) — click a model to use it

gemini-3.1-pro-preview gemini-3.1-flash-lite-preview gemini-3.1-flash-image-preview gemini-3.1-flash-live-preview gemini-3.1-flash-tts-preview gemini-3-pro-preview gemini-3-flash-preview gemini-3-pro-image-preview gemini-2.5-pro gemini-2.5-flash gemini-2.5-flash-lite gemini-2.5-flash-image gemini-2.5-computer-use-preview gemini-2.5-flash-preview-tts gemini-2.5-pro-preview-tts gemini-2.5-flash-native-audio-preview-12-2025 gemini-2.0-flash gemini-2.0-flash-lite gemini-1.5-pro gemini-1.5-flash gemini-1.5-flash-8b gemini-deep-research-preview-04-2026 gemini-deep-research-max-preview-04-2026 gemini-robotics-er-1.6-preview gemini-embedding-2 gemini-embedding-001 imagen-4.0-generate-001 veo-3.1-generate-preview veo-3.1-lite-generate-preview lyria-3-pro-preview lyria-3-clip-preview lyria-realtime-exp gemma-4-31b-it gemma-4-26b-it gemma-4-E4B-it gemma-4-E2B-it gemma-3-27b-it gemma-3-12b-it gemma-3-4b-it gemma-3-1b-it gemma-2-27b-it gemma-2-9b-it learnlm-1.5-pro

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: gemini
    params:
      model: gemini-2.5-flash
      apiKey: "$GOOGLE_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export GOOGLE_KEY=<your-gemini-api-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: google-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $GOOGLE_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: gemini
  namespace: agentgateway-system
spec:
  ai:
    provider:
      gemini:
        model: gemini-2.5-flash
  policies:
    auth:
      secretRef:
        name: google-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: gemini
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /gemini
    backendRefs:
    - name: gemini
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/gemini" -H content-type:application/json -d '{
  "model": "gemini-2.5-flash",
  "messages": [{"role": "user", "content": "Hello from Gemini!"}]
}' | jq

Google Vertex AI

Native

39 models

gemini-3.1-pro-preview gemini-3.1-flash-lite-preview gemini-3-pro-preview +36 more

{region}-aiplatform.googleapis.com

Auth: $VERTEX_AI_API_KEY

View configuration

Google Vertex AI Configuration

Supported Models (39) — click a model to use it

gemini-3.1-pro-preview gemini-3.1-flash-lite-preview gemini-3-pro-preview gemini-3-flash-preview gemini-2.5-pro gemini-2.5-flash gemini-2.5-flash-lite gemini-2.0-flash gemini-2.0-flash-lite gemini-1.5-pro gemini-1.5-flash gemini-pro gemini-embedding-2 gemini-embedding-001 text-embedding-005 imagen-4.0-generate veo-3.1-generate-preview claude-opus-4-7 claude-opus-4.6 claude-sonnet-4.6 claude-opus-4.5 claude-sonnet-4.5 claude-opus-4.1 claude-opus-4 claude-sonnet-4 claude-haiku-4-5@20251001 claude-haiku-4-5 claude-3-opus claude-3.7-sonnet claude-3.5-sonnet-v2 claude-3.5-haiku gemma-4 gemma-3 llama-4-scout llama-4-maverick llama-3.3-70b llama-3.1-405b mistral-large jamba-1.5-large

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: vertexAI
    params:
      model: gemini-pro
      projectId: "my-gcp-project"
      region: "us-central1"
      apiKey: "$VERTEX_AI_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export VERTEX_AI_API_KEY=<your-vertex-api-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: vertex-ai-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $VERTEX_AI_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: vertex-ai
  namespace: agentgateway-system
spec:
  ai:
    provider:
      vertexai:
        model: gemini-pro
        projectId: "my-gcp-project"
        region: "us-central1"
  policies:
    auth:
      secretRef:
        name: vertex-ai-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: vertex-ai
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /vertex
    backendRefs:
    - name: vertex-ai
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/vertex" -H content-type:application/json -d '{
  "model": "gemini-pro",
  "messages": [{"role": "user", "content": "Hello from Vertex AI!"}]
}' | jq

Azure OpenAI

Native

54 models

gpt-5.5 gpt-5.5-pro gpt-5.4 +51 more

{resource}.openai.azure.com

Auth: $AZURE_API_KEY

View configuration

Azure OpenAI Configuration

Supported Models (54) — click a model to use it

gpt-5.5 gpt-5.5-pro gpt-5.4 gpt-5.4-mini gpt-5.4-nano gpt-5.4-thinking gpt-5.3-codex gpt-5.2 gpt-5.2-pro gpt-5.1 gpt-5 gpt-5-mini gpt-5-nano gpt-5-codex gpt-4.1 gpt-4.1-mini gpt-4.1-nano gpt-4o gpt-4o-mini gpt-4-turbo gpt-4 gpt-3.5-turbo o1 o1-mini o3 o3-mini o3-pro o4-mini model-router gpt-realtime-1.5 gpt-realtime-1.5-2026-02-23 gpt-realtime-mini-2025-12-15 gpt-realtime-translate gpt-realtime-whisper gpt-audio-1.5 gpt-audio-1.5-2026-02-23 gpt-4o-mini-transcribe-2025-12-15 gpt-4o-mini-tts-2025-12-15 gpt-4o-transcribe-diarize gpt-image-1.5 gpt-image-1 gpt-image-1-mini dall-e-3 sora sora-2 text-embedding-3-large text-embedding-3-small gpt-oss-120b gpt-oss-20b deepseek-r1 deepseek-v4-pro llama-3.3-70b-instruct whisper tts-1

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: gpt-4o
      host: your-resource.openai.azure.com
      port: 443
      path: "/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21"
      apiKey: "$AZURE_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export AZURE_API_KEY=<your-azure-api-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: azure-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $AZURE_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: azure-openai
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: gpt-4o
        host: your-resource.openai.azure.com
        port: 443
        path: "/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21"
  policies:
    auth:
      secretRef:
        name: azure-secret
    tls:
      sni: your-resource.openai.azure.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: azure-openai
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /azure
    backendRefs:
    - name: azure-openai
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/azure" -H content-type:application/json -d '{
  "model": "gpt-4o",
  "messages": [{"role": "user", "content": "Hello from Azure!"}]
}' | jq

OpenAI-Compatible Providers

These providers expose an OpenAI-compatible API. Built-in providers use first-class provider shortcuts and baseUrl defaults; providers without built-in support use the openai provider type with custom baseUrl or host/path settings.

Mistral AI

Native

32 models

mistral-large-latest mistral-large-2512 mistral-medium-latest +29 more

api.mistral.ai

Auth: $MISTRAL_API_KEY

View configuration

Mistral AI Configuration

Supported Models (32) — click a model to use it

mistral-large-latest mistral-large-2512 mistral-medium-latest mistral-medium-2604 mistral-medium-2508 mistral-small-latest mistral-small-2603 mistral-small-2506 magistral-medium-latest magistral-small-latest ministral-14b-2512 ministral-8b-2512 ministral-3b-2512 codestral-latest codestral-2508 codestral-embed codestral-mamba-latest devstral-latest devstral-medium-latest devstral-small-latest devstral-2512 voxtral-small-2507 voxtral-mini-2507 voxtral-tts-2603 pixtral-large-latest pixtral-12b mistral-nemo mistral-embed mistral-ocr-latest open-mixtral-8x22b open-mixtral-8x7b open-mistral-7b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: mistral
    params:
      model: mistral-medium-2505
      apiKey: "$MISTRAL_API_KEY"
      baseUrl: "https://api.mistral.ai/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export MISTRAL_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: mistral-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $MISTRAL_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: mistral
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: mistral-medium-2505
        host: api.mistral.ai
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: mistral-secret
    tls:
      sni: api.mistral.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: mistral
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /mistral
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.mistral.ai
    backendRefs:
    - name: mistral
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/mistral" -H content-type:application/json -d '{
  "model": "mistral-medium-2505",
  "messages": [{"role": "user", "content": "Hello from Mistral!"}]
}' | jq

DeepSeek

Native

9 models

deepseek-v4-pro deepseek-v4-flash deepseek-chat +6 more

api.deepseek.com

Auth: $DEEPSEEK_API_KEY

View configuration

DeepSeek Configuration

Supported Models (9) — click a model to use it

deepseek-v4-pro deepseek-v4-flash deepseek-chat deepseek-reasoner deepseek-v3.2 deepseek-v3.1 deepseek-v3 deepseek-r1 deepseek-coder

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: deepseek
    params:
      model: deepseek-chat
      apiKey: "$DEEPSEEK_API_KEY"
      baseUrl: "https://api.deepseek.com/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export DEEPSEEK_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: deepseek-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $DEEPSEEK_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: deepseek
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: deepseek-chat
        host: api.deepseek.com
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: deepseek-secret
    tls:
      sni: api.deepseek.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: deepseek
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /deepseek
    backendRefs:
    - name: deepseek
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/deepseek" -H content-type:application/json -d '{
  "model": "deepseek-chat",
  "messages": [{"role": "user", "content": "Hello from DeepSeek!"}]
}' | jq

xAI (Grok)

Native

14 models

grok-4.3 grok-4-1-fast-reasoning grok-4-1-fast-non-reasoning +11 more

api.x.ai

Auth: $XAI_API_KEY

View configuration

xAI (Grok) Configuration

Supported Models (14) — click a model to use it

grok-4.3 grok-4-1-fast-reasoning grok-4-1-fast-non-reasoning grok-4 grok-4-fast-reasoning grok-4-fast-non-reasoning grok-3 grok-3-fast-latest grok-3-mini grok-3-mini-fast grok-2-latest grok-2-vision-latest grok-code-fast-1 grok-imagine-image-pro

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: xai
    params:
      model: grok-2-latest
      apiKey: "$XAI_API_KEY"
      baseUrl: "https://api.x.ai/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export XAI_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: xai-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $XAI_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: xai
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: grok-2-latest
        host: api.x.ai
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: xai-secret
    tls:
      sni: api.x.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: xai
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /xai
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.x.ai
    backendRefs:
    - name: xai
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/xai" -H content-type:application/json -d '{
  "model": "grok-2-latest",
  "messages": [{"role": "user", "content": "Hello from Grok!"}]
}' | jq

Groq

Native

20 models

llama-4-maverick-17b-128e-instruct llama-4-scout-17b-16e-instruct llama-3.3-70b-versatile +17 more

api.groq.com

Auth: $GROQ_API_KEY

View configuration

Groq Configuration

Supported Models (20) — click a model to use it

llama-4-maverick-17b-128e-instruct llama-4-scout-17b-16e-instruct llama-3.3-70b-versatile llama-3.1-8b-instant llama-guard-4-12b gemma-7b-it qwen3.5-397b-a17b qwen3-32b gpt-oss-120b gpt-oss-20b deepseek-v4-flash deepseek-r1-distill-llama-70b kimi-k2.6-instruct kimi-k2-instruct minimax-m2.7 glm-5.1 groq/compound groq/compound-mini whisper-large-v3 whisper-large-v3-turbo

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: groq
    params:
      model: llama-3.3-70b-versatile
      apiKey: "$GROQ_API_KEY"
      baseUrl: "https://api.groq.com/openai/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export GROQ_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: groq-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $GROQ_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: groq
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: llama-3.3-70b-versatile
        host: api.groq.com
        port: 443
        path: "/openai/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: groq-secret
    tls:
      sni: api.groq.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: groq
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /groq
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.groq.com
    backendRefs:
    - name: groq
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/groq" -H content-type:application/json -d '{
  "model": "llama-3.3-70b-versatile",
  "messages": [{"role": "user", "content": "Hello from Groq!"}]
}' | jq

Cohere

Native

16 models

command-a-reasoning-08-2025 command-a-translate-08-2025 command-a-vision-07-2025 +13 more

api.cohere.ai

Auth: $COHERE_API_KEY

View configuration

Cohere Configuration

Supported Models (16) — click a model to use it

command-a-reasoning-08-2025 command-a-translate-08-2025 command-a-vision-07-2025 command-a-03-2025 command-r-plus command-r command-r7b-12-2024 cohere-transcribe-03-2026 embed-v4.0 embed-english-v3.0 embed-multilingual-v3.0 rerank-v4.0-pro rerank-v4.0-fast rerank-v3.5 c4ai-aya-expanse-32b c4ai-aya-vision-32b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: cohere
    params:
      model: command-r-plus
      apiKey: "$COHERE_API_KEY"
      baseUrl: "https://api.cohere.ai"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export COHERE_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: cohere-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $COHERE_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: cohere
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: command-r-plus
        host: api.cohere.ai
        port: 443
        path: "/compatibility/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: cohere-secret
    tls:
      sni: api.cohere.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: cohere
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /cohere
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.cohere.ai
    backendRefs:
    - name: cohere
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/cohere" -H content-type:application/json -d '{
  "model": "command-r-plus",
  "messages": [{"role": "user", "content": "Hello from Cohere!"}]
}' | jq

Together AI

Native

37 models

meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 meta-llama/Llama-3.3-70B-Instruct-Turbo meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo +34 more

api.together.xyz

Auth: $TOGETHER_API_KEY

View configuration

Together AI Configuration

Supported Models (37) — click a model to use it

meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 meta-llama/Llama-3.3-70B-Instruct-Turbo meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo meta-llama/Llama-3.1-405B-Instruct-Turbo meta-llama/Llama-3.1-70B-Instruct-Turbo meta-llama/Llama-3.1-8B-Instruct-Turbo meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo meta-llama/Llama-Guard-4-12B Qwen/Qwen3.5-397B-A17B Qwen/Qwen3.5-35B-A3B Qwen/Qwen3.5-9B Qwen/Qwen3-235B-A22B Qwen/Qwen3-235B-A22B-Instruct-2507 Qwen/Qwen3-Coder-480B-A35B-Instruct Qwen/Qwen2.5-72B-Instruct-Turbo deepseek-ai/DeepSeek-V4-Pro deepseek-ai/DeepSeek-V4-Flash deepseek-ai/DeepSeek-R1 deepseek-ai/DeepSeek-V3 deepseek-ai/DeepSeek-V3.1 deepseek-ai/DeepSeek-V3.2 openai/gpt-oss-120b openai/gpt-oss-20b openai/gpt-oss-safeguard moonshotai/Kimi-K2.6-Instruct moonshotai/Kimi-K2-Instruct-0905 google/gemma-4-31b-it google/gemma-4-26b-it google/gemma-3-27b-it google/gemma-2-27b-it google/gemma-3n-E4B-it MiniMaxAI/MiniMax-M2.7 MiniMaxAI/MiniMax-M2.5 zai-org/GLM-5.1 zai-org/GLM-5 mistralai/Mixtral-8x22B-Instruct-v0.1 mistralai/Mistral-Small-24B-Instruct-2501

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: togetherai
    params:
      model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
      apiKey: "$TOGETHER_API_KEY"
      baseUrl: "https://api.together.xyz/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export TOGETHER_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: together-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $TOGETHER_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: together
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
        host: api.together.xyz
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: together-secret
    tls:
      sni: api.together.xyz
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: together
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /together
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.together.xyz
    backendRefs:
    - name: together
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/together" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
  "messages": [{"role": "user", "content": "Hello from Together AI!"}]
}' | jq

Fireworks AI

Native

39 models

llama-v3p3-70b-instruct llama-v3p1-405b-instruct llama-v3p1-70b-instruct +36 more

api.fireworks.ai

Auth: $FIREWORKS_API_KEY

View configuration

Fireworks AI Configuration

Supported Models (39) — click a model to use it

llama-v3p3-70b-instruct llama-v3p1-405b-instruct llama-v3p1-70b-instruct llama-v3p1-8b-instruct llama-v3p2-90b-vision-instruct llama4-maverick-instruct-basic llama4-scout-instruct-basic qwen3p5-397b-a17b qwen3p5-35b-a3b qwen3-235b-a22b qwen3-coder-480b-a35b-instruct qwen3-32b qwen3-8b qwen2p5-72b-instruct deepseek-v4-pro deepseek-v4-flash deepseek-r1 deepseek-v3 deepseek-v3p1 deepseek-v3p2 deepseek-r1-0528 gpt-oss-120b gpt-oss-20b gpt-oss-safeguard kimi-k2p6-instruct kimi-k2-instruct-0905 glm-5p1 glm-5 minimax-m2p7 mixtral-8x22b-instruct gemma2-9b-it gemma-4-31b-instruct gemma-4-26b-instruct gemma-3-27b-instruct gemma-3-12b-instruct mistral-large-3-675b-instruct-2512 phi-4 phi-4-multimodal yi-large

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: fireworks
    params:
      model: accounts/fireworks/models/llama-v3p1-70b-instruct
      apiKey: "$FIREWORKS_API_KEY"
      baseUrl: "https://api.fireworks.ai/inference/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export FIREWORKS_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: fireworks-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $FIREWORKS_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: fireworks
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: accounts/fireworks/models/llama-v3p1-70b-instruct
        host: api.fireworks.ai
        port: 443
        path: "/inference/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: fireworks-secret
    tls:
      sni: api.fireworks.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: fireworks
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /fireworks
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.fireworks.ai
    backendRefs:
    - name: fireworks
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/fireworks" -H content-type:application/json -d '{
  "model": "accounts/fireworks/models/llama-v3p1-70b-instruct",
  "messages": [{"role": "user", "content": "Hello from Fireworks!"}]
}' | jq

Perplexity AI

OpenAI-compat

9 models

sonar-pro sonar sonar-deep-research +6 more

api.perplexity.ai

Auth: $PERPLEXITY_API_KEY

View configuration

Perplexity AI Configuration

Supported Models (9) — click a model to use it

sonar-pro sonar sonar-deep-research sonar-reasoning-pro sonar-reasoning pplx-embed-v1-4b r1-1776 llama-3.1-sonar-large-128k-online llama-3.1-sonar-huge-128k-online

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: sonar-pro
      host: api.perplexity.ai
      port: 443
      path: "/chat/completions"
      apiKey: "$PERPLEXITY_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export PERPLEXITY_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: perplexity-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $PERPLEXITY_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: perplexity
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: sonar-pro
        host: api.perplexity.ai
        port: 443
        path: "/chat/completions"
  policies:
    auth:
      secretRef:
        name: perplexity-secret
    tls:
      sni: api.perplexity.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: perplexity
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /perplexity
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.perplexity.ai
    backendRefs:
    - name: perplexity
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/perplexity" -H content-type:application/json -d '{
  "model": "sonar-pro",
  "messages": [{"role": "user", "content": "Hello from Perplexity!"}]
}' | jq

OpenRouter

Native

66 models

openai/gpt-5.5 openai/gpt-5.5-pro openai/gpt-5.4 +63 more

openrouter.ai

Auth: $OPENROUTER_API_KEY

View configuration

OpenRouter Configuration

Supported Models (66) — click a model to use it

openai/gpt-5.5 openai/gpt-5.5-pro openai/gpt-5.4 openai/gpt-5.4-thinking openai/gpt-5.3-codex openai/gpt-5.2 openai/gpt-5.2-pro openai/gpt-5.1 openai/gpt-5 openai/gpt-5-mini openai/gpt-5-nano openai/gpt-5-codex openai/gpt-4.1 openai/gpt-4.1-mini openai/gpt-4o openai/o3 openai/o3-mini openai/o3-pro openai/o4-mini openai/gpt-oss-120b anthropic/claude-opus-4.7 anthropic/claude-opus-4.6 anthropic/claude-opus-4.5 anthropic/claude-opus-4.1 anthropic/claude-sonnet-4.6 anthropic/claude-sonnet-4.5 anthropic/claude-sonnet-4 anthropic/claude-opus-4 anthropic/claude-haiku-4.5 google/gemini-3.1-pro-preview google/gemini-3.1-flash-lite-preview google/gemini-3-pro-preview google/gemini-3-flash-preview google/gemini-2.5-pro google/gemini-2.5-flash google/gemini-2.5-flash-lite deepseek/deepseek-v4-pro deepseek/deepseek-v4-flash deepseek/deepseek-v3.2 deepseek/deepseek-chat-v3.1 deepseek/deepseek-r1 deepseek/deepseek-r1-0528 meta-llama/llama-4-maverick meta-llama/llama-4-scout meta-llama/llama-3.3-70b-instruct x-ai/grok-4.3 x-ai/grok-4-1-fast x-ai/grok-4 x-ai/grok-3 qwen/qwen3.5-max qwen/qwen3.5-397b-a17b qwen/qwen3-235b-a22b qwen/qwen3-max qwen/qwen3-coder mistralai/mistral-large-2512 mistralai/mistral-large mistralai/mistral-medium-2604 mistralai/mistral-medium-3 moonshotai/kimi-k2.6 moonshotai/kimi-k2.5 zhipu/glm-5.1 zhipu/glm-5 minimax/minimax-m2.7 cohere/command-a-reasoning cohere/command-r-plus nousresearch/hermes-3-llama-3.1-405b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openrouter
    params:
      model: anthropic/claude-sonnet-4-20250514
      apiKey: "$OPENROUTER_API_KEY"
      baseUrl: "https://openrouter.ai/api/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export OPENROUTER_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: openrouter-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $OPENROUTER_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: openrouter
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: anthropic/claude-sonnet-4-20250514
        host: openrouter.ai
        port: 443
        path: "/api/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: openrouter-secret
    tls:
      sni: openrouter.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: openrouter
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /openrouter
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: openrouter.ai
    backendRefs:
    - name: openrouter
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/openrouter" -H content-type:application/json -d '{
  "model": "anthropic/claude-sonnet-4-20250514",
  "messages": [{"role": "user", "content": "Hello from OpenRouter!"}]
}' | jq

Cerebras

Native

16 models

llama-4-maverick llama-4-scout llama-3.3-70b +13 more

api.cerebras.ai

Auth: $CEREBRAS_API_KEY

View configuration

Cerebras Configuration

Supported Models (16) — click a model to use it

llama-4-maverick llama-4-scout llama-3.3-70b llama3.1-70b llama3.1-8b qwen-3.6-35b-a3b qwen-3.5-397b-a17b qwen-3-235b-a22b-instruct-2507 qwen-3-32b gpt-oss-120b gpt-oss-20b deepseek-v4-flash kimi-k2.6-instruct minimax-m2.7 zai-glm-5.1 zai-glm-5

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: cerebras
    params:
      model: llama-3.3-70b
      apiKey: "$CEREBRAS_API_KEY"
      baseUrl: "https://api.cerebras.ai/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export CEREBRAS_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: cerebras-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $CEREBRAS_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: cerebras
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: llama-3.3-70b
        host: api.cerebras.ai
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: cerebras-secret
    tls:
      sni: api.cerebras.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: cerebras
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /cerebras
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.cerebras.ai
    backendRefs:
    - name: cerebras
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/cerebras" -H content-type:application/json -d '{
  "model": "llama-3.3-70b",
  "messages": [{"role": "user", "content": "Hello from Cerebras!"}]
}' | jq

SambaNova

OpenAI-compat

23 models

Llama-4-Maverick-17B-128E-Instruct Llama-4-Scout-17B-16E-Instruct Meta-Llama-3.3-70B-Instruct +20 more

api.sambanova.ai

Auth: $SAMBANOVA_API_KEY

View configuration

SambaNova Configuration

Supported Models (23) — click a model to use it

Llama-4-Maverick-17B-128E-Instruct Llama-4-Scout-17B-16E-Instruct Meta-Llama-3.3-70B-Instruct Meta-Llama-3.1-405B-Instruct Meta-Llama-3.1-70B-Instruct Meta-Llama-3.1-8B-Instruct DeepSeek-V4-Pro DeepSeek-V4-Flash DeepSeek-V3.2 DeepSeek-V3.1 DeepSeek-V3-0324 DeepSeek-R1 DeepSeek-R1-0528 QwQ-32B Qwen3.6-35B-A3B Qwen3.5-397B-A17B Qwen3.5-35B-A3B Qwen3-235B-A22B-Instruct-2507 Qwen3-32B Kimi-K2.6-Instruct MiniMax-M2.7 gpt-oss-120b gpt-oss-20b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: Meta-Llama-3.1-70B-Instruct
      host: api.sambanova.ai
      port: 443
      path: "/v1/chat/completions"
      apiKey: "$SAMBANOVA_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export SAMBANOVA_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: sambanova-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $SAMBANOVA_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: sambanova
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: Meta-Llama-3.1-70B-Instruct
        host: api.sambanova.ai
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: sambanova-secret
    tls:
      sni: api.sambanova.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: sambanova
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /sambanova
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.sambanova.ai
    backendRefs:
    - name: sambanova
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/sambanova" -H content-type:application/json -d '{
  "model": "Meta-Llama-3.1-70B-Instruct",
  "messages": [{"role": "user", "content": "Hello from SambaNova!"}]
}' | jq

DeepInfra

Native

39 models

meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 meta-llama/Llama-3.3-70B-Instruct-Turbo +36 more

api.deepinfra.com

Auth: $DEEPINFRA_API_KEY

View configuration

DeepInfra Configuration

Supported Models (39) — click a model to use it

meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 meta-llama/Llama-3.3-70B-Instruct-Turbo meta-llama/Meta-Llama-3.1-405B-Instruct meta-llama/Meta-Llama-3.1-70B-Instruct meta-llama/Meta-Llama-3.1-8B-Instruct Qwen/Qwen3.6-35B-A3B Qwen/Qwen3.6-27B Qwen/Qwen3.5-397B-A17B Qwen/Qwen3.5-122B-A10B Qwen/Qwen3.5-35B-A3B Qwen/Qwen3.5-9B Qwen/Qwen3-235B-A22B Qwen/Qwen3-235B-A22B-Instruct-2507 Qwen/Qwen3-Coder-480B-A35B-Instruct Qwen/Qwen3-Coder-Next-80B-A3B-Instruct Qwen/Qwen3-32B Qwen/Qwen2.5-72B-Instruct Qwen/QwQ-32B deepseek-ai/DeepSeek-V4-Pro deepseek-ai/DeepSeek-V4-Flash deepseek-ai/DeepSeek-V3.2 deepseek-ai/DeepSeek-V3.1 deepseek-ai/DeepSeek-R1-0528 moonshotai/Kimi-K2.6-Instruct MiniMaxAI/MiniMax-M2.7 zai-org/GLM-5.1 NousResearch/Hermes-3-Llama-3.1-405B google/gemma-4-31b-it google/gemma-4-26b-it google/gemma-3-27b-it google/gemma-3-12b-it google/gemma-2-27b-it nvidia/Nemotron-3-Super nvidia/Nemotron-3-Nano-Omni microsoft/Phi-4 microsoft/Phi-4-multimodal mistralai/Mixtral-8x22B-Instruct-v0.1 microsoft/WizardLM-2-8x22B

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: deepinfra
    params:
      model: meta-llama/Llama-3.3-70B-Instruct-Turbo
      apiKey: "$DEEPINFRA_API_KEY"
      baseUrl: "https://api.deepinfra.com/v1/openai"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export DEEPINFRA_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: deepinfra-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $DEEPINFRA_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: deepinfra
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta-llama/Llama-3.3-70B-Instruct-Turbo
        host: api.deepinfra.com
        port: 443
        path: "/v1/openai/chat/completions"
  policies:
    auth:
      secretRef:
        name: deepinfra-secret
    tls:
      sni: api.deepinfra.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: deepinfra
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /deepinfra
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.deepinfra.com
    backendRefs:
    - name: deepinfra
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/deepinfra" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
  "messages": [{"role": "user", "content": "Hello from DeepInfra!"}]
}' | jq

HuggingFace

Native

37 models

meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-4-Maverick-17B-128E-Instruct meta-llama/Llama-3.1-70B-Instruct +34 more

router.huggingface.co

Auth: $HF_API_KEY

View configuration

HuggingFace Configuration

Supported Models (37) — click a model to use it

meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-4-Maverick-17B-128E-Instruct meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.3-70B-Instruct deepseek-ai/DeepSeek-V4-Pro deepseek-ai/DeepSeek-V4-Flash deepseek-ai/DeepSeek-R1 deepseek-ai/DeepSeek-V3.2 deepseek-ai/DeepSeek-V3.1 Qwen/Qwen3.5-397B-A17B Qwen/Qwen3.5-122B-A10B Qwen/Qwen3.5-35B-A3B Qwen/Qwen3.5-9B Qwen/Qwen3.6-35B-A3B Qwen/Qwen3.6-27B Qwen/Qwen3-235B-A22B Qwen/Qwen3-Coder-480B-A35B-Instruct Qwen/Qwen3-32B Qwen/Qwen2.5-72B-Instruct Qwen/QwQ-32B google/gemma-4-31b-it google/gemma-4-26b-it google/gemma-3-27b-it google/gemma-2-27b-it openai/gpt-oss-120b openai/gpt-oss-safeguard moonshotai/Kimi-K2.6-Instruct MiniMaxAI/MiniMax-M2.7 MiniMaxAI/MiniMax-M2.5 zai-org/GLM-5.1 zai-org/GLM-5 nvidia/Nemotron-3-Super nvidia/Nemotron-3-Nano-Omni microsoft/Phi-4 microsoft/Phi-4-multimodal microsoft/Phi-4-reasoning mistralai/Mixtral-8x7B-Instruct-v0.1

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: huggingface
    params:
      model: meta-llama/Llama-3.1-70B-Instruct
      apiKey: "$HF_API_KEY"
      baseUrl: "https://router.huggingface.co/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export HF_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: huggingface-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $HF_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: huggingface
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta-llama/Llama-3.1-70B-Instruct
        host: router.huggingface.co
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: huggingface-secret
    tls:
      sni: router.huggingface.co
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: huggingface
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /huggingface
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: router.huggingface.co
    backendRefs:
    - name: huggingface
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/huggingface" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.1-70B-Instruct",
  "messages": [{"role": "user", "content": "Hello from HuggingFace!"}]
}' | jq

Nvidia NIM

OpenAI-compat

30 models

meta/llama-4-maverick-17b-128e-instruct meta/llama-4-scout-17b-16e-instruct meta/llama-3.3-70b-instruct +27 more

integrate.api.nvidia.com

Auth: $NVIDIA_API_KEY

View configuration

Nvidia NIM Configuration

Supported Models (30) — click a model to use it

meta/llama-4-maverick-17b-128e-instruct meta/llama-4-scout-17b-16e-instruct meta/llama-3.3-70b-instruct meta/llama-3.1-405b-instruct meta/llama-3.1-70b-instruct meta/llama-3.1-8b-instruct deepseek-ai/deepseek-v4-pro deepseek-ai/deepseek-v4-flash deepseek-ai/deepseek-v3.2 deepseek-ai/deepseek-v3.1 mistralai/mistral-large-3-675b-instruct-2512 mistralai/mistral-medium-2604 mistralai/mistral-small-24b-instruct mistralai/mixtral-8x22b-instruct-v0.1 google/gemma-4-31b-it google/gemma-4-26b-it google/gemma-3-27b-it google/gemma-3-12b-it google/gemma-2-27b-it qwen/qwen3.6-35b-a3b qwen/qwen3.5-397b-a17b qwen/qwen3-235b-a22b qwen/qwen3-coder-480b-a35b-instruct microsoft/phi-4 microsoft/phi-4-multimodal microsoft/phi-4-reasoning nvidia/nemotron-3-ultra nvidia/nemotron-3-super nvidia/nemotron-3-nano-omni nvidia/nemotron-4-340b-instruct

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta/llama-3.1-70b-instruct
      host: integrate.api.nvidia.com
      port: 443
      path: "/v1/chat/completions"
      apiKey: "$NVIDIA_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export NVIDIA_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: nvidia-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $NVIDIA_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: nvidia-nim
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta/llama-3.1-70b-instruct
        host: integrate.api.nvidia.com
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: nvidia-secret
    tls:
      sni: integrate.api.nvidia.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: nvidia-nim
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /nvidia
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: integrate.api.nvidia.com
    backendRefs:
    - name: nvidia-nim
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/nvidia" -H content-type:application/json -d '{
  "model": "meta/llama-3.1-70b-instruct",
  "messages": [{"role": "user", "content": "Hello from Nvidia NIM!"}]
}' | jq

Replicate

OpenAI-compat

18 models

meta/llama-4-scout-17b-16e-instruct meta/llama-4-maverick-17b-128e-instruct meta/llama-3.1-405b-instruct +15 more

api.replicate.com

Auth: $REPLICATE_API_KEY

View configuration

Replicate Configuration

Supported Models (18) — click a model to use it

meta/llama-4-scout-17b-16e-instruct meta/llama-4-maverick-17b-128e-instruct meta/llama-3.1-405b-instruct meta/llama-3.3-70b-instruct meta/llama-3.2-90b-vision-instruct anthropic/claude-opus-4.7 anthropic/claude-opus-4.6 anthropic/claude-sonnet-4.6 anthropic/claude-4-sonnet anthropic/claude-3.5-sonnet deepseek-ai/deepseek-v4-pro deepseek-ai/deepseek-v4-flash deepseek-ai/deepseek-r1 deepseek-ai/deepseek-v3.2 deepseek-ai/deepseek-v3.1 deepseek-ai/deepseek-v3 google/gemini-2.5-flash mistralai/mixtral-8x7b-instruct-v0.1

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta/llama-3.1-405b-instruct
      host: api.replicate.com
      port: 443
      path: "/v1/chat/completions"
      apiKey: "$REPLICATE_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export REPLICATE_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: replicate-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $REPLICATE_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: replicate
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta/llama-3.1-405b-instruct
        host: api.replicate.com
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: replicate-secret
    tls:
      sni: api.replicate.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: replicate
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /replicate
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.replicate.com
    backendRefs:
    - name: replicate
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/replicate" -H content-type:application/json -d '{
  "model": "meta/llama-3.1-405b-instruct",
  "messages": [{"role": "user", "content": "Hello from Replicate!"}]
}' | jq

AI21

OpenAI-compat

8 models

jamba-1.5-large jamba-1.5-mini jamba-instruct +5 more

api.ai21.com

Auth: $AI21_API_KEY

View configuration

AI21 Configuration

Supported Models (8) — click a model to use it

jamba-1.5-large jamba-1.5-mini jamba-instruct jamba-1-5-large jamba-1-5-mini j2-ultra j2-mid j2-light

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: jamba-1.5-large
      apiKey: "$AI21_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export AI21_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: ai21-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $AI21_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: ai21
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "jamba-1.5-large"
  policies:
    auth:
      secretRef:
        name: ai21-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: ai21
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /ai21
    backendRefs:
    - name: ai21
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/ai21" -H content-type:application/json -d '{
  "model": "jamba-1.5-large",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Cloudflare Workers AI

OpenAI-compat

9 models

@cf/meta/llama-3.1-8b-instruct @cf/meta/llama-3.1-70b-instruct @cf/meta/llama-3.2-3b-instruct +6 more

api.cloudflare.com

Auth: $CF_API_TOKEN

View configuration

Cloudflare Workers AI Configuration

Supported Models (9) — click a model to use it

@cf/meta/llama-3.1-8b-instruct @cf/meta/llama-3.1-70b-instruct @cf/meta/llama-3.2-3b-instruct @cf/meta/llama-3.3-70b-instruct-fp8-fast @cf/mistral/mistral-7b-instruct-v0.2 @cf/google/gemma-7b-it @cf/qwen/qwen1.5-14b-chat-awq @cf/deepseek-ai/deepseek-r1-distill-qwen-32b @hf/thebloke/deepseek-coder-6.7b-instruct-awq

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: "@cf/meta/llama-3.1-8b-instruct"
      apiKey: "$CF_API_TOKEN"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export CF_API_TOKEN=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: cloudflare-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $CF_API_TOKEN
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: cloudflare
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "@cf/meta/llama-3.1-8b-instruct"
  policies:
    auth:
      secretRef:
        name: cloudflare-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: cloudflare
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /cloudflare
    backendRefs:
    - name: cloudflare
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/cloudflare" -H content-type:application/json -d '{
  "model": "@cf/meta/llama-3.1-8b-instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Lambda AI

OpenAI-compat

7 models

hermes-3-llama-3.1-405b-fp8 hermes-3-llama-3.1-70b-fp8 llama-3.1-405b-instruct +4 more

api.lambdalabs.com

Auth: $LAMBDA_API_KEY

View configuration

Lambda AI Configuration

Supported Models (7) — click a model to use it

hermes-3-llama-3.1-405b-fp8 hermes-3-llama-3.1-70b-fp8 llama-3.1-405b-instruct llama-3.1-70b-instruct llama-3.3-70b-instruct deepseek-llm-67b-chat qwen2.5-72b-instruct

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: llama-3.3-70b-instruct
      apiKey: "$LAMBDA_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export LAMBDA_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: lambda-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $LAMBDA_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: lambda
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "llama-3.3-70b-instruct"
  policies:
    auth:
      secretRef:
        name: lambda-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: lambda
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /lambda
    backendRefs:
    - name: lambda
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/lambda" -H content-type:application/json -d '{
  "model": "llama-3.3-70b-instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Nebius AI Studio

OpenAI-compat

10 models

meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.1-405B-Instruct meta-llama/Llama-3.3-70B-Instruct +7 more

api.studio.nebius.ai

Auth: $NEBIUS_API_KEY

View configuration

Nebius AI Studio Configuration

Supported Models (10) — click a model to use it

meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.1-405B-Instruct meta-llama/Llama-3.3-70B-Instruct meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-4-Maverick-17B-128E-Instruct Qwen/Qwen2.5-72B-Instruct Qwen/Qwen3-235B-A22B deepseek-ai/DeepSeek-R1 deepseek-ai/DeepSeek-V3-0324 mistralai/Mistral-Large-2411

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta-llama/Llama-3.3-70B-Instruct
      apiKey: "$NEBIUS_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export NEBIUS_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: nebius-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $NEBIUS_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: nebius
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "meta-llama/Llama-3.3-70B-Instruct"
  policies:
    auth:
      secretRef:
        name: nebius-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: nebius
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /nebius
    backendRefs:
    - name: nebius
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/nebius" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.3-70B-Instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Novita AI

OpenAI-compat

8 models

meta-llama/llama-3.1-70b-instruct meta-llama/llama-3.1-405b-instruct meta-llama/llama-3.3-70b-instruct +5 more

api.novita.ai

Auth: $NOVITA_API_KEY

View configuration

Novita AI Configuration

Supported Models (8) — click a model to use it

meta-llama/llama-3.1-70b-instruct meta-llama/llama-3.1-405b-instruct meta-llama/llama-3.3-70b-instruct deepseek/deepseek-r1 deepseek/deepseek-v3-0324 Qwen/Qwen2.5-72B-Instruct mistralai/mistral-large-2411 microsoft/phi-4

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta-llama/llama-3.3-70b-instruct
      apiKey: "$NOVITA_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export NOVITA_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: novita-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $NOVITA_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: novita
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "meta-llama/llama-3.3-70b-instruct"
  policies:
    auth:
      secretRef:
        name: novita-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: novita
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /novita
    backendRefs:
    - name: novita
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/novita" -H content-type:application/json -d '{
  "model": "meta-llama/llama-3.3-70b-instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Hyperbolic

OpenAI-compat

8 models

meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.1-405B-Instruct meta-llama/Llama-3.3-70B-Instruct +5 more

api.hyperbolic.xyz

Auth: $HYPERBOLIC_API_KEY

View configuration

Hyperbolic Configuration

Supported Models (8) — click a model to use it

meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.1-405B-Instruct meta-llama/Llama-3.3-70B-Instruct deepseek-ai/DeepSeek-R1 deepseek-ai/DeepSeek-V3 Qwen/Qwen2.5-72B-Instruct Qwen/QwQ-32B mistralai/Mistral-Small-24B-Instruct-2501

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta-llama/Llama-3.3-70B-Instruct
      apiKey: "$HYPERBOLIC_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export HYPERBOLIC_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: hyperbolic-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $HYPERBOLIC_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: hyperbolic
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "meta-llama/Llama-3.3-70B-Instruct"
  policies:
    auth:
      secretRef:
        name: hyperbolic-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: hyperbolic
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /hyperbolic
    backendRefs:
    - name: hyperbolic
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/hyperbolic" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.3-70B-Instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Enterprise & Regional Providers

Enterprise cloud platforms and regional AI providers with OpenAI-compatible APIs.

Databricks

OpenAI-compat

31 models

databricks-llama-4-maverick databricks-llama-4-scout databricks-meta-llama-3-3-70b-instruct +28 more

{workspace}.databricks.com

Auth: $DATABRICKS_TOKEN

View configuration

Databricks Configuration

Supported Models (31) — click a model to use it

databricks-llama-4-maverick databricks-llama-4-scout databricks-meta-llama-3-3-70b-instruct databricks-meta-llama-3-1-70b-instruct databricks-meta-llama-3-1-405b-instruct databricks-claude-opus-4-7 databricks-claude-opus-4-6 databricks-claude-opus-4-5 databricks-claude-opus-4-1 databricks-claude-opus-4 databricks-claude-sonnet-4-6 databricks-claude-sonnet-4-5 databricks-claude-sonnet-4 databricks-claude-haiku-4-5 databricks-gpt-5-5 databricks-gpt-5-4 databricks-gpt-5-2 databricks-gpt-5-1 databricks-gpt-5 databricks-gpt-5-mini databricks-gpt-5-nano databricks-gpt-oss-120b databricks-gpt-oss-20b databricks-gemini-3-1-pro databricks-gemini-3-pro databricks-gemini-3-flash databricks-gemini-2-5-pro databricks-gemini-2-5-flash databricks-deepseek-v4-pro databricks-qwen3-5-397b databricks-qwen3-235b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: databricks-meta-llama-3-1-70b-instruct
      host: <your-workspace>.cloud.databricks.com
      port: 443
      path: "/serving-endpoints/databricks-meta-llama-3-1-70b-instruct/invocations"
      apiKey: "$DATABRICKS_TOKEN"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export DATABRICKS_TOKEN=<your-token>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: databricks-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $DATABRICKS_TOKEN
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: databricks
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: databricks-meta-llama-3-1-70b-instruct
        host: <your-workspace>.cloud.databricks.com
        port: 443
        path: "/serving-endpoints/databricks-meta-llama-3-1-70b-instruct/invocations"
  policies:
    auth:
      secretRef:
        name: databricks-secret
    tls:
      sni: <your-workspace>.cloud.databricks.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: databricks
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /databricks
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: <your-workspace>.cloud.databricks.com
    backendRefs:
    - name: databricks
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/databricks" -H content-type:application/json -d '{
  "model": "databricks-meta-llama-3-1-70b-instruct",
  "messages": [{"role": "user", "content": "Hello from Databricks!"}]
}' | jq

GitHub Models

OpenAI-compat

39 models

gpt-5.5 gpt-5.5-pro gpt-5.4 +36 more

models.inference.ai.azure.com

Auth: $GITHUB_TOKEN

View configuration

GitHub Models Configuration

Supported Models (39) — click a model to use it

gpt-5.5 gpt-5.5-pro gpt-5.4 gpt-5.2 gpt-5.1 gpt-5 gpt-5-mini gpt-5-nano gpt-4.1 gpt-4.1-mini gpt-4o gpt-4o-mini o1 o3 o3-mini o4-mini Phi-4 Phi-4-mini-instruct Phi-4-multimodal Phi-4-reasoning Llama-4-Maverick-17B-128E-Instruct-FP8 Llama-4-Scout-17B-16E-Instruct Llama-3.3-70B-Instruct Llama-3.1-405B-Instruct DeepSeek-V4-Pro DeepSeek-V4-Flash DeepSeek-R1 DeepSeek-V3-0324 Mistral-Large Mistral-Medium-3 Mistral-Small-3.1 Grok-4.3 Grok-4 Grok-3 Grok-3-Mini Cohere-Command-A-Reasoning Cohere-Command-A Cohere-command-r-plus AI21-Jamba-1.5-Large

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: gpt-4o
      host: models.inference.ai.azure.com
      port: 443
      path: "/chat/completions"
      apiKey: "$GITHUB_TOKEN"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export GITHUB_TOKEN=<your-github-pat>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: github-models-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $GITHUB_TOKEN
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: github-models
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: gpt-4o
        host: models.inference.ai.azure.com
        port: 443
        path: "/chat/completions"
  policies:
    auth:
      secretRef:
        name: github-models-secret
    tls:
      sni: models.inference.ai.azure.com
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: github-models
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /github-models
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: models.inference.ai.azure.com
    backendRefs:
    - name: github-models
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/github-models" -H content-type:application/json -d '{
  "model": "gpt-4o",
  "messages": [{"role": "user", "content": "Hello from GitHub Models!"}]
}' | jq

Scaleway

OpenAI-compat

8 models

llama-3.1-70b-instruct llama-3.3-70b-instruct mistral-nemo-instruct +5 more

api.scaleway.ai

Auth: $SCALEWAY_API_KEY

View configuration

Scaleway Configuration

Supported Models (8) — click a model to use it

llama-3.1-70b-instruct llama-3.3-70b-instruct mistral-nemo-instruct mixtral-8x7b-instruct qwen2.5-72b-instruct qwen3-32b-instruct deepseek-r1-distill-llama-70b deepseek-r1-distill-qwen-32b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: llama-3.1-70b-instruct
      host: api.scaleway.ai
      port: 443
      path: "/v1/chat/completions"
      apiKey: "$SCALEWAY_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export SCALEWAY_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: scaleway-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $SCALEWAY_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: scaleway
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: llama-3.1-70b-instruct
        host: api.scaleway.ai
        port: 443
        path: "/v1/chat/completions"
  policies:
    auth:
      secretRef:
        name: scaleway-secret
    tls:
      sni: api.scaleway.ai
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: scaleway
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /scaleway
    filters:
    - type: URLRewrite
      urlRewrite:
        hostname: api.scaleway.ai
    backendRefs:
    - name: scaleway
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/scaleway" -H content-type:application/json -d '{
  "model": "llama-3.1-70b-instruct",
  "messages": [{"role": "user", "content": "Hello from Scaleway!"}]
}' | jq

Dashscope (Qwen / Alibaba)

OpenAI-compat

26 models

qwen3.5-max qwen3.5-plus qwen3.5-flash +23 more

dashscope.aliyuncs.com

Auth: $DASHSCOPE_API_KEY

View configuration

Dashscope (Qwen / Alibaba) Configuration

Supported Models (26) — click a model to use it

qwen3.5-max qwen3.5-plus qwen3.5-flash qwen3.5-omni-plus qwen3-max qwen3-coder-plus qwen3-coder-flash qwen3-vl-max qwen3-vl-plus qwen3-vl-flash qwen-turbo qwen-plus qwen-max qwen-long qwen-flash qwq-plus qwen-deep-research qwen3-235b-a22b qwen3-30b-a3b qwen2.5-72b-instruct qwen2.5-32b-instruct qwen2.5-14b-instruct qwen2.5-7b-instruct qwen-vl-max qwen-vl-plus qwen-coder-turbo

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: qwen-max
      apiKey: "$DASHSCOPE_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export DASHSCOPE_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: dashscope-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $DASHSCOPE_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: dashscope
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "qwen-max"
  policies:
    auth:
      secretRef:
        name: dashscope-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: dashscope
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /dashscope
    backendRefs:
    - name: dashscope
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/dashscope" -H content-type:application/json -d '{
  "model": "qwen-max",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Moonshot AI

OpenAI-compat

8 models

kimi-latest kimi-k2.6 kimi-k2.5 +5 more

api.moonshot.cn

Auth: $MOONSHOT_API_KEY

View configuration

Moonshot AI Configuration

Supported Models (8) — click a model to use it

kimi-latest kimi-k2.6 kimi-k2.5 kimi-k2 moonshot-v1-auto moonshot-v1-128k moonshot-v1-32k moonshot-v1-8k

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: kimi-latest
      apiKey: "$MOONSHOT_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export MOONSHOT_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: moonshot-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $MOONSHOT_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: moonshot
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "kimi-latest"
  policies:
    auth:
      secretRef:
        name: moonshot-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: moonshot
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /moonshot
    backendRefs:
    - name: moonshot
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/moonshot" -H content-type:application/json -d '{
  "model": "kimi-latest",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Zhipu AI (Z.AI)

OpenAI-compat

13 models

glm-5.1 glm-5 glm-4.6 +10 more

open.bigmodel.cn

Auth: $ZHIPU_API_KEY

View configuration

Zhipu AI (Z.AI) Configuration

Supported Models (13) — click a model to use it

glm-5.1 glm-5 glm-4.6 glm-4-plus glm-4-air glm-4-airx glm-4-flash glm-4-flashx glm-4-long glm-4 glm-4v-plus glm-4v codegeex-4

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: glm-4-plus
      apiKey: "$ZHIPU_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export ZHIPU_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: zhipu-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $ZHIPU_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: zhipu
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "glm-4-plus"
  policies:
    auth:
      secretRef:
        name: zhipu-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: zhipu
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /zhipu
    backendRefs:
    - name: zhipu
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/zhipu" -H content-type:application/json -d '{
  "model": "glm-4-plus",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Volcano Engine (ByteDance)

OpenAI-compat

13 models

doubao-seed-2.0-pro doubao-seed-2.0-lite doubao-seed-2.0-mini +10 more

maas-api.ml-platform-cn.volces.com

Auth: $VOLC_API_KEY

View configuration

Volcano Engine (ByteDance) Configuration

Supported Models (13) — click a model to use it

doubao-seed-2.0-pro doubao-seed-2.0-lite doubao-seed-2.0-mini doubao-seed-2.0-code seed3d-2.0 doubao-pro-256k doubao-pro-128k doubao-pro-32k doubao-lite-128k doubao-lite-32k doubao-vision-pro-32k doubao-character-pro-32k doubao-embedding

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: doubao-pro-32k
      apiKey: "$VOLC_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export VOLC_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: volcengine-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $VOLC_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: volcengine
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "doubao-pro-32k"
  policies:
    auth:
      secretRef:
        name: volcengine-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: volcengine
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /volcengine
    backendRefs:
    - name: volcengine
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/volcengine" -H content-type:application/json -d '{
  "model": "doubao-pro-32k",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

IBM watsonx

OpenAI-compat

19 models

ibm/granite-3-8b-instruct ibm/granite-3-2b-instruct ibm/granite-3.1-8b-instruct +16 more

{region}.ml.cloud.ibm.com

Auth: $WATSONX_API_KEY

View configuration

IBM watsonx Configuration

Supported Models (19) — click a model to use it

ibm/granite-3-8b-instruct ibm/granite-3-2b-instruct ibm/granite-3.1-8b-instruct ibm/granite-3.1-2b-instruct ibm/granite-3-3-8b-instruct ibm/granite-3-2-8b-instruct ibm/granite-guardian-3-8b ibm/granite-vision-3.1-8b ibm/granite-vision-3-2-2b ibm/granite-20b-multilingual ibm/granite-embedding-125m-english ibm/granite-embedding-278m-multilingual meta-llama/llama-3-1-70b-instruct meta-llama/llama-3-1-8b-instruct meta-llama/llama-3-3-70b-instruct meta-llama/llama-4-maverick-17b-128e-instruct-fp8 meta-llama/llama-3-2-90b-vision-instruct mistralai/mistral-large openai/gpt-oss-120b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: ibm/granite-3.1-8b-instruct
      apiKey: "$WATSONX_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export WATSONX_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: watsonx-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $WATSONX_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: watsonx
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "ibm/granite-3.1-8b-instruct"
  policies:
    auth:
      secretRef:
        name: watsonx-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: watsonx
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /watsonx
    backendRefs:
    - name: watsonx
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/watsonx" -H content-type:application/json -d '{
  "model": "ibm/granite-3.1-8b-instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Snowflake Cortex

OpenAI-compat

25 models

claude-opus-4-7 claude-sonnet-4-6 claude-sonnet-4-5 +22 more

{account}.snowflakecomputing.com

Auth: No API key needed

View configuration

Snowflake Cortex Configuration

Supported Models (25) — click a model to use it

claude-opus-4-7 claude-sonnet-4-6 claude-sonnet-4-5 claude-haiku-4-5 claude-4-sonnet claude-3-5-sonnet llama4-maverick llama4-scout llama3.3-70b snowflake-llama-3.3-70b llama3.1-405b llama3.1-70b llama3.1-8b mistral-large2 mixtral-8x7b deepseek-v4-pro deepseek-r1 openai-gpt-5-5 openai-gpt-5 openai-gpt-4.1 reka-core reka-flash jamba-1.5-large snowflake-arctic gemma-7b

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: llama3.3-70b

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF

# Step 2: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: snowflake
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "llama3.3-70b"
EOF

# Step 3: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: snowflake
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /snowflake
    backendRefs:
    - name: snowflake
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 4: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/snowflake" -H content-type:application/json -d '{
  "model": "llama3.3-70b",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

OVHcloud AI

OpenAI-compat

8 models

DeepSeek-R1-Distill-Llama-70B Llama-3.3-70B-Instruct Llama-3.1-70B-Instruct +5 more

llama-3-3-70b-instruct.endpoints.kepler.ai.cloud.ovh.net

Auth: $OVH_API_KEY

View configuration

OVHcloud AI Configuration

Supported Models (8) — click a model to use it

DeepSeek-R1-Distill-Llama-70B Llama-3.3-70B-Instruct Llama-3.1-70B-Instruct Mistral-Large-Instruct-2411 Mixtral-8x22B-Instruct-v0.1 Mixtral-8x7B-Instruct-v0.1 Qwen2.5-72B-Instruct Phi-3-mini-4k-instruct

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: Llama-3.3-70B-Instruct
      apiKey: "$OVH_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export OVH_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: ovhcloud-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $OVH_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: ovhcloud
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "Llama-3.3-70B-Instruct"
  policies:
    auth:
      secretRef:
        name: ovhcloud-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: ovhcloud
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /ovhcloud
    backendRefs:
    - name: ovhcloud
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/ovhcloud" -H content-type:application/json -d '{
  "model": "Llama-3.3-70B-Instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Oracle Cloud OCI

OpenAI-compat

6 models

meta.llama-3.1-405b-instruct meta.llama-3.1-70b-instruct meta.llama-3.3-70b-instruct +3 more

inference.generativeai.{region}.oci.oraclecloud.com

Auth: $OCI_API_KEY

View configuration

Oracle Cloud OCI Configuration

Supported Models (6) — click a model to use it

meta.llama-3.1-405b-instruct meta.llama-3.1-70b-instruct meta.llama-3.3-70b-instruct cohere.command-r-plus cohere.command-r meta.llama-3.2-90b-vision-instruct

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta.llama-3.3-70b-instruct
      apiKey: "$OCI_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export OCI_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: oci-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $OCI_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: oci
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "meta.llama-3.3-70b-instruct"
  policies:
    auth:
      secretRef:
        name: oci-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: oci
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /oci
    backendRefs:
    - name: oci
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/oci" -H content-type:application/json -d '{
  "model": "meta.llama-3.3-70b-instruct",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Anyscale

OpenAI-compat

7 models

meta-llama/Llama-3-70b-chat-hf meta-llama/Llama-3-8b-chat-hf mistralai/Mixtral-8x22B-Instruct-v0.1 +4 more

api.endpoints.anyscale.com

Auth: $ANYSCALE_API_KEY

View configuration

Anyscale Configuration

Supported Models (7) — click a model to use it

meta-llama/Llama-3-70b-chat-hf meta-llama/Llama-3-8b-chat-hf mistralai/Mixtral-8x22B-Instruct-v0.1 mistralai/Mixtral-8x7B-Instruct-v0.1 mistralai/Mistral-7B-Instruct-v0.1 google/gemma-7b-it codellama/CodeLlama-70b-Instruct-hf

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta-llama/Llama-3-70b-chat-hf
      apiKey: "$ANYSCALE_API_KEY"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Secret
export ANYSCALE_API_KEY=<your-key>
kubectl apply -f- <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: anyscale-secret
  namespace: agentgateway-system
type: Opaque
stringData:
  Authorization: $ANYSCALE_API_KEY
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: anyscale
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: "meta-llama/Llama-3-70b-chat-hf"
  policies:
    auth:
      secretRef:
        name: anyscale-secret
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: anyscale
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /anyscale
    backendRefs:
    - name: anyscale
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/anyscale" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3-70b-chat-hf",
  "messages": [{"role": "user", "content": "Hello!"}]
}' | jq

Local & Self-Hosted

Run models locally or in-cluster. No TLS or external API keys required.

Ollama

Local

46 models

llama4 llama3.3 llama3.2 +43 more

localhost / in-cluster

Auth: No API key needed

View configuration

Ollama Configuration

Supported Models (46) — click a model to use it

llama4 llama3.3 llama3.2 llama3.2-vision llama3.1 llama3.1:70b mistral mistral-small mixtral magistral devstral gemma4 gemma3 gemma3n gemma2 qwen3 qwen3-coder qwen3.5 qwen3.6 qwen2.5 qwen2.5-coder qwq phi4 phi4-mini phi4-multimodal phi4-reasoning phi3 deepseek-v4 deepseek-v3.2 deepseek-v3.1 deepseek-v3 deepseek-r1 kimi-k2.6 kimi-k2 minimax-m2.7 glm-5.1 glm-5 gpt-oss:120b gpt-oss:20b gpt-oss-safeguard command-r codellama codegemma cogito llava nomic-embed-text

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: ollama
    params:
      model: llama3.2
      baseUrl: "http://localhost:11434/v1"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Deploy Ollama
kubectl apply -f- <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ollama
  namespace: agentgateway-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app: ollama
  template:
    metadata:
      labels:
        app: ollama
    spec:
      containers:
      - name: ollama
        image: ollama/ollama:latest
        ports:
        - containerPort: 11434
---
apiVersion: v1
kind: Service
metadata:
  name: ollama
  namespace: agentgateway-system
spec:
  selector:
    app: ollama
  ports:
  - port: 11434
    targetPort: 11434
EOF

# Step 3: Backend (no TLS, no auth)
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: ollama
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: llama3.2
        host: ollama.agentgateway-system.svc.cluster.local
        port: 11434
        path: "/v1/chat/completions"
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: ollama
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /ollama
    backendRefs:
    - name: ollama
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/ollama" -H content-type:application/json -d '{
  "model": "llama3.2",
  "messages": [{"role": "user", "content": "Hello from Ollama!"}]
}' | jq

vLLM

Local

25 models

meta-llama/Llama-4-Maverick-17B-128E-Instruct meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-3.3-70B-Instruct +22 more

localhost / in-cluster

Auth: No API key needed

View configuration

vLLM Configuration

Supported Models (25) — click a model to use it

meta-llama/Llama-4-Maverick-17B-128E-Instruct meta-llama/Llama-4-Scout-17B-16E-Instruct meta-llama/Llama-3.3-70B-Instruct meta-llama/Llama-3.1-70B-Instruct meta-llama/Llama-3.1-8B-Instruct Qwen/Qwen3.6-35B-A3B Qwen/Qwen3.5-397B-A17B Qwen/Qwen3-32B Qwen/Qwen2.5-72B-Instruct deepseek-ai/DeepSeek-V4-Pro deepseek-ai/DeepSeek-V4-Flash deepseek-ai/DeepSeek-V3 moonshotai/Kimi-K2.6-Instruct MiniMaxAI/MiniMax-M2.7 zai-org/GLM-5.1 google/gemma-4-31b-it google/gemma-4-26b-it google/gemma-3-27b-it google/gemma-2-27b-it microsoft/Phi-4 microsoft/Phi-4-multimodal microsoft/Phi-4-reasoning mistralai/Mixtral-8x7B-Instruct-v0.1 mistralai/Mistral-7B-Instruct-v0.3 Any HuggingFace model

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      model: meta-llama/Llama-3.1-8B-Instruct
      host: localhost
      port: 8000
      path: "/v1/chat/completions"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF


# Step 2: Deploy vLLM
kubectl apply -f- <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: vllm
  namespace: agentgateway-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app: vllm
  template:
    metadata:
      labels:
        app: vllm
    spec:
      containers:
      - name: vllm
        image: vllm/vllm-openai:latest
        args: ["--model", "meta-llama/Llama-3.1-8B-Instruct"]
        ports:
        - containerPort: 8000
        resources:
          limits:
            nvidia.com/gpu: 1
---
apiVersion: v1
kind: Service
metadata:
  name: vllm
  namespace: agentgateway-system
spec:
  selector:
    app: vllm
  ports:
  - port: 8000
    targetPort: 8000
EOF

# Step 3: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: vllm
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        model: meta-llama/Llama-3.1-8B-Instruct
        host: vllm.agentgateway-system.svc.cluster.local
        port: 8000
        path: "/v1/chat/completions"
EOF

# Step 4: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: vllm
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /vllm
    backendRefs:
    - name: vllm
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 5: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/vllm" -H content-type:application/json -d '{
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "messages": [{"role": "user", "content": "Hello from vLLM!"}]
}' | jq

llama.cpp

Local

12 models

Any GGUF model Llama 3.x / 4.x Mistral / Mixtral / Magistral / Devstral +9 more

localhost / in-cluster

Auth: No API key needed

View configuration

llama.cpp Configuration

Supported Models (12) — click a model to use it

Any GGUF model Llama 3.x / 4.x Mistral / Mixtral / Magistral / Devstral Qwen 2.5 / 3 / 3.5 / 3.6 Phi-3 / Phi-4 Gemma 2 / 3 / 4 DeepSeek R1 / V3 / V4 distills Kimi K2 / K2.6 GLM 5 / 5.1 MiniMax M2.7 gpt-oss CodeLlama

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      host: localhost
      port: 8080
      path: "/v1/chat/completions"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF

# Step 2: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: llamacpp
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        host: llamacpp.agentgateway-system.svc.cluster.local
        port: 8080
        path: "/v1/chat/completions"
EOF

# Step 3: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: llamacpp
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /llamacpp
    backendRefs:
    - name: llamacpp
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 4: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/llamacpp" -H content-type:application/json -d '{
  "messages": [{"role": "user", "content": "Hello from llama.cpp!"}]
}' | jq

Triton Inference Server

Local

4 models

Any TensorRT-LLM model Any vLLM backend model Any Python backend model +1 more

localhost / in-cluster

Auth: No API key needed

View configuration

Triton Inference Server Configuration

Supported Models (4) — click a model to use it

Any TensorRT-LLM model Any vLLM backend model Any Python backend model Custom ONNX models

Click any model above to update the configuration below, or edit the model field directly.

Save this as config.yaml and run with agentgateway -f config.yaml

llm:
  models:
  - name: "*"
    provider: openAI
    params:
      host: localhost
      port: 8000
      path: "/v1/chat/completions"

Run these kubectl apply commands in order

# Step 1: Gateway
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: agentgateway-proxy
  namespace: agentgateway-system
spec:
  gatewayClassName: agentgateway
  listeners:
  - protocol: HTTP
    port: 8080
    name: http
    allowedRoutes:
      namespaces:
        from: All
EOF

# Step 2: Backend
kubectl apply -f- <<EOF
apiVersion: agentgateway.dev/v1alpha1
kind: AgentgatewayBackend
metadata:
  name: triton
  namespace: agentgateway-system
spec:
  ai:
    provider:
      openai:
        host: triton.agentgateway-system.svc.cluster.local
        port: 8000
        path: "/v1/chat/completions"
EOF

# Step 3: Route
kubectl apply -f- <<EOF
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
  name: triton
  namespace: agentgateway-system
spec:
  parentRefs:
  - name: agentgateway-proxy
    namespace: agentgateway-system
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /triton
    backendRefs:
    - name: triton
      namespace: agentgateway-system
      group: agentgateway.dev
      kind: AgentgatewayBackend
EOF

# Step 4: Port-forward to test
kubectl port-forward -n agentgateway-system svc/agentgateway-proxy 8080:8080 &

Test it

curl "localhost:3000/triton" -H content-type:application/json -d '{
  "messages": [{"role": "user", "content": "Hello from Triton!"}]
}' | jq

Browse by Endpoint

See which providers support each API endpoint type

Browse by Endpoint

chat completions embeddings fim rerank responses messages models

Triton Inference Server

Local

localhost / in-cluster

chat models

Agentgateway Config /chat/completions

Save as config.yaml and run with agentgateway -f config.yaml

Run these kubectl apply commands in order

Test it