169 lines
4.0 KiB
YAML
169 lines
4.0 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: llm-gateway
|
|
namespace: llm-gateway
|
|
labels:
|
|
app: llm-gateway
|
|
version: v1
|
|
spec:
|
|
replicas: 3
|
|
strategy:
|
|
type: RollingUpdate
|
|
rollingUpdate:
|
|
maxSurge: 1
|
|
maxUnavailable: 0
|
|
selector:
|
|
matchLabels:
|
|
app: llm-gateway
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: llm-gateway
|
|
version: v1
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
serviceAccountName: llm-gateway
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
runAsGroup: 1000
|
|
fsGroup: 1000
|
|
seccompProfile:
|
|
type: RuntimeDefault
|
|
|
|
containers:
|
|
- name: gateway
|
|
image: llm-gateway:latest # Replace with your registry/image:tag
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
ports:
|
|
- name: http
|
|
containerPort: 8080
|
|
protocol: TCP
|
|
|
|
env:
|
|
# Provider API Keys from Secret
|
|
- name: GOOGLE_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: llm-gateway-secrets
|
|
key: GOOGLE_API_KEY
|
|
- name: ANTHROPIC_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: llm-gateway-secrets
|
|
key: ANTHROPIC_API_KEY
|
|
- name: OPENAI_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: llm-gateway-secrets
|
|
key: OPENAI_API_KEY
|
|
- name: OIDC_AUDIENCE
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: llm-gateway-secrets
|
|
key: OIDC_AUDIENCE
|
|
|
|
# Optional: Pod metadata
|
|
- name: POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: POD_NAMESPACE
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.namespace
|
|
- name: POD_IP
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.podIP
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 128Mi
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 512Mi
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
scheme: HTTP
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 30
|
|
timeoutSeconds: 5
|
|
successThreshold: 1
|
|
failureThreshold: 3
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
scheme: HTTP
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
successThreshold: 1
|
|
failureThreshold: 3
|
|
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
scheme: HTTP
|
|
initialDelaySeconds: 0
|
|
periodSeconds: 5
|
|
timeoutSeconds: 3
|
|
successThreshold: 1
|
|
failureThreshold: 30
|
|
|
|
volumeMounts:
|
|
- name: config
|
|
mountPath: /app/config
|
|
readOnly: true
|
|
- name: tmp
|
|
mountPath: /tmp
|
|
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
capabilities:
|
|
drop:
|
|
- ALL
|
|
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: llm-gateway-config
|
|
- name: tmp
|
|
emptyDir: {}
|
|
|
|
# Affinity rules for better distribution
|
|
affinity:
|
|
podAntiAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 100
|
|
podAffinityTerm:
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: app
|
|
operator: In
|
|
values:
|
|
- llm-gateway
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
# Tolerations (if needed for specific node pools)
|
|
# tolerations:
|
|
# - key: "workload-type"
|
|
# operator: "Equal"
|
|
# value: "llm"
|
|
# effect: "NoSchedule"
|