apiVersion: apps/v1 kind: Deployment metadata: name: llm-gateway namespace: llm-gateway labels: app: llm-gateway version: v1 spec: replicas: 3 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 selector: matchLabels: app: llm-gateway template: metadata: labels: app: llm-gateway version: v1 annotations: prometheus.io/scrape: "true" prometheus.io/port: "8080" prometheus.io/path: "/metrics" spec: serviceAccountName: llm-gateway securityContext: runAsNonRoot: true runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 seccompProfile: type: RuntimeDefault containers: - name: gateway image: llm-gateway:latest # Replace with your registry/image:tag imagePullPolicy: IfNotPresent ports: - name: http containerPort: 8080 protocol: TCP env: # Provider API Keys from Secret - name: GOOGLE_API_KEY valueFrom: secretKeyRef: name: llm-gateway-secrets key: GOOGLE_API_KEY - name: ANTHROPIC_API_KEY valueFrom: secretKeyRef: name: llm-gateway-secrets key: ANTHROPIC_API_KEY - name: OPENAI_API_KEY valueFrom: secretKeyRef: name: llm-gateway-secrets key: OPENAI_API_KEY - name: OIDC_AUDIENCE valueFrom: secretKeyRef: name: llm-gateway-secrets key: OIDC_AUDIENCE # Optional: Pod metadata - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP resources: requests: cpu: 100m memory: 128Mi limits: cpu: 1000m memory: 512Mi livenessProbe: httpGet: path: /health port: http scheme: HTTP initialDelaySeconds: 10 periodSeconds: 30 timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 readinessProbe: httpGet: path: /ready port: http scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 startupProbe: httpGet: path: /health port: http scheme: HTTP initialDelaySeconds: 0 periodSeconds: 5 timeoutSeconds: 3 successThreshold: 1 failureThreshold: 30 volumeMounts: - name: config mountPath: /app/config readOnly: true - name: tmp mountPath: /tmp securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 1000 capabilities: drop: - ALL volumes: - name: config configMap: name: llm-gateway-config - name: tmp emptyDir: {} # Affinity rules for better distribution affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 podAffinityTerm: labelSelector: matchExpressions: - key: app operator: In values: - llm-gateway topologyKey: kubernetes.io/hostname # Tolerations (if needed for specific node pools) # tolerations: # - key: "workload-type" # operator: "Equal" # value: "llm" # effect: "NoSchedule"