Files
latticelm/k8s/deployment.yaml

169 lines
4.0 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
version: v1
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: llm-gateway
template:
metadata:
labels:
app: llm-gateway
version: v1
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: llm-gateway
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
- name: gateway
image: llm-gateway:latest # Replace with your registry/image:tag
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8080
protocol: TCP
env:
# Provider API Keys from Secret
- name: GOOGLE_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: GOOGLE_API_KEY
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: ANTHROPIC_API_KEY
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OPENAI_API_KEY
- name: OIDC_AUDIENCE
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OIDC_AUDIENCE
# Optional: Pod metadata
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 1000m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 0
periodSeconds: 5
timeoutSeconds: 3
successThreshold: 1
failureThreshold: 30
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: tmp
mountPath: /tmp
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: llm-gateway-config
- name: tmp
emptyDir: {}
# Affinity rules for better distribution
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- llm-gateway
topologyKey: kubernetes.io/hostname
# Tolerations (if needed for specific node pools)
# tolerations:
# - key: "workload-type"
# operator: "Equal"
# value: "llm"
# effect: "NoSchedule"