Add Dockerfile and Manifests

This commit is contained in:
2026-03-05 06:13:50 +00:00
parent b56c78fa07
commit df6b677a15
21 changed files with 1952 additions and 0 deletions

63
k8s/hpa.yaml Normal file
View File

@@ -0,0 +1,63 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-gateway
minReplicas: 3
maxReplicas: 20
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
selectPolicy: Max
metrics:
# CPU-based scaling
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory-based scaling
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Custom metrics (requires metrics-server and custom metrics API)
# - type: Pods
# pods:
# metric:
# name: http_requests_per_second
# target:
# type: AverageValue
# averageValue: "1000"