Add Dockerfile and Manifests
This commit is contained in:
63
k8s/hpa.yaml
Normal file
63
k8s/hpa.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: llm-gateway
|
||||
|
||||
minReplicas: 3
|
||||
maxReplicas: 20
|
||||
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 50
|
||||
periodSeconds: 60
|
||||
- type: Pods
|
||||
value: 2
|
||||
periodSeconds: 60
|
||||
selectPolicy: Min
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 0
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 100
|
||||
periodSeconds: 30
|
||||
- type: Pods
|
||||
value: 4
|
||||
periodSeconds: 30
|
||||
selectPolicy: Max
|
||||
|
||||
metrics:
|
||||
# CPU-based scaling
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
|
||||
# Memory-based scaling
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80
|
||||
|
||||
# Custom metrics (requires metrics-server and custom metrics API)
|
||||
# - type: Pods
|
||||
# pods:
|
||||
# metric:
|
||||
# name: http_requests_per_second
|
||||
# target:
|
||||
# type: AverageValue
|
||||
# averageValue: "1000"
|
||||
Reference in New Issue
Block a user