Files
latticelm/k8s/hpa.yaml

64 lines
1.2 KiB
YAML

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-gateway
minReplicas: 3
maxReplicas: 20
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
selectPolicy: Max
metrics:
# CPU-based scaling
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory-based scaling
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Custom metrics (requires metrics-server and custom metrics API)
# - type: Pods
# pods:
# metric:
# name: http_requests_per_second
# target:
# type: AverageValue
# averageValue: "1000"