64 lines
1.2 KiB
YAML
64 lines
1.2 KiB
YAML
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata:
|
|
name: llm-gateway
|
|
namespace: llm-gateway
|
|
labels:
|
|
app: llm-gateway
|
|
spec:
|
|
scaleTargetRef:
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
name: llm-gateway
|
|
|
|
minReplicas: 3
|
|
maxReplicas: 20
|
|
|
|
behavior:
|
|
scaleDown:
|
|
stabilizationWindowSeconds: 300
|
|
policies:
|
|
- type: Percent
|
|
value: 50
|
|
periodSeconds: 60
|
|
- type: Pods
|
|
value: 2
|
|
periodSeconds: 60
|
|
selectPolicy: Min
|
|
scaleUp:
|
|
stabilizationWindowSeconds: 0
|
|
policies:
|
|
- type: Percent
|
|
value: 100
|
|
periodSeconds: 30
|
|
- type: Pods
|
|
value: 4
|
|
periodSeconds: 30
|
|
selectPolicy: Max
|
|
|
|
metrics:
|
|
# CPU-based scaling
|
|
- type: Resource
|
|
resource:
|
|
name: cpu
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: 70
|
|
|
|
# Memory-based scaling
|
|
- type: Resource
|
|
resource:
|
|
name: memory
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: 80
|
|
|
|
# Custom metrics (requires metrics-server and custom metrics API)
|
|
# - type: Pods
|
|
# pods:
|
|
# metric:
|
|
# name: http_requests_per_second
|
|
# target:
|
|
# type: AverageValue
|
|
# averageValue: "1000"
|