Add Dockerfile and Manifests

This commit is contained in:
2026-03-05 06:13:50 +00:00
parent b56c78fa07
commit df6b677a15
21 changed files with 1952 additions and 0 deletions

65
.dockerignore Normal file
View File

@@ -0,0 +1,65 @@
# Git
.git
.gitignore
.github
# Documentation
*.md
docs/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# Build artifacts
/bin/
/dist/
/build/
/gateway
/cmd/gateway/gateway
*.exe
*.dll
*.so
*.dylib
*.test
*.out
# Configuration files with secrets
config.yaml
config.json
*-local.yaml
*-local.json
.env
.env.local
*.key
*.pem
# Test and coverage
coverage.out
*.log
logs/
# OS
.DS_Store
Thumbs.db
# Dependencies (will be downloaded during build)
vendor/
# Python
__pycache__/
*.py[cod]
tests/node_modules/
# Jujutsu
.jj/
# Claude
.claude/
# Data directories
data/
*.db

181
.github/workflows/ci.yaml vendored Normal file
View File

@@ -0,0 +1,181 @@
name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
env:
GO_VERSION: '1.23'
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
name: Test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Download dependencies
run: go mod download
- name: Verify dependencies
run: go mod verify
- name: Run tests
run: go test -v -race -coverprofile=coverage.out ./...
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
file: ./coverage.out
flags: unittests
name: codecov-umbrella
- name: Generate coverage report
run: go tool cover -html=coverage.out -o coverage.html
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: coverage.html
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v4
with:
version: latest
args: --timeout=5m
security:
name: Security Scan
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Run Gosec Security Scanner
uses: securego/gosec@master
with:
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: results.sarif
build:
name: Build
runs-on: ubuntu-latest
needs: [test, lint]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Build binary
run: |
CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
- name: Upload binary
uses: actions/upload-artifact@v4
with:
name: gateway-binary
path: bin/gateway
docker:
name: Build and Push Docker Image
runs-on: ubuntu-latest
needs: [test, lint, security]
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
permissions:
contents: read
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'

129
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,129 @@
name: Release
on:
push:
tags:
- 'v*'
env:
GO_VERSION: '1.23'
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
release:
name: Create Release
runs-on: ubuntu-latest
permissions:
contents: write
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Run tests
run: go test -v ./...
- name: Build binaries
run: |
# Linux amd64
GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
# Linux arm64
GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
# macOS amd64
GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
# macOS arm64
GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
- name: Create checksums
run: |
cd bin
sha256sum gateway-* > checksums.txt
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=raw,value=latest
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Generate changelog
id: changelog
run: |
git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
echo "changelog<<EOF" >> $GITHUB_OUTPUT
cat CHANGELOG.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Create Release
uses: softprops/action-gh-release@v1
with:
body: |
## Changes
${{ steps.changelog.outputs.changelog }}
## Docker Images
```
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
```
## Installation
### Kubernetes
```bash
kubectl apply -k k8s/
```
### Docker
```bash
docker run -p 8080:8080 \
-e GOOGLE_API_KEY=your-key \
-e ANTHROPIC_API_KEY=your-key \
-e OPENAI_API_KEY=your-key \
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
```
files: |
bin/gateway-*
bin/checksums.txt
draft: false
prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

62
Dockerfile Normal file
View File

@@ -0,0 +1,62 @@
# Multi-stage build for Go LLM Gateway
# Stage 1: Build the Go binary
FROM golang:alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates tzdata
WORKDIR /build
# Copy go mod files first for better caching
COPY go.mod go.sum ./
RUN go mod download
# Copy source code
COPY . .
# Build the binary with optimizations
# CGO is required for SQLite support
RUN apk add --no-cache gcc musl-dev && \
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
-ldflags='-w -s -extldflags "-static"' \
-a -installsuffix cgo \
-o gateway \
./cmd/gateway
# Stage 2: Create minimal runtime image
FROM alpine:3.19
# Install runtime dependencies
RUN apk add --no-cache ca-certificates tzdata
# Create non-root user
RUN addgroup -g 1000 gateway && \
adduser -D -u 1000 -G gateway gateway
# Create necessary directories
RUN mkdir -p /app /app/data && \
chown -R gateway:gateway /app
WORKDIR /app
# Copy binary from builder
COPY --from=builder /build/gateway /app/gateway
# Copy example config (optional, mainly for documentation)
COPY config.example.yaml /app/config.example.yaml
# Switch to non-root user
USER gateway
# Expose port
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
# Set entrypoint
ENTRYPOINT ["/app/gateway"]
# Default command (can be overridden)
CMD ["--config", "/app/config/config.yaml"]

151
Makefile Normal file
View File

@@ -0,0 +1,151 @@
# Makefile for LLM Gateway
.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
# Variables
APP_NAME := llm-gateway
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
REGISTRY ?= your-registry
IMAGE := $(REGISTRY)/$(APP_NAME)
DOCKER_TAG := $(IMAGE):$(VERSION)
LATEST_TAG := $(IMAGE):latest
# Go variables
GOCMD := go
GOBUILD := $(GOCMD) build
GOTEST := $(GOCMD) test
GOMOD := $(GOCMD) mod
GOFMT := $(GOCMD) fmt
# Build directory
BUILD_DIR := bin
# Help target
help: ## Show this help message
@echo "Usage: make [target]"
@echo ""
@echo "Targets:"
@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
# Development targets
build: ## Build the binary
@echo "Building $(APP_NAME)..."
CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
build-static: ## Build static binary
@echo "Building static binary..."
CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
test: ## Run tests
@echo "Running tests..."
$(GOTEST) -v -race -coverprofile=coverage.out ./...
test-coverage: test ## Run tests with coverage report
@echo "Generating coverage report..."
$(GOCMD) tool cover -html=coverage.out -o coverage.html
@echo "Coverage report saved to coverage.html"
fmt: ## Format Go code
@echo "Formatting code..."
$(GOFMT) ./...
lint: ## Run linter
@echo "Running linter..."
@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
golangci-lint run ./...
tidy: ## Tidy go modules
@echo "Tidying go modules..."
$(GOMOD) tidy
clean: ## Clean build artifacts
@echo "Cleaning..."
rm -rf $(BUILD_DIR)
rm -f coverage.out coverage.html
# Docker targets
docker-build: ## Build Docker image
@echo "Building Docker image $(DOCKER_TAG)..."
docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
docker-push: docker-build ## Push Docker image to registry
@echo "Pushing Docker image..."
docker push $(DOCKER_TAG)
docker push $(LATEST_TAG)
docker-run: ## Run Docker container locally
@echo "Running Docker container..."
docker run --rm -p 8080:8080 \
-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
$(DOCKER_TAG)
docker-compose-up: ## Start services with docker-compose
@echo "Starting services with docker-compose..."
docker-compose up -d
docker-compose-down: ## Stop services with docker-compose
@echo "Stopping services with docker-compose..."
docker-compose down
docker-compose-logs: ## View docker-compose logs
docker-compose logs -f
# Kubernetes targets
k8s-namespace: ## Create Kubernetes namespace
kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
k8s-secrets: ## Create Kubernetes secrets (requires env vars)
@echo "Creating secrets..."
@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
exit 1; \
fi
kubectl create secret generic llm-gateway-secrets \
--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
-n llm-gateway \
--dry-run=client -o yaml | kubectl apply -f -
k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
@echo "Deploying to Kubernetes..."
kubectl apply -k k8s/
k8s-delete: ## Delete from Kubernetes
@echo "Deleting from Kubernetes..."
kubectl delete -k k8s/
k8s-status: ## Check Kubernetes deployment status
@echo "Checking deployment status..."
kubectl get all -n llm-gateway
k8s-logs: ## View Kubernetes logs
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
k8s-describe: ## Describe Kubernetes deployment
kubectl describe deployment llm-gateway -n llm-gateway
k8s-port-forward: ## Port forward to local machine
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
# CI/CD targets
ci: lint test ## Run CI checks
security-scan: ## Run security scan
@echo "Running security scan..."
@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
gosec ./...
# Run target
run: ## Run locally
@echo "Running $(APP_NAME) locally..."
$(GOCMD) run ./cmd/gateway --config config.yaml
# Version info
version: ## Show version
@echo "Version: $(VERSION)"
@echo "Image: $(DOCKER_TAG)"

102
docker-compose.yaml Normal file
View File

@@ -0,0 +1,102 @@
# Docker Compose for local development and testing
# Not recommended for production - use Kubernetes instead
version: '3.9'
services:
gateway:
build:
context: .
dockerfile: Dockerfile
image: llm-gateway:latest
container_name: llm-gateway
ports:
- "8080:8080"
environment:
# Provider API keys
GOOGLE_API_KEY: ${GOOGLE_API_KEY}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
OPENAI_API_KEY: ${OPENAI_API_KEY}
OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
volumes:
- ./config.yaml:/app/config/config.yaml:ro
depends_on:
redis:
condition: service_healthy
networks:
- llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
redis:
image: redis:7.2-alpine
container_name: llm-gateway-redis
ports:
- "6379:6379"
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
volumes:
- redis-data:/data
networks:
- llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
# Optional: Prometheus for metrics
prometheus:
image: prom/prometheus:latest
container_name: llm-gateway-prometheus
ports:
- "9090:9090"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks:
- llm-network
restart: unless-stopped
profiles:
- monitoring
# Optional: Grafana for visualization
grafana:
image: grafana/grafana:latest
container_name: llm-gateway-grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
- ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
- ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
depends_on:
- prometheus
networks:
- llm-network
restart: unless-stopped
profiles:
- monitoring
networks:
llm-network:
driver: bridge
volumes:
redis-data:
prometheus-data:
grafana-data:

352
k8s/README.md Normal file
View File

@@ -0,0 +1,352 @@
# Kubernetes Deployment Guide
This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
## Prerequisites
- Kubernetes cluster (v1.24+)
- `kubectl` configured
- Container registry access
- (Optional) Prometheus Operator for monitoring
- (Optional) cert-manager for TLS certificates
- (Optional) nginx-ingress-controller or cloud load balancer
## Quick Start
### 1. Build and Push Docker Image
```bash
# Build the image
docker build -t your-registry/llm-gateway:v1.0.0 .
# Push to registry
docker push your-registry/llm-gateway:v1.0.0
```
### 2. Configure Secrets
**Option A: Using kubectl**
```bash
kubectl create namespace llm-gateway
kubectl create secret generic llm-gateway-secrets \
--from-literal=GOOGLE_API_KEY="your-key" \
--from-literal=ANTHROPIC_API_KEY="your-key" \
--from-literal=OPENAI_API_KEY="your-key" \
--from-literal=OIDC_AUDIENCE="your-client-id" \
-n llm-gateway
```
**Option B: Using External Secrets Operator (Recommended)**
- Uncomment the ExternalSecret in `secret.yaml`
- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
### 3. Update Configuration
Edit `configmap.yaml`:
- Update Redis connection string if using external Redis
- Configure observability endpoints (Tempo, Prometheus)
- Adjust rate limits as needed
- Set OIDC issuer and audience
Edit `ingress.yaml`:
- Replace `llm-gateway.example.com` with your domain
- Configure TLS certificate annotations
Edit `kustomization.yaml`:
- Update image registry and tag
### 4. Deploy
**Using Kustomize (Recommended):**
```bash
kubectl apply -k k8s/
```
**Using kubectl directly:**
```bash
kubectl apply -f k8s/namespace.yaml
kubectl apply -f k8s/serviceaccount.yaml
kubectl apply -f k8s/secret.yaml
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/redis.yaml
kubectl apply -f k8s/deployment.yaml
kubectl apply -f k8s/service.yaml
kubectl apply -f k8s/ingress.yaml
kubectl apply -f k8s/hpa.yaml
kubectl apply -f k8s/pdb.yaml
kubectl apply -f k8s/networkpolicy.yaml
```
**With Prometheus Operator:**
```bash
kubectl apply -f k8s/servicemonitor.yaml
kubectl apply -f k8s/prometheusrule.yaml
```
### 5. Verify Deployment
```bash
# Check pods
kubectl get pods -n llm-gateway
# Check services
kubectl get svc -n llm-gateway
# Check ingress
kubectl get ingress -n llm-gateway
# View logs
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
# Check health
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
curl http://localhost:8080/health
```
## Architecture Overview
```
┌─────────────────────────────────────────────────────────┐
│ Internet/Clients │
└───────────────────────┬─────────────────────────────────┘
┌─────────────────────────────────────────────────────────┐
│ Ingress Controller │
│ (nginx/ALB/GCE with TLS) │
└───────────────────────┬─────────────────────────────────┘
┌─────────────────────────────────────────────────────────┐
│ LLM Gateway Service │
│ (LoadBalancer) │
└───────────────────────┬─────────────────────────────────┘
┌───────────────┼───────────────┐
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Gateway │ │ Gateway │ │ Gateway │
│ Pod 1 │ │ Pod 2 │ │ Pod 3 │
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
│ │ │
└────────────────┼────────────────┘
┌───────────────┼───────────────┐
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Redis │ │ Prometheus │ │ Tempo │
│ (Persistent) │ │ (Metrics) │ │ (Traces) │
└──────────────┘ └──────────────┘ └──────────────┘
```
## Resource Specifications
### Default Resources
- **Requests**: 100m CPU, 128Mi memory
- **Limits**: 1000m CPU, 512Mi memory
- **Replicas**: 3 (min), 20 (max with HPA)
### Scaling
- HPA scales based on CPU (70%) and memory (80%)
- PodDisruptionBudget ensures minimum 2 replicas during disruptions
## Configuration Options
### Environment Variables (from Secret)
- `GOOGLE_API_KEY`: Google AI API key
- `ANTHROPIC_API_KEY`: Anthropic API key
- `OPENAI_API_KEY`: OpenAI API key
- `OIDC_AUDIENCE`: OIDC client ID for authentication
### ConfigMap Settings
See `configmap.yaml` for full configuration options:
- Server address
- Logging format and level
- Rate limiting
- Observability (metrics/tracing)
- Provider endpoints
- Conversation storage
- Authentication
## Security
### Security Features
- Non-root container execution (UID 1000)
- Read-only root filesystem
- No privilege escalation
- All capabilities dropped
- Network policies for ingress/egress control
- SeccompProfile: RuntimeDefault
### TLS/HTTPS
- Ingress configured with TLS
- Uses cert-manager for automatic certificate provisioning
- Force SSL redirect enabled
### Secrets Management
**Never commit secrets to git!**
Production options:
1. **External Secrets Operator** (Recommended)
- AWS Secrets Manager
- HashiCorp Vault
- Google Secret Manager
2. **Sealed Secrets**
- Encrypted secrets in git
3. **Manual kubectl secrets**
- Created outside of git
## Monitoring
### Metrics
- Exposed on `/metrics` endpoint
- Scraped by Prometheus via ServiceMonitor
- Key metrics:
- HTTP request rate, latency, errors
- Provider request rate, latency, token usage
- Conversation store operations
- Rate limiting hits
### Alerts
See `prometheusrule.yaml` for configured alerts:
- High error rate
- High latency
- Provider failures
- Pod down
- High memory usage
- Rate limit threshold exceeded
- Conversation store errors
### Logs
Structured JSON logs with:
- Request IDs
- Trace context (trace_id, span_id)
- Log levels (debug/info/warn/error)
View logs:
```bash
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
```
## Maintenance
### Rolling Updates
```bash
# Update image
kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
# Check rollout status
kubectl rollout status deployment/llm-gateway -n llm-gateway
# Rollback if needed
kubectl rollout undo deployment/llm-gateway -n llm-gateway
```
### Scaling
```bash
# Manual scale
kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
# HPA will auto-scale within min/max bounds (3-20)
```
### Configuration Updates
```bash
# Edit ConfigMap
kubectl edit configmap llm-gateway-config -n llm-gateway
# Restart pods to pick up changes
kubectl rollout restart deployment/llm-gateway -n llm-gateway
```
### Debugging
```bash
# Exec into pod
kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
# Port forward for local access
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
# Check events
kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
```
## Production Considerations
### High Availability
- Minimum 3 replicas across availability zones
- Pod anti-affinity rules spread pods across nodes
- PodDisruptionBudget ensures service availability during disruptions
### Performance
- Adjust resource limits based on load testing
- Configure HPA thresholds based on traffic patterns
- Use node affinity for GPU nodes if needed
### Cost Optimization
- Use spot/preemptible instances for non-critical workloads
- Set appropriate resource requests/limits
- Monitor token usage and implement quotas
### Disaster Recovery
- Redis persistence (if using StatefulSet)
- Regular backups of conversation data
- Multi-region deployment for geo-redundancy
- Document runbooks for incident response
## Cloud-Specific Notes
### AWS EKS
- Use AWS Load Balancer Controller for ALB
- Configure IRSA for service account
- Use ElastiCache for Redis
- Store secrets in AWS Secrets Manager
### GCP GKE
- Use GKE Ingress for GCLB
- Configure Workload Identity
- Use Memorystore for Redis
- Store secrets in Google Secret Manager
### Azure AKS
- Use Azure Application Gateway Ingress Controller
- Configure Azure AD Workload Identity
- Use Azure Cache for Redis
- Store secrets in Azure Key Vault
## Troubleshooting
### Common Issues
**Pods not starting:**
```bash
kubectl describe pod -n llm-gateway -l app=llm-gateway
kubectl logs -n llm-gateway -l app=llm-gateway --previous
```
**Health check failures:**
```bash
kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
curl http://localhost:8080/health
curl http://localhost:8080/ready
```
**Provider connection issues:**
- Verify API keys in secrets
- Check network policies allow egress
- Verify provider endpoints are accessible
**Redis connection issues:**
```bash
kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
```
## Additional Resources
- [Kubernetes Documentation](https://kubernetes.io/docs/)
- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
- [cert-manager](https://cert-manager.io/)
- [External Secrets Operator](https://external-secrets.io/)

76
k8s/configmap.yaml Normal file
View File

@@ -0,0 +1,76 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: llm-gateway-config
namespace: llm-gateway
labels:
app: llm-gateway
data:
config.yaml: |
server:
address: ":8080"
logging:
format: "json"
level: "info"
rate_limit:
enabled: true
requests_per_second: 10
burst: 20
observability:
enabled: true
metrics:
enabled: true
path: "/metrics"
tracing:
enabled: true
service_name: "llm-gateway"
sampler:
type: "probability"
rate: 0.1
exporter:
type: "otlp"
endpoint: "tempo.observability.svc.cluster.local:4317"
insecure: true
providers:
google:
type: "google"
api_key: "${GOOGLE_API_KEY}"
endpoint: "https://generativelanguage.googleapis.com"
anthropic:
type: "anthropic"
api_key: "${ANTHROPIC_API_KEY}"
endpoint: "https://api.anthropic.com"
openai:
type: "openai"
api_key: "${OPENAI_API_KEY}"
endpoint: "https://api.openai.com"
conversations:
store: "redis"
ttl: "1h"
dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
auth:
enabled: true
issuer: "https://accounts.google.com"
audience: "${OIDC_AUDIENCE}"
models:
- name: "gemini-1.5-flash"
provider: "google"
- name: "gemini-1.5-pro"
provider: "google"
- name: "claude-3-5-sonnet-20241022"
provider: "anthropic"
- name: "claude-3-5-haiku-20241022"
provider: "anthropic"
- name: "gpt-4o"
provider: "openai"
- name: "gpt-4o-mini"
provider: "openai"

168
k8s/deployment.yaml Normal file
View File

@@ -0,0 +1,168 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
version: v1
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: llm-gateway
template:
metadata:
labels:
app: llm-gateway
version: v1
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: llm-gateway
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
- name: gateway
image: llm-gateway:latest # Replace with your registry/image:tag
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8080
protocol: TCP
env:
# Provider API Keys from Secret
- name: GOOGLE_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: GOOGLE_API_KEY
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: ANTHROPIC_API_KEY
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OPENAI_API_KEY
- name: OIDC_AUDIENCE
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OIDC_AUDIENCE
# Optional: Pod metadata
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 1000m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 0
periodSeconds: 5
timeoutSeconds: 3
successThreshold: 1
failureThreshold: 30
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: tmp
mountPath: /tmp
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: llm-gateway-config
- name: tmp
emptyDir: {}
# Affinity rules for better distribution
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- llm-gateway
topologyKey: kubernetes.io/hostname
# Tolerations (if needed for specific node pools)
# tolerations:
# - key: "workload-type"
# operator: "Equal"
# value: "llm"
# effect: "NoSchedule"

63
k8s/hpa.yaml Normal file
View File

@@ -0,0 +1,63 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-gateway
minReplicas: 3
maxReplicas: 20
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
selectPolicy: Max
metrics:
# CPU-based scaling
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory-based scaling
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Custom metrics (requires metrics-server and custom metrics API)
# - type: Pods
# pods:
# metric:
# name: http_requests_per_second
# target:
# type: AverageValue
# averageValue: "1000"

66
k8s/ingress.yaml Normal file
View File

@@ -0,0 +1,66 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# General annotations
kubernetes.io/ingress.class: "nginx"
# TLS configuration
cert-manager.io/cluster-issuer: "letsencrypt-prod"
# Security headers
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
# Rate limiting (supplement application-level rate limiting)
nginx.ingress.kubernetes.io/limit-rps: "100"
nginx.ingress.kubernetes.io/limit-connections: "50"
# Request size limit (10MB)
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
# Timeouts
nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
# CORS (if needed)
# nginx.ingress.kubernetes.io/enable-cors: "true"
# nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
# nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
# nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
# For AWS ALB Ingress Controller (alternative to nginx)
# kubernetes.io/ingress.class: "alb"
# alb.ingress.kubernetes.io/scheme: "internet-facing"
# alb.ingress.kubernetes.io/target-type: "ip"
# alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
# alb.ingress.kubernetes.io/ssl-redirect: '443'
# alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
# For GKE Ingress (alternative to nginx)
# kubernetes.io/ingress.class: "gce"
# kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
# ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
spec:
tls:
- hosts:
- llm-gateway.example.com # Replace with your domain
secretName: llm-gateway-tls
rules:
- host: llm-gateway.example.com # Replace with your domain
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: llm-gateway
port:
number: 80

46
k8s/kustomization.yaml Normal file
View File

@@ -0,0 +1,46 @@
# Kustomize configuration for easy deployment
# Usage: kubectl apply -k k8s/
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: llm-gateway
resources:
- namespace.yaml
- serviceaccount.yaml
- configmap.yaml
- secret.yaml
- deployment.yaml
- service.yaml
- ingress.yaml
- hpa.yaml
- pdb.yaml
- networkpolicy.yaml
- redis.yaml
- servicemonitor.yaml
- prometheusrule.yaml
# Common labels applied to all resources
commonLabels:
app.kubernetes.io/name: llm-gateway
app.kubernetes.io/component: api-gateway
app.kubernetes.io/part-of: llm-platform
# Images to be used (customize for your registry)
images:
- name: llm-gateway
newName: your-registry/llm-gateway
newTag: latest
# ConfigMap generator (alternative to configmap.yaml)
# configMapGenerator:
# - name: llm-gateway-config
# files:
# - config.yaml
# Secret generator (for local development only)
# secretGenerator:
# - name: llm-gateway-secrets
# envs:
# - secrets.env

7
k8s/namespace.yaml Normal file
View File

@@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: llm-gateway
labels:
app: llm-gateway
environment: production

83
k8s/networkpolicy.yaml Normal file
View File

@@ -0,0 +1,83 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
podSelector:
matchLabels:
app: llm-gateway
policyTypes:
- Ingress
- Egress
ingress:
# Allow traffic from ingress controller
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
ports:
- protocol: TCP
port: 8080
# Allow traffic from within the namespace (for debugging/testing)
- from:
- podSelector: {}
ports:
- protocol: TCP
port: 8080
# Allow Prometheus scraping
- from:
- namespaceSelector:
matchLabels:
name: observability
podSelector:
matchLabels:
app: prometheus
ports:
- protocol: TCP
port: 8080
egress:
# Allow DNS
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- protocol: UDP
port: 53
# Allow Redis access
- to:
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
# Allow external provider API access (OpenAI, Anthropic, Google)
- to:
- namespaceSelector: {}
ports:
- protocol: TCP
port: 443
# Allow OTLP tracing export
- to:
- namespaceSelector:
matchLabels:
name: observability
podSelector:
matchLabels:
app: tempo
ports:
- protocol: TCP
port: 4317

13
k8s/pdb.yaml Normal file
View File

@@ -0,0 +1,13 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
minAvailable: 2
selector:
matchLabels:
app: llm-gateway
unhealthyPodEvictionPolicy: AlwaysAllow

122
k8s/prometheusrule.yaml Normal file
View File

@@ -0,0 +1,122 @@
# PrometheusRule for alerting
# Requires Prometheus Operator to be installed
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
prometheus: kube-prometheus
spec:
groups:
- name: llm-gateway.rules
interval: 30s
rules:
# High error rate
- alert: LLMGatewayHighErrorRate
expr: |
(
sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
/
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
) > 0.05
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate in LLM Gateway"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
# High latency
- alert: LLMGatewayHighLatency
expr: |
histogram_quantile(0.95,
sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
) > 10
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High latency in LLM Gateway"
description: "P95 latency is {{ $value }}s (threshold: 10s)"
# Provider errors
- alert: LLMProviderHighErrorRate
expr: |
(
sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
/
sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
) > 0.10
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate for provider {{ $labels.provider }}"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
# Pod down
- alert: LLMGatewayPodDown
expr: |
up{job="llm-gateway",namespace="llm-gateway"} == 0
for: 2m
labels:
severity: critical
component: llm-gateway
annotations:
summary: "LLM Gateway pod is down"
description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
# High memory usage
- alert: LLMGatewayHighMemoryUsage
expr: |
(
container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
/
container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
) > 0.85
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High memory usage in LLM Gateway"
description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
# Rate limit threshold
- alert: LLMGatewayHighRateLimitHitRate
expr: |
(
sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
/
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
) > 0.20
for: 10m
labels:
severity: info
component: llm-gateway
annotations:
summary: "High rate limit hit rate"
description: "{{ $value | humanizePercentage }} of requests are being rate limited"
# Conversation store errors
- alert: LLMGatewayConversationStoreErrors
expr: |
(
sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
/
sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
) > 0.05
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate in conversation store"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"

131
k8s/redis.yaml Normal file
View File

@@ -0,0 +1,131 @@
# Simple Redis deployment for conversation storage
# For production, consider using:
# - Redis Operator (e.g., Redis Enterprise Operator)
# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
# - Redis Cluster for high availability
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-config
namespace: llm-gateway
labels:
app: redis
data:
redis.conf: |
maxmemory 256mb
maxmemory-policy allkeys-lru
save ""
appendonly no
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis
namespace: llm-gateway
labels:
app: redis
spec:
serviceName: redis
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
securityContext:
runAsNonRoot: true
runAsUser: 999
fsGroup: 999
seccompProfile:
type: RuntimeDefault
containers:
- name: redis
image: redis:7.2-alpine
imagePullPolicy: IfNotPresent
command:
- redis-server
- /etc/redis/redis.conf
ports:
- name: redis
containerPort: 6379
protocol: TCP
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
tcpSocket:
port: redis
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
exec:
command:
- redis-cli
- ping
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumeMounts:
- name: config
mountPath: /etc/redis
- name: data
mountPath: /data
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 999
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: redis-config
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: llm-gateway
labels:
app: redis
spec:
type: ClusterIP
clusterIP: None
selector:
app: redis
ports:
- name: redis
port: 6379
targetPort: redis
protocol: TCP

46
k8s/secret.yaml Normal file
View File

@@ -0,0 +1,46 @@
apiVersion: v1
kind: Secret
metadata:
name: llm-gateway-secrets
namespace: llm-gateway
labels:
app: llm-gateway
type: Opaque
stringData:
# IMPORTANT: Replace these with actual values or use external secret management
# For production, use:
# - kubectl create secret generic llm-gateway-secrets --from-literal=...
# - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
# - Sealed Secrets
GOOGLE_API_KEY: "your-google-api-key-here"
ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
OPENAI_API_KEY: "your-openai-api-key-here"
OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
---
# Example using External Secrets Operator (commented out)
# apiVersion: external-secrets.io/v1beta1
# kind: ExternalSecret
# metadata:
# name: llm-gateway-secrets
# namespace: llm-gateway
# spec:
# refreshInterval: 1h
# secretStoreRef:
# name: aws-secrets-manager
# kind: SecretStore
# target:
# name: llm-gateway-secrets
# creationPolicy: Owner
# data:
# - secretKey: GOOGLE_API_KEY
# remoteRef:
# key: prod/llm-gateway/google-api-key
# - secretKey: ANTHROPIC_API_KEY
# remoteRef:
# key: prod/llm-gateway/anthropic-api-key
# - secretKey: OPENAI_API_KEY
# remoteRef:
# key: prod/llm-gateway/openai-api-key
# - secretKey: OIDC_AUDIENCE
# remoteRef:
# key: prod/llm-gateway/oidc-audience

40
k8s/service.yaml Normal file
View File

@@ -0,0 +1,40 @@
apiVersion: v1
kind: Service
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# For cloud load balancers (uncomment as needed)
# service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
# cloud.google.com/neg: '{"ingress": true}'
spec:
type: ClusterIP
selector:
app: llm-gateway
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
sessionAffinity: None
---
# Headless service for pod-to-pod communication (if needed)
apiVersion: v1
kind: Service
metadata:
name: llm-gateway-headless
namespace: llm-gateway
labels:
app: llm-gateway
spec:
type: ClusterIP
clusterIP: None
selector:
app: llm-gateway
ports:
- name: http
port: 8080
targetPort: http
protocol: TCP

14
k8s/serviceaccount.yaml Normal file
View File

@@ -0,0 +1,14 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# For GKE Workload Identity
# iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
# For EKS IRSA (IAM Roles for Service Accounts)
# eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
automountServiceAccountToken: true

35
k8s/servicemonitor.yaml Normal file
View File

@@ -0,0 +1,35 @@
# ServiceMonitor for Prometheus Operator
# Requires Prometheus Operator to be installed
# https://github.com/prometheus-operator/prometheus-operator
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
prometheus: kube-prometheus
spec:
selector:
matchLabels:
app: llm-gateway
endpoints:
- port: http
path: /metrics
interval: 30s
scrapeTimeout: 10s
relabelings:
# Add namespace label
- sourceLabels: [__meta_kubernetes_namespace]
targetLabel: namespace
# Add pod label
- sourceLabels: [__meta_kubernetes_pod_name]
targetLabel: pod
# Add service label
- sourceLabels: [__meta_kubernetes_service_name]
targetLabel: service