diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bacc824 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,65 @@ +# Git +.git +.gitignore +.github + +# Documentation +*.md +docs/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Build artifacts +/bin/ +/dist/ +/build/ +/gateway +/cmd/gateway/gateway +*.exe +*.dll +*.so +*.dylib +*.test +*.out + +# Configuration files with secrets +config.yaml +config.json +*-local.yaml +*-local.json +.env +.env.local +*.key +*.pem + +# Test and coverage +coverage.out +*.log +logs/ + +# OS +.DS_Store +Thumbs.db + +# Dependencies (will be downloaded during build) +vendor/ + +# Python +__pycache__/ +*.py[cod] +tests/node_modules/ + +# Jujutsu +.jj/ + +# Claude +.claude/ + +# Data directories +data/ +*.db diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..99800bd --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,181 @@ +name: CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +env: + GO_VERSION: '1.23' + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + test: + name: Test + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Download dependencies + run: go mod download + + - name: Verify dependencies + run: go mod verify + + - name: Run tests + run: go test -v -race -coverprofile=coverage.out ./... + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./coverage.out + flags: unittests + name: codecov-umbrella + + - name: Generate coverage report + run: go tool cover -html=coverage.out -o coverage.html + + - name: Upload coverage report + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: coverage.html + + lint: + name: Lint + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v4 + with: + version: latest + args: --timeout=5m + + security: + name: Security Scan + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Run Gosec Security Scanner + uses: securego/gosec@master + with: + args: '-no-fail -fmt sarif -out results.sarif ./...' + + - name: Upload SARIF file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif + + build: + name: Build + runs-on: ubuntu-latest + needs: [test, lint] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Build binary + run: | + CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway + + - name: Upload binary + uses: actions/upload-artifact@v4 + with: + name: gateway-binary + path: bin/gateway + + docker: + name: Build and Push Docker Image + runs-on: ubuntu-latest + needs: [test, lint, security] + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop') + + permissions: + contents: read + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha,prefix={{branch}}- + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64,linux/arm64 + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} + format: 'sarif' + output: 'trivy-results.sarif' + + - name: Upload Trivy results to GitHub Security + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: 'trivy-results.sarif' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..c680643 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,129 @@ +name: Release + +on: + push: + tags: + - 'v*' + +env: + GO_VERSION: '1.23' + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + release: + name: Create Release + runs-on: ubuntu-latest + + permissions: + contents: write + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Run tests + run: go test -v ./... + + - name: Build binaries + run: | + # Linux amd64 + GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway + + # Linux arm64 + GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway + + # macOS amd64 + GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway + + # macOS arm64 + GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway + + - name: Create checksums + run: | + cd bin + sha256sum gateway-* > checksums.txt + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=raw,value=latest + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Generate changelog + id: changelog + run: | + git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt + echo "changelog<> $GITHUB_OUTPUT + cat CHANGELOG.txt >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Create Release + uses: softprops/action-gh-release@v1 + with: + body: | + ## Changes + ${{ steps.changelog.outputs.changelog }} + + ## Docker Images + ``` + docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }} + docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + ``` + + ## Installation + + ### Kubernetes + ```bash + kubectl apply -k k8s/ + ``` + + ### Docker + ```bash + docker run -p 8080:8080 \ + -e GOOGLE_API_KEY=your-key \ + -e ANTHROPIC_API_KEY=your-key \ + -e OPENAI_API_KEY=your-key \ + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }} + ``` + files: | + bin/gateway-* + bin/checksums.txt + draft: false + prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..51d348e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,62 @@ +# Multi-stage build for Go LLM Gateway +# Stage 1: Build the Go binary +FROM golang:alpine AS builder + +# Install build dependencies +RUN apk add --no-cache git ca-certificates tzdata + +WORKDIR /build + +# Copy go mod files first for better caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code +COPY . . + +# Build the binary with optimizations +# CGO is required for SQLite support +RUN apk add --no-cache gcc musl-dev && \ + CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \ + -ldflags='-w -s -extldflags "-static"' \ + -a -installsuffix cgo \ + -o gateway \ + ./cmd/gateway + +# Stage 2: Create minimal runtime image +FROM alpine:3.19 + +# Install runtime dependencies +RUN apk add --no-cache ca-certificates tzdata + +# Create non-root user +RUN addgroup -g 1000 gateway && \ + adduser -D -u 1000 -G gateway gateway + +# Create necessary directories +RUN mkdir -p /app /app/data && \ + chown -R gateway:gateway /app + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /build/gateway /app/gateway + +# Copy example config (optional, mainly for documentation) +COPY config.example.yaml /app/config.example.yaml + +# Switch to non-root user +USER gateway + +# Expose port +EXPOSE 8080 + +# Health check +HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1 + +# Set entrypoint +ENTRYPOINT ["/app/gateway"] + +# Default command (can be overridden) +CMD ["--config", "/app/config/config.yaml"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fdc6346 --- /dev/null +++ b/Makefile @@ -0,0 +1,151 @@ +# Makefile for LLM Gateway + +.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean + +# Variables +APP_NAME := llm-gateway +VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") +REGISTRY ?= your-registry +IMAGE := $(REGISTRY)/$(APP_NAME) +DOCKER_TAG := $(IMAGE):$(VERSION) +LATEST_TAG := $(IMAGE):latest + +# Go variables +GOCMD := go +GOBUILD := $(GOCMD) build +GOTEST := $(GOCMD) test +GOMOD := $(GOCMD) mod +GOFMT := $(GOCMD) fmt + +# Build directory +BUILD_DIR := bin + +# Help target +help: ## Show this help message + @echo "Usage: make [target]" + @echo "" + @echo "Targets:" + @awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST) + +# Development targets +build: ## Build the binary + @echo "Building $(APP_NAME)..." + CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway + +build-static: ## Build static binary + @echo "Building static binary..." + CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway + +test: ## Run tests + @echo "Running tests..." + $(GOTEST) -v -race -coverprofile=coverage.out ./... + +test-coverage: test ## Run tests with coverage report + @echo "Generating coverage report..." + $(GOCMD) tool cover -html=coverage.out -o coverage.html + @echo "Coverage report saved to coverage.html" + +fmt: ## Format Go code + @echo "Formatting code..." + $(GOFMT) ./... + +lint: ## Run linter + @echo "Running linter..." + @which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1) + golangci-lint run ./... + +tidy: ## Tidy go modules + @echo "Tidying go modules..." + $(GOMOD) tidy + +clean: ## Clean build artifacts + @echo "Cleaning..." + rm -rf $(BUILD_DIR) + rm -f coverage.out coverage.html + +# Docker targets +docker-build: ## Build Docker image + @echo "Building Docker image $(DOCKER_TAG)..." + docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) . + +docker-push: docker-build ## Push Docker image to registry + @echo "Pushing Docker image..." + docker push $(DOCKER_TAG) + docker push $(LATEST_TAG) + +docker-run: ## Run Docker container locally + @echo "Running Docker container..." + docker run --rm -p 8080:8080 \ + -e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \ + -e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \ + -e OPENAI_API_KEY="$(OPENAI_API_KEY)" \ + -v $(PWD)/config.yaml:/app/config/config.yaml:ro \ + $(DOCKER_TAG) + +docker-compose-up: ## Start services with docker-compose + @echo "Starting services with docker-compose..." + docker-compose up -d + +docker-compose-down: ## Stop services with docker-compose + @echo "Stopping services with docker-compose..." + docker-compose down + +docker-compose-logs: ## View docker-compose logs + docker-compose logs -f + +# Kubernetes targets +k8s-namespace: ## Create Kubernetes namespace + kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f - + +k8s-secrets: ## Create Kubernetes secrets (requires env vars) + @echo "Creating secrets..." + @if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \ + echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \ + exit 1; \ + fi + kubectl create secret generic llm-gateway-secrets \ + --from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \ + --from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \ + --from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \ + --from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \ + -n llm-gateway \ + --dry-run=client -o yaml | kubectl apply -f - + +k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes + @echo "Deploying to Kubernetes..." + kubectl apply -k k8s/ + +k8s-delete: ## Delete from Kubernetes + @echo "Deleting from Kubernetes..." + kubectl delete -k k8s/ + +k8s-status: ## Check Kubernetes deployment status + @echo "Checking deployment status..." + kubectl get all -n llm-gateway + +k8s-logs: ## View Kubernetes logs + kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f + +k8s-describe: ## Describe Kubernetes deployment + kubectl describe deployment llm-gateway -n llm-gateway + +k8s-port-forward: ## Port forward to local machine + kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80 + +# CI/CD targets +ci: lint test ## Run CI checks + +security-scan: ## Run security scan + @echo "Running security scan..." + @which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1) + gosec ./... + +# Run target +run: ## Run locally + @echo "Running $(APP_NAME) locally..." + $(GOCMD) run ./cmd/gateway --config config.yaml + +# Version info +version: ## Show version + @echo "Version: $(VERSION)" + @echo "Image: $(DOCKER_TAG)" diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..2cf90e5 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,102 @@ +# Docker Compose for local development and testing +# Not recommended for production - use Kubernetes instead + +version: '3.9' + +services: + gateway: + build: + context: . + dockerfile: Dockerfile + image: llm-gateway:latest + container_name: llm-gateway + ports: + - "8080:8080" + environment: + # Provider API keys + GOOGLE_API_KEY: ${GOOGLE_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + OPENAI_API_KEY: ${OPENAI_API_KEY} + OIDC_AUDIENCE: ${OIDC_AUDIENCE:-} + volumes: + - ./config.yaml:/app/config/config.yaml:ro + depends_on: + redis: + condition: service_healthy + networks: + - llm-network + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + + redis: + image: redis:7.2-alpine + container_name: llm-gateway-redis + ports: + - "6379:6379" + command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru + volumes: + - redis-data:/data + networks: + - llm-network + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 3 + + # Optional: Prometheus for metrics + prometheus: + image: prom/prometheus:latest + container_name: llm-gateway-prometheus + ports: + - "9090:9090" + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + volumes: + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - llm-network + restart: unless-stopped + profiles: + - monitoring + + # Optional: Grafana for visualization + grafana: + image: grafana/grafana:latest + container_name: llm-gateway-grafana + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro + - ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro + - ./monitoring/dashboards:/var/lib/grafana/dashboards:ro + - grafana-data:/var/lib/grafana + depends_on: + - prometheus + networks: + - llm-network + restart: unless-stopped + profiles: + - monitoring + +networks: + llm-network: + driver: bridge + +volumes: + redis-data: + prometheus-data: + grafana-data: diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000..3fa3641 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,352 @@ +# Kubernetes Deployment Guide + +This directory contains Kubernetes manifests for deploying the LLM Gateway to production. + +## Prerequisites + +- Kubernetes cluster (v1.24+) +- `kubectl` configured +- Container registry access +- (Optional) Prometheus Operator for monitoring +- (Optional) cert-manager for TLS certificates +- (Optional) nginx-ingress-controller or cloud load balancer + +## Quick Start + +### 1. Build and Push Docker Image + +```bash +# Build the image +docker build -t your-registry/llm-gateway:v1.0.0 . + +# Push to registry +docker push your-registry/llm-gateway:v1.0.0 +``` + +### 2. Configure Secrets + +**Option A: Using kubectl** +```bash +kubectl create namespace llm-gateway + +kubectl create secret generic llm-gateway-secrets \ + --from-literal=GOOGLE_API_KEY="your-key" \ + --from-literal=ANTHROPIC_API_KEY="your-key" \ + --from-literal=OPENAI_API_KEY="your-key" \ + --from-literal=OIDC_AUDIENCE="your-client-id" \ + -n llm-gateway +``` + +**Option B: Using External Secrets Operator (Recommended)** +- Uncomment the ExternalSecret in `secret.yaml` +- Configure your SecretStore (AWS Secrets Manager, Vault, etc.) + +### 3. Update Configuration + +Edit `configmap.yaml`: +- Update Redis connection string if using external Redis +- Configure observability endpoints (Tempo, Prometheus) +- Adjust rate limits as needed +- Set OIDC issuer and audience + +Edit `ingress.yaml`: +- Replace `llm-gateway.example.com` with your domain +- Configure TLS certificate annotations + +Edit `kustomization.yaml`: +- Update image registry and tag + +### 4. Deploy + +**Using Kustomize (Recommended):** +```bash +kubectl apply -k k8s/ +``` + +**Using kubectl directly:** +```bash +kubectl apply -f k8s/namespace.yaml +kubectl apply -f k8s/serviceaccount.yaml +kubectl apply -f k8s/secret.yaml +kubectl apply -f k8s/configmap.yaml +kubectl apply -f k8s/redis.yaml +kubectl apply -f k8s/deployment.yaml +kubectl apply -f k8s/service.yaml +kubectl apply -f k8s/ingress.yaml +kubectl apply -f k8s/hpa.yaml +kubectl apply -f k8s/pdb.yaml +kubectl apply -f k8s/networkpolicy.yaml +``` + +**With Prometheus Operator:** +```bash +kubectl apply -f k8s/servicemonitor.yaml +kubectl apply -f k8s/prometheusrule.yaml +``` + +### 5. Verify Deployment + +```bash +# Check pods +kubectl get pods -n llm-gateway + +# Check services +kubectl get svc -n llm-gateway + +# Check ingress +kubectl get ingress -n llm-gateway + +# View logs +kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f + +# Check health +kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80 +curl http://localhost:8080/health +``` + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────┐ +│ Internet/Clients │ +└───────────────────────┬─────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Ingress Controller │ +│ (nginx/ALB/GCE with TLS) │ +└───────────────────────┬─────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ LLM Gateway Service │ +│ (LoadBalancer) │ +└───────────────────────┬─────────────────────────────────┘ + │ + ┌───────────────┼───────────────┐ + ▼ ▼ ▼ +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Gateway │ │ Gateway │ │ Gateway │ +│ Pod 1 │ │ Pod 2 │ │ Pod 3 │ +└──────┬───────┘ └──────┬───────┘ └──────┬───────┘ + │ │ │ + └────────────────┼────────────────┘ + │ + ┌───────────────┼───────────────┐ + ▼ ▼ ▼ +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Redis │ │ Prometheus │ │ Tempo │ +│ (Persistent) │ │ (Metrics) │ │ (Traces) │ +└──────────────┘ └──────────────┘ └──────────────┘ +``` + +## Resource Specifications + +### Default Resources +- **Requests**: 100m CPU, 128Mi memory +- **Limits**: 1000m CPU, 512Mi memory +- **Replicas**: 3 (min), 20 (max with HPA) + +### Scaling +- HPA scales based on CPU (70%) and memory (80%) +- PodDisruptionBudget ensures minimum 2 replicas during disruptions + +## Configuration Options + +### Environment Variables (from Secret) +- `GOOGLE_API_KEY`: Google AI API key +- `ANTHROPIC_API_KEY`: Anthropic API key +- `OPENAI_API_KEY`: OpenAI API key +- `OIDC_AUDIENCE`: OIDC client ID for authentication + +### ConfigMap Settings +See `configmap.yaml` for full configuration options: +- Server address +- Logging format and level +- Rate limiting +- Observability (metrics/tracing) +- Provider endpoints +- Conversation storage +- Authentication + +## Security + +### Security Features +- Non-root container execution (UID 1000) +- Read-only root filesystem +- No privilege escalation +- All capabilities dropped +- Network policies for ingress/egress control +- SeccompProfile: RuntimeDefault + +### TLS/HTTPS +- Ingress configured with TLS +- Uses cert-manager for automatic certificate provisioning +- Force SSL redirect enabled + +### Secrets Management +**Never commit secrets to git!** + +Production options: +1. **External Secrets Operator** (Recommended) + - AWS Secrets Manager + - HashiCorp Vault + - Google Secret Manager + +2. **Sealed Secrets** + - Encrypted secrets in git + +3. **Manual kubectl secrets** + - Created outside of git + +## Monitoring + +### Metrics +- Exposed on `/metrics` endpoint +- Scraped by Prometheus via ServiceMonitor +- Key metrics: + - HTTP request rate, latency, errors + - Provider request rate, latency, token usage + - Conversation store operations + - Rate limiting hits + +### Alerts +See `prometheusrule.yaml` for configured alerts: +- High error rate +- High latency +- Provider failures +- Pod down +- High memory usage +- Rate limit threshold exceeded +- Conversation store errors + +### Logs +Structured JSON logs with: +- Request IDs +- Trace context (trace_id, span_id) +- Log levels (debug/info/warn/error) + +View logs: +```bash +kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f +``` + +## Maintenance + +### Rolling Updates +```bash +# Update image +kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway + +# Check rollout status +kubectl rollout status deployment/llm-gateway -n llm-gateway + +# Rollback if needed +kubectl rollout undo deployment/llm-gateway -n llm-gateway +``` + +### Scaling +```bash +# Manual scale +kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway + +# HPA will auto-scale within min/max bounds (3-20) +``` + +### Configuration Updates +```bash +# Edit ConfigMap +kubectl edit configmap llm-gateway-config -n llm-gateway + +# Restart pods to pick up changes +kubectl rollout restart deployment/llm-gateway -n llm-gateway +``` + +### Debugging +```bash +# Exec into pod +kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh + +# Port forward for local access +kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80 + +# Check events +kubectl get events -n llm-gateway --sort-by='.lastTimestamp' +``` + +## Production Considerations + +### High Availability +- Minimum 3 replicas across availability zones +- Pod anti-affinity rules spread pods across nodes +- PodDisruptionBudget ensures service availability during disruptions + +### Performance +- Adjust resource limits based on load testing +- Configure HPA thresholds based on traffic patterns +- Use node affinity for GPU nodes if needed + +### Cost Optimization +- Use spot/preemptible instances for non-critical workloads +- Set appropriate resource requests/limits +- Monitor token usage and implement quotas + +### Disaster Recovery +- Redis persistence (if using StatefulSet) +- Regular backups of conversation data +- Multi-region deployment for geo-redundancy +- Document runbooks for incident response + +## Cloud-Specific Notes + +### AWS EKS +- Use AWS Load Balancer Controller for ALB +- Configure IRSA for service account +- Use ElastiCache for Redis +- Store secrets in AWS Secrets Manager + +### GCP GKE +- Use GKE Ingress for GCLB +- Configure Workload Identity +- Use Memorystore for Redis +- Store secrets in Google Secret Manager + +### Azure AKS +- Use Azure Application Gateway Ingress Controller +- Configure Azure AD Workload Identity +- Use Azure Cache for Redis +- Store secrets in Azure Key Vault + +## Troubleshooting + +### Common Issues + +**Pods not starting:** +```bash +kubectl describe pod -n llm-gateway -l app=llm-gateway +kubectl logs -n llm-gateway -l app=llm-gateway --previous +``` + +**Health check failures:** +```bash +kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080 +curl http://localhost:8080/health +curl http://localhost:8080/ready +``` + +**Provider connection issues:** +- Verify API keys in secrets +- Check network policies allow egress +- Verify provider endpoints are accessible + +**Redis connection issues:** +```bash +kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping +``` + +## Additional Resources + +- [Kubernetes Documentation](https://kubernetes.io/docs/) +- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) +- [cert-manager](https://cert-manager.io/) +- [External Secrets Operator](https://external-secrets.io/) diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml new file mode 100644 index 0000000..e5dd06e --- /dev/null +++ b/k8s/configmap.yaml @@ -0,0 +1,76 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: llm-gateway-config + namespace: llm-gateway + labels: + app: llm-gateway +data: + config.yaml: | + server: + address: ":8080" + + logging: + format: "json" + level: "info" + + rate_limit: + enabled: true + requests_per_second: 10 + burst: 20 + + observability: + enabled: true + + metrics: + enabled: true + path: "/metrics" + + tracing: + enabled: true + service_name: "llm-gateway" + sampler: + type: "probability" + rate: 0.1 + exporter: + type: "otlp" + endpoint: "tempo.observability.svc.cluster.local:4317" + insecure: true + + providers: + google: + type: "google" + api_key: "${GOOGLE_API_KEY}" + endpoint: "https://generativelanguage.googleapis.com" + anthropic: + type: "anthropic" + api_key: "${ANTHROPIC_API_KEY}" + endpoint: "https://api.anthropic.com" + openai: + type: "openai" + api_key: "${OPENAI_API_KEY}" + endpoint: "https://api.openai.com" + + conversations: + store: "redis" + ttl: "1h" + dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0" + + auth: + enabled: true + issuer: "https://accounts.google.com" + audience: "${OIDC_AUDIENCE}" + + models: + - name: "gemini-1.5-flash" + provider: "google" + - name: "gemini-1.5-pro" + provider: "google" + - name: "claude-3-5-sonnet-20241022" + provider: "anthropic" + - name: "claude-3-5-haiku-20241022" + provider: "anthropic" + - name: "gpt-4o" + provider: "openai" + - name: "gpt-4o-mini" + provider: "openai" diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml new file mode 100644 index 0000000..baede2f --- /dev/null +++ b/k8s/deployment.yaml @@ -0,0 +1,168 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + version: v1 +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: llm-gateway + template: + metadata: + labels: + app: llm-gateway + version: v1 + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + serviceAccountName: llm-gateway + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + + containers: + - name: gateway + image: llm-gateway:latest # Replace with your registry/image:tag + imagePullPolicy: IfNotPresent + + ports: + - name: http + containerPort: 8080 + protocol: TCP + + env: + # Provider API Keys from Secret + - name: GOOGLE_API_KEY + valueFrom: + secretKeyRef: + name: llm-gateway-secrets + key: GOOGLE_API_KEY + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: llm-gateway-secrets + key: ANTHROPIC_API_KEY + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: llm-gateway-secrets + key: OPENAI_API_KEY + - name: OIDC_AUDIENCE + valueFrom: + secretKeyRef: + name: llm-gateway-secrets + key: OIDC_AUDIENCE + + # Optional: Pod metadata + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 1000m + memory: 512Mi + + livenessProbe: + httpGet: + path: /health + port: http + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /ready + port: http + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + + startupProbe: + httpGet: + path: /health + port: http + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 30 + + volumeMounts: + - name: config + mountPath: /app/config + readOnly: true + - name: tmp + mountPath: /tmp + + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + + volumes: + - name: config + configMap: + name: llm-gateway-config + - name: tmp + emptyDir: {} + + # Affinity rules for better distribution + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - llm-gateway + topologyKey: kubernetes.io/hostname + + # Tolerations (if needed for specific node pools) + # tolerations: + # - key: "workload-type" + # operator: "Equal" + # value: "llm" + # effect: "NoSchedule" diff --git a/k8s/hpa.yaml b/k8s/hpa.yaml new file mode 100644 index 0000000..e21f7d2 --- /dev/null +++ b/k8s/hpa.yaml @@ -0,0 +1,63 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: llm-gateway + + minReplicas: 3 + maxReplicas: 20 + + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 50 + periodSeconds: 60 + - type: Pods + value: 2 + periodSeconds: 60 + selectPolicy: Min + scaleUp: + stabilizationWindowSeconds: 0 + policies: + - type: Percent + value: 100 + periodSeconds: 30 + - type: Pods + value: 4 + periodSeconds: 30 + selectPolicy: Max + + metrics: + # CPU-based scaling + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + + # Memory-based scaling + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + + # Custom metrics (requires metrics-server and custom metrics API) + # - type: Pods + # pods: + # metric: + # name: http_requests_per_second + # target: + # type: AverageValue + # averageValue: "1000" diff --git a/k8s/ingress.yaml b/k8s/ingress.yaml new file mode 100644 index 0000000..2655ba3 --- /dev/null +++ b/k8s/ingress.yaml @@ -0,0 +1,66 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + annotations: + # General annotations + kubernetes.io/ingress.class: "nginx" + + # TLS configuration + cert-manager.io/cluster-issuer: "letsencrypt-prod" + + # Security headers + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3" + + # Rate limiting (supplement application-level rate limiting) + nginx.ingress.kubernetes.io/limit-rps: "100" + nginx.ingress.kubernetes.io/limit-connections: "50" + + # Request size limit (10MB) + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + + # Timeouts + nginx.ingress.kubernetes.io/proxy-connect-timeout: "60" + nginx.ingress.kubernetes.io/proxy-send-timeout: "120" + nginx.ingress.kubernetes.io/proxy-read-timeout: "120" + + # CORS (if needed) + # nginx.ingress.kubernetes.io/enable-cors: "true" + # nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com" + # nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS" + # nginx.ingress.kubernetes.io/cors-allow-credentials: "true" + + # For AWS ALB Ingress Controller (alternative to nginx) + # kubernetes.io/ingress.class: "alb" + # alb.ingress.kubernetes.io/scheme: "internet-facing" + # alb.ingress.kubernetes.io/target-type: "ip" + # alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' + # alb.ingress.kubernetes.io/ssl-redirect: '443' + # alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx" + + # For GKE Ingress (alternative to nginx) + # kubernetes.io/ingress.class: "gce" + # kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip" + # ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert" + +spec: + tls: + - hosts: + - llm-gateway.example.com # Replace with your domain + secretName: llm-gateway-tls + + rules: + - host: llm-gateway.example.com # Replace with your domain + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: llm-gateway + port: + number: 80 diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml new file mode 100644 index 0000000..e5c5ce7 --- /dev/null +++ b/k8s/kustomization.yaml @@ -0,0 +1,46 @@ +# Kustomize configuration for easy deployment +# Usage: kubectl apply -k k8s/ + +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: llm-gateway + +resources: +- namespace.yaml +- serviceaccount.yaml +- configmap.yaml +- secret.yaml +- deployment.yaml +- service.yaml +- ingress.yaml +- hpa.yaml +- pdb.yaml +- networkpolicy.yaml +- redis.yaml +- servicemonitor.yaml +- prometheusrule.yaml + +# Common labels applied to all resources +commonLabels: + app.kubernetes.io/name: llm-gateway + app.kubernetes.io/component: api-gateway + app.kubernetes.io/part-of: llm-platform + +# Images to be used (customize for your registry) +images: +- name: llm-gateway + newName: your-registry/llm-gateway + newTag: latest + +# ConfigMap generator (alternative to configmap.yaml) +# configMapGenerator: +# - name: llm-gateway-config +# files: +# - config.yaml + +# Secret generator (for local development only) +# secretGenerator: +# - name: llm-gateway-secrets +# envs: +# - secrets.env diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 0000000..8ad84fd --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: llm-gateway + labels: + app: llm-gateway + environment: production diff --git a/k8s/networkpolicy.yaml b/k8s/networkpolicy.yaml new file mode 100644 index 0000000..2d92e50 --- /dev/null +++ b/k8s/networkpolicy.yaml @@ -0,0 +1,83 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway +spec: + podSelector: + matchLabels: + app: llm-gateway + + policyTypes: + - Ingress + - Egress + + ingress: + # Allow traffic from ingress controller + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 8080 + + # Allow traffic from within the namespace (for debugging/testing) + - from: + - podSelector: {} + ports: + - protocol: TCP + port: 8080 + + # Allow Prometheus scraping + - from: + - namespaceSelector: + matchLabels: + name: observability + podSelector: + matchLabels: + app: prometheus + ports: + - protocol: TCP + port: 8080 + + egress: + # Allow DNS + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + + # Allow Redis access + - to: + - podSelector: + matchLabels: + app: redis + ports: + - protocol: TCP + port: 6379 + + # Allow external provider API access (OpenAI, Anthropic, Google) + - to: + - namespaceSelector: {} + ports: + - protocol: TCP + port: 443 + + # Allow OTLP tracing export + - to: + - namespaceSelector: + matchLabels: + name: observability + podSelector: + matchLabels: + app: tempo + ports: + - protocol: TCP + port: 4317 diff --git a/k8s/pdb.yaml b/k8s/pdb.yaml new file mode 100644 index 0000000..62f5349 --- /dev/null +++ b/k8s/pdb.yaml @@ -0,0 +1,13 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway +spec: + minAvailable: 2 + selector: + matchLabels: + app: llm-gateway + unhealthyPodEvictionPolicy: AlwaysAllow diff --git a/k8s/prometheusrule.yaml b/k8s/prometheusrule.yaml new file mode 100644 index 0000000..35a0808 --- /dev/null +++ b/k8s/prometheusrule.yaml @@ -0,0 +1,122 @@ +# PrometheusRule for alerting +# Requires Prometheus Operator to be installed + +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + prometheus: kube-prometheus +spec: + groups: + - name: llm-gateway.rules + interval: 30s + rules: + + # High error rate + - alert: LLMGatewayHighErrorRate + expr: | + ( + sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m])) + / + sum(rate(http_requests_total{namespace="llm-gateway"}[5m])) + ) > 0.05 + for: 5m + labels: + severity: warning + component: llm-gateway + annotations: + summary: "High error rate in LLM Gateway" + description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)" + + # High latency + - alert: LLMGatewayHighLatency + expr: | + histogram_quantile(0.95, + sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le) + ) > 10 + for: 5m + labels: + severity: warning + component: llm-gateway + annotations: + summary: "High latency in LLM Gateway" + description: "P95 latency is {{ $value }}s (threshold: 10s)" + + # Provider errors + - alert: LLMProviderHighErrorRate + expr: | + ( + sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider) + / + sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider) + ) > 0.10 + for: 5m + labels: + severity: warning + component: llm-gateway + annotations: + summary: "High error rate for provider {{ $labels.provider }}" + description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)" + + # Pod down + - alert: LLMGatewayPodDown + expr: | + up{job="llm-gateway",namespace="llm-gateway"} == 0 + for: 2m + labels: + severity: critical + component: llm-gateway + annotations: + summary: "LLM Gateway pod is down" + description: "Pod {{ $labels.pod }} has been down for more than 2 minutes" + + # High memory usage + - alert: LLMGatewayHighMemoryUsage + expr: | + ( + container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"} + / + container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"} + ) > 0.85 + for: 5m + labels: + severity: warning + component: llm-gateway + annotations: + summary: "High memory usage in LLM Gateway" + description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)" + + # Rate limit threshold + - alert: LLMGatewayHighRateLimitHitRate + expr: | + ( + sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m])) + / + sum(rate(http_requests_total{namespace="llm-gateway"}[5m])) + ) > 0.20 + for: 10m + labels: + severity: info + component: llm-gateway + annotations: + summary: "High rate limit hit rate" + description: "{{ $value | humanizePercentage }} of requests are being rate limited" + + # Conversation store errors + - alert: LLMGatewayConversationStoreErrors + expr: | + ( + sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m])) + / + sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m])) + ) > 0.05 + for: 5m + labels: + severity: warning + component: llm-gateway + annotations: + summary: "High error rate in conversation store" + description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)" diff --git a/k8s/redis.yaml b/k8s/redis.yaml new file mode 100644 index 0000000..7257d20 --- /dev/null +++ b/k8s/redis.yaml @@ -0,0 +1,131 @@ +# Simple Redis deployment for conversation storage +# For production, consider using: +# - Redis Operator (e.g., Redis Enterprise Operator) +# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis) +# - Redis Cluster for high availability + +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-config + namespace: llm-gateway + labels: + app: redis +data: + redis.conf: | + maxmemory 256mb + maxmemory-policy allkeys-lru + save "" + appendonly no +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + namespace: llm-gateway + labels: + app: redis +spec: + serviceName: redis + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + securityContext: + runAsNonRoot: true + runAsUser: 999 + fsGroup: 999 + seccompProfile: + type: RuntimeDefault + + containers: + - name: redis + image: redis:7.2-alpine + imagePullPolicy: IfNotPresent + + command: + - redis-server + - /etc/redis/redis.conf + + ports: + - name: redis + containerPort: 6379 + protocol: TCP + + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + + livenessProbe: + tcpSocket: + port: redis + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + + readinessProbe: + exec: + command: + - redis-cli + - ping + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + + volumeMounts: + - name: config + mountPath: /etc/redis + - name: data + mountPath: /data + + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 999 + capabilities: + drop: + - ALL + + volumes: + - name: config + configMap: + name: redis-config + + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + namespace: llm-gateway + labels: + app: redis +spec: + type: ClusterIP + clusterIP: None + selector: + app: redis + ports: + - name: redis + port: 6379 + targetPort: redis + protocol: TCP diff --git a/k8s/secret.yaml b/k8s/secret.yaml new file mode 100644 index 0000000..514b538 --- /dev/null +++ b/k8s/secret.yaml @@ -0,0 +1,46 @@ +apiVersion: v1 +kind: Secret +metadata: + name: llm-gateway-secrets + namespace: llm-gateway + labels: + app: llm-gateway +type: Opaque +stringData: + # IMPORTANT: Replace these with actual values or use external secret management + # For production, use: + # - kubectl create secret generic llm-gateway-secrets --from-literal=... + # - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault + # - Sealed Secrets + GOOGLE_API_KEY: "your-google-api-key-here" + ANTHROPIC_API_KEY: "your-anthropic-api-key-here" + OPENAI_API_KEY: "your-openai-api-key-here" + OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com" +--- +# Example using External Secrets Operator (commented out) +# apiVersion: external-secrets.io/v1beta1 +# kind: ExternalSecret +# metadata: +# name: llm-gateway-secrets +# namespace: llm-gateway +# spec: +# refreshInterval: 1h +# secretStoreRef: +# name: aws-secrets-manager +# kind: SecretStore +# target: +# name: llm-gateway-secrets +# creationPolicy: Owner +# data: +# - secretKey: GOOGLE_API_KEY +# remoteRef: +# key: prod/llm-gateway/google-api-key +# - secretKey: ANTHROPIC_API_KEY +# remoteRef: +# key: prod/llm-gateway/anthropic-api-key +# - secretKey: OPENAI_API_KEY +# remoteRef: +# key: prod/llm-gateway/openai-api-key +# - secretKey: OIDC_AUDIENCE +# remoteRef: +# key: prod/llm-gateway/oidc-audience diff --git a/k8s/service.yaml b/k8s/service.yaml new file mode 100644 index 0000000..d9f4da6 --- /dev/null +++ b/k8s/service.yaml @@ -0,0 +1,40 @@ +apiVersion: v1 +kind: Service +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + annotations: + # For cloud load balancers (uncomment as needed) + # service.beta.kubernetes.io/aws-load-balancer-type: "nlb" + # cloud.google.com/neg: '{"ingress": true}' +spec: + type: ClusterIP + selector: + app: llm-gateway + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP + sessionAffinity: None +--- +# Headless service for pod-to-pod communication (if needed) +apiVersion: v1 +kind: Service +metadata: + name: llm-gateway-headless + namespace: llm-gateway + labels: + app: llm-gateway +spec: + type: ClusterIP + clusterIP: None + selector: + app: llm-gateway + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP diff --git a/k8s/serviceaccount.yaml b/k8s/serviceaccount.yaml new file mode 100644 index 0000000..35d6876 --- /dev/null +++ b/k8s/serviceaccount.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + annotations: + # For GKE Workload Identity + # iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com + + # For EKS IRSA (IAM Roles for Service Accounts) + # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role +automountServiceAccountToken: true diff --git a/k8s/servicemonitor.yaml b/k8s/servicemonitor.yaml new file mode 100644 index 0000000..9be94d7 --- /dev/null +++ b/k8s/servicemonitor.yaml @@ -0,0 +1,35 @@ +# ServiceMonitor for Prometheus Operator +# Requires Prometheus Operator to be installed +# https://github.com/prometheus-operator/prometheus-operator + +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: llm-gateway + namespace: llm-gateway + labels: + app: llm-gateway + prometheus: kube-prometheus +spec: + selector: + matchLabels: + app: llm-gateway + + endpoints: + - port: http + path: /metrics + interval: 30s + scrapeTimeout: 10s + + relabelings: + # Add namespace label + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: namespace + + # Add pod label + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + + # Add service label + - sourceLabels: [__meta_kubernetes_service_name] + targetLabel: service