2026-03-05 23:09:11 +00:00
21 changed files with 1952 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,65 @@
 # Git
 .git
 .gitignore
 .github
 # Documentation
 *.md
 docs/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # Build artifacts
 /bin/
 /dist/
 /build/
 /gateway
 /cmd/gateway/gateway
 *.exe
 *.dll
 *.so
 *.dylib
 *.test
 *.out
 # Configuration files with secrets
 config.yaml
 config.json
 *-local.yaml
 *-local.json
 .env
 .env.local
 *.key
 *.pem
 # Test and coverage
 coverage.out
 *.log
 logs/
 # OS
 .DS_Store
 Thumbs.db
 # Dependencies (will be downloaded during build)
 vendor/
 # Python
 __pycache__/
 *.py[cod]
 tests/node_modules/
 # Jujutsu
 .jj/
 # Claude
 .claude/
 # Data directories
 data/
 *.db
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,181 @@
 name: CI
 on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main, develop ]
 env:
  GO_VERSION: '1.23'
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  test:
    name: Test
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: ${{ env.GO_VERSION }}
        cache: true
    - name: Download dependencies
      run: go mod download
    - name: Verify dependencies
      run: go mod verify
    - name: Run tests
      run: go test -v -race -coverprofile=coverage.out ./...
    - name: Upload coverage to Codecov
      uses: codecov/codecov-action@v4
      with:
        file: ./coverage.out
        flags: unittests
        name: codecov-umbrella
    - name: Generate coverage report
      run: go tool cover -html=coverage.out -o coverage.html
    - name: Upload coverage report
      uses: actions/upload-artifact@v4
      with:
        name: coverage-report
        path: coverage.html
  lint:
    name: Lint
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: ${{ env.GO_VERSION }}
        cache: true
    - name: Run golangci-lint
      uses: golangci/golangci-lint-action@v4
      with:
        version: latest
        args: --timeout=5m
  security:
    name: Security Scan
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: ${{ env.GO_VERSION }}
        cache: true
    - name: Run Gosec Security Scanner
      uses: securego/gosec@master
      with:
        args: '-no-fail -fmt sarif -out results.sarif ./...'
    - name: Upload SARIF file
      uses: github/codeql-action/upload-sarif@v3
      with:
        sarif_file: results.sarif
  build:
    name: Build
    runs-on: ubuntu-latest
    needs: [test, lint]
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: ${{ env.GO_VERSION }}
        cache: true
    - name: Build binary
      run: |
        CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
    - name: Upload binary
      uses: actions/upload-artifact@v4
      with:
        name: gateway-binary
        path: bin/gateway
  docker:
    name: Build and Push Docker Image
    runs-on: ubuntu-latest
    needs: [test, lint, security]
    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
    permissions:
      contents: read
      packages: write
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
    - name: Log in to Container Registry
      uses: docker/login-action@v3
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v5
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=ref,event=branch
          type=ref,event=pr
          type=semver,pattern={{version}}
          type=semver,pattern={{major}}.{{minor}}
          type=sha,prefix={{branch}}-
          type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
    - name: Build and push Docker image
      uses: docker/build-push-action@v5
      with:
        context: .
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max
        platforms: linux/amd64,linux/arm64
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
      with:
        image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
        format: 'sarif'
        output: 'trivy-results.sarif'
    - name: Upload Trivy results to GitHub Security
      uses: github/codeql-action/upload-sarif@v3
      with:
        sarif_file: 'trivy-results.sarif'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,129 @@
 name: Release
 on:
  push:
    tags:
      - 'v*'
 env:
  GO_VERSION: '1.23'
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  release:
    name: Create Release
    runs-on: ubuntu-latest
    permissions:
      contents: write
      packages: write
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        fetch-depth: 0
    - name: Set up Go
      uses: actions/setup-go@v5
      with:
        go-version: ${{ env.GO_VERSION }}
    - name: Run tests
      run: go test -v ./...
    - name: Build binaries
      run: |
        # Linux amd64
        GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
        # Linux arm64
        GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
        # macOS amd64
        GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
        # macOS arm64
        GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
    - name: Create checksums
      run: |
        cd bin
        sha256sum gateway-* > checksums.txt
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
    - name: Log in to Container Registry
      uses: docker/login-action@v3
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v5
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=semver,pattern={{version}}
          type=semver,pattern={{major}}.{{minor}}
          type=semver,pattern={{major}}
          type=raw,value=latest
    - name: Build and push Docker image
      uses: docker/build-push-action@v5
      with:
        context: .
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        platforms: linux/amd64,linux/arm64
        cache-from: type=gha
        cache-to: type=gha,mode=max
    - name: Generate changelog
      id: changelog
      run: |
        git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
        echo "changelog<<EOF" >> $GITHUB_OUTPUT
        cat CHANGELOG.txt >> $GITHUB_OUTPUT
        echo "EOF" >> $GITHUB_OUTPUT
    - name: Create Release
      uses: softprops/action-gh-release@v1
      with:
        body: |
          ## Changes
          ${{ steps.changelog.outputs.changelog }}
          ## Docker Images
          ```
          docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
          docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
          ```
          ## Installation
          ### Kubernetes
          ```bash
          kubectl apply -k k8s/
          ```
          ### Docker
          ```bash
          docker run -p 8080:8080 \
            -e GOOGLE_API_KEY=your-key \
            -e ANTHROPIC_API_KEY=your-key \
            -e OPENAI_API_KEY=your-key \
            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
          ```
        files: |
          bin/gateway-*
          bin/checksums.txt
        draft: false
        prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/62
+++ b/62
@@ -0,0 +1,62 @@
 # Multi-stage build for Go LLM Gateway
 # Stage 1: Build the Go binary
 FROM golang:alpine AS builder
 # Install build dependencies
 RUN apk add --no-cache git ca-certificates tzdata
 WORKDIR /build
 # Copy go mod files first for better caching
 COPY go.mod go.sum ./
 RUN go mod download
 # Copy source code
 COPY . .
 # Build the binary with optimizations
 # CGO is required for SQLite support
 RUN apk add --no-cache gcc musl-dev && \
    CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
    -ldflags='-w -s -extldflags "-static"' \
    -a -installsuffix cgo \
    -o gateway \
    ./cmd/gateway
 # Stage 2: Create minimal runtime image
 FROM alpine:3.19
 # Install runtime dependencies
 RUN apk add --no-cache ca-certificates tzdata
 # Create non-root user
 RUN addgroup -g 1000 gateway && \
    adduser -D -u 1000 -G gateway gateway
 # Create necessary directories
 RUN mkdir -p /app /app/data && \
    chown -R gateway:gateway /app
 WORKDIR /app
 # Copy binary from builder
 COPY --from=builder /build/gateway /app/gateway
 # Copy example config (optional, mainly for documentation)
 COPY config.example.yaml /app/config.example.yaml
 # Switch to non-root user
 USER gateway
 # Expose port
 EXPOSE 8080
 # Health check
 HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
    CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
 # Set entrypoint
 ENTRYPOINT ["/app/gateway"]
 # Default command (can be overridden)
 CMD ["--config", "/app/config/config.yaml"]
--- a/151
+++ b/151
@@ -0,0 +1,151 @@
 # Makefile for LLM Gateway
 .PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
 # Variables
 APP_NAME := llm-gateway
 VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
 REGISTRY ?= your-registry
 IMAGE := $(REGISTRY)/$(APP_NAME)
 DOCKER_TAG := $(IMAGE):$(VERSION)
 LATEST_TAG := $(IMAGE):latest
 # Go variables
 GOCMD := go
 GOBUILD := $(GOCMD) build
 GOTEST := $(GOCMD) test
 GOMOD := $(GOCMD) mod
 GOFMT := $(GOCMD) fmt
 # Build directory
 BUILD_DIR := bin
 # Help target
 help: ## Show this help message
 	@echo "Usage: make [target]"
 	@echo ""
 	@echo "Targets:"
 	@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf "  %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
 # Development targets
 build: ## Build the binary
 	@echo "Building $(APP_NAME)..."
 	CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
 build-static: ## Build static binary
 	@echo "Building static binary..."
 	CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
 test: ## Run tests
 	@echo "Running tests..."
 	$(GOTEST) -v -race -coverprofile=coverage.out ./...
 test-coverage: test ## Run tests with coverage report
 	@echo "Generating coverage report..."
 	$(GOCMD) tool cover -html=coverage.out -o coverage.html
 	@echo "Coverage report saved to coverage.html"
 fmt: ## Format Go code
 	@echo "Formatting code..."
 	$(GOFMT) ./...
 lint: ## Run linter
 	@echo "Running linter..."
 	@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
 	golangci-lint run ./...
 tidy: ## Tidy go modules
 	@echo "Tidying go modules..."
 	$(GOMOD) tidy
 clean: ## Clean build artifacts
 	@echo "Cleaning..."
 	rm -rf $(BUILD_DIR)
 	rm -f coverage.out coverage.html
 # Docker targets
 docker-build: ## Build Docker image
 	@echo "Building Docker image $(DOCKER_TAG)..."
 	docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
 docker-push: docker-build ## Push Docker image to registry
 	@echo "Pushing Docker image..."
 	docker push $(DOCKER_TAG)
 	docker push $(LATEST_TAG)
 docker-run: ## Run Docker container locally
 	@echo "Running Docker container..."
 	docker run --rm -p 8080:8080 \
 		-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
 		-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
 		-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
 		-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
 		$(DOCKER_TAG)
 docker-compose-up: ## Start services with docker-compose
 	@echo "Starting services with docker-compose..."
 	docker-compose up -d
 docker-compose-down: ## Stop services with docker-compose
 	@echo "Stopping services with docker-compose..."
 	docker-compose down
 docker-compose-logs: ## View docker-compose logs
 	docker-compose logs -f
 # Kubernetes targets
 k8s-namespace: ## Create Kubernetes namespace
 	kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
 k8s-secrets: ## Create Kubernetes secrets (requires env vars)
 	@echo "Creating secrets..."
 	@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
 		echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
 		exit 1; \
 	fi
 	kubectl create secret generic llm-gateway-secrets \
 		--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
 		--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
 		--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
 		--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
 		-n llm-gateway \
 		--dry-run=client -o yaml | kubectl apply -f -
 k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
 	@echo "Deploying to Kubernetes..."
 	kubectl apply -k k8s/
 k8s-delete: ## Delete from Kubernetes
 	@echo "Deleting from Kubernetes..."
 	kubectl delete -k k8s/
 k8s-status: ## Check Kubernetes deployment status
 	@echo "Checking deployment status..."
 	kubectl get all -n llm-gateway
 k8s-logs: ## View Kubernetes logs
 	kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
 k8s-describe: ## Describe Kubernetes deployment
 	kubectl describe deployment llm-gateway -n llm-gateway
 k8s-port-forward: ## Port forward to local machine
 	kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
 # CI/CD targets
 ci: lint test ## Run CI checks
 security-scan: ## Run security scan
 	@echo "Running security scan..."
 	@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
 	gosec ./...
 # Run target
 run: ## Run locally
 	@echo "Running $(APP_NAME) locally..."
 	$(GOCMD) run ./cmd/gateway --config config.yaml
 # Version info
 version: ## Show version
 	@echo "Version: $(VERSION)"
 	@echo "Image: $(DOCKER_TAG)"
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,102 @@
 # Docker Compose for local development and testing
 # Not recommended for production - use Kubernetes instead
 version: '3.9'
 services:
  gateway:
    build:
      context: .
      dockerfile: Dockerfile
    image: llm-gateway:latest
    container_name: llm-gateway
    ports:
      - "8080:8080"
    environment:
      # Provider API keys
      GOOGLE_API_KEY: ${GOOGLE_API_KEY}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
      OPENAI_API_KEY: ${OPENAI_API_KEY}
      OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
    volumes:
      - ./config.yaml:/app/config/config.yaml:ro
    depends_on:
      redis:
        condition: service_healthy
    networks:
      - llm-network
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
  redis:
    image: redis:7.2-alpine
    container_name: llm-gateway-redis
    ports:
      - "6379:6379"
    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
    volumes:
      - redis-data:/data
    networks:
      - llm-network
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 3s
      retries: 3
  # Optional: Prometheus for metrics
  prometheus:
    image: prom/prometheus:latest
    container_name: llm-gateway-prometheus
    ports:
      - "9090:9090"
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
    volumes:
      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - prometheus-data:/prometheus
    networks:
      - llm-network
    restart: unless-stopped
    profiles:
      - monitoring
  # Optional: Grafana for visualization
  grafana:
    image: grafana/grafana:latest
    container_name: llm-gateway-grafana
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_USERS_ALLOW_SIGN_UP=false
    volumes:
      - ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
      - ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
      - ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
      - grafana-data:/var/lib/grafana
    depends_on:
      - prometheus
    networks:
      - llm-network
    restart: unless-stopped
    profiles:
      - monitoring
 networks:
  llm-network:
    driver: bridge
 volumes:
  redis-data:
  prometheus-data:
  grafana-data:
--- a/k8s/README.md
+++ b/k8s/README.md
@@ -0,0 +1,352 @@
 # Kubernetes Deployment Guide
 This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
 ## Prerequisites
 - Kubernetes cluster (v1.24+)
 - `kubectl` configured
 - Container registry access
 - (Optional) Prometheus Operator for monitoring
 - (Optional) cert-manager for TLS certificates
 - (Optional) nginx-ingress-controller or cloud load balancer
 ## Quick Start
 ### 1. Build and Push Docker Image
 ```bash
 # Build the image
 docker build -t your-registry/llm-gateway:v1.0.0 .
 # Push to registry
 docker push your-registry/llm-gateway:v1.0.0
 ```
 ### 2. Configure Secrets
 **Option A: Using kubectl**
 ```bash
 kubectl create namespace llm-gateway
 kubectl create secret generic llm-gateway-secrets \
  --from-literal=GOOGLE_API_KEY="your-key" \
  --from-literal=ANTHROPIC_API_KEY="your-key" \
  --from-literal=OPENAI_API_KEY="your-key" \
  --from-literal=OIDC_AUDIENCE="your-client-id" \
  -n llm-gateway
 ```
 **Option B: Using External Secrets Operator (Recommended)**
 - Uncomment the ExternalSecret in `secret.yaml`
 - Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
 ### 3. Update Configuration
 Edit `configmap.yaml`:
 - Update Redis connection string if using external Redis
 - Configure observability endpoints (Tempo, Prometheus)
 - Adjust rate limits as needed
 - Set OIDC issuer and audience
 Edit `ingress.yaml`:
 - Replace `llm-gateway.example.com` with your domain
 - Configure TLS certificate annotations
 Edit `kustomization.yaml`:
 - Update image registry and tag
 ### 4. Deploy
 **Using Kustomize (Recommended):**
 ```bash
 kubectl apply -k k8s/
 ```
 **Using kubectl directly:**
 ```bash
 kubectl apply -f k8s/namespace.yaml
 kubectl apply -f k8s/serviceaccount.yaml
 kubectl apply -f k8s/secret.yaml
 kubectl apply -f k8s/configmap.yaml
 kubectl apply -f k8s/redis.yaml
 kubectl apply -f k8s/deployment.yaml
 kubectl apply -f k8s/service.yaml
 kubectl apply -f k8s/ingress.yaml
 kubectl apply -f k8s/hpa.yaml
 kubectl apply -f k8s/pdb.yaml
 kubectl apply -f k8s/networkpolicy.yaml
 ```
 **With Prometheus Operator:**
 ```bash
 kubectl apply -f k8s/servicemonitor.yaml
 kubectl apply -f k8s/prometheusrule.yaml
 ```
 ### 5. Verify Deployment
 ```bash
 # Check pods
 kubectl get pods -n llm-gateway
 # Check services
 kubectl get svc -n llm-gateway
 # Check ingress
 kubectl get ingress -n llm-gateway
 # View logs
 kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
 # Check health
 kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
 curl http://localhost:8080/health
 ```
 ## Architecture Overview
 ```
 ┌─────────────────────────────────────────────────────────┐
 │                    Internet/Clients                      │
 └───────────────────────┬─────────────────────────────────┘
                        │
                        ▼
 ┌─────────────────────────────────────────────────────────┐
 │                  Ingress Controller                      │
 │            (nginx/ALB/GCE with TLS)                     │
 └───────────────────────┬─────────────────────────────────┘
                        │
                        ▼
 ┌─────────────────────────────────────────────────────────┐
 │                  LLM Gateway Service                     │
 │                    (LoadBalancer)                        │
 └───────────────────────┬─────────────────────────────────┘
                        │
        ┌───────────────┼───────────────┐
        ▼               ▼               ▼
 ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
 │   Gateway    │ │   Gateway    │ │   Gateway    │
 │   Pod 1      │ │   Pod 2      │ │   Pod 3      │
 └──────┬───────┘ └──────┬───────┘ └──────┬───────┘
       │                │                │
       └────────────────┼────────────────┘
                        │
        ┌───────────────┼───────────────┐
        ▼               ▼               ▼
 ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
 │    Redis     │ │  Prometheus  │ │    Tempo     │
 │ (Persistent) │ │  (Metrics)   │ │  (Traces)    │
 └──────────────┘ └──────────────┘ └──────────────┘
 ```
 ## Resource Specifications
 ### Default Resources
 - **Requests**: 100m CPU, 128Mi memory
 - **Limits**: 1000m CPU, 512Mi memory
 - **Replicas**: 3 (min), 20 (max with HPA)
 ### Scaling
 - HPA scales based on CPU (70%) and memory (80%)
 - PodDisruptionBudget ensures minimum 2 replicas during disruptions
 ## Configuration Options
 ### Environment Variables (from Secret)
 - `GOOGLE_API_KEY`: Google AI API key
 - `ANTHROPIC_API_KEY`: Anthropic API key
 - `OPENAI_API_KEY`: OpenAI API key
 - `OIDC_AUDIENCE`: OIDC client ID for authentication
 ### ConfigMap Settings
 See `configmap.yaml` for full configuration options:
 - Server address
 - Logging format and level
 - Rate limiting
 - Observability (metrics/tracing)
 - Provider endpoints
 - Conversation storage
 - Authentication
 ## Security
 ### Security Features
 - Non-root container execution (UID 1000)
 - Read-only root filesystem
 - No privilege escalation
 - All capabilities dropped
 - Network policies for ingress/egress control
 - SeccompProfile: RuntimeDefault
 ### TLS/HTTPS
 - Ingress configured with TLS
 - Uses cert-manager for automatic certificate provisioning
 - Force SSL redirect enabled
 ### Secrets Management
 **Never commit secrets to git!**
 Production options:
 1. **External Secrets Operator** (Recommended)
   - AWS Secrets Manager
   - HashiCorp Vault
   - Google Secret Manager
 2. **Sealed Secrets**
   - Encrypted secrets in git
 3. **Manual kubectl secrets**
   - Created outside of git
 ## Monitoring
 ### Metrics
 - Exposed on `/metrics` endpoint
 - Scraped by Prometheus via ServiceMonitor
 - Key metrics:
  - HTTP request rate, latency, errors
  - Provider request rate, latency, token usage
  - Conversation store operations
  - Rate limiting hits
 ### Alerts
 See `prometheusrule.yaml` for configured alerts:
 - High error rate
 - High latency
 - Provider failures
 - Pod down
 - High memory usage
 - Rate limit threshold exceeded
 - Conversation store errors
 ### Logs
 Structured JSON logs with:
 - Request IDs
 - Trace context (trace_id, span_id)
 - Log levels (debug/info/warn/error)
 View logs:
 ```bash
 kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
 ```
 ## Maintenance
 ### Rolling Updates
 ```bash
 # Update image
 kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
 # Check rollout status
 kubectl rollout status deployment/llm-gateway -n llm-gateway
 # Rollback if needed
 kubectl rollout undo deployment/llm-gateway -n llm-gateway
 ```
 ### Scaling
 ```bash
 # Manual scale
 kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
 # HPA will auto-scale within min/max bounds (3-20)
 ```
 ### Configuration Updates
 ```bash
 # Edit ConfigMap
 kubectl edit configmap llm-gateway-config -n llm-gateway
 # Restart pods to pick up changes
 kubectl rollout restart deployment/llm-gateway -n llm-gateway
 ```
 ### Debugging
 ```bash
 # Exec into pod
 kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
 # Port forward for local access
 kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
 # Check events
 kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
 ```
 ## Production Considerations
 ### High Availability
 - Minimum 3 replicas across availability zones
 - Pod anti-affinity rules spread pods across nodes
 - PodDisruptionBudget ensures service availability during disruptions
 ### Performance
 - Adjust resource limits based on load testing
 - Configure HPA thresholds based on traffic patterns
 - Use node affinity for GPU nodes if needed
 ### Cost Optimization
 - Use spot/preemptible instances for non-critical workloads
 - Set appropriate resource requests/limits
 - Monitor token usage and implement quotas
 ### Disaster Recovery
 - Redis persistence (if using StatefulSet)
 - Regular backups of conversation data
 - Multi-region deployment for geo-redundancy
 - Document runbooks for incident response
 ## Cloud-Specific Notes
 ### AWS EKS
 - Use AWS Load Balancer Controller for ALB
 - Configure IRSA for service account
 - Use ElastiCache for Redis
 - Store secrets in AWS Secrets Manager
 ### GCP GKE
 - Use GKE Ingress for GCLB
 - Configure Workload Identity
 - Use Memorystore for Redis
 - Store secrets in Google Secret Manager
 ### Azure AKS
 - Use Azure Application Gateway Ingress Controller
 - Configure Azure AD Workload Identity
 - Use Azure Cache for Redis
 - Store secrets in Azure Key Vault
 ## Troubleshooting
 ### Common Issues
 **Pods not starting:**
 ```bash
 kubectl describe pod -n llm-gateway -l app=llm-gateway
 kubectl logs -n llm-gateway -l app=llm-gateway --previous
 ```
 **Health check failures:**
 ```bash
 kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
 curl http://localhost:8080/health
 curl http://localhost:8080/ready
 ```
 **Provider connection issues:**
 - Verify API keys in secrets
 - Check network policies allow egress
 - Verify provider endpoints are accessible
 **Redis connection issues:**
 ```bash
 kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
 ```
 ## Additional Resources
 - [Kubernetes Documentation](https://kubernetes.io/docs/)
 - [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
 - [cert-manager](https://cert-manager.io/)
 - [External Secrets Operator](https://external-secrets.io/)
--- a/k8s/configmap.yaml
+++ b/k8s/configmap.yaml
@@ -0,0 +1,76 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: llm-gateway-config
  namespace: llm-gateway
  labels:
    app: llm-gateway
 data:
  config.yaml: |
    server:
      address: ":8080"
    logging:
      format: "json"
      level: "info"
    rate_limit:
      enabled: true
      requests_per_second: 10
      burst: 20
    observability:
      enabled: true
      metrics:
        enabled: true
        path: "/metrics"
      tracing:
        enabled: true
        service_name: "llm-gateway"
        sampler:
          type: "probability"
          rate: 0.1
        exporter:
          type: "otlp"
          endpoint: "tempo.observability.svc.cluster.local:4317"
          insecure: true
    providers:
      google:
        type: "google"
        api_key: "${GOOGLE_API_KEY}"
        endpoint: "https://generativelanguage.googleapis.com"
      anthropic:
        type: "anthropic"
        api_key: "${ANTHROPIC_API_KEY}"
        endpoint: "https://api.anthropic.com"
      openai:
        type: "openai"
        api_key: "${OPENAI_API_KEY}"
        endpoint: "https://api.openai.com"
    conversations:
      store: "redis"
      ttl: "1h"
      dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
    auth:
      enabled: true
      issuer: "https://accounts.google.com"
      audience: "${OIDC_AUDIENCE}"
    models:
      - name: "gemini-1.5-flash"
        provider: "google"
      - name: "gemini-1.5-pro"
        provider: "google"
      - name: "claude-3-5-sonnet-20241022"
        provider: "anthropic"
      - name: "claude-3-5-haiku-20241022"
        provider: "anthropic"
      - name: "gpt-4o"
        provider: "openai"
      - name: "gpt-4o-mini"
        provider: "openai"
--- a/k8s/deployment.yaml
+++ b/k8s/deployment.yaml
@@ -0,0 +1,168 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
    version: v1
 spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  selector:
    matchLabels:
      app: llm-gateway
  template:
    metadata:
      labels:
        app: llm-gateway
        version: v1
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      serviceAccountName: llm-gateway
      securityContext:
        runAsNonRoot: true
        runAsUser: 1000
        runAsGroup: 1000
        fsGroup: 1000
        seccompProfile:
          type: RuntimeDefault
      containers:
      - name: gateway
        image: llm-gateway:latest  # Replace with your registry/image:tag
        imagePullPolicy: IfNotPresent
        ports:
        - name: http
          containerPort: 8080
          protocol: TCP
        env:
        # Provider API Keys from Secret
        - name: GOOGLE_API_KEY
          valueFrom:
            secretKeyRef:
              name: llm-gateway-secrets
              key: GOOGLE_API_KEY
        - name: ANTHROPIC_API_KEY
          valueFrom:
            secretKeyRef:
              name: llm-gateway-secrets
              key: ANTHROPIC_API_KEY
        - name: OPENAI_API_KEY
          valueFrom:
            secretKeyRef:
              name: llm-gateway-secrets
              key: OPENAI_API_KEY
        - name: OIDC_AUDIENCE
          valueFrom:
            secretKeyRef:
              name: llm-gateway-secrets
              key: OIDC_AUDIENCE
        # Optional: Pod metadata
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: status.podIP
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 1000m
            memory: 512Mi
        livenessProbe:
          httpGet:
            path: /health
            port: http
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 30
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /ready
            port: http
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 10
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 3
        startupProbe:
          httpGet:
            path: /health
            port: http
            scheme: HTTP
          initialDelaySeconds: 0
          periodSeconds: 5
          timeoutSeconds: 3
          successThreshold: 1
          failureThreshold: 30
        volumeMounts:
        - name: config
          mountPath: /app/config
          readOnly: true
        - name: tmp
          mountPath: /tmp
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
          capabilities:
            drop:
            - ALL
      volumes:
      - name: config
        configMap:
          name: llm-gateway-config
      - name: tmp
        emptyDir: {}
      # Affinity rules for better distribution
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - llm-gateway
              topologyKey: kubernetes.io/hostname
      # Tolerations (if needed for specific node pools)
      # tolerations:
      # - key: "workload-type"
      #   operator: "Equal"
      #   value: "llm"
      #   effect: "NoSchedule"
--- a/k8s/hpa.yaml
+++ b/k8s/hpa.yaml
@@ -0,0 +1,63 @@
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: llm-gateway
  minReplicas: 3
  maxReplicas: 20
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60
      - type: Pods
        value: 2
        periodSeconds: 60
      selectPolicy: Min
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
      - type: Percent
        value: 100
        periodSeconds: 30
      - type: Pods
        value: 4
        periodSeconds: 30
      selectPolicy: Max
  metrics:
  # CPU-based scaling
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  # Memory-based scaling
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  # Custom metrics (requires metrics-server and custom metrics API)
  # - type: Pods
  #   pods:
  #     metric:
  #       name: http_requests_per_second
  #     target:
  #       type: AverageValue
  #       averageValue: "1000"
--- a/k8s/ingress.yaml
+++ b/k8s/ingress.yaml
@@ -0,0 +1,66 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
  annotations:
    # General annotations
    kubernetes.io/ingress.class: "nginx"
    # TLS configuration
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
    # Security headers
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
    # Rate limiting (supplement application-level rate limiting)
    nginx.ingress.kubernetes.io/limit-rps: "100"
    nginx.ingress.kubernetes.io/limit-connections: "50"
    # Request size limit (10MB)
    nginx.ingress.kubernetes.io/proxy-body-size: "10m"
    # Timeouts
    nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
    # CORS (if needed)
    # nginx.ingress.kubernetes.io/enable-cors: "true"
    # nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
    # nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
    # nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
    # For AWS ALB Ingress Controller (alternative to nginx)
    # kubernetes.io/ingress.class: "alb"
    # alb.ingress.kubernetes.io/scheme: "internet-facing"
    # alb.ingress.kubernetes.io/target-type: "ip"
    # alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
    # alb.ingress.kubernetes.io/ssl-redirect: '443'
    # alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
    # For GKE Ingress (alternative to nginx)
    # kubernetes.io/ingress.class: "gce"
    # kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
    # ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
 spec:
  tls:
  - hosts:
    - llm-gateway.example.com  # Replace with your domain
    secretName: llm-gateway-tls
  rules:
  - host: llm-gateway.example.com  # Replace with your domain
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: llm-gateway
            port:
              number: 80
--- a/k8s/kustomization.yaml
+++ b/k8s/kustomization.yaml
@@ -0,0 +1,46 @@
 # Kustomize configuration for easy deployment
 # Usage: kubectl apply -k k8s/
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 namespace: llm-gateway
 resources:
 - namespace.yaml
 - serviceaccount.yaml
 - configmap.yaml
 - secret.yaml
 - deployment.yaml
 - service.yaml
 - ingress.yaml
 - hpa.yaml
 - pdb.yaml
 - networkpolicy.yaml
 - redis.yaml
 - servicemonitor.yaml
 - prometheusrule.yaml
 # Common labels applied to all resources
 commonLabels:
  app.kubernetes.io/name: llm-gateway
  app.kubernetes.io/component: api-gateway
  app.kubernetes.io/part-of: llm-platform
 # Images to be used (customize for your registry)
 images:
 - name: llm-gateway
  newName: your-registry/llm-gateway
  newTag: latest
 # ConfigMap generator (alternative to configmap.yaml)
 # configMapGenerator:
 # - name: llm-gateway-config
 #   files:
 #   - config.yaml
 # Secret generator (for local development only)
 # secretGenerator:
 # - name: llm-gateway-secrets
 #   envs:
 #   - secrets.env
--- a/k8s/namespace.yaml
+++ b/k8s/namespace.yaml
@@ -0,0 +1,7 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: llm-gateway
  labels:
    app: llm-gateway
    environment: production
--- a/k8s/networkpolicy.yaml
+++ b/k8s/networkpolicy.yaml
@@ -0,0 +1,83 @@
 apiVersion: networking.k8s.io/v1
 kind: NetworkPolicy
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
 spec:
  podSelector:
    matchLabels:
      app: llm-gateway
  policyTypes:
  - Ingress
  - Egress
  ingress:
  # Allow traffic from ingress controller
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    ports:
    - protocol: TCP
      port: 8080
  # Allow traffic from within the namespace (for debugging/testing)
  - from:
    - podSelector: {}
    ports:
    - protocol: TCP
      port: 8080
  # Allow Prometheus scraping
  - from:
    - namespaceSelector:
        matchLabels:
          name: observability
      podSelector:
        matchLabels:
          app: prometheus
    ports:
    - protocol: TCP
      port: 8080
  egress:
  # Allow DNS
  - to:
    - namespaceSelector: {}
      podSelector:
        matchLabels:
          k8s-app: kube-dns
    ports:
    - protocol: UDP
      port: 53
  # Allow Redis access
  - to:
    - podSelector:
        matchLabels:
          app: redis
    ports:
    - protocol: TCP
      port: 6379
  # Allow external provider API access (OpenAI, Anthropic, Google)
  - to:
    - namespaceSelector: {}
    ports:
    - protocol: TCP
      port: 443
  # Allow OTLP tracing export
  - to:
    - namespaceSelector:
        matchLabels:
          name: observability
      podSelector:
        matchLabels:
          app: tempo
    ports:
    - protocol: TCP
      port: 4317
--- a/k8s/pdb.yaml
+++ b/k8s/pdb.yaml
@@ -0,0 +1,13 @@
 apiVersion: policy/v1
 kind: PodDisruptionBudget
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
 spec:
  minAvailable: 2
  selector:
    matchLabels:
      app: llm-gateway
  unhealthyPodEvictionPolicy: AlwaysAllow
--- a/k8s/prometheusrule.yaml
+++ b/k8s/prometheusrule.yaml
@@ -0,0 +1,122 @@
 # PrometheusRule for alerting
 # Requires Prometheus Operator to be installed
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
    prometheus: kube-prometheus
 spec:
  groups:
  - name: llm-gateway.rules
    interval: 30s
    rules:
    # High error rate
    - alert: LLMGatewayHighErrorRate
      expr: |
        (
          sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
          /
          sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
        ) > 0.05
      for: 5m
      labels:
        severity: warning
        component: llm-gateway
      annotations:
        summary: "High error rate in LLM Gateway"
        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
    # High latency
    - alert: LLMGatewayHighLatency
      expr: |
        histogram_quantile(0.95,
          sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
        ) > 10
      for: 5m
      labels:
        severity: warning
        component: llm-gateway
      annotations:
        summary: "High latency in LLM Gateway"
        description: "P95 latency is {{ $value }}s (threshold: 10s)"
    # Provider errors
    - alert: LLMProviderHighErrorRate
      expr: |
        (
          sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
          /
          sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
        ) > 0.10
      for: 5m
      labels:
        severity: warning
        component: llm-gateway
      annotations:
        summary: "High error rate for provider {{ $labels.provider }}"
        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
    # Pod down
    - alert: LLMGatewayPodDown
      expr: |
        up{job="llm-gateway",namespace="llm-gateway"} == 0
      for: 2m
      labels:
        severity: critical
        component: llm-gateway
      annotations:
        summary: "LLM Gateway pod is down"
        description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
    # High memory usage
    - alert: LLMGatewayHighMemoryUsage
      expr: |
        (
          container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
          /
          container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
        ) > 0.85
      for: 5m
      labels:
        severity: warning
        component: llm-gateway
      annotations:
        summary: "High memory usage in LLM Gateway"
        description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
    # Rate limit threshold
    - alert: LLMGatewayHighRateLimitHitRate
      expr: |
        (
          sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
          /
          sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
        ) > 0.20
      for: 10m
      labels:
        severity: info
        component: llm-gateway
      annotations:
        summary: "High rate limit hit rate"
        description: "{{ $value | humanizePercentage }} of requests are being rate limited"
    # Conversation store errors
    - alert: LLMGatewayConversationStoreErrors
      expr: |
        (
          sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
          /
          sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
        ) > 0.05
      for: 5m
      labels:
        severity: warning
        component: llm-gateway
      annotations:
        summary: "High error rate in conversation store"
        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
--- a/k8s/redis.yaml
+++ b/k8s/redis.yaml
@@ -0,0 +1,131 @@
 # Simple Redis deployment for conversation storage
 # For production, consider using:
 # - Redis Operator (e.g., Redis Enterprise Operator)
 # - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
 # - Redis Cluster for high availability
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: redis-config
  namespace: llm-gateway
  labels:
    app: redis
 data:
  redis.conf: |
    maxmemory 256mb
    maxmemory-policy allkeys-lru
    save ""
    appendonly no
 ---
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
  name: redis
  namespace: llm-gateway
  labels:
    app: redis
 spec:
  serviceName: redis
  replicas: 1
  selector:
    matchLabels:
      app: redis
  template:
    metadata:
      labels:
        app: redis
    spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: 999
        fsGroup: 999
        seccompProfile:
          type: RuntimeDefault
      containers:
      - name: redis
        image: redis:7.2-alpine
        imagePullPolicy: IfNotPresent
        command:
        - redis-server
        - /etc/redis/redis.conf
        ports:
        - name: redis
          containerPort: 6379
          protocol: TCP
        resources:
          requests:
            cpu: 100m
            memory: 128Mi
          limits:
            cpu: 500m
            memory: 512Mi
        livenessProbe:
          tcpSocket:
            port: redis
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        readinessProbe:
          exec:
            command:
            - redis-cli
            - ping
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
        volumeMounts:
        - name: config
          mountPath: /etc/redis
        - name: data
          mountPath: /data
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 999
          capabilities:
            drop:
            - ALL
      volumes:
      - name: config
        configMap:
          name: redis-config
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      accessModes: ["ReadWriteOnce"]
      resources:
        requests:
          storage: 10Gi
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: redis
  namespace: llm-gateway
  labels:
    app: redis
 spec:
  type: ClusterIP
  clusterIP: None
  selector:
    app: redis
  ports:
  - name: redis
    port: 6379
    targetPort: redis
    protocol: TCP
--- a/k8s/secret.yaml
+++ b/k8s/secret.yaml
@@ -0,0 +1,46 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: llm-gateway-secrets
  namespace: llm-gateway
  labels:
    app: llm-gateway
 type: Opaque
 stringData:
  # IMPORTANT: Replace these with actual values or use external secret management
  # For production, use:
  # - kubectl create secret generic llm-gateway-secrets --from-literal=...
  # - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
  # - Sealed Secrets
  GOOGLE_API_KEY: "your-google-api-key-here"
  ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
  OPENAI_API_KEY: "your-openai-api-key-here"
  OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
 ---
 # Example using External Secrets Operator (commented out)
 # apiVersion: external-secrets.io/v1beta1
 # kind: ExternalSecret
 # metadata:
 #   name: llm-gateway-secrets
 #   namespace: llm-gateway
 # spec:
 #   refreshInterval: 1h
 #   secretStoreRef:
 #     name: aws-secrets-manager
 #     kind: SecretStore
 #   target:
 #     name: llm-gateway-secrets
 #     creationPolicy: Owner
 #   data:
 #     - secretKey: GOOGLE_API_KEY
 #       remoteRef:
 #         key: prod/llm-gateway/google-api-key
 #     - secretKey: ANTHROPIC_API_KEY
 #       remoteRef:
 #         key: prod/llm-gateway/anthropic-api-key
 #     - secretKey: OPENAI_API_KEY
 #       remoteRef:
 #         key: prod/llm-gateway/openai-api-key
 #     - secretKey: OIDC_AUDIENCE
 #       remoteRef:
 #         key: prod/llm-gateway/oidc-audience
--- a/k8s/service.yaml
+++ b/k8s/service.yaml
@@ -0,0 +1,40 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
  annotations:
    # For cloud load balancers (uncomment as needed)
    # service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
    # cloud.google.com/neg: '{"ingress": true}'
 spec:
  type: ClusterIP
  selector:
    app: llm-gateway
  ports:
  - name: http
    port: 80
    targetPort: http
    protocol: TCP
  sessionAffinity: None
 ---
 # Headless service for pod-to-pod communication (if needed)
 apiVersion: v1
 kind: Service
 metadata:
  name: llm-gateway-headless
  namespace: llm-gateway
  labels:
    app: llm-gateway
 spec:
  type: ClusterIP
  clusterIP: None
  selector:
    app: llm-gateway
  ports:
  - name: http
    port: 8080
    targetPort: http
    protocol: TCP
--- a/k8s/serviceaccount.yaml
+++ b/k8s/serviceaccount.yaml
@@ -0,0 +1,14 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
  annotations:
    # For GKE Workload Identity
    # iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
    # For EKS IRSA (IAM Roles for Service Accounts)
    # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
 automountServiceAccountToken: true
--- a/k8s/servicemonitor.yaml
+++ b/k8s/servicemonitor.yaml
@@ -0,0 +1,35 @@
 # ServiceMonitor for Prometheus Operator
 # Requires Prometheus Operator to be installed
 # https://github.com/prometheus-operator/prometheus-operator
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
  name: llm-gateway
  namespace: llm-gateway
  labels:
    app: llm-gateway
    prometheus: kube-prometheus
 spec:
  selector:
    matchLabels:
      app: llm-gateway
  endpoints:
  - port: http
    path: /metrics
    interval: 30s
    scrapeTimeout: 10s
    relabelings:
    # Add namespace label
    - sourceLabels: [__meta_kubernetes_namespace]
      targetLabel: namespace
    # Add pod label
    - sourceLabels: [__meta_kubernetes_pod_name]
      targetLabel: pod
    # Add service label
    - sourceLabels: [__meta_kubernetes_service_name]
      targetLabel: service