Add Dockerfile and Manifests

2026-03-05 06:13:50 +00:00
parent b56c78fa07
commit df6b677a15
21 changed files with 1952 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,65 @@
+# Git
+.git
+.gitignore
+.github
+
+# Documentation
+*.md
+docs/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Build artifacts
+/bin/
+/dist/
+/build/
+/gateway
+/cmd/gateway/gateway
+*.exe
+*.dll
+*.so
+*.dylib
+*.test
+*.out
+
+# Configuration files with secrets
+config.yaml
+config.json
+*-local.yaml
+*-local.json
+.env
+.env.local
+*.key
+*.pem
+
+# Test and coverage
+coverage.out
+*.log
+logs/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Dependencies (will be downloaded during build)
+vendor/
+
+# Python
+__pycache__/
+*.py[cod]
+tests/node_modules/
+
+# Jujutsu
+.jj/
+
+# Claude
+.claude/
+
+# Data directories
+data/
+*.db
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,181 @@
+name: CI
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main, develop ]
+
+env:
+  GO_VERSION: '1.23'
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+        cache: true
+
+    - name: Download dependencies
+      run: go mod download
+
+    - name: Verify dependencies
+      run: go mod verify
+
+    - name: Run tests
+      run: go test -v -race -coverprofile=coverage.out ./...
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v4
+      with:
+        file: ./coverage.out
+        flags: unittests
+        name: codecov-umbrella
+
+    - name: Generate coverage report
+      run: go tool cover -html=coverage.out -o coverage.html
+
+    - name: Upload coverage report
+      uses: actions/upload-artifact@v4
+      with:
+        name: coverage-report
+        path: coverage.html
+
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+        cache: true
+
+    - name: Run golangci-lint
+      uses: golangci/golangci-lint-action@v4
+      with:
+        version: latest
+        args: --timeout=5m
+
+  security:
+    name: Security Scan
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+        cache: true
+
+    - name: Run Gosec Security Scanner
+      uses: securego/gosec@master
+      with:
+        args: '-no-fail -fmt sarif -out results.sarif ./...'
+
+    - name: Upload SARIF file
+      uses: github/codeql-action/upload-sarif@v3
+      with:
+        sarif_file: results.sarif
+
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    needs: [test, lint]
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+        cache: true
+
+    - name: Build binary
+      run: |
+        CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
+
+    - name: Upload binary
+      uses: actions/upload-artifact@v4
+      with:
+        name: gateway-binary
+        path: bin/gateway
+
+  docker:
+    name: Build and Push Docker Image
+    runs-on: ubuntu-latest
+    needs: [test, lint, security]
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
+
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Log in to Container Registry
+      uses: docker/login-action@v3
+      with:
+        registry: ${{ env.REGISTRY }}
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Extract metadata
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+        tags: |
+          type=ref,event=branch
+          type=ref,event=pr
+          type=semver,pattern={{version}}
+          type=semver,pattern={{major}}.{{minor}}
+          type=sha,prefix={{branch}}-
+          type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+
+    - name: Build and push Docker image
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        push: true
+        tags: ${{ steps.meta.outputs.tags }}
+        labels: ${{ steps.meta.outputs.labels }}
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+        platforms: linux/amd64,linux/arm64
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@master
+      with:
+        image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
+        format: 'sarif'
+        output: 'trivy-results.sarif'
+
+    - name: Upload Trivy results to GitHub Security
+      uses: github/codeql-action/upload-sarif@v3
+      with:
+        sarif_file: 'trivy-results.sarif'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,129 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+env:
+  GO_VERSION: '1.23'
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  release:
+    name: Create Release
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write
+      packages: write
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+
+    - name: Run tests
+      run: go test -v ./...
+
+    - name: Build binaries
+      run: |
+        # Linux amd64
+        GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
+
+        # Linux arm64
+        GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
+
+        # macOS amd64
+        GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
+
+        # macOS arm64
+        GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
+
+    - name: Create checksums
+      run: |
+        cd bin
+        sha256sum gateway-* > checksums.txt
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Log in to Container Registry
+      uses: docker/login-action@v3
+      with:
+        registry: ${{ env.REGISTRY }}
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Extract metadata
+      id: meta
+      uses: docker/metadata-action@v5
+      with:
+        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+        tags: |
+          type=semver,pattern={{version}}
+          type=semver,pattern={{major}}.{{minor}}
+          type=semver,pattern={{major}}
+          type=raw,value=latest
+
+    - name: Build and push Docker image
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        push: true
+        tags: ${{ steps.meta.outputs.tags }}
+        labels: ${{ steps.meta.outputs.labels }}
+        platforms: linux/amd64,linux/arm64
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+
+    - name: Generate changelog
+      id: changelog
+      run: |
+        git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
+        echo "changelog<<EOF" >> $GITHUB_OUTPUT
+        cat CHANGELOG.txt >> $GITHUB_OUTPUT
+        echo "EOF" >> $GITHUB_OUTPUT
+
+    - name: Create Release
+      uses: softprops/action-gh-release@v1
+      with:
+        body: |
+          ## Changes
+          ${{ steps.changelog.outputs.changelog }}
+
+          ## Docker Images
+          ```
+          docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
+          docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
+          ```
+
+          ## Installation
+
+          ### Kubernetes
+          ```bash
+          kubectl apply -k k8s/
+          ```
+
+          ### Docker
+          ```bash
+          docker run -p 8080:8080 \
+            -e GOOGLE_API_KEY=your-key \
+            -e ANTHROPIC_API_KEY=your-key \
+            -e OPENAI_API_KEY=your-key \
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
+          ```
+        files: |
+          bin/gateway-*
+          bin/checksums.txt
+        draft: false
+        prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/62
+++ b/62
@@ -0,0 +1,62 @@
+# Multi-stage build for Go LLM Gateway
+# Stage 1: Build the Go binary
+FROM golang:alpine AS builder
+
+# Install build dependencies
+RUN apk add --no-cache git ca-certificates tzdata
+
+WORKDIR /build
+
+# Copy go mod files first for better caching
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Build the binary with optimizations
+# CGO is required for SQLite support
+RUN apk add --no-cache gcc musl-dev && \
+    CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
+    -ldflags='-w -s -extldflags "-static"' \
+    -a -installsuffix cgo \
+    -o gateway \
+    ./cmd/gateway
+
+# Stage 2: Create minimal runtime image
+FROM alpine:3.19
+
+# Install runtime dependencies
+RUN apk add --no-cache ca-certificates tzdata
+
+# Create non-root user
+RUN addgroup -g 1000 gateway && \
+    adduser -D -u 1000 -G gateway gateway
+
+# Create necessary directories
+RUN mkdir -p /app /app/data && \
+    chown -R gateway:gateway /app
+
+WORKDIR /app
+
+# Copy binary from builder
+COPY --from=builder /build/gateway /app/gateway
+
+# Copy example config (optional, mainly for documentation)
+COPY config.example.yaml /app/config.example.yaml
+
+# Switch to non-root user
+USER gateway
+
+# Expose port
+EXPOSE 8080
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
+    CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
+
+# Set entrypoint
+ENTRYPOINT ["/app/gateway"]
+
+# Default command (can be overridden)
+CMD ["--config", "/app/config/config.yaml"]
--- a/151
+++ b/151
@@ -0,0 +1,151 @@
+# Makefile for LLM Gateway
+
+.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
+
+# Variables
+APP_NAME := llm-gateway
+VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
+REGISTRY ?= your-registry
+IMAGE := $(REGISTRY)/$(APP_NAME)
+DOCKER_TAG := $(IMAGE):$(VERSION)
+LATEST_TAG := $(IMAGE):latest
+
+# Go variables
+GOCMD := go
+GOBUILD := $(GOCMD) build
+GOTEST := $(GOCMD) test
+GOMOD := $(GOCMD) mod
+GOFMT := $(GOCMD) fmt
+
+# Build directory
+BUILD_DIR := bin
+
+# Help target
+help: ## Show this help message
+	@echo "Usage: make [target]"
+	@echo ""
+	@echo "Targets:"
+	@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf "  %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
+
+# Development targets
+build: ## Build the binary
+	@echo "Building $(APP_NAME)..."
+	CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
+
+build-static: ## Build static binary
+	@echo "Building static binary..."
+	CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
+
+test: ## Run tests
+	@echo "Running tests..."
+	$(GOTEST) -v -race -coverprofile=coverage.out ./...
+
+test-coverage: test ## Run tests with coverage report
+	@echo "Generating coverage report..."
+	$(GOCMD) tool cover -html=coverage.out -o coverage.html
+	@echo "Coverage report saved to coverage.html"
+
+fmt: ## Format Go code
+	@echo "Formatting code..."
+	$(GOFMT) ./...
+
+lint: ## Run linter
+	@echo "Running linter..."
+	@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
+	golangci-lint run ./...
+
+tidy: ## Tidy go modules
+	@echo "Tidying go modules..."
+	$(GOMOD) tidy
+
+clean: ## Clean build artifacts
+	@echo "Cleaning..."
+	rm -rf $(BUILD_DIR)
+	rm -f coverage.out coverage.html
+
+# Docker targets
+docker-build: ## Build Docker image
+	@echo "Building Docker image $(DOCKER_TAG)..."
+	docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
+
+docker-push: docker-build ## Push Docker image to registry
+	@echo "Pushing Docker image..."
+	docker push $(DOCKER_TAG)
+	docker push $(LATEST_TAG)
+
+docker-run: ## Run Docker container locally
+	@echo "Running Docker container..."
+	docker run --rm -p 8080:8080 \
+		-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
+		-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
+		-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
+		-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
+		$(DOCKER_TAG)
+
+docker-compose-up: ## Start services with docker-compose
+	@echo "Starting services with docker-compose..."
+	docker-compose up -d
+
+docker-compose-down: ## Stop services with docker-compose
+	@echo "Stopping services with docker-compose..."
+	docker-compose down
+
+docker-compose-logs: ## View docker-compose logs
+	docker-compose logs -f
+
+# Kubernetes targets
+k8s-namespace: ## Create Kubernetes namespace
+	kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
+
+k8s-secrets: ## Create Kubernetes secrets (requires env vars)
+	@echo "Creating secrets..."
+	@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
+		echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
+		exit 1; \
+	fi
+	kubectl create secret generic llm-gateway-secrets \
+		--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
+		--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
+		--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
+		--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
+		-n llm-gateway \
+		--dry-run=client -o yaml | kubectl apply -f -
+
+k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
+	@echo "Deploying to Kubernetes..."
+	kubectl apply -k k8s/
+
+k8s-delete: ## Delete from Kubernetes
+	@echo "Deleting from Kubernetes..."
+	kubectl delete -k k8s/
+
+k8s-status: ## Check Kubernetes deployment status
+	@echo "Checking deployment status..."
+	kubectl get all -n llm-gateway
+
+k8s-logs: ## View Kubernetes logs
+	kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
+
+k8s-describe: ## Describe Kubernetes deployment
+	kubectl describe deployment llm-gateway -n llm-gateway
+
+k8s-port-forward: ## Port forward to local machine
+	kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
+
+# CI/CD targets
+ci: lint test ## Run CI checks
+
+security-scan: ## Run security scan
+	@echo "Running security scan..."
+	@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
+	gosec ./...
+
+# Run target
+run: ## Run locally
+	@echo "Running $(APP_NAME) locally..."
+	$(GOCMD) run ./cmd/gateway --config config.yaml
+
+# Version info
+version: ## Show version
+	@echo "Version: $(VERSION)"
+	@echo "Image: $(DOCKER_TAG)"
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,102 @@
+# Docker Compose for local development and testing
+# Not recommended for production - use Kubernetes instead
+
+version: '3.9'
+
+services:
+  gateway:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: llm-gateway:latest
+    container_name: llm-gateway
+    ports:
+      - "8080:8080"
+    environment:
+      # Provider API keys
+      GOOGLE_API_KEY: ${GOOGLE_API_KEY}
+      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
+    volumes:
+      - ./config.yaml:/app/config/config.yaml:ro
+    depends_on:
+      redis:
+        condition: service_healthy
+    networks:
+      - llm-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+
+  redis:
+    image: redis:7.2-alpine
+    container_name: llm-gateway-redis
+    ports:
+      - "6379:6379"
+    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
+    volumes:
+      - redis-data:/data
+    networks:
+      - llm-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 3
+
+  # Optional: Prometheus for metrics
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: llm-gateway-prometheus
+    ports:
+      - "9090:9090"
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+    volumes:
+      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    networks:
+      - llm-network
+    restart: unless-stopped
+    profiles:
+      - monitoring
+
+  # Optional: Grafana for visualization
+  grafana:
+    image: grafana/grafana:latest
+    container_name: llm-gateway-grafana
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+    volumes:
+      - ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
+      - ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
+      - ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
+      - grafana-data:/var/lib/grafana
+    depends_on:
+      - prometheus
+    networks:
+      - llm-network
+    restart: unless-stopped
+    profiles:
+      - monitoring
+
+networks:
+  llm-network:
+    driver: bridge
+
+volumes:
+  redis-data:
+  prometheus-data:
+  grafana-data:
--- a/k8s/README.md
+++ b/k8s/README.md
@@ -0,0 +1,352 @@
+# Kubernetes Deployment Guide
+
+This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
+
+## Prerequisites
+
+- Kubernetes cluster (v1.24+)
+- `kubectl` configured
+- Container registry access
+- (Optional) Prometheus Operator for monitoring
+- (Optional) cert-manager for TLS certificates
+- (Optional) nginx-ingress-controller or cloud load balancer
+
+## Quick Start
+
+### 1. Build and Push Docker Image
+
+```bash
+# Build the image
+docker build -t your-registry/llm-gateway:v1.0.0 .
+
+# Push to registry
+docker push your-registry/llm-gateway:v1.0.0
+```
+
+### 2. Configure Secrets
+
+**Option A: Using kubectl**
+```bash
+kubectl create namespace llm-gateway
+
+kubectl create secret generic llm-gateway-secrets \
+  --from-literal=GOOGLE_API_KEY="your-key" \
+  --from-literal=ANTHROPIC_API_KEY="your-key" \
+  --from-literal=OPENAI_API_KEY="your-key" \
+  --from-literal=OIDC_AUDIENCE="your-client-id" \
+  -n llm-gateway
+```
+
+**Option B: Using External Secrets Operator (Recommended)**
+- Uncomment the ExternalSecret in `secret.yaml`
+- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
+
+### 3. Update Configuration
+
+Edit `configmap.yaml`:
+- Update Redis connection string if using external Redis
+- Configure observability endpoints (Tempo, Prometheus)
+- Adjust rate limits as needed
+- Set OIDC issuer and audience
+
+Edit `ingress.yaml`:
+- Replace `llm-gateway.example.com` with your domain
+- Configure TLS certificate annotations
+
+Edit `kustomization.yaml`:
+- Update image registry and tag
+
+### 4. Deploy
+
+**Using Kustomize (Recommended):**
+```bash
+kubectl apply -k k8s/
+```
+
+**Using kubectl directly:**
+```bash
+kubectl apply -f k8s/namespace.yaml
+kubectl apply -f k8s/serviceaccount.yaml
+kubectl apply -f k8s/secret.yaml
+kubectl apply -f k8s/configmap.yaml
+kubectl apply -f k8s/redis.yaml
+kubectl apply -f k8s/deployment.yaml
+kubectl apply -f k8s/service.yaml
+kubectl apply -f k8s/ingress.yaml
+kubectl apply -f k8s/hpa.yaml
+kubectl apply -f k8s/pdb.yaml
+kubectl apply -f k8s/networkpolicy.yaml
+```
+
+**With Prometheus Operator:**
+```bash
+kubectl apply -f k8s/servicemonitor.yaml
+kubectl apply -f k8s/prometheusrule.yaml
+```
+
+### 5. Verify Deployment
+
+```bash
+# Check pods
+kubectl get pods -n llm-gateway
+
+# Check services
+kubectl get svc -n llm-gateway
+
+# Check ingress
+kubectl get ingress -n llm-gateway
+
+# View logs
+kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
+
+# Check health
+kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
+curl http://localhost:8080/health
+```
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    Internet/Clients                      │
+└───────────────────────┬─────────────────────────────────┘
+                        │
+                        ▼
+┌─────────────────────────────────────────────────────────┐
+│                  Ingress Controller                      │
+│            (nginx/ALB/GCE with TLS)                     │
+└───────────────────────┬─────────────────────────────────┘
+                        │
+                        ▼
+┌─────────────────────────────────────────────────────────┐
+│                  LLM Gateway Service                     │
+│                    (LoadBalancer)                        │
+└───────────────────────┬─────────────────────────────────┘
+                        │
+        ┌───────────────┼───────────────┐
+        ▼               ▼               ▼
+┌──────────────┐ ┌──────────────┐ ┌──────────────┐
+│   Gateway    │ │   Gateway    │ │   Gateway    │
+│   Pod 1      │ │   Pod 2      │ │   Pod 3      │
+└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
+       │                │                │
+       └────────────────┼────────────────┘
+                        │
+        ┌───────────────┼───────────────┐
+        ▼               ▼               ▼
+┌──────────────┐ ┌──────────────┐ ┌──────────────┐
+│    Redis     │ │  Prometheus  │ │    Tempo     │
+│ (Persistent) │ │  (Metrics)   │ │  (Traces)    │
+└──────────────┘ └──────────────┘ └──────────────┘
+```
+
+## Resource Specifications
+
+### Default Resources
+- **Requests**: 100m CPU, 128Mi memory
+- **Limits**: 1000m CPU, 512Mi memory
+- **Replicas**: 3 (min), 20 (max with HPA)
+
+### Scaling
+- HPA scales based on CPU (70%) and memory (80%)
+- PodDisruptionBudget ensures minimum 2 replicas during disruptions
+
+## Configuration Options
+
+### Environment Variables (from Secret)
+- `GOOGLE_API_KEY`: Google AI API key
+- `ANTHROPIC_API_KEY`: Anthropic API key
+- `OPENAI_API_KEY`: OpenAI API key
+- `OIDC_AUDIENCE`: OIDC client ID for authentication
+
+### ConfigMap Settings
+See `configmap.yaml` for full configuration options:
+- Server address
+- Logging format and level
+- Rate limiting
+- Observability (metrics/tracing)
+- Provider endpoints
+- Conversation storage
+- Authentication
+
+## Security
+
+### Security Features
+- Non-root container execution (UID 1000)
+- Read-only root filesystem
+- No privilege escalation
+- All capabilities dropped
+- Network policies for ingress/egress control
+- SeccompProfile: RuntimeDefault
+
+### TLS/HTTPS
+- Ingress configured with TLS
+- Uses cert-manager for automatic certificate provisioning
+- Force SSL redirect enabled
+
+### Secrets Management
+**Never commit secrets to git!**
+
+Production options:
+1. **External Secrets Operator** (Recommended)
+   - AWS Secrets Manager
+   - HashiCorp Vault
+   - Google Secret Manager
+
+2. **Sealed Secrets**
+   - Encrypted secrets in git
+
+3. **Manual kubectl secrets**
+   - Created outside of git
+
+## Monitoring
+
+### Metrics
+- Exposed on `/metrics` endpoint
+- Scraped by Prometheus via ServiceMonitor
+- Key metrics:
+  - HTTP request rate, latency, errors
+  - Provider request rate, latency, token usage
+  - Conversation store operations
+  - Rate limiting hits
+
+### Alerts
+See `prometheusrule.yaml` for configured alerts:
+- High error rate
+- High latency
+- Provider failures
+- Pod down
+- High memory usage
+- Rate limit threshold exceeded
+- Conversation store errors
+
+### Logs
+Structured JSON logs with:
+- Request IDs
+- Trace context (trace_id, span_id)
+- Log levels (debug/info/warn/error)
+
+View logs:
+```bash
+kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
+```
+
+## Maintenance
+
+### Rolling Updates
+```bash
+# Update image
+kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
+
+# Check rollout status
+kubectl rollout status deployment/llm-gateway -n llm-gateway
+
+# Rollback if needed
+kubectl rollout undo deployment/llm-gateway -n llm-gateway
+```
+
+### Scaling
+```bash
+# Manual scale
+kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
+
+# HPA will auto-scale within min/max bounds (3-20)
+```
+
+### Configuration Updates
+```bash
+# Edit ConfigMap
+kubectl edit configmap llm-gateway-config -n llm-gateway
+
+# Restart pods to pick up changes
+kubectl rollout restart deployment/llm-gateway -n llm-gateway
+```
+
+### Debugging
+```bash
+# Exec into pod
+kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
+
+# Port forward for local access
+kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
+
+# Check events
+kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
+```
+
+## Production Considerations
+
+### High Availability
+- Minimum 3 replicas across availability zones
+- Pod anti-affinity rules spread pods across nodes
+- PodDisruptionBudget ensures service availability during disruptions
+
+### Performance
+- Adjust resource limits based on load testing
+- Configure HPA thresholds based on traffic patterns
+- Use node affinity for GPU nodes if needed
+
+### Cost Optimization
+- Use spot/preemptible instances for non-critical workloads
+- Set appropriate resource requests/limits
+- Monitor token usage and implement quotas
+
+### Disaster Recovery
+- Redis persistence (if using StatefulSet)
+- Regular backups of conversation data
+- Multi-region deployment for geo-redundancy
+- Document runbooks for incident response
+
+## Cloud-Specific Notes
+
+### AWS EKS
+- Use AWS Load Balancer Controller for ALB
+- Configure IRSA for service account
+- Use ElastiCache for Redis
+- Store secrets in AWS Secrets Manager
+
+### GCP GKE
+- Use GKE Ingress for GCLB
+- Configure Workload Identity
+- Use Memorystore for Redis
+- Store secrets in Google Secret Manager
+
+### Azure AKS
+- Use Azure Application Gateway Ingress Controller
+- Configure Azure AD Workload Identity
+- Use Azure Cache for Redis
+- Store secrets in Azure Key Vault
+
+## Troubleshooting
+
+### Common Issues
+
+**Pods not starting:**
+```bash
+kubectl describe pod -n llm-gateway -l app=llm-gateway
+kubectl logs -n llm-gateway -l app=llm-gateway --previous
+```
+
+**Health check failures:**
+```bash
+kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
+curl http://localhost:8080/health
+curl http://localhost:8080/ready
+```
+
+**Provider connection issues:**
+- Verify API keys in secrets
+- Check network policies allow egress
+- Verify provider endpoints are accessible
+
+**Redis connection issues:**
+```bash
+kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
+```
+
+## Additional Resources
+
+- [Kubernetes Documentation](https://kubernetes.io/docs/)
+- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
+- [cert-manager](https://cert-manager.io/)
+- [External Secrets Operator](https://external-secrets.io/)
--- a/k8s/configmap.yaml
+++ b/k8s/configmap.yaml
@@ -0,0 +1,76 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llm-gateway-config
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+data:
+  config.yaml: |
+    server:
+      address: ":8080"
+
+    logging:
+      format: "json"
+      level: "info"
+
+    rate_limit:
+      enabled: true
+      requests_per_second: 10
+      burst: 20
+
+    observability:
+      enabled: true
+
+      metrics:
+        enabled: true
+        path: "/metrics"
+
+      tracing:
+        enabled: true
+        service_name: "llm-gateway"
+        sampler:
+          type: "probability"
+          rate: 0.1
+        exporter:
+          type: "otlp"
+          endpoint: "tempo.observability.svc.cluster.local:4317"
+          insecure: true
+
+    providers:
+      google:
+        type: "google"
+        api_key: "${GOOGLE_API_KEY}"
+        endpoint: "https://generativelanguage.googleapis.com"
+      anthropic:
+        type: "anthropic"
+        api_key: "${ANTHROPIC_API_KEY}"
+        endpoint: "https://api.anthropic.com"
+      openai:
+        type: "openai"
+        api_key: "${OPENAI_API_KEY}"
+        endpoint: "https://api.openai.com"
+
+    conversations:
+      store: "redis"
+      ttl: "1h"
+      dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
+
+    auth:
+      enabled: true
+      issuer: "https://accounts.google.com"
+      audience: "${OIDC_AUDIENCE}"
+
+    models:
+      - name: "gemini-1.5-flash"
+        provider: "google"
+      - name: "gemini-1.5-pro"
+        provider: "google"
+      - name: "claude-3-5-sonnet-20241022"
+        provider: "anthropic"
+      - name: "claude-3-5-haiku-20241022"
+        provider: "anthropic"
+      - name: "gpt-4o"
+        provider: "openai"
+      - name: "gpt-4o-mini"
+        provider: "openai"
--- a/k8s/deployment.yaml
+++ b/k8s/deployment.yaml
@@ -0,0 +1,168 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+    version: v1
+spec:
+  replicas: 3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+      maxUnavailable: 0
+  selector:
+    matchLabels:
+      app: llm-gateway
+  template:
+    metadata:
+      labels:
+        app: llm-gateway
+        version: v1
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      serviceAccountName: llm-gateway
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        runAsGroup: 1000
+        fsGroup: 1000
+        seccompProfile:
+          type: RuntimeDefault
+
+      containers:
+      - name: gateway
+        image: llm-gateway:latest  # Replace with your registry/image:tag
+        imagePullPolicy: IfNotPresent
+
+        ports:
+        - name: http
+          containerPort: 8080
+          protocol: TCP
+
+        env:
+        # Provider API Keys from Secret
+        - name: GOOGLE_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: llm-gateway-secrets
+              key: GOOGLE_API_KEY
+        - name: ANTHROPIC_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: llm-gateway-secrets
+              key: ANTHROPIC_API_KEY
+        - name: OPENAI_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: llm-gateway-secrets
+              key: OPENAI_API_KEY
+        - name: OIDC_AUDIENCE
+          valueFrom:
+            secretKeyRef:
+              name: llm-gateway-secrets
+              key: OIDC_AUDIENCE
+
+        # Optional: Pod metadata
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+
+        resources:
+          requests:
+            cpu: 100m
+            memory: 128Mi
+          limits:
+            cpu: 1000m
+            memory: 512Mi
+
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: http
+            scheme: HTTP
+          initialDelaySeconds: 10
+          periodSeconds: 30
+          timeoutSeconds: 5
+          successThreshold: 1
+          failureThreshold: 3
+
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: http
+            scheme: HTTP
+          initialDelaySeconds: 5
+          periodSeconds: 10
+          timeoutSeconds: 5
+          successThreshold: 1
+          failureThreshold: 3
+
+        startupProbe:
+          httpGet:
+            path: /health
+            port: http
+            scheme: HTTP
+          initialDelaySeconds: 0
+          periodSeconds: 5
+          timeoutSeconds: 3
+          successThreshold: 1
+          failureThreshold: 30
+
+        volumeMounts:
+        - name: config
+          mountPath: /app/config
+          readOnly: true
+        - name: tmp
+          mountPath: /tmp
+
+        securityContext:
+          allowPrivilegeEscalation: false
+          readOnlyRootFilesystem: true
+          runAsNonRoot: true
+          runAsUser: 1000
+          capabilities:
+            drop:
+            - ALL
+
+      volumes:
+      - name: config
+        configMap:
+          name: llm-gateway-config
+      - name: tmp
+        emptyDir: {}
+
+      # Affinity rules for better distribution
+      affinity:
+        podAntiAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 100
+            podAffinityTerm:
+              labelSelector:
+                matchExpressions:
+                - key: app
+                  operator: In
+                  values:
+                  - llm-gateway
+              topologyKey: kubernetes.io/hostname
+
+      # Tolerations (if needed for specific node pools)
+      # tolerations:
+      # - key: "workload-type"
+      #   operator: "Equal"
+      #   value: "llm"
+      #   effect: "NoSchedule"
--- a/k8s/hpa.yaml
+++ b/k8s/hpa.yaml
@@ -0,0 +1,63 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: llm-gateway
+
+  minReplicas: 3
+  maxReplicas: 20
+
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 300
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 60
+      - type: Pods
+        value: 2
+        periodSeconds: 60
+      selectPolicy: Min
+    scaleUp:
+      stabilizationWindowSeconds: 0
+      policies:
+      - type: Percent
+        value: 100
+        periodSeconds: 30
+      - type: Pods
+        value: 4
+        periodSeconds: 30
+      selectPolicy: Max
+
+  metrics:
+  # CPU-based scaling
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 70
+
+  # Memory-based scaling
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: 80
+
+  # Custom metrics (requires metrics-server and custom metrics API)
+  # - type: Pods
+  #   pods:
+  #     metric:
+  #       name: http_requests_per_second
+  #     target:
+  #       type: AverageValue
+  #       averageValue: "1000"
--- a/k8s/ingress.yaml
+++ b/k8s/ingress.yaml
@@ -0,0 +1,66 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+  annotations:
+    # General annotations
+    kubernetes.io/ingress.class: "nginx"
+
+    # TLS configuration
+    cert-manager.io/cluster-issuer: "letsencrypt-prod"
+
+    # Security headers
+    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
+    nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
+
+    # Rate limiting (supplement application-level rate limiting)
+    nginx.ingress.kubernetes.io/limit-rps: "100"
+    nginx.ingress.kubernetes.io/limit-connections: "50"
+
+    # Request size limit (10MB)
+    nginx.ingress.kubernetes.io/proxy-body-size: "10m"
+
+    # Timeouts
+    nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
+
+    # CORS (if needed)
+    # nginx.ingress.kubernetes.io/enable-cors: "true"
+    # nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
+    # nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
+    # nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
+
+    # For AWS ALB Ingress Controller (alternative to nginx)
+    # kubernetes.io/ingress.class: "alb"
+    # alb.ingress.kubernetes.io/scheme: "internet-facing"
+    # alb.ingress.kubernetes.io/target-type: "ip"
+    # alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
+    # alb.ingress.kubernetes.io/ssl-redirect: '443'
+    # alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
+
+    # For GKE Ingress (alternative to nginx)
+    # kubernetes.io/ingress.class: "gce"
+    # kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
+    # ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
+
+spec:
+  tls:
+  - hosts:
+    - llm-gateway.example.com  # Replace with your domain
+    secretName: llm-gateway-tls
+
+  rules:
+  - host: llm-gateway.example.com  # Replace with your domain
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: llm-gateway
+            port:
+              number: 80
--- a/k8s/kustomization.yaml
+++ b/k8s/kustomization.yaml
@@ -0,0 +1,46 @@
+# Kustomize configuration for easy deployment
+# Usage: kubectl apply -k k8s/
+
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: llm-gateway
+
+resources:
+- namespace.yaml
+- serviceaccount.yaml
+- configmap.yaml
+- secret.yaml
+- deployment.yaml
+- service.yaml
+- ingress.yaml
+- hpa.yaml
+- pdb.yaml
+- networkpolicy.yaml
+- redis.yaml
+- servicemonitor.yaml
+- prometheusrule.yaml
+
+# Common labels applied to all resources
+commonLabels:
+  app.kubernetes.io/name: llm-gateway
+  app.kubernetes.io/component: api-gateway
+  app.kubernetes.io/part-of: llm-platform
+
+# Images to be used (customize for your registry)
+images:
+- name: llm-gateway
+  newName: your-registry/llm-gateway
+  newTag: latest
+
+# ConfigMap generator (alternative to configmap.yaml)
+# configMapGenerator:
+# - name: llm-gateway-config
+#   files:
+#   - config.yaml
+
+# Secret generator (for local development only)
+# secretGenerator:
+# - name: llm-gateway-secrets
+#   envs:
+#   - secrets.env
--- a/k8s/namespace.yaml
+++ b/k8s/namespace.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llm-gateway
+  labels:
+    app: llm-gateway
+    environment: production
--- a/k8s/networkpolicy.yaml
+++ b/k8s/networkpolicy.yaml
@@ -0,0 +1,83 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+spec:
+  podSelector:
+    matchLabels:
+      app: llm-gateway
+
+  policyTypes:
+  - Ingress
+  - Egress
+
+  ingress:
+  # Allow traffic from ingress controller
+  - from:
+    - namespaceSelector:
+        matchLabels:
+          name: ingress-nginx
+    ports:
+    - protocol: TCP
+      port: 8080
+
+  # Allow traffic from within the namespace (for debugging/testing)
+  - from:
+    - podSelector: {}
+    ports:
+    - protocol: TCP
+      port: 8080
+
+  # Allow Prometheus scraping
+  - from:
+    - namespaceSelector:
+        matchLabels:
+          name: observability
+      podSelector:
+        matchLabels:
+          app: prometheus
+    ports:
+    - protocol: TCP
+      port: 8080
+
+  egress:
+  # Allow DNS
+  - to:
+    - namespaceSelector: {}
+      podSelector:
+        matchLabels:
+          k8s-app: kube-dns
+    ports:
+    - protocol: UDP
+      port: 53
+
+  # Allow Redis access
+  - to:
+    - podSelector:
+        matchLabels:
+          app: redis
+    ports:
+    - protocol: TCP
+      port: 6379
+
+  # Allow external provider API access (OpenAI, Anthropic, Google)
+  - to:
+    - namespaceSelector: {}
+    ports:
+    - protocol: TCP
+      port: 443
+
+  # Allow OTLP tracing export
+  - to:
+    - namespaceSelector:
+        matchLabels:
+          name: observability
+      podSelector:
+        matchLabels:
+          app: tempo
+    ports:
+    - protocol: TCP
+      port: 4317
--- a/k8s/pdb.yaml
+++ b/k8s/pdb.yaml
@@ -0,0 +1,13 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+spec:
+  minAvailable: 2
+  selector:
+    matchLabels:
+      app: llm-gateway
+  unhealthyPodEvictionPolicy: AlwaysAllow
--- a/k8s/prometheusrule.yaml
+++ b/k8s/prometheusrule.yaml
@@ -0,0 +1,122 @@
+# PrometheusRule for alerting
+# Requires Prometheus Operator to be installed
+
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+    prometheus: kube-prometheus
+spec:
+  groups:
+  - name: llm-gateway.rules
+    interval: 30s
+    rules:
+
+    # High error rate
+    - alert: LLMGatewayHighErrorRate
+      expr: |
+        (
+          sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
+          /
+          sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
+        ) > 0.05
+      for: 5m
+      labels:
+        severity: warning
+        component: llm-gateway
+      annotations:
+        summary: "High error rate in LLM Gateway"
+        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
+
+    # High latency
+    - alert: LLMGatewayHighLatency
+      expr: |
+        histogram_quantile(0.95,
+          sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
+        ) > 10
+      for: 5m
+      labels:
+        severity: warning
+        component: llm-gateway
+      annotations:
+        summary: "High latency in LLM Gateway"
+        description: "P95 latency is {{ $value }}s (threshold: 10s)"
+
+    # Provider errors
+    - alert: LLMProviderHighErrorRate
+      expr: |
+        (
+          sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
+          /
+          sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
+        ) > 0.10
+      for: 5m
+      labels:
+        severity: warning
+        component: llm-gateway
+      annotations:
+        summary: "High error rate for provider {{ $labels.provider }}"
+        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
+
+    # Pod down
+    - alert: LLMGatewayPodDown
+      expr: |
+        up{job="llm-gateway",namespace="llm-gateway"} == 0
+      for: 2m
+      labels:
+        severity: critical
+        component: llm-gateway
+      annotations:
+        summary: "LLM Gateway pod is down"
+        description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
+
+    # High memory usage
+    - alert: LLMGatewayHighMemoryUsage
+      expr: |
+        (
+          container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
+          /
+          container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
+        ) > 0.85
+      for: 5m
+      labels:
+        severity: warning
+        component: llm-gateway
+      annotations:
+        summary: "High memory usage in LLM Gateway"
+        description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
+
+    # Rate limit threshold
+    - alert: LLMGatewayHighRateLimitHitRate
+      expr: |
+        (
+          sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
+          /
+          sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
+        ) > 0.20
+      for: 10m
+      labels:
+        severity: info
+        component: llm-gateway
+      annotations:
+        summary: "High rate limit hit rate"
+        description: "{{ $value | humanizePercentage }} of requests are being rate limited"
+
+    # Conversation store errors
+    - alert: LLMGatewayConversationStoreErrors
+      expr: |
+        (
+          sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
+          /
+          sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
+        ) > 0.05
+      for: 5m
+      labels:
+        severity: warning
+        component: llm-gateway
+      annotations:
+        summary: "High error rate in conversation store"
+        description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
--- a/k8s/redis.yaml
+++ b/k8s/redis.yaml
@@ -0,0 +1,131 @@
+# Simple Redis deployment for conversation storage
+# For production, consider using:
+# - Redis Operator (e.g., Redis Enterprise Operator)
+# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
+# - Redis Cluster for high availability
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: redis-config
+  namespace: llm-gateway
+  labels:
+    app: redis
+data:
+  redis.conf: |
+    maxmemory 256mb
+    maxmemory-policy allkeys-lru
+    save ""
+    appendonly no
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: redis
+  namespace: llm-gateway
+  labels:
+    app: redis
+spec:
+  serviceName: redis
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 999
+        fsGroup: 999
+        seccompProfile:
+          type: RuntimeDefault
+
+      containers:
+      - name: redis
+        image: redis:7.2-alpine
+        imagePullPolicy: IfNotPresent
+
+        command:
+        - redis-server
+        - /etc/redis/redis.conf
+
+        ports:
+        - name: redis
+          containerPort: 6379
+          protocol: TCP
+
+        resources:
+          requests:
+            cpu: 100m
+            memory: 128Mi
+          limits:
+            cpu: 500m
+            memory: 512Mi
+
+        livenessProbe:
+          tcpSocket:
+            port: redis
+          initialDelaySeconds: 10
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+
+        readinessProbe:
+          exec:
+            command:
+            - redis-cli
+            - ping
+          initialDelaySeconds: 5
+          periodSeconds: 5
+          timeoutSeconds: 3
+          failureThreshold: 3
+
+        volumeMounts:
+        - name: config
+          mountPath: /etc/redis
+        - name: data
+          mountPath: /data
+
+        securityContext:
+          allowPrivilegeEscalation: false
+          readOnlyRootFilesystem: true
+          runAsNonRoot: true
+          runAsUser: 999
+          capabilities:
+            drop:
+            - ALL
+
+      volumes:
+      - name: config
+        configMap:
+          name: redis-config
+
+  volumeClaimTemplates:
+  - metadata:
+      name: data
+    spec:
+      accessModes: ["ReadWriteOnce"]
+      resources:
+        requests:
+          storage: 10Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis
+  namespace: llm-gateway
+  labels:
+    app: redis
+spec:
+  type: ClusterIP
+  clusterIP: None
+  selector:
+    app: redis
+  ports:
+  - name: redis
+    port: 6379
+    targetPort: redis
+    protocol: TCP
--- a/k8s/secret.yaml
+++ b/k8s/secret.yaml
@@ -0,0 +1,46 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: llm-gateway-secrets
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+type: Opaque
+stringData:
+  # IMPORTANT: Replace these with actual values or use external secret management
+  # For production, use:
+  # - kubectl create secret generic llm-gateway-secrets --from-literal=...
+  # - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
+  # - Sealed Secrets
+  GOOGLE_API_KEY: "your-google-api-key-here"
+  ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
+  OPENAI_API_KEY: "your-openai-api-key-here"
+  OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
+---
+# Example using External Secrets Operator (commented out)
+# apiVersion: external-secrets.io/v1beta1
+# kind: ExternalSecret
+# metadata:
+#   name: llm-gateway-secrets
+#   namespace: llm-gateway
+# spec:
+#   refreshInterval: 1h
+#   secretStoreRef:
+#     name: aws-secrets-manager
+#     kind: SecretStore
+#   target:
+#     name: llm-gateway-secrets
+#     creationPolicy: Owner
+#   data:
+#     - secretKey: GOOGLE_API_KEY
+#       remoteRef:
+#         key: prod/llm-gateway/google-api-key
+#     - secretKey: ANTHROPIC_API_KEY
+#       remoteRef:
+#         key: prod/llm-gateway/anthropic-api-key
+#     - secretKey: OPENAI_API_KEY
+#       remoteRef:
+#         key: prod/llm-gateway/openai-api-key
+#     - secretKey: OIDC_AUDIENCE
+#       remoteRef:
+#         key: prod/llm-gateway/oidc-audience
--- a/k8s/service.yaml
+++ b/k8s/service.yaml
@@ -0,0 +1,40 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+  annotations:
+    # For cloud load balancers (uncomment as needed)
+    # service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
+    # cloud.google.com/neg: '{"ingress": true}'
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-gateway
+  ports:
+  - name: http
+    port: 80
+    targetPort: http
+    protocol: TCP
+  sessionAffinity: None
+---
+# Headless service for pod-to-pod communication (if needed)
+apiVersion: v1
+kind: Service
+metadata:
+  name: llm-gateway-headless
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+spec:
+  type: ClusterIP
+  clusterIP: None
+  selector:
+    app: llm-gateway
+  ports:
+  - name: http
+    port: 8080
+    targetPort: http
+    protocol: TCP
--- a/k8s/serviceaccount.yaml
+++ b/k8s/serviceaccount.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+  annotations:
+    # For GKE Workload Identity
+    # iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
+
+    # For EKS IRSA (IAM Roles for Service Accounts)
+    # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
+automountServiceAccountToken: true
--- a/k8s/servicemonitor.yaml
+++ b/k8s/servicemonitor.yaml
@@ -0,0 +1,35 @@
+# ServiceMonitor for Prometheus Operator
+# Requires Prometheus Operator to be installed
+# https://github.com/prometheus-operator/prometheus-operator
+
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: llm-gateway
+  namespace: llm-gateway
+  labels:
+    app: llm-gateway
+    prometheus: kube-prometheus
+spec:
+  selector:
+    matchLabels:
+      app: llm-gateway
+
+  endpoints:
+  - port: http
+    path: /metrics
+    interval: 30s
+    scrapeTimeout: 10s
+
+    relabelings:
+    # Add namespace label
+    - sourceLabels: [__meta_kubernetes_namespace]
+      targetLabel: namespace
+
+    # Add pod label
+    - sourceLabels: [__meta_kubernetes_pod_name]
+      targetLabel: pod
+
+    # Add service label
+    - sourceLabels: [__meta_kubernetes_service_name]
+      targetLabel: service