Add CI and production grade improvements #3
65
.dockerignore
Normal file
65
.dockerignore
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# Git
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
.github
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Build artifacts
|
||||||
|
/bin/
|
||||||
|
/dist/
|
||||||
|
/build/
|
||||||
|
/gateway
|
||||||
|
/cmd/gateway/gateway
|
||||||
|
*.exe
|
||||||
|
*.dll
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
*.test
|
||||||
|
*.out
|
||||||
|
|
||||||
|
# Configuration files with secrets
|
||||||
|
config.yaml
|
||||||
|
config.json
|
||||||
|
*-local.yaml
|
||||||
|
*-local.json
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
*.key
|
||||||
|
*.pem
|
||||||
|
|
||||||
|
# Test and coverage
|
||||||
|
coverage.out
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Dependencies (will be downloaded during build)
|
||||||
|
vendor/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
tests/node_modules/
|
||||||
|
|
||||||
|
# Jujutsu
|
||||||
|
.jj/
|
||||||
|
|
||||||
|
# Claude
|
||||||
|
.claude/
|
||||||
|
|
||||||
|
# Data directories
|
||||||
|
data/
|
||||||
|
*.db
|
||||||
181
.github/workflows/ci.yaml
vendored
Normal file
181
.github/workflows/ci.yaml
vendored
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
|
||||||
|
env:
|
||||||
|
GO_VERSION: '1.23'
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Download dependencies
|
||||||
|
run: go mod download
|
||||||
|
|
||||||
|
- name: Verify dependencies
|
||||||
|
run: go mod verify
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: go test -v -race -coverprofile=coverage.out ./...
|
||||||
|
|
||||||
|
- name: Upload coverage to Codecov
|
||||||
|
uses: codecov/codecov-action@v4
|
||||||
|
with:
|
||||||
|
file: ./coverage.out
|
||||||
|
flags: unittests
|
||||||
|
name: codecov-umbrella
|
||||||
|
|
||||||
|
- name: Generate coverage report
|
||||||
|
run: go tool cover -html=coverage.out -o coverage.html
|
||||||
|
|
||||||
|
- name: Upload coverage report
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: coverage-report
|
||||||
|
path: coverage.html
|
||||||
|
|
||||||
|
lint:
|
||||||
|
name: Lint
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Run golangci-lint
|
||||||
|
uses: golangci/golangci-lint-action@v4
|
||||||
|
with:
|
||||||
|
version: latest
|
||||||
|
args: --timeout=5m
|
||||||
|
|
||||||
|
security:
|
||||||
|
name: Security Scan
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Run Gosec Security Scanner
|
||||||
|
uses: securego/gosec@master
|
||||||
|
with:
|
||||||
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|
||||||
|
- name: Upload SARIF file
|
||||||
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
|
with:
|
||||||
|
sarif_file: results.sarif
|
||||||
|
|
||||||
|
build:
|
||||||
|
name: Build
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [test, lint]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Build binary
|
||||||
|
run: |
|
||||||
|
CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
|
||||||
|
|
||||||
|
- name: Upload binary
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: gateway-binary
|
||||||
|
path: bin/gateway
|
||||||
|
|
||||||
|
docker:
|
||||||
|
name: Build and Push Docker Image
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [test, lint, security]
|
||||||
|
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=ref,event=pr
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=sha,prefix={{branch}}-
|
||||||
|
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
|
- name: Run Trivy vulnerability scanner
|
||||||
|
uses: aquasecurity/trivy-action@master
|
||||||
|
with:
|
||||||
|
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
||||||
|
format: 'sarif'
|
||||||
|
output: 'trivy-results.sarif'
|
||||||
|
|
||||||
|
- name: Upload Trivy results to GitHub Security
|
||||||
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
|
with:
|
||||||
|
sarif_file: 'trivy-results.sarif'
|
||||||
129
.github/workflows/release.yaml
vendored
Normal file
129
.github/workflows/release.yaml
vendored
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
|
||||||
|
env:
|
||||||
|
GO_VERSION: '1.23'
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
release:
|
||||||
|
name: Create Release
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: go test -v ./...
|
||||||
|
|
||||||
|
- name: Build binaries
|
||||||
|
run: |
|
||||||
|
# Linux amd64
|
||||||
|
GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
|
||||||
|
|
||||||
|
# Linux arm64
|
||||||
|
GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
|
||||||
|
|
||||||
|
# macOS amd64
|
||||||
|
GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
|
||||||
|
|
||||||
|
# macOS arm64
|
||||||
|
GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
|
||||||
|
|
||||||
|
- name: Create checksums
|
||||||
|
run: |
|
||||||
|
cd bin
|
||||||
|
sha256sum gateway-* > checksums.txt
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
type=raw,value=latest
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Generate changelog
|
||||||
|
id: changelog
|
||||||
|
run: |
|
||||||
|
git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
|
||||||
|
echo "changelog<<EOF" >> $GITHUB_OUTPUT
|
||||||
|
cat CHANGELOG.txt >> $GITHUB_OUTPUT
|
||||||
|
echo "EOF" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Create Release
|
||||||
|
uses: softprops/action-gh-release@v1
|
||||||
|
with:
|
||||||
|
body: |
|
||||||
|
## Changes
|
||||||
|
${{ steps.changelog.outputs.changelog }}
|
||||||
|
|
||||||
|
## Docker Images
|
||||||
|
```
|
||||||
|
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||||
|
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### Kubernetes
|
||||||
|
```bash
|
||||||
|
kubectl apply -k k8s/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
```bash
|
||||||
|
docker run -p 8080:8080 \
|
||||||
|
-e GOOGLE_API_KEY=your-key \
|
||||||
|
-e ANTHROPIC_API_KEY=your-key \
|
||||||
|
-e OPENAI_API_KEY=your-key \
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||||
|
```
|
||||||
|
files: |
|
||||||
|
bin/gateway-*
|
||||||
|
bin/checksums.txt
|
||||||
|
draft: false
|
||||||
|
prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
62
Dockerfile
Normal file
62
Dockerfile
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# Multi-stage build for Go LLM Gateway
|
||||||
|
# Stage 1: Build the Go binary
|
||||||
|
FROM golang:alpine AS builder
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apk add --no-cache git ca-certificates tzdata
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
# Copy go mod files first for better caching
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
# Copy source code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Build the binary with optimizations
|
||||||
|
# CGO is required for SQLite support
|
||||||
|
RUN apk add --no-cache gcc musl-dev && \
|
||||||
|
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
|
||||||
|
-ldflags='-w -s -extldflags "-static"' \
|
||||||
|
-a -installsuffix cgo \
|
||||||
|
-o gateway \
|
||||||
|
./cmd/gateway
|
||||||
|
|
||||||
|
# Stage 2: Create minimal runtime image
|
||||||
|
FROM alpine:3.19
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN apk add --no-cache ca-certificates tzdata
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN addgroup -g 1000 gateway && \
|
||||||
|
adduser -D -u 1000 -G gateway gateway
|
||||||
|
|
||||||
|
# Create necessary directories
|
||||||
|
RUN mkdir -p /app /app/data && \
|
||||||
|
chown -R gateway:gateway /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy binary from builder
|
||||||
|
COPY --from=builder /build/gateway /app/gateway
|
||||||
|
|
||||||
|
# Copy example config (optional, mainly for documentation)
|
||||||
|
COPY config.example.yaml /app/config.example.yaml
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER gateway
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
||||||
|
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
|
||||||
|
|
||||||
|
# Set entrypoint
|
||||||
|
ENTRYPOINT ["/app/gateway"]
|
||||||
|
|
||||||
|
# Default command (can be overridden)
|
||||||
|
CMD ["--config", "/app/config/config.yaml"]
|
||||||
151
Makefile
Normal file
151
Makefile
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# Makefile for LLM Gateway
|
||||||
|
|
||||||
|
.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
|
||||||
|
|
||||||
|
# Variables
|
||||||
|
APP_NAME := llm-gateway
|
||||||
|
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
|
||||||
|
REGISTRY ?= your-registry
|
||||||
|
IMAGE := $(REGISTRY)/$(APP_NAME)
|
||||||
|
DOCKER_TAG := $(IMAGE):$(VERSION)
|
||||||
|
LATEST_TAG := $(IMAGE):latest
|
||||||
|
|
||||||
|
# Go variables
|
||||||
|
GOCMD := go
|
||||||
|
GOBUILD := $(GOCMD) build
|
||||||
|
GOTEST := $(GOCMD) test
|
||||||
|
GOMOD := $(GOCMD) mod
|
||||||
|
GOFMT := $(GOCMD) fmt
|
||||||
|
|
||||||
|
# Build directory
|
||||||
|
BUILD_DIR := bin
|
||||||
|
|
||||||
|
# Help target
|
||||||
|
help: ## Show this help message
|
||||||
|
@echo "Usage: make [target]"
|
||||||
|
@echo ""
|
||||||
|
@echo "Targets:"
|
||||||
|
@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
|
# Development targets
|
||||||
|
build: ## Build the binary
|
||||||
|
@echo "Building $(APP_NAME)..."
|
||||||
|
CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
|
||||||
|
|
||||||
|
build-static: ## Build static binary
|
||||||
|
@echo "Building static binary..."
|
||||||
|
CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
|
||||||
|
|
||||||
|
test: ## Run tests
|
||||||
|
@echo "Running tests..."
|
||||||
|
$(GOTEST) -v -race -coverprofile=coverage.out ./...
|
||||||
|
|
||||||
|
test-coverage: test ## Run tests with coverage report
|
||||||
|
@echo "Generating coverage report..."
|
||||||
|
$(GOCMD) tool cover -html=coverage.out -o coverage.html
|
||||||
|
@echo "Coverage report saved to coverage.html"
|
||||||
|
|
||||||
|
fmt: ## Format Go code
|
||||||
|
@echo "Formatting code..."
|
||||||
|
$(GOFMT) ./...
|
||||||
|
|
||||||
|
lint: ## Run linter
|
||||||
|
@echo "Running linter..."
|
||||||
|
@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
|
||||||
|
golangci-lint run ./...
|
||||||
|
|
||||||
|
tidy: ## Tidy go modules
|
||||||
|
@echo "Tidying go modules..."
|
||||||
|
$(GOMOD) tidy
|
||||||
|
|
||||||
|
clean: ## Clean build artifacts
|
||||||
|
@echo "Cleaning..."
|
||||||
|
rm -rf $(BUILD_DIR)
|
||||||
|
rm -f coverage.out coverage.html
|
||||||
|
|
||||||
|
# Docker targets
|
||||||
|
docker-build: ## Build Docker image
|
||||||
|
@echo "Building Docker image $(DOCKER_TAG)..."
|
||||||
|
docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
|
||||||
|
|
||||||
|
docker-push: docker-build ## Push Docker image to registry
|
||||||
|
@echo "Pushing Docker image..."
|
||||||
|
docker push $(DOCKER_TAG)
|
||||||
|
docker push $(LATEST_TAG)
|
||||||
|
|
||||||
|
docker-run: ## Run Docker container locally
|
||||||
|
@echo "Running Docker container..."
|
||||||
|
docker run --rm -p 8080:8080 \
|
||||||
|
-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
|
||||||
|
-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
|
||||||
|
-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
|
||||||
|
-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
|
||||||
|
$(DOCKER_TAG)
|
||||||
|
|
||||||
|
docker-compose-up: ## Start services with docker-compose
|
||||||
|
@echo "Starting services with docker-compose..."
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
docker-compose-down: ## Stop services with docker-compose
|
||||||
|
@echo "Stopping services with docker-compose..."
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
docker-compose-logs: ## View docker-compose logs
|
||||||
|
docker-compose logs -f
|
||||||
|
|
||||||
|
# Kubernetes targets
|
||||||
|
k8s-namespace: ## Create Kubernetes namespace
|
||||||
|
kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
k8s-secrets: ## Create Kubernetes secrets (requires env vars)
|
||||||
|
@echo "Creating secrets..."
|
||||||
|
@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
|
||||||
|
echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
kubectl create secret generic llm-gateway-secrets \
|
||||||
|
--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
|
||||||
|
--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
|
||||||
|
--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
|
||||||
|
--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
|
||||||
|
-n llm-gateway \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
|
||||||
|
@echo "Deploying to Kubernetes..."
|
||||||
|
kubectl apply -k k8s/
|
||||||
|
|
||||||
|
k8s-delete: ## Delete from Kubernetes
|
||||||
|
@echo "Deleting from Kubernetes..."
|
||||||
|
kubectl delete -k k8s/
|
||||||
|
|
||||||
|
k8s-status: ## Check Kubernetes deployment status
|
||||||
|
@echo "Checking deployment status..."
|
||||||
|
kubectl get all -n llm-gateway
|
||||||
|
|
||||||
|
k8s-logs: ## View Kubernetes logs
|
||||||
|
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||||
|
|
||||||
|
k8s-describe: ## Describe Kubernetes deployment
|
||||||
|
kubectl describe deployment llm-gateway -n llm-gateway
|
||||||
|
|
||||||
|
k8s-port-forward: ## Port forward to local machine
|
||||||
|
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||||
|
|
||||||
|
# CI/CD targets
|
||||||
|
ci: lint test ## Run CI checks
|
||||||
|
|
||||||
|
security-scan: ## Run security scan
|
||||||
|
@echo "Running security scan..."
|
||||||
|
@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
|
||||||
|
gosec ./...
|
||||||
|
|
||||||
|
# Run target
|
||||||
|
run: ## Run locally
|
||||||
|
@echo "Running $(APP_NAME) locally..."
|
||||||
|
$(GOCMD) run ./cmd/gateway --config config.yaml
|
||||||
|
|
||||||
|
# Version info
|
||||||
|
version: ## Show version
|
||||||
|
@echo "Version: $(VERSION)"
|
||||||
|
@echo "Image: $(DOCKER_TAG)"
|
||||||
102
docker-compose.yaml
Normal file
102
docker-compose.yaml
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# Docker Compose for local development and testing
|
||||||
|
# Not recommended for production - use Kubernetes instead
|
||||||
|
|
||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
services:
|
||||||
|
gateway:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: llm-gateway:latest
|
||||||
|
container_name: llm-gateway
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
environment:
|
||||||
|
# Provider API keys
|
||||||
|
GOOGLE_API_KEY: ${GOOGLE_API_KEY}
|
||||||
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||||
|
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||||
|
OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
|
||||||
|
volumes:
|
||||||
|
- ./config.yaml:/app/config/config.yaml:ro
|
||||||
|
depends_on:
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- llm-network
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7.2-alpine
|
||||||
|
container_name: llm-gateway-redis
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||||
|
volumes:
|
||||||
|
- redis-data:/data
|
||||||
|
networks:
|
||||||
|
- llm-network
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# Optional: Prometheus for metrics
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
container_name: llm-gateway-prometheus
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||||
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||||
|
volumes:
|
||||||
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
networks:
|
||||||
|
- llm-network
|
||||||
|
restart: unless-stopped
|
||||||
|
profiles:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
# Optional: Grafana for visualization
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
container_name: llm-gateway-grafana
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
- GF_USERS_ALLOW_SIGN_UP=false
|
||||||
|
volumes:
|
||||||
|
- ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
|
||||||
|
- ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
|
||||||
|
- ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
networks:
|
||||||
|
- llm-network
|
||||||
|
restart: unless-stopped
|
||||||
|
profiles:
|
||||||
|
- monitoring
|
||||||
|
|
||||||
|
networks:
|
||||||
|
llm-network:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
redis-data:
|
||||||
|
prometheus-data:
|
||||||
|
grafana-data:
|
||||||
352
k8s/README.md
Normal file
352
k8s/README.md
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
# Kubernetes Deployment Guide
|
||||||
|
|
||||||
|
This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Kubernetes cluster (v1.24+)
|
||||||
|
- `kubectl` configured
|
||||||
|
- Container registry access
|
||||||
|
- (Optional) Prometheus Operator for monitoring
|
||||||
|
- (Optional) cert-manager for TLS certificates
|
||||||
|
- (Optional) nginx-ingress-controller or cloud load balancer
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Build and Push Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the image
|
||||||
|
docker build -t your-registry/llm-gateway:v1.0.0 .
|
||||||
|
|
||||||
|
# Push to registry
|
||||||
|
docker push your-registry/llm-gateway:v1.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure Secrets
|
||||||
|
|
||||||
|
**Option A: Using kubectl**
|
||||||
|
```bash
|
||||||
|
kubectl create namespace llm-gateway
|
||||||
|
|
||||||
|
kubectl create secret generic llm-gateway-secrets \
|
||||||
|
--from-literal=GOOGLE_API_KEY="your-key" \
|
||||||
|
--from-literal=ANTHROPIC_API_KEY="your-key" \
|
||||||
|
--from-literal=OPENAI_API_KEY="your-key" \
|
||||||
|
--from-literal=OIDC_AUDIENCE="your-client-id" \
|
||||||
|
-n llm-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option B: Using External Secrets Operator (Recommended)**
|
||||||
|
- Uncomment the ExternalSecret in `secret.yaml`
|
||||||
|
- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
|
||||||
|
|
||||||
|
### 3. Update Configuration
|
||||||
|
|
||||||
|
Edit `configmap.yaml`:
|
||||||
|
- Update Redis connection string if using external Redis
|
||||||
|
- Configure observability endpoints (Tempo, Prometheus)
|
||||||
|
- Adjust rate limits as needed
|
||||||
|
- Set OIDC issuer and audience
|
||||||
|
|
||||||
|
Edit `ingress.yaml`:
|
||||||
|
- Replace `llm-gateway.example.com` with your domain
|
||||||
|
- Configure TLS certificate annotations
|
||||||
|
|
||||||
|
Edit `kustomization.yaml`:
|
||||||
|
- Update image registry and tag
|
||||||
|
|
||||||
|
### 4. Deploy
|
||||||
|
|
||||||
|
**Using Kustomize (Recommended):**
|
||||||
|
```bash
|
||||||
|
kubectl apply -k k8s/
|
||||||
|
```
|
||||||
|
|
||||||
|
**Using kubectl directly:**
|
||||||
|
```bash
|
||||||
|
kubectl apply -f k8s/namespace.yaml
|
||||||
|
kubectl apply -f k8s/serviceaccount.yaml
|
||||||
|
kubectl apply -f k8s/secret.yaml
|
||||||
|
kubectl apply -f k8s/configmap.yaml
|
||||||
|
kubectl apply -f k8s/redis.yaml
|
||||||
|
kubectl apply -f k8s/deployment.yaml
|
||||||
|
kubectl apply -f k8s/service.yaml
|
||||||
|
kubectl apply -f k8s/ingress.yaml
|
||||||
|
kubectl apply -f k8s/hpa.yaml
|
||||||
|
kubectl apply -f k8s/pdb.yaml
|
||||||
|
kubectl apply -f k8s/networkpolicy.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
**With Prometheus Operator:**
|
||||||
|
```bash
|
||||||
|
kubectl apply -f k8s/servicemonitor.yaml
|
||||||
|
kubectl apply -f k8s/prometheusrule.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Verify Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check pods
|
||||||
|
kubectl get pods -n llm-gateway
|
||||||
|
|
||||||
|
# Check services
|
||||||
|
kubectl get svc -n llm-gateway
|
||||||
|
|
||||||
|
# Check ingress
|
||||||
|
kubectl get ingress -n llm-gateway
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||||
|
|
||||||
|
# Check health
|
||||||
|
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||||
|
curl http://localhost:8080/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────┐
|
||||||
|
│ Internet/Clients │
|
||||||
|
└───────────────────────┬─────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────┐
|
||||||
|
│ Ingress Controller │
|
||||||
|
│ (nginx/ALB/GCE with TLS) │
|
||||||
|
└───────────────────────┬─────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────┐
|
||||||
|
│ LLM Gateway Service │
|
||||||
|
│ (LoadBalancer) │
|
||||||
|
└───────────────────────┬─────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌───────────────┼───────────────┐
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||||
|
│ Gateway │ │ Gateway │ │ Gateway │
|
||||||
|
│ Pod 1 │ │ Pod 2 │ │ Pod 3 │
|
||||||
|
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
|
||||||
|
│ │ │
|
||||||
|
└────────────────┼────────────────┘
|
||||||
|
│
|
||||||
|
┌───────────────┼───────────────┐
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||||
|
│ Redis │ │ Prometheus │ │ Tempo │
|
||||||
|
│ (Persistent) │ │ (Metrics) │ │ (Traces) │
|
||||||
|
└──────────────┘ └──────────────┘ └──────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Resource Specifications
|
||||||
|
|
||||||
|
### Default Resources
|
||||||
|
- **Requests**: 100m CPU, 128Mi memory
|
||||||
|
- **Limits**: 1000m CPU, 512Mi memory
|
||||||
|
- **Replicas**: 3 (min), 20 (max with HPA)
|
||||||
|
|
||||||
|
### Scaling
|
||||||
|
- HPA scales based on CPU (70%) and memory (80%)
|
||||||
|
- PodDisruptionBudget ensures minimum 2 replicas during disruptions
|
||||||
|
|
||||||
|
## Configuration Options
|
||||||
|
|
||||||
|
### Environment Variables (from Secret)
|
||||||
|
- `GOOGLE_API_KEY`: Google AI API key
|
||||||
|
- `ANTHROPIC_API_KEY`: Anthropic API key
|
||||||
|
- `OPENAI_API_KEY`: OpenAI API key
|
||||||
|
- `OIDC_AUDIENCE`: OIDC client ID for authentication
|
||||||
|
|
||||||
|
### ConfigMap Settings
|
||||||
|
See `configmap.yaml` for full configuration options:
|
||||||
|
- Server address
|
||||||
|
- Logging format and level
|
||||||
|
- Rate limiting
|
||||||
|
- Observability (metrics/tracing)
|
||||||
|
- Provider endpoints
|
||||||
|
- Conversation storage
|
||||||
|
- Authentication
|
||||||
|
|
||||||
|
## Security
|
||||||
|
|
||||||
|
### Security Features
|
||||||
|
- Non-root container execution (UID 1000)
|
||||||
|
- Read-only root filesystem
|
||||||
|
- No privilege escalation
|
||||||
|
- All capabilities dropped
|
||||||
|
- Network policies for ingress/egress control
|
||||||
|
- SeccompProfile: RuntimeDefault
|
||||||
|
|
||||||
|
### TLS/HTTPS
|
||||||
|
- Ingress configured with TLS
|
||||||
|
- Uses cert-manager for automatic certificate provisioning
|
||||||
|
- Force SSL redirect enabled
|
||||||
|
|
||||||
|
### Secrets Management
|
||||||
|
**Never commit secrets to git!**
|
||||||
|
|
||||||
|
Production options:
|
||||||
|
1. **External Secrets Operator** (Recommended)
|
||||||
|
- AWS Secrets Manager
|
||||||
|
- HashiCorp Vault
|
||||||
|
- Google Secret Manager
|
||||||
|
|
||||||
|
2. **Sealed Secrets**
|
||||||
|
- Encrypted secrets in git
|
||||||
|
|
||||||
|
3. **Manual kubectl secrets**
|
||||||
|
- Created outside of git
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
- Exposed on `/metrics` endpoint
|
||||||
|
- Scraped by Prometheus via ServiceMonitor
|
||||||
|
- Key metrics:
|
||||||
|
- HTTP request rate, latency, errors
|
||||||
|
- Provider request rate, latency, token usage
|
||||||
|
- Conversation store operations
|
||||||
|
- Rate limiting hits
|
||||||
|
|
||||||
|
### Alerts
|
||||||
|
See `prometheusrule.yaml` for configured alerts:
|
||||||
|
- High error rate
|
||||||
|
- High latency
|
||||||
|
- Provider failures
|
||||||
|
- Pod down
|
||||||
|
- High memory usage
|
||||||
|
- Rate limit threshold exceeded
|
||||||
|
- Conversation store errors
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
Structured JSON logs with:
|
||||||
|
- Request IDs
|
||||||
|
- Trace context (trace_id, span_id)
|
||||||
|
- Log levels (debug/info/warn/error)
|
||||||
|
|
||||||
|
View logs:
|
||||||
|
```bash
|
||||||
|
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||||
|
```
|
||||||
|
|
||||||
|
## Maintenance
|
||||||
|
|
||||||
|
### Rolling Updates
|
||||||
|
```bash
|
||||||
|
# Update image
|
||||||
|
kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
|
||||||
|
|
||||||
|
# Check rollout status
|
||||||
|
kubectl rollout status deployment/llm-gateway -n llm-gateway
|
||||||
|
|
||||||
|
# Rollback if needed
|
||||||
|
kubectl rollout undo deployment/llm-gateway -n llm-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scaling
|
||||||
|
```bash
|
||||||
|
# Manual scale
|
||||||
|
kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
|
||||||
|
|
||||||
|
# HPA will auto-scale within min/max bounds (3-20)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Updates
|
||||||
|
```bash
|
||||||
|
# Edit ConfigMap
|
||||||
|
kubectl edit configmap llm-gateway-config -n llm-gateway
|
||||||
|
|
||||||
|
# Restart pods to pick up changes
|
||||||
|
kubectl rollout restart deployment/llm-gateway -n llm-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debugging
|
||||||
|
```bash
|
||||||
|
# Exec into pod
|
||||||
|
kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
|
||||||
|
|
||||||
|
# Port forward for local access
|
||||||
|
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||||
|
|
||||||
|
# Check events
|
||||||
|
kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Production Considerations
|
||||||
|
|
||||||
|
### High Availability
|
||||||
|
- Minimum 3 replicas across availability zones
|
||||||
|
- Pod anti-affinity rules spread pods across nodes
|
||||||
|
- PodDisruptionBudget ensures service availability during disruptions
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
- Adjust resource limits based on load testing
|
||||||
|
- Configure HPA thresholds based on traffic patterns
|
||||||
|
- Use node affinity for GPU nodes if needed
|
||||||
|
|
||||||
|
### Cost Optimization
|
||||||
|
- Use spot/preemptible instances for non-critical workloads
|
||||||
|
- Set appropriate resource requests/limits
|
||||||
|
- Monitor token usage and implement quotas
|
||||||
|
|
||||||
|
### Disaster Recovery
|
||||||
|
- Redis persistence (if using StatefulSet)
|
||||||
|
- Regular backups of conversation data
|
||||||
|
- Multi-region deployment for geo-redundancy
|
||||||
|
- Document runbooks for incident response
|
||||||
|
|
||||||
|
## Cloud-Specific Notes
|
||||||
|
|
||||||
|
### AWS EKS
|
||||||
|
- Use AWS Load Balancer Controller for ALB
|
||||||
|
- Configure IRSA for service account
|
||||||
|
- Use ElastiCache for Redis
|
||||||
|
- Store secrets in AWS Secrets Manager
|
||||||
|
|
||||||
|
### GCP GKE
|
||||||
|
- Use GKE Ingress for GCLB
|
||||||
|
- Configure Workload Identity
|
||||||
|
- Use Memorystore for Redis
|
||||||
|
- Store secrets in Google Secret Manager
|
||||||
|
|
||||||
|
### Azure AKS
|
||||||
|
- Use Azure Application Gateway Ingress Controller
|
||||||
|
- Configure Azure AD Workload Identity
|
||||||
|
- Use Azure Cache for Redis
|
||||||
|
- Store secrets in Azure Key Vault
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
**Pods not starting:**
|
||||||
|
```bash
|
||||||
|
kubectl describe pod -n llm-gateway -l app=llm-gateway
|
||||||
|
kubectl logs -n llm-gateway -l app=llm-gateway --previous
|
||||||
|
```
|
||||||
|
|
||||||
|
**Health check failures:**
|
||||||
|
```bash
|
||||||
|
kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
|
||||||
|
curl http://localhost:8080/health
|
||||||
|
curl http://localhost:8080/ready
|
||||||
|
```
|
||||||
|
|
||||||
|
**Provider connection issues:**
|
||||||
|
- Verify API keys in secrets
|
||||||
|
- Check network policies allow egress
|
||||||
|
- Verify provider endpoints are accessible
|
||||||
|
|
||||||
|
**Redis connection issues:**
|
||||||
|
```bash
|
||||||
|
kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
|
||||||
|
```
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
- [Kubernetes Documentation](https://kubernetes.io/docs/)
|
||||||
|
- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
|
||||||
|
- [cert-manager](https://cert-manager.io/)
|
||||||
|
- [External Secrets Operator](https://external-secrets.io/)
|
||||||
76
k8s/configmap.yaml
Normal file
76
k8s/configmap.yaml
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway-config
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
data:
|
||||||
|
config.yaml: |
|
||||||
|
server:
|
||||||
|
address: ":8080"
|
||||||
|
|
||||||
|
logging:
|
||||||
|
format: "json"
|
||||||
|
level: "info"
|
||||||
|
|
||||||
|
rate_limit:
|
||||||
|
enabled: true
|
||||||
|
requests_per_second: 10
|
||||||
|
burst: 20
|
||||||
|
|
||||||
|
observability:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
path: "/metrics"
|
||||||
|
|
||||||
|
tracing:
|
||||||
|
enabled: true
|
||||||
|
service_name: "llm-gateway"
|
||||||
|
sampler:
|
||||||
|
type: "probability"
|
||||||
|
rate: 0.1
|
||||||
|
exporter:
|
||||||
|
type: "otlp"
|
||||||
|
endpoint: "tempo.observability.svc.cluster.local:4317"
|
||||||
|
insecure: true
|
||||||
|
|
||||||
|
providers:
|
||||||
|
google:
|
||||||
|
type: "google"
|
||||||
|
api_key: "${GOOGLE_API_KEY}"
|
||||||
|
endpoint: "https://generativelanguage.googleapis.com"
|
||||||
|
anthropic:
|
||||||
|
type: "anthropic"
|
||||||
|
api_key: "${ANTHROPIC_API_KEY}"
|
||||||
|
endpoint: "https://api.anthropic.com"
|
||||||
|
openai:
|
||||||
|
type: "openai"
|
||||||
|
api_key: "${OPENAI_API_KEY}"
|
||||||
|
endpoint: "https://api.openai.com"
|
||||||
|
|
||||||
|
conversations:
|
||||||
|
store: "redis"
|
||||||
|
ttl: "1h"
|
||||||
|
dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
|
||||||
|
|
||||||
|
auth:
|
||||||
|
enabled: true
|
||||||
|
issuer: "https://accounts.google.com"
|
||||||
|
audience: "${OIDC_AUDIENCE}"
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: "gemini-1.5-flash"
|
||||||
|
provider: "google"
|
||||||
|
- name: "gemini-1.5-pro"
|
||||||
|
provider: "google"
|
||||||
|
- name: "claude-3-5-sonnet-20241022"
|
||||||
|
provider: "anthropic"
|
||||||
|
- name: "claude-3-5-haiku-20241022"
|
||||||
|
provider: "anthropic"
|
||||||
|
- name: "gpt-4o"
|
||||||
|
provider: "openai"
|
||||||
|
- name: "gpt-4o-mini"
|
||||||
|
provider: "openai"
|
||||||
168
k8s/deployment.yaml
Normal file
168
k8s/deployment.yaml
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
version: v1
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
strategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxSurge: 1
|
||||||
|
maxUnavailable: 0
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-gateway
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
version: v1
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: llm-gateway
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
runAsGroup: 1000
|
||||||
|
fsGroup: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
|
||||||
|
containers:
|
||||||
|
- name: gateway
|
||||||
|
image: llm-gateway:latest # Replace with your registry/image:tag
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Provider API Keys from Secret
|
||||||
|
- name: GOOGLE_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: llm-gateway-secrets
|
||||||
|
key: GOOGLE_API_KEY
|
||||||
|
- name: ANTHROPIC_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: llm-gateway-secrets
|
||||||
|
key: ANTHROPIC_API_KEY
|
||||||
|
- name: OPENAI_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: llm-gateway-secrets
|
||||||
|
key: OPENAI_API_KEY
|
||||||
|
- name: OIDC_AUDIENCE
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: llm-gateway-secrets
|
||||||
|
key: OIDC_AUDIENCE
|
||||||
|
|
||||||
|
# Optional: Pod metadata
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: POD_IP
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: status.podIP
|
||||||
|
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 512Mi
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /ready
|
||||||
|
port: http
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 0
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 3
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 30
|
||||||
|
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /app/config
|
||||||
|
readOnly: true
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: llm-gateway-config
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
|
||||||
|
# Affinity rules for better distribution
|
||||||
|
affinity:
|
||||||
|
podAntiAffinity:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
podAffinityTerm:
|
||||||
|
labelSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- llm-gateway
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
|
||||||
|
# Tolerations (if needed for specific node pools)
|
||||||
|
# tolerations:
|
||||||
|
# - key: "workload-type"
|
||||||
|
# operator: "Equal"
|
||||||
|
# value: "llm"
|
||||||
|
# effect: "NoSchedule"
|
||||||
63
k8s/hpa.yaml
Normal file
63
k8s/hpa.yaml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
apiVersion: autoscaling/v2
|
||||||
|
kind: HorizontalPodAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: llm-gateway
|
||||||
|
|
||||||
|
minReplicas: 3
|
||||||
|
maxReplicas: 20
|
||||||
|
|
||||||
|
behavior:
|
||||||
|
scaleDown:
|
||||||
|
stabilizationWindowSeconds: 300
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 50
|
||||||
|
periodSeconds: 60
|
||||||
|
- type: Pods
|
||||||
|
value: 2
|
||||||
|
periodSeconds: 60
|
||||||
|
selectPolicy: Min
|
||||||
|
scaleUp:
|
||||||
|
stabilizationWindowSeconds: 0
|
||||||
|
policies:
|
||||||
|
- type: Percent
|
||||||
|
value: 100
|
||||||
|
periodSeconds: 30
|
||||||
|
- type: Pods
|
||||||
|
value: 4
|
||||||
|
periodSeconds: 30
|
||||||
|
selectPolicy: Max
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
# CPU-based scaling
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: cpu
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 70
|
||||||
|
|
||||||
|
# Memory-based scaling
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: memory
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 80
|
||||||
|
|
||||||
|
# Custom metrics (requires metrics-server and custom metrics API)
|
||||||
|
# - type: Pods
|
||||||
|
# pods:
|
||||||
|
# metric:
|
||||||
|
# name: http_requests_per_second
|
||||||
|
# target:
|
||||||
|
# type: AverageValue
|
||||||
|
# averageValue: "1000"
|
||||||
66
k8s/ingress.yaml
Normal file
66
k8s/ingress.yaml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
annotations:
|
||||||
|
# General annotations
|
||||||
|
kubernetes.io/ingress.class: "nginx"
|
||||||
|
|
||||||
|
# TLS configuration
|
||||||
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||||
|
|
||||||
|
# Security headers
|
||||||
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||||
|
nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
|
||||||
|
|
||||||
|
# Rate limiting (supplement application-level rate limiting)
|
||||||
|
nginx.ingress.kubernetes.io/limit-rps: "100"
|
||||||
|
nginx.ingress.kubernetes.io/limit-connections: "50"
|
||||||
|
|
||||||
|
# Request size limit (10MB)
|
||||||
|
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
|
||||||
|
|
||||||
|
# Timeouts
|
||||||
|
nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
|
||||||
|
nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
|
||||||
|
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
|
||||||
|
|
||||||
|
# CORS (if needed)
|
||||||
|
# nginx.ingress.kubernetes.io/enable-cors: "true"
|
||||||
|
# nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
|
||||||
|
# nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
|
||||||
|
# nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
|
||||||
|
|
||||||
|
# For AWS ALB Ingress Controller (alternative to nginx)
|
||||||
|
# kubernetes.io/ingress.class: "alb"
|
||||||
|
# alb.ingress.kubernetes.io/scheme: "internet-facing"
|
||||||
|
# alb.ingress.kubernetes.io/target-type: "ip"
|
||||||
|
# alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
|
||||||
|
# alb.ingress.kubernetes.io/ssl-redirect: '443'
|
||||||
|
# alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
|
||||||
|
|
||||||
|
# For GKE Ingress (alternative to nginx)
|
||||||
|
# kubernetes.io/ingress.class: "gce"
|
||||||
|
# kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
|
||||||
|
# ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
|
||||||
|
|
||||||
|
spec:
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- llm-gateway.example.com # Replace with your domain
|
||||||
|
secretName: llm-gateway-tls
|
||||||
|
|
||||||
|
rules:
|
||||||
|
- host: llm-gateway.example.com # Replace with your domain
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: llm-gateway
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
46
k8s/kustomization.yaml
Normal file
46
k8s/kustomization.yaml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Kustomize configuration for easy deployment
|
||||||
|
# Usage: kubectl apply -k k8s/
|
||||||
|
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
namespace: llm-gateway
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- serviceaccount.yaml
|
||||||
|
- configmap.yaml
|
||||||
|
- secret.yaml
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- ingress.yaml
|
||||||
|
- hpa.yaml
|
||||||
|
- pdb.yaml
|
||||||
|
- networkpolicy.yaml
|
||||||
|
- redis.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
- prometheusrule.yaml
|
||||||
|
|
||||||
|
# Common labels applied to all resources
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: llm-gateway
|
||||||
|
app.kubernetes.io/component: api-gateway
|
||||||
|
app.kubernetes.io/part-of: llm-platform
|
||||||
|
|
||||||
|
# Images to be used (customize for your registry)
|
||||||
|
images:
|
||||||
|
- name: llm-gateway
|
||||||
|
newName: your-registry/llm-gateway
|
||||||
|
newTag: latest
|
||||||
|
|
||||||
|
# ConfigMap generator (alternative to configmap.yaml)
|
||||||
|
# configMapGenerator:
|
||||||
|
# - name: llm-gateway-config
|
||||||
|
# files:
|
||||||
|
# - config.yaml
|
||||||
|
|
||||||
|
# Secret generator (for local development only)
|
||||||
|
# secretGenerator:
|
||||||
|
# - name: llm-gateway-secrets
|
||||||
|
# envs:
|
||||||
|
# - secrets.env
|
||||||
7
k8s/namespace.yaml
Normal file
7
k8s/namespace.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
environment: production
|
||||||
83
k8s/networkpolicy.yaml
Normal file
83
k8s/networkpolicy.yaml
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-gateway
|
||||||
|
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
|
||||||
|
ingress:
|
||||||
|
# Allow traffic from ingress controller
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: ingress-nginx
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
# Allow traffic from within the namespace (for debugging/testing)
|
||||||
|
- from:
|
||||||
|
- podSelector: {}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
# Allow Prometheus scraping
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: observability
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: prometheus
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
egress:
|
||||||
|
# Allow DNS
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- protocol: UDP
|
||||||
|
port: 53
|
||||||
|
|
||||||
|
# Allow Redis access
|
||||||
|
- to:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: redis
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 6379
|
||||||
|
|
||||||
|
# Allow external provider API access (OpenAI, Anthropic, Google)
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 443
|
||||||
|
|
||||||
|
# Allow OTLP tracing export
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: observability
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: tempo
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 4317
|
||||||
13
k8s/pdb.yaml
Normal file
13
k8s/pdb.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: policy/v1
|
||||||
|
kind: PodDisruptionBudget
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
spec:
|
||||||
|
minAvailable: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-gateway
|
||||||
|
unhealthyPodEvictionPolicy: AlwaysAllow
|
||||||
122
k8s/prometheusrule.yaml
Normal file
122
k8s/prometheusrule.yaml
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
# PrometheusRule for alerting
|
||||||
|
# Requires Prometheus Operator to be installed
|
||||||
|
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
prometheus: kube-prometheus
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: llm-gateway.rules
|
||||||
|
interval: 30s
|
||||||
|
rules:
|
||||||
|
|
||||||
|
# High error rate
|
||||||
|
- alert: LLMGatewayHighErrorRate
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
|
||||||
|
/
|
||||||
|
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
|
||||||
|
) > 0.05
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High error rate in LLM Gateway"
|
||||||
|
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
|
||||||
|
|
||||||
|
# High latency
|
||||||
|
- alert: LLMGatewayHighLatency
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(0.95,
|
||||||
|
sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
|
||||||
|
) > 10
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High latency in LLM Gateway"
|
||||||
|
description: "P95 latency is {{ $value }}s (threshold: 10s)"
|
||||||
|
|
||||||
|
# Provider errors
|
||||||
|
- alert: LLMProviderHighErrorRate
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
|
||||||
|
/
|
||||||
|
sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
|
||||||
|
) > 0.10
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High error rate for provider {{ $labels.provider }}"
|
||||||
|
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
|
||||||
|
|
||||||
|
# Pod down
|
||||||
|
- alert: LLMGatewayPodDown
|
||||||
|
expr: |
|
||||||
|
up{job="llm-gateway",namespace="llm-gateway"} == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "LLM Gateway pod is down"
|
||||||
|
description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
|
||||||
|
|
||||||
|
# High memory usage
|
||||||
|
- alert: LLMGatewayHighMemoryUsage
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
|
||||||
|
/
|
||||||
|
container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
|
||||||
|
) > 0.85
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High memory usage in LLM Gateway"
|
||||||
|
description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
|
||||||
|
|
||||||
|
# Rate limit threshold
|
||||||
|
- alert: LLMGatewayHighRateLimitHitRate
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
|
||||||
|
/
|
||||||
|
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
|
||||||
|
) > 0.20
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High rate limit hit rate"
|
||||||
|
description: "{{ $value | humanizePercentage }} of requests are being rate limited"
|
||||||
|
|
||||||
|
# Conversation store errors
|
||||||
|
- alert: LLMGatewayConversationStoreErrors
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
|
||||||
|
/
|
||||||
|
sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
|
||||||
|
) > 0.05
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
component: llm-gateway
|
||||||
|
annotations:
|
||||||
|
summary: "High error rate in conversation store"
|
||||||
|
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
|
||||||
131
k8s/redis.yaml
Normal file
131
k8s/redis.yaml
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
# Simple Redis deployment for conversation storage
|
||||||
|
# For production, consider using:
|
||||||
|
# - Redis Operator (e.g., Redis Enterprise Operator)
|
||||||
|
# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
|
||||||
|
# - Redis Cluster for high availability
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: redis-config
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
data:
|
||||||
|
redis.conf: |
|
||||||
|
maxmemory 256mb
|
||||||
|
maxmemory-policy allkeys-lru
|
||||||
|
save ""
|
||||||
|
appendonly no
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
spec:
|
||||||
|
serviceName: redis
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 999
|
||||||
|
fsGroup: 999
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7.2-alpine
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
command:
|
||||||
|
- redis-server
|
||||||
|
- /etc/redis/redis.conf
|
||||||
|
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
containerPort: 6379
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: redis
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 3
|
||||||
|
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- redis-cli
|
||||||
|
- ping
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 3
|
||||||
|
failureThreshold: 3
|
||||||
|
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/redis
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 999
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: redis-config
|
||||||
|
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: data
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
clusterIP: None
|
||||||
|
selector:
|
||||||
|
app: redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: redis
|
||||||
|
protocol: TCP
|
||||||
46
k8s/secret.yaml
Normal file
46
k8s/secret.yaml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway-secrets
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
# IMPORTANT: Replace these with actual values or use external secret management
|
||||||
|
# For production, use:
|
||||||
|
# - kubectl create secret generic llm-gateway-secrets --from-literal=...
|
||||||
|
# - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
|
||||||
|
# - Sealed Secrets
|
||||||
|
GOOGLE_API_KEY: "your-google-api-key-here"
|
||||||
|
ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
|
||||||
|
OPENAI_API_KEY: "your-openai-api-key-here"
|
||||||
|
OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
|
||||||
|
---
|
||||||
|
# Example using External Secrets Operator (commented out)
|
||||||
|
# apiVersion: external-secrets.io/v1beta1
|
||||||
|
# kind: ExternalSecret
|
||||||
|
# metadata:
|
||||||
|
# name: llm-gateway-secrets
|
||||||
|
# namespace: llm-gateway
|
||||||
|
# spec:
|
||||||
|
# refreshInterval: 1h
|
||||||
|
# secretStoreRef:
|
||||||
|
# name: aws-secrets-manager
|
||||||
|
# kind: SecretStore
|
||||||
|
# target:
|
||||||
|
# name: llm-gateway-secrets
|
||||||
|
# creationPolicy: Owner
|
||||||
|
# data:
|
||||||
|
# - secretKey: GOOGLE_API_KEY
|
||||||
|
# remoteRef:
|
||||||
|
# key: prod/llm-gateway/google-api-key
|
||||||
|
# - secretKey: ANTHROPIC_API_KEY
|
||||||
|
# remoteRef:
|
||||||
|
# key: prod/llm-gateway/anthropic-api-key
|
||||||
|
# - secretKey: OPENAI_API_KEY
|
||||||
|
# remoteRef:
|
||||||
|
# key: prod/llm-gateway/openai-api-key
|
||||||
|
# - secretKey: OIDC_AUDIENCE
|
||||||
|
# remoteRef:
|
||||||
|
# key: prod/llm-gateway/oidc-audience
|
||||||
40
k8s/service.yaml
Normal file
40
k8s/service.yaml
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
annotations:
|
||||||
|
# For cloud load balancers (uncomment as needed)
|
||||||
|
# service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
|
||||||
|
# cloud.google.com/neg: '{"ingress": true}'
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: llm-gateway
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: http
|
||||||
|
protocol: TCP
|
||||||
|
sessionAffinity: None
|
||||||
|
---
|
||||||
|
# Headless service for pod-to-pod communication (if needed)
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway-headless
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
clusterIP: None
|
||||||
|
selector:
|
||||||
|
app: llm-gateway
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: http
|
||||||
|
protocol: TCP
|
||||||
14
k8s/serviceaccount.yaml
Normal file
14
k8s/serviceaccount.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
annotations:
|
||||||
|
# For GKE Workload Identity
|
||||||
|
# iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
|
||||||
|
|
||||||
|
# For EKS IRSA (IAM Roles for Service Accounts)
|
||||||
|
# eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
|
||||||
|
automountServiceAccountToken: true
|
||||||
35
k8s/servicemonitor.yaml
Normal file
35
k8s/servicemonitor.yaml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# ServiceMonitor for Prometheus Operator
|
||||||
|
# Requires Prometheus Operator to be installed
|
||||||
|
# https://github.com/prometheus-operator/prometheus-operator
|
||||||
|
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: llm-gateway
|
||||||
|
namespace: llm-gateway
|
||||||
|
labels:
|
||||||
|
app: llm-gateway
|
||||||
|
prometheus: kube-prometheus
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-gateway
|
||||||
|
|
||||||
|
endpoints:
|
||||||
|
- port: http
|
||||||
|
path: /metrics
|
||||||
|
interval: 30s
|
||||||
|
scrapeTimeout: 10s
|
||||||
|
|
||||||
|
relabelings:
|
||||||
|
# Add namespace label
|
||||||
|
- sourceLabels: [__meta_kubernetes_namespace]
|
||||||
|
targetLabel: namespace
|
||||||
|
|
||||||
|
# Add pod label
|
||||||
|
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||||
|
targetLabel: pod
|
||||||
|
|
||||||
|
# Add service label
|
||||||
|
- sourceLabels: [__meta_kubernetes_service_name]
|
||||||
|
targetLabel: service
|
||||||
Reference in New Issue
Block a user