Add Dockerfile and Manifests
This commit is contained in:
65
.dockerignore
Normal file
65
.dockerignore
Normal file
@@ -0,0 +1,65 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.github
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Build artifacts
|
||||
/bin/
|
||||
/dist/
|
||||
/build/
|
||||
/gateway
|
||||
/cmd/gateway/gateway
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
*.test
|
||||
*.out
|
||||
|
||||
# Configuration files with secrets
|
||||
config.yaml
|
||||
config.json
|
||||
*-local.yaml
|
||||
*-local.json
|
||||
.env
|
||||
.env.local
|
||||
*.key
|
||||
*.pem
|
||||
|
||||
# Test and coverage
|
||||
coverage.out
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Dependencies (will be downloaded during build)
|
||||
vendor/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
tests/node_modules/
|
||||
|
||||
# Jujutsu
|
||||
.jj/
|
||||
|
||||
# Claude
|
||||
.claude/
|
||||
|
||||
# Data directories
|
||||
data/
|
||||
*.db
|
||||
181
.github/workflows/ci.yaml
vendored
Normal file
181
.github/workflows/ci.yaml
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
pull_request:
|
||||
branches: [ main, develop ]
|
||||
|
||||
env:
|
||||
GO_VERSION: '1.23'
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
cache: true
|
||||
|
||||
- name: Download dependencies
|
||||
run: go mod download
|
||||
|
||||
- name: Verify dependencies
|
||||
run: go mod verify
|
||||
|
||||
- name: Run tests
|
||||
run: go test -v -race -coverprofile=coverage.out ./...
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
file: ./coverage.out
|
||||
flags: unittests
|
||||
name: codecov-umbrella
|
||||
|
||||
- name: Generate coverage report
|
||||
run: go tool cover -html=coverage.out -o coverage.html
|
||||
|
||||
- name: Upload coverage report
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-report
|
||||
path: coverage.html
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
cache: true
|
||||
|
||||
- name: Run golangci-lint
|
||||
uses: golangci/golangci-lint-action@v4
|
||||
with:
|
||||
version: latest
|
||||
args: --timeout=5m
|
||||
|
||||
security:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
cache: true
|
||||
|
||||
- name: Run Gosec Security Scanner
|
||||
uses: securego/gosec@master
|
||||
with:
|
||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@v3
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
cache: true
|
||||
|
||||
- name: Build binary
|
||||
run: |
|
||||
CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
|
||||
|
||||
- name: Upload binary
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: gateway-binary
|
||||
path: bin/gateway
|
||||
|
||||
docker:
|
||||
name: Build and Push Docker Image
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint, security]
|
||||
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=sha,prefix={{branch}}-
|
||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
||||
format: 'sarif'
|
||||
output: 'trivy-results.sarif'
|
||||
|
||||
- name: Upload Trivy results to GitHub Security
|
||||
uses: github/codeql-action/upload-sarif@v3
|
||||
with:
|
||||
sarif_file: 'trivy-results.sarif'
|
||||
129
.github/workflows/release.yaml
vendored
Normal file
129
.github/workflows/release.yaml
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
|
||||
env:
|
||||
GO_VERSION: '1.23'
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: Create Release
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
|
||||
- name: Run tests
|
||||
run: go test -v ./...
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
# Linux amd64
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
|
||||
|
||||
# Linux arm64
|
||||
GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
|
||||
|
||||
# macOS amd64
|
||||
GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
|
||||
|
||||
# macOS arm64
|
||||
GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
|
||||
|
||||
- name: Create checksums
|
||||
run: |
|
||||
cd bin
|
||||
sha256sum gateway-* > checksums.txt
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Generate changelog
|
||||
id: changelog
|
||||
run: |
|
||||
git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
|
||||
echo "changelog<<EOF" >> $GITHUB_OUTPUT
|
||||
cat CHANGELOG.txt >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
body: |
|
||||
## Changes
|
||||
${{ steps.changelog.outputs.changelog }}
|
||||
|
||||
## Docker Images
|
||||
```
|
||||
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
### Kubernetes
|
||||
```bash
|
||||
kubectl apply -k k8s/
|
||||
```
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
docker run -p 8080:8080 \
|
||||
-e GOOGLE_API_KEY=your-key \
|
||||
-e ANTHROPIC_API_KEY=your-key \
|
||||
-e OPENAI_API_KEY=your-key \
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||
```
|
||||
files: |
|
||||
bin/gateway-*
|
||||
bin/checksums.txt
|
||||
draft: false
|
||||
prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
62
Dockerfile
Normal file
62
Dockerfile
Normal file
@@ -0,0 +1,62 @@
|
||||
# Multi-stage build for Go LLM Gateway
|
||||
# Stage 1: Build the Go binary
|
||||
FROM golang:alpine AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache git ca-certificates tzdata
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Copy go mod files first for better caching
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Build the binary with optimizations
|
||||
# CGO is required for SQLite support
|
||||
RUN apk add --no-cache gcc musl-dev && \
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
|
||||
-ldflags='-w -s -extldflags "-static"' \
|
||||
-a -installsuffix cgo \
|
||||
-o gateway \
|
||||
./cmd/gateway
|
||||
|
||||
# Stage 2: Create minimal runtime image
|
||||
FROM alpine:3.19
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1000 gateway && \
|
||||
adduser -D -u 1000 -G gateway gateway
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app /app/data && \
|
||||
chown -R gateway:gateway /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /build/gateway /app/gateway
|
||||
|
||||
# Copy example config (optional, mainly for documentation)
|
||||
COPY config.example.yaml /app/config.example.yaml
|
||||
|
||||
# Switch to non-root user
|
||||
USER gateway
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8080
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
|
||||
|
||||
# Set entrypoint
|
||||
ENTRYPOINT ["/app/gateway"]
|
||||
|
||||
# Default command (can be overridden)
|
||||
CMD ["--config", "/app/config/config.yaml"]
|
||||
151
Makefile
Normal file
151
Makefile
Normal file
@@ -0,0 +1,151 @@
|
||||
# Makefile for LLM Gateway
|
||||
|
||||
.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
|
||||
|
||||
# Variables
|
||||
APP_NAME := llm-gateway
|
||||
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
|
||||
REGISTRY ?= your-registry
|
||||
IMAGE := $(REGISTRY)/$(APP_NAME)
|
||||
DOCKER_TAG := $(IMAGE):$(VERSION)
|
||||
LATEST_TAG := $(IMAGE):latest
|
||||
|
||||
# Go variables
|
||||
GOCMD := go
|
||||
GOBUILD := $(GOCMD) build
|
||||
GOTEST := $(GOCMD) test
|
||||
GOMOD := $(GOCMD) mod
|
||||
GOFMT := $(GOCMD) fmt
|
||||
|
||||
# Build directory
|
||||
BUILD_DIR := bin
|
||||
|
||||
# Help target
|
||||
help: ## Show this help message
|
||||
@echo "Usage: make [target]"
|
||||
@echo ""
|
||||
@echo "Targets:"
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
|
||||
|
||||
# Development targets
|
||||
build: ## Build the binary
|
||||
@echo "Building $(APP_NAME)..."
|
||||
CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
|
||||
|
||||
build-static: ## Build static binary
|
||||
@echo "Building static binary..."
|
||||
CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
|
||||
|
||||
test: ## Run tests
|
||||
@echo "Running tests..."
|
||||
$(GOTEST) -v -race -coverprofile=coverage.out ./...
|
||||
|
||||
test-coverage: test ## Run tests with coverage report
|
||||
@echo "Generating coverage report..."
|
||||
$(GOCMD) tool cover -html=coverage.out -o coverage.html
|
||||
@echo "Coverage report saved to coverage.html"
|
||||
|
||||
fmt: ## Format Go code
|
||||
@echo "Formatting code..."
|
||||
$(GOFMT) ./...
|
||||
|
||||
lint: ## Run linter
|
||||
@echo "Running linter..."
|
||||
@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
|
||||
golangci-lint run ./...
|
||||
|
||||
tidy: ## Tidy go modules
|
||||
@echo "Tidying go modules..."
|
||||
$(GOMOD) tidy
|
||||
|
||||
clean: ## Clean build artifacts
|
||||
@echo "Cleaning..."
|
||||
rm -rf $(BUILD_DIR)
|
||||
rm -f coverage.out coverage.html
|
||||
|
||||
# Docker targets
|
||||
docker-build: ## Build Docker image
|
||||
@echo "Building Docker image $(DOCKER_TAG)..."
|
||||
docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
|
||||
|
||||
docker-push: docker-build ## Push Docker image to registry
|
||||
@echo "Pushing Docker image..."
|
||||
docker push $(DOCKER_TAG)
|
||||
docker push $(LATEST_TAG)
|
||||
|
||||
docker-run: ## Run Docker container locally
|
||||
@echo "Running Docker container..."
|
||||
docker run --rm -p 8080:8080 \
|
||||
-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
|
||||
-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
|
||||
-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
|
||||
-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
|
||||
$(DOCKER_TAG)
|
||||
|
||||
docker-compose-up: ## Start services with docker-compose
|
||||
@echo "Starting services with docker-compose..."
|
||||
docker-compose up -d
|
||||
|
||||
docker-compose-down: ## Stop services with docker-compose
|
||||
@echo "Stopping services with docker-compose..."
|
||||
docker-compose down
|
||||
|
||||
docker-compose-logs: ## View docker-compose logs
|
||||
docker-compose logs -f
|
||||
|
||||
# Kubernetes targets
|
||||
k8s-namespace: ## Create Kubernetes namespace
|
||||
kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
k8s-secrets: ## Create Kubernetes secrets (requires env vars)
|
||||
@echo "Creating secrets..."
|
||||
@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
|
||||
echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
|
||||
exit 1; \
|
||||
fi
|
||||
kubectl create secret generic llm-gateway-secrets \
|
||||
--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
|
||||
--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
|
||||
--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
|
||||
--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
|
||||
-n llm-gateway \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
|
||||
@echo "Deploying to Kubernetes..."
|
||||
kubectl apply -k k8s/
|
||||
|
||||
k8s-delete: ## Delete from Kubernetes
|
||||
@echo "Deleting from Kubernetes..."
|
||||
kubectl delete -k k8s/
|
||||
|
||||
k8s-status: ## Check Kubernetes deployment status
|
||||
@echo "Checking deployment status..."
|
||||
kubectl get all -n llm-gateway
|
||||
|
||||
k8s-logs: ## View Kubernetes logs
|
||||
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||
|
||||
k8s-describe: ## Describe Kubernetes deployment
|
||||
kubectl describe deployment llm-gateway -n llm-gateway
|
||||
|
||||
k8s-port-forward: ## Port forward to local machine
|
||||
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||
|
||||
# CI/CD targets
|
||||
ci: lint test ## Run CI checks
|
||||
|
||||
security-scan: ## Run security scan
|
||||
@echo "Running security scan..."
|
||||
@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
|
||||
gosec ./...
|
||||
|
||||
# Run target
|
||||
run: ## Run locally
|
||||
@echo "Running $(APP_NAME) locally..."
|
||||
$(GOCMD) run ./cmd/gateway --config config.yaml
|
||||
|
||||
# Version info
|
||||
version: ## Show version
|
||||
@echo "Version: $(VERSION)"
|
||||
@echo "Image: $(DOCKER_TAG)"
|
||||
102
docker-compose.yaml
Normal file
102
docker-compose.yaml
Normal file
@@ -0,0 +1,102 @@
|
||||
# Docker Compose for local development and testing
|
||||
# Not recommended for production - use Kubernetes instead
|
||||
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
gateway:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: llm-gateway:latest
|
||||
container_name: llm-gateway
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
# Provider API keys
|
||||
GOOGLE_API_KEY: ${GOOGLE_API_KEY}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
|
||||
volumes:
|
||||
- ./config.yaml:/app/config/config.yaml:ro
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- llm-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
container_name: llm-gateway-redis
|
||||
ports:
|
||||
- "6379:6379"
|
||||
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- llm-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
|
||||
# Optional: Prometheus for metrics
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: llm-gateway-prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||
volumes:
|
||||
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus-data:/prometheus
|
||||
networks:
|
||||
- llm-network
|
||||
restart: unless-stopped
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
# Optional: Grafana for visualization
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: llm-gateway-grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
volumes:
|
||||
- ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
|
||||
- ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
|
||||
- ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
|
||||
- grafana-data:/var/lib/grafana
|
||||
depends_on:
|
||||
- prometheus
|
||||
networks:
|
||||
- llm-network
|
||||
restart: unless-stopped
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
networks:
|
||||
llm-network:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
prometheus-data:
|
||||
grafana-data:
|
||||
352
k8s/README.md
Normal file
352
k8s/README.md
Normal file
@@ -0,0 +1,352 @@
|
||||
# Kubernetes Deployment Guide
|
||||
|
||||
This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Kubernetes cluster (v1.24+)
|
||||
- `kubectl` configured
|
||||
- Container registry access
|
||||
- (Optional) Prometheus Operator for monitoring
|
||||
- (Optional) cert-manager for TLS certificates
|
||||
- (Optional) nginx-ingress-controller or cloud load balancer
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Build and Push Docker Image
|
||||
|
||||
```bash
|
||||
# Build the image
|
||||
docker build -t your-registry/llm-gateway:v1.0.0 .
|
||||
|
||||
# Push to registry
|
||||
docker push your-registry/llm-gateway:v1.0.0
|
||||
```
|
||||
|
||||
### 2. Configure Secrets
|
||||
|
||||
**Option A: Using kubectl**
|
||||
```bash
|
||||
kubectl create namespace llm-gateway
|
||||
|
||||
kubectl create secret generic llm-gateway-secrets \
|
||||
--from-literal=GOOGLE_API_KEY="your-key" \
|
||||
--from-literal=ANTHROPIC_API_KEY="your-key" \
|
||||
--from-literal=OPENAI_API_KEY="your-key" \
|
||||
--from-literal=OIDC_AUDIENCE="your-client-id" \
|
||||
-n llm-gateway
|
||||
```
|
||||
|
||||
**Option B: Using External Secrets Operator (Recommended)**
|
||||
- Uncomment the ExternalSecret in `secret.yaml`
|
||||
- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
|
||||
|
||||
### 3. Update Configuration
|
||||
|
||||
Edit `configmap.yaml`:
|
||||
- Update Redis connection string if using external Redis
|
||||
- Configure observability endpoints (Tempo, Prometheus)
|
||||
- Adjust rate limits as needed
|
||||
- Set OIDC issuer and audience
|
||||
|
||||
Edit `ingress.yaml`:
|
||||
- Replace `llm-gateway.example.com` with your domain
|
||||
- Configure TLS certificate annotations
|
||||
|
||||
Edit `kustomization.yaml`:
|
||||
- Update image registry and tag
|
||||
|
||||
### 4. Deploy
|
||||
|
||||
**Using Kustomize (Recommended):**
|
||||
```bash
|
||||
kubectl apply -k k8s/
|
||||
```
|
||||
|
||||
**Using kubectl directly:**
|
||||
```bash
|
||||
kubectl apply -f k8s/namespace.yaml
|
||||
kubectl apply -f k8s/serviceaccount.yaml
|
||||
kubectl apply -f k8s/secret.yaml
|
||||
kubectl apply -f k8s/configmap.yaml
|
||||
kubectl apply -f k8s/redis.yaml
|
||||
kubectl apply -f k8s/deployment.yaml
|
||||
kubectl apply -f k8s/service.yaml
|
||||
kubectl apply -f k8s/ingress.yaml
|
||||
kubectl apply -f k8s/hpa.yaml
|
||||
kubectl apply -f k8s/pdb.yaml
|
||||
kubectl apply -f k8s/networkpolicy.yaml
|
||||
```
|
||||
|
||||
**With Prometheus Operator:**
|
||||
```bash
|
||||
kubectl apply -f k8s/servicemonitor.yaml
|
||||
kubectl apply -f k8s/prometheusrule.yaml
|
||||
```
|
||||
|
||||
### 5. Verify Deployment
|
||||
|
||||
```bash
|
||||
# Check pods
|
||||
kubectl get pods -n llm-gateway
|
||||
|
||||
# Check services
|
||||
kubectl get svc -n llm-gateway
|
||||
|
||||
# Check ingress
|
||||
kubectl get ingress -n llm-gateway
|
||||
|
||||
# View logs
|
||||
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||
|
||||
# Check health
|
||||
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||
curl http://localhost:8080/health
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ Internet/Clients │
|
||||
└───────────────────────┬─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ Ingress Controller │
|
||||
│ (nginx/ALB/GCE with TLS) │
|
||||
└───────────────────────┬─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ LLM Gateway Service │
|
||||
│ (LoadBalancer) │
|
||||
└───────────────────────┬─────────────────────────────────┘
|
||||
│
|
||||
┌───────────────┼───────────────┐
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Gateway │ │ Gateway │ │ Gateway │
|
||||
│ Pod 1 │ │ Pod 2 │ │ Pod 3 │
|
||||
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
|
||||
│ │ │
|
||||
└────────────────┼────────────────┘
|
||||
│
|
||||
┌───────────────┼───────────────┐
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Redis │ │ Prometheus │ │ Tempo │
|
||||
│ (Persistent) │ │ (Metrics) │ │ (Traces) │
|
||||
└──────────────┘ └──────────────┘ └──────────────┘
|
||||
```
|
||||
|
||||
## Resource Specifications
|
||||
|
||||
### Default Resources
|
||||
- **Requests**: 100m CPU, 128Mi memory
|
||||
- **Limits**: 1000m CPU, 512Mi memory
|
||||
- **Replicas**: 3 (min), 20 (max with HPA)
|
||||
|
||||
### Scaling
|
||||
- HPA scales based on CPU (70%) and memory (80%)
|
||||
- PodDisruptionBudget ensures minimum 2 replicas during disruptions
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Environment Variables (from Secret)
|
||||
- `GOOGLE_API_KEY`: Google AI API key
|
||||
- `ANTHROPIC_API_KEY`: Anthropic API key
|
||||
- `OPENAI_API_KEY`: OpenAI API key
|
||||
- `OIDC_AUDIENCE`: OIDC client ID for authentication
|
||||
|
||||
### ConfigMap Settings
|
||||
See `configmap.yaml` for full configuration options:
|
||||
- Server address
|
||||
- Logging format and level
|
||||
- Rate limiting
|
||||
- Observability (metrics/tracing)
|
||||
- Provider endpoints
|
||||
- Conversation storage
|
||||
- Authentication
|
||||
|
||||
## Security
|
||||
|
||||
### Security Features
|
||||
- Non-root container execution (UID 1000)
|
||||
- Read-only root filesystem
|
||||
- No privilege escalation
|
||||
- All capabilities dropped
|
||||
- Network policies for ingress/egress control
|
||||
- SeccompProfile: RuntimeDefault
|
||||
|
||||
### TLS/HTTPS
|
||||
- Ingress configured with TLS
|
||||
- Uses cert-manager for automatic certificate provisioning
|
||||
- Force SSL redirect enabled
|
||||
|
||||
### Secrets Management
|
||||
**Never commit secrets to git!**
|
||||
|
||||
Production options:
|
||||
1. **External Secrets Operator** (Recommended)
|
||||
- AWS Secrets Manager
|
||||
- HashiCorp Vault
|
||||
- Google Secret Manager
|
||||
|
||||
2. **Sealed Secrets**
|
||||
- Encrypted secrets in git
|
||||
|
||||
3. **Manual kubectl secrets**
|
||||
- Created outside of git
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Metrics
|
||||
- Exposed on `/metrics` endpoint
|
||||
- Scraped by Prometheus via ServiceMonitor
|
||||
- Key metrics:
|
||||
- HTTP request rate, latency, errors
|
||||
- Provider request rate, latency, token usage
|
||||
- Conversation store operations
|
||||
- Rate limiting hits
|
||||
|
||||
### Alerts
|
||||
See `prometheusrule.yaml` for configured alerts:
|
||||
- High error rate
|
||||
- High latency
|
||||
- Provider failures
|
||||
- Pod down
|
||||
- High memory usage
|
||||
- Rate limit threshold exceeded
|
||||
- Conversation store errors
|
||||
|
||||
### Logs
|
||||
Structured JSON logs with:
|
||||
- Request IDs
|
||||
- Trace context (trace_id, span_id)
|
||||
- Log levels (debug/info/warn/error)
|
||||
|
||||
View logs:
|
||||
```bash
|
||||
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
|
||||
```
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Rolling Updates
|
||||
```bash
|
||||
# Update image
|
||||
kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
|
||||
|
||||
# Check rollout status
|
||||
kubectl rollout status deployment/llm-gateway -n llm-gateway
|
||||
|
||||
# Rollback if needed
|
||||
kubectl rollout undo deployment/llm-gateway -n llm-gateway
|
||||
```
|
||||
|
||||
### Scaling
|
||||
```bash
|
||||
# Manual scale
|
||||
kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
|
||||
|
||||
# HPA will auto-scale within min/max bounds (3-20)
|
||||
```
|
||||
|
||||
### Configuration Updates
|
||||
```bash
|
||||
# Edit ConfigMap
|
||||
kubectl edit configmap llm-gateway-config -n llm-gateway
|
||||
|
||||
# Restart pods to pick up changes
|
||||
kubectl rollout restart deployment/llm-gateway -n llm-gateway
|
||||
```
|
||||
|
||||
### Debugging
|
||||
```bash
|
||||
# Exec into pod
|
||||
kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
|
||||
|
||||
# Port forward for local access
|
||||
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
|
||||
|
||||
# Check events
|
||||
kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
|
||||
```
|
||||
|
||||
## Production Considerations
|
||||
|
||||
### High Availability
|
||||
- Minimum 3 replicas across availability zones
|
||||
- Pod anti-affinity rules spread pods across nodes
|
||||
- PodDisruptionBudget ensures service availability during disruptions
|
||||
|
||||
### Performance
|
||||
- Adjust resource limits based on load testing
|
||||
- Configure HPA thresholds based on traffic patterns
|
||||
- Use node affinity for GPU nodes if needed
|
||||
|
||||
### Cost Optimization
|
||||
- Use spot/preemptible instances for non-critical workloads
|
||||
- Set appropriate resource requests/limits
|
||||
- Monitor token usage and implement quotas
|
||||
|
||||
### Disaster Recovery
|
||||
- Redis persistence (if using StatefulSet)
|
||||
- Regular backups of conversation data
|
||||
- Multi-region deployment for geo-redundancy
|
||||
- Document runbooks for incident response
|
||||
|
||||
## Cloud-Specific Notes
|
||||
|
||||
### AWS EKS
|
||||
- Use AWS Load Balancer Controller for ALB
|
||||
- Configure IRSA for service account
|
||||
- Use ElastiCache for Redis
|
||||
- Store secrets in AWS Secrets Manager
|
||||
|
||||
### GCP GKE
|
||||
- Use GKE Ingress for GCLB
|
||||
- Configure Workload Identity
|
||||
- Use Memorystore for Redis
|
||||
- Store secrets in Google Secret Manager
|
||||
|
||||
### Azure AKS
|
||||
- Use Azure Application Gateway Ingress Controller
|
||||
- Configure Azure AD Workload Identity
|
||||
- Use Azure Cache for Redis
|
||||
- Store secrets in Azure Key Vault
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Pods not starting:**
|
||||
```bash
|
||||
kubectl describe pod -n llm-gateway -l app=llm-gateway
|
||||
kubectl logs -n llm-gateway -l app=llm-gateway --previous
|
||||
```
|
||||
|
||||
**Health check failures:**
|
||||
```bash
|
||||
kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
|
||||
curl http://localhost:8080/health
|
||||
curl http://localhost:8080/ready
|
||||
```
|
||||
|
||||
**Provider connection issues:**
|
||||
- Verify API keys in secrets
|
||||
- Check network policies allow egress
|
||||
- Verify provider endpoints are accessible
|
||||
|
||||
**Redis connection issues:**
|
||||
```bash
|
||||
kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Kubernetes Documentation](https://kubernetes.io/docs/)
|
||||
- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
|
||||
- [cert-manager](https://cert-manager.io/)
|
||||
- [External Secrets Operator](https://external-secrets.io/)
|
||||
76
k8s/configmap.yaml
Normal file
76
k8s/configmap.yaml
Normal file
@@ -0,0 +1,76 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: llm-gateway-config
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
data:
|
||||
config.yaml: |
|
||||
server:
|
||||
address: ":8080"
|
||||
|
||||
logging:
|
||||
format: "json"
|
||||
level: "info"
|
||||
|
||||
rate_limit:
|
||||
enabled: true
|
||||
requests_per_second: 10
|
||||
burst: 20
|
||||
|
||||
observability:
|
||||
enabled: true
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
path: "/metrics"
|
||||
|
||||
tracing:
|
||||
enabled: true
|
||||
service_name: "llm-gateway"
|
||||
sampler:
|
||||
type: "probability"
|
||||
rate: 0.1
|
||||
exporter:
|
||||
type: "otlp"
|
||||
endpoint: "tempo.observability.svc.cluster.local:4317"
|
||||
insecure: true
|
||||
|
||||
providers:
|
||||
google:
|
||||
type: "google"
|
||||
api_key: "${GOOGLE_API_KEY}"
|
||||
endpoint: "https://generativelanguage.googleapis.com"
|
||||
anthropic:
|
||||
type: "anthropic"
|
||||
api_key: "${ANTHROPIC_API_KEY}"
|
||||
endpoint: "https://api.anthropic.com"
|
||||
openai:
|
||||
type: "openai"
|
||||
api_key: "${OPENAI_API_KEY}"
|
||||
endpoint: "https://api.openai.com"
|
||||
|
||||
conversations:
|
||||
store: "redis"
|
||||
ttl: "1h"
|
||||
dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
|
||||
|
||||
auth:
|
||||
enabled: true
|
||||
issuer: "https://accounts.google.com"
|
||||
audience: "${OIDC_AUDIENCE}"
|
||||
|
||||
models:
|
||||
- name: "gemini-1.5-flash"
|
||||
provider: "google"
|
||||
- name: "gemini-1.5-pro"
|
||||
provider: "google"
|
||||
- name: "claude-3-5-sonnet-20241022"
|
||||
provider: "anthropic"
|
||||
- name: "claude-3-5-haiku-20241022"
|
||||
provider: "anthropic"
|
||||
- name: "gpt-4o"
|
||||
provider: "openai"
|
||||
- name: "gpt-4o-mini"
|
||||
provider: "openai"
|
||||
168
k8s/deployment.yaml
Normal file
168
k8s/deployment.yaml
Normal file
@@ -0,0 +1,168 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
version: v1
|
||||
spec:
|
||||
replicas: 3
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 1
|
||||
maxUnavailable: 0
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-gateway
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llm-gateway
|
||||
version: v1
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
serviceAccountName: llm-gateway
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containers:
|
||||
- name: gateway
|
||||
image: llm-gateway:latest # Replace with your registry/image:tag
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
||||
|
||||
env:
|
||||
# Provider API Keys from Secret
|
||||
- name: GOOGLE_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llm-gateway-secrets
|
||||
key: GOOGLE_API_KEY
|
||||
- name: ANTHROPIC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llm-gateway-secrets
|
||||
key: ANTHROPIC_API_KEY
|
||||
- name: OPENAI_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llm-gateway-secrets
|
||||
key: OPENAI_API_KEY
|
||||
- name: OIDC_AUDIENCE
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llm-gateway-secrets
|
||||
key: OIDC_AUDIENCE
|
||||
|
||||
# Optional: Pod metadata
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 512Mi
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 0
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
successThreshold: 1
|
||||
failureThreshold: 30
|
||||
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /app/config
|
||||
readOnly: true
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: llm-gateway-config
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
|
||||
# Affinity rules for better distribution
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- llm-gateway
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
# Tolerations (if needed for specific node pools)
|
||||
# tolerations:
|
||||
# - key: "workload-type"
|
||||
# operator: "Equal"
|
||||
# value: "llm"
|
||||
# effect: "NoSchedule"
|
||||
63
k8s/hpa.yaml
Normal file
63
k8s/hpa.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: llm-gateway
|
||||
|
||||
minReplicas: 3
|
||||
maxReplicas: 20
|
||||
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 50
|
||||
periodSeconds: 60
|
||||
- type: Pods
|
||||
value: 2
|
||||
periodSeconds: 60
|
||||
selectPolicy: Min
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 0
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 100
|
||||
periodSeconds: 30
|
||||
- type: Pods
|
||||
value: 4
|
||||
periodSeconds: 30
|
||||
selectPolicy: Max
|
||||
|
||||
metrics:
|
||||
# CPU-based scaling
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
|
||||
# Memory-based scaling
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80
|
||||
|
||||
# Custom metrics (requires metrics-server and custom metrics API)
|
||||
# - type: Pods
|
||||
# pods:
|
||||
# metric:
|
||||
# name: http_requests_per_second
|
||||
# target:
|
||||
# type: AverageValue
|
||||
# averageValue: "1000"
|
||||
66
k8s/ingress.yaml
Normal file
66
k8s/ingress.yaml
Normal file
@@ -0,0 +1,66 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
annotations:
|
||||
# General annotations
|
||||
kubernetes.io/ingress.class: "nginx"
|
||||
|
||||
# TLS configuration
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
|
||||
# Security headers
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
|
||||
|
||||
# Rate limiting (supplement application-level rate limiting)
|
||||
nginx.ingress.kubernetes.io/limit-rps: "100"
|
||||
nginx.ingress.kubernetes.io/limit-connections: "50"
|
||||
|
||||
# Request size limit (10MB)
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
|
||||
|
||||
# Timeouts
|
||||
nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
|
||||
|
||||
# CORS (if needed)
|
||||
# nginx.ingress.kubernetes.io/enable-cors: "true"
|
||||
# nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
|
||||
# nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
|
||||
# nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
|
||||
|
||||
# For AWS ALB Ingress Controller (alternative to nginx)
|
||||
# kubernetes.io/ingress.class: "alb"
|
||||
# alb.ingress.kubernetes.io/scheme: "internet-facing"
|
||||
# alb.ingress.kubernetes.io/target-type: "ip"
|
||||
# alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
|
||||
# alb.ingress.kubernetes.io/ssl-redirect: '443'
|
||||
# alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
|
||||
|
||||
# For GKE Ingress (alternative to nginx)
|
||||
# kubernetes.io/ingress.class: "gce"
|
||||
# kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
|
||||
# ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
|
||||
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- llm-gateway.example.com # Replace with your domain
|
||||
secretName: llm-gateway-tls
|
||||
|
||||
rules:
|
||||
- host: llm-gateway.example.com # Replace with your domain
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: llm-gateway
|
||||
port:
|
||||
number: 80
|
||||
46
k8s/kustomization.yaml
Normal file
46
k8s/kustomization.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
# Kustomize configuration for easy deployment
|
||||
# Usage: kubectl apply -k k8s/
|
||||
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: llm-gateway
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- serviceaccount.yaml
|
||||
- configmap.yaml
|
||||
- secret.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
- hpa.yaml
|
||||
- pdb.yaml
|
||||
- networkpolicy.yaml
|
||||
- redis.yaml
|
||||
- servicemonitor.yaml
|
||||
- prometheusrule.yaml
|
||||
|
||||
# Common labels applied to all resources
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: llm-gateway
|
||||
app.kubernetes.io/component: api-gateway
|
||||
app.kubernetes.io/part-of: llm-platform
|
||||
|
||||
# Images to be used (customize for your registry)
|
||||
images:
|
||||
- name: llm-gateway
|
||||
newName: your-registry/llm-gateway
|
||||
newTag: latest
|
||||
|
||||
# ConfigMap generator (alternative to configmap.yaml)
|
||||
# configMapGenerator:
|
||||
# - name: llm-gateway-config
|
||||
# files:
|
||||
# - config.yaml
|
||||
|
||||
# Secret generator (for local development only)
|
||||
# secretGenerator:
|
||||
# - name: llm-gateway-secrets
|
||||
# envs:
|
||||
# - secrets.env
|
||||
7
k8s/namespace.yaml
Normal file
7
k8s/namespace.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
environment: production
|
||||
83
k8s/networkpolicy.yaml
Normal file
83
k8s/networkpolicy.yaml
Normal file
@@ -0,0 +1,83 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: llm-gateway
|
||||
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
|
||||
ingress:
|
||||
# Allow traffic from ingress controller
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: ingress-nginx
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
|
||||
# Allow traffic from within the namespace (for debugging/testing)
|
||||
- from:
|
||||
- podSelector: {}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
|
||||
# Allow Prometheus scraping
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: observability
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
|
||||
egress:
|
||||
# Allow DNS
|
||||
- to:
|
||||
- namespaceSelector: {}
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- protocol: UDP
|
||||
port: 53
|
||||
|
||||
# Allow Redis access
|
||||
- to:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 6379
|
||||
|
||||
# Allow external provider API access (OpenAI, Anthropic, Google)
|
||||
- to:
|
||||
- namespaceSelector: {}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 443
|
||||
|
||||
# Allow OTLP tracing export
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: observability
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: tempo
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 4317
|
||||
13
k8s/pdb.yaml
Normal file
13
k8s/pdb.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
spec:
|
||||
minAvailable: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-gateway
|
||||
unhealthyPodEvictionPolicy: AlwaysAllow
|
||||
122
k8s/prometheusrule.yaml
Normal file
122
k8s/prometheusrule.yaml
Normal file
@@ -0,0 +1,122 @@
|
||||
# PrometheusRule for alerting
|
||||
# Requires Prometheus Operator to be installed
|
||||
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
prometheus: kube-prometheus
|
||||
spec:
|
||||
groups:
|
||||
- name: llm-gateway.rules
|
||||
interval: 30s
|
||||
rules:
|
||||
|
||||
# High error rate
|
||||
- alert: LLMGatewayHighErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
|
||||
/
|
||||
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
|
||||
) > 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High error rate in LLM Gateway"
|
||||
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
|
||||
|
||||
# High latency
|
||||
- alert: LLMGatewayHighLatency
|
||||
expr: |
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
|
||||
) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High latency in LLM Gateway"
|
||||
description: "P95 latency is {{ $value }}s (threshold: 10s)"
|
||||
|
||||
# Provider errors
|
||||
- alert: LLMProviderHighErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
|
||||
/
|
||||
sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
|
||||
) > 0.10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High error rate for provider {{ $labels.provider }}"
|
||||
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
|
||||
|
||||
# Pod down
|
||||
- alert: LLMGatewayPodDown
|
||||
expr: |
|
||||
up{job="llm-gateway",namespace="llm-gateway"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "LLM Gateway pod is down"
|
||||
description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
|
||||
|
||||
# High memory usage
|
||||
- alert: LLMGatewayHighMemoryUsage
|
||||
expr: |
|
||||
(
|
||||
container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
|
||||
/
|
||||
container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
|
||||
) > 0.85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High memory usage in LLM Gateway"
|
||||
description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
|
||||
|
||||
# Rate limit threshold
|
||||
- alert: LLMGatewayHighRateLimitHitRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
|
||||
/
|
||||
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
|
||||
) > 0.20
|
||||
for: 10m
|
||||
labels:
|
||||
severity: info
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High rate limit hit rate"
|
||||
description: "{{ $value | humanizePercentage }} of requests are being rate limited"
|
||||
|
||||
# Conversation store errors
|
||||
- alert: LLMGatewayConversationStoreErrors
|
||||
expr: |
|
||||
(
|
||||
sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
|
||||
/
|
||||
sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
|
||||
) > 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
component: llm-gateway
|
||||
annotations:
|
||||
summary: "High error rate in conversation store"
|
||||
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
|
||||
131
k8s/redis.yaml
Normal file
131
k8s/redis.yaml
Normal file
@@ -0,0 +1,131 @@
|
||||
# Simple Redis deployment for conversation storage
|
||||
# For production, consider using:
|
||||
# - Redis Operator (e.g., Redis Enterprise Operator)
|
||||
# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
|
||||
# - Redis Cluster for high availability
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: redis-config
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: redis
|
||||
data:
|
||||
redis.conf: |
|
||||
maxmemory 256mb
|
||||
maxmemory-policy allkeys-lru
|
||||
save ""
|
||||
appendonly no
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
serviceName: redis
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
fsGroup: 999
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7.2-alpine
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
command:
|
||||
- redis-server
|
||||
- /etc/redis/redis.conf
|
||||
|
||||
ports:
|
||||
- name: redis
|
||||
containerPort: 6379
|
||||
protocol: TCP
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: redis
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/redis
|
||||
- name: data
|
||||
mountPath: /data
|
||||
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: redis-config
|
||||
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
selector:
|
||||
app: redis
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: redis
|
||||
protocol: TCP
|
||||
46
k8s/secret.yaml
Normal file
46
k8s/secret.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: llm-gateway-secrets
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
type: Opaque
|
||||
stringData:
|
||||
# IMPORTANT: Replace these with actual values or use external secret management
|
||||
# For production, use:
|
||||
# - kubectl create secret generic llm-gateway-secrets --from-literal=...
|
||||
# - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
|
||||
# - Sealed Secrets
|
||||
GOOGLE_API_KEY: "your-google-api-key-here"
|
||||
ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
|
||||
OPENAI_API_KEY: "your-openai-api-key-here"
|
||||
OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
|
||||
---
|
||||
# Example using External Secrets Operator (commented out)
|
||||
# apiVersion: external-secrets.io/v1beta1
|
||||
# kind: ExternalSecret
|
||||
# metadata:
|
||||
# name: llm-gateway-secrets
|
||||
# namespace: llm-gateway
|
||||
# spec:
|
||||
# refreshInterval: 1h
|
||||
# secretStoreRef:
|
||||
# name: aws-secrets-manager
|
||||
# kind: SecretStore
|
||||
# target:
|
||||
# name: llm-gateway-secrets
|
||||
# creationPolicy: Owner
|
||||
# data:
|
||||
# - secretKey: GOOGLE_API_KEY
|
||||
# remoteRef:
|
||||
# key: prod/llm-gateway/google-api-key
|
||||
# - secretKey: ANTHROPIC_API_KEY
|
||||
# remoteRef:
|
||||
# key: prod/llm-gateway/anthropic-api-key
|
||||
# - secretKey: OPENAI_API_KEY
|
||||
# remoteRef:
|
||||
# key: prod/llm-gateway/openai-api-key
|
||||
# - secretKey: OIDC_AUDIENCE
|
||||
# remoteRef:
|
||||
# key: prod/llm-gateway/oidc-audience
|
||||
40
k8s/service.yaml
Normal file
40
k8s/service.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
annotations:
|
||||
# For cloud load balancers (uncomment as needed)
|
||||
# service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
|
||||
# cloud.google.com/neg: '{"ingress": true}'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llm-gateway
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
sessionAffinity: None
|
||||
---
|
||||
# Headless service for pod-to-pod communication (if needed)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llm-gateway-headless
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
selector:
|
||||
app: llm-gateway
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
14
k8s/serviceaccount.yaml
Normal file
14
k8s/serviceaccount.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
annotations:
|
||||
# For GKE Workload Identity
|
||||
# iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
|
||||
|
||||
# For EKS IRSA (IAM Roles for Service Accounts)
|
||||
# eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
|
||||
automountServiceAccountToken: true
|
||||
35
k8s/servicemonitor.yaml
Normal file
35
k8s/servicemonitor.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
# ServiceMonitor for Prometheus Operator
|
||||
# Requires Prometheus Operator to be installed
|
||||
# https://github.com/prometheus-operator/prometheus-operator
|
||||
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: llm-gateway
|
||||
namespace: llm-gateway
|
||||
labels:
|
||||
app: llm-gateway
|
||||
prometheus: kube-prometheus
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llm-gateway
|
||||
|
||||
endpoints:
|
||||
- port: http
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
|
||||
relabelings:
|
||||
# Add namespace label
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
targetLabel: namespace
|
||||
|
||||
# Add pod label
|
||||
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||
targetLabel: pod
|
||||
|
||||
# Add service label
|
||||
- sourceLabels: [__meta_kubernetes_service_name]
|
||||
targetLabel: service
|
||||
Reference in New Issue
Block a user