Compare commits

..

9 Commits

Author SHA1 Message Date
d782204c68 Add circuit breaker 2026-03-05 07:21:13 +00:00
ae2e1b7a80 Fix context background and silent JWT 2026-03-05 06:55:58 +00:00
214e63b0c5 Add panic recovery and request size limit 2026-03-05 06:32:42 +00:00
df6b677a15 Add Dockerfile and Manifests 2026-03-05 06:14:03 +00:00
b56c78fa07 Add observabilitty and monitoring 2026-03-03 06:40:08 +00:00
2edb290563 Add graceful shutdown 2026-03-03 06:01:01 +00:00
119862d7ed Add rate limiting 2026-03-03 05:52:54 +00:00
27dfe7298d Add better logging 2026-03-03 05:33:02 +00:00
c2b6945cab Add tests 2026-03-03 05:18:00 +00:00
63 changed files with 10993 additions and 113 deletions

65
.dockerignore Normal file
View File

@@ -0,0 +1,65 @@
# Git
.git
.gitignore
.github
# Documentation
*.md
docs/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# Build artifacts
/bin/
/dist/
/build/
/gateway
/cmd/gateway/gateway
*.exe
*.dll
*.so
*.dylib
*.test
*.out
# Configuration files with secrets
config.yaml
config.json
*-local.yaml
*-local.json
.env
.env.local
*.key
*.pem
# Test and coverage
coverage.out
*.log
logs/
# OS
.DS_Store
Thumbs.db
# Dependencies (will be downloaded during build)
vendor/
# Python
__pycache__/
*.py[cod]
tests/node_modules/
# Jujutsu
.jj/
# Claude
.claude/
# Data directories
data/
*.db

181
.github/workflows/ci.yaml vendored Normal file
View File

@@ -0,0 +1,181 @@
name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
env:
GO_VERSION: '1.23'
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
name: Test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Download dependencies
run: go mod download
- name: Verify dependencies
run: go mod verify
- name: Run tests
run: go test -v -race -coverprofile=coverage.out ./...
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
file: ./coverage.out
flags: unittests
name: codecov-umbrella
- name: Generate coverage report
run: go tool cover -html=coverage.out -o coverage.html
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: coverage.html
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v4
with:
version: latest
args: --timeout=5m
security:
name: Security Scan
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Run Gosec Security Scanner
uses: securego/gosec@master
with:
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: results.sarif
build:
name: Build
runs-on: ubuntu-latest
needs: [test, lint]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache: true
- name: Build binary
run: |
CGO_ENABLED=1 go build -v -o bin/gateway ./cmd/gateway
- name: Upload binary
uses: actions/upload-artifact@v4
with:
name: gateway-binary
path: bin/gateway
docker:
name: Build and Push Docker Image
runs-on: ubuntu-latest
needs: [test, lint, security]
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')
permissions:
contents: read
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'

129
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,129 @@
name: Release
on:
push:
tags:
- 'v*'
env:
GO_VERSION: '1.23'
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
release:
name: Create Release
runs-on: ubuntu-latest
permissions:
contents: write
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Run tests
run: go test -v ./...
- name: Build binaries
run: |
# Linux amd64
GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-linux-amd64 ./cmd/gateway
# Linux arm64
GOOS=linux GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-linux-arm64 ./cmd/gateway
# macOS amd64
GOOS=darwin GOARCH=amd64 CGO_ENABLED=1 go build -o bin/gateway-darwin-amd64 ./cmd/gateway
# macOS arm64
GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 go build -o bin/gateway-darwin-arm64 ./cmd/gateway
- name: Create checksums
run: |
cd bin
sha256sum gateway-* > checksums.txt
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=raw,value=latest
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Generate changelog
id: changelog
run: |
git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"* %s (%h)" > CHANGELOG.txt
echo "changelog<<EOF" >> $GITHUB_OUTPUT
cat CHANGELOG.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Create Release
uses: softprops/action-gh-release@v1
with:
body: |
## Changes
${{ steps.changelog.outputs.changelog }}
## Docker Images
```
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
```
## Installation
### Kubernetes
```bash
kubectl apply -k k8s/
```
### Docker
```bash
docker run -p 8080:8080 \
-e GOOGLE_API_KEY=your-key \
-e ANTHROPIC_API_KEY=your-key \
-e OPENAI_API_KEY=your-key \
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
```
files: |
bin/gateway-*
bin/checksums.txt
draft: false
prerelease: ${{ contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

62
Dockerfile Normal file
View File

@@ -0,0 +1,62 @@
# Multi-stage build for Go LLM Gateway
# Stage 1: Build the Go binary
FROM golang:alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates tzdata
WORKDIR /build
# Copy go mod files first for better caching
COPY go.mod go.sum ./
RUN go mod download
# Copy source code
COPY . .
# Build the binary with optimizations
# CGO is required for SQLite support
RUN apk add --no-cache gcc musl-dev && \
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
-ldflags='-w -s -extldflags "-static"' \
-a -installsuffix cgo \
-o gateway \
./cmd/gateway
# Stage 2: Create minimal runtime image
FROM alpine:3.19
# Install runtime dependencies
RUN apk add --no-cache ca-certificates tzdata
# Create non-root user
RUN addgroup -g 1000 gateway && \
adduser -D -u 1000 -G gateway gateway
# Create necessary directories
RUN mkdir -p /app /app/data && \
chown -R gateway:gateway /app
WORKDIR /app
# Copy binary from builder
COPY --from=builder /build/gateway /app/gateway
# Copy example config (optional, mainly for documentation)
COPY config.example.yaml /app/config.example.yaml
# Switch to non-root user
USER gateway
# Expose port
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1
# Set entrypoint
ENTRYPOINT ["/app/gateway"]
# Default command (can be overridden)
CMD ["--config", "/app/config/config.yaml"]

151
Makefile Normal file
View File

@@ -0,0 +1,151 @@
# Makefile for LLM Gateway
.PHONY: help build test docker-build docker-push k8s-deploy k8s-delete clean
# Variables
APP_NAME := llm-gateway
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
REGISTRY ?= your-registry
IMAGE := $(REGISTRY)/$(APP_NAME)
DOCKER_TAG := $(IMAGE):$(VERSION)
LATEST_TAG := $(IMAGE):latest
# Go variables
GOCMD := go
GOBUILD := $(GOCMD) build
GOTEST := $(GOCMD) test
GOMOD := $(GOCMD) mod
GOFMT := $(GOCMD) fmt
# Build directory
BUILD_DIR := bin
# Help target
help: ## Show this help message
@echo "Usage: make [target]"
@echo ""
@echo "Targets:"
@awk 'BEGIN {FS = ":.*##"; printf "\n"} /^[a-zA-Z_-]+:.*?##/ { printf " %-20s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
# Development targets
build: ## Build the binary
@echo "Building $(APP_NAME)..."
CGO_ENABLED=1 $(GOBUILD) -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
build-static: ## Build static binary
@echo "Building static binary..."
CGO_ENABLED=1 $(GOBUILD) -ldflags='-w -s -extldflags "-static"' -a -installsuffix cgo -o $(BUILD_DIR)/$(APP_NAME) ./cmd/gateway
test: ## Run tests
@echo "Running tests..."
$(GOTEST) -v -race -coverprofile=coverage.out ./...
test-coverage: test ## Run tests with coverage report
@echo "Generating coverage report..."
$(GOCMD) tool cover -html=coverage.out -o coverage.html
@echo "Coverage report saved to coverage.html"
fmt: ## Format Go code
@echo "Formatting code..."
$(GOFMT) ./...
lint: ## Run linter
@echo "Running linter..."
@which golangci-lint > /dev/null || (echo "golangci-lint not installed. Run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest" && exit 1)
golangci-lint run ./...
tidy: ## Tidy go modules
@echo "Tidying go modules..."
$(GOMOD) tidy
clean: ## Clean build artifacts
@echo "Cleaning..."
rm -rf $(BUILD_DIR)
rm -f coverage.out coverage.html
# Docker targets
docker-build: ## Build Docker image
@echo "Building Docker image $(DOCKER_TAG)..."
docker build -t $(DOCKER_TAG) -t $(LATEST_TAG) .
docker-push: docker-build ## Push Docker image to registry
@echo "Pushing Docker image..."
docker push $(DOCKER_TAG)
docker push $(LATEST_TAG)
docker-run: ## Run Docker container locally
@echo "Running Docker container..."
docker run --rm -p 8080:8080 \
-e GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
-e ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
-e OPENAI_API_KEY="$(OPENAI_API_KEY)" \
-v $(PWD)/config.yaml:/app/config/config.yaml:ro \
$(DOCKER_TAG)
docker-compose-up: ## Start services with docker-compose
@echo "Starting services with docker-compose..."
docker-compose up -d
docker-compose-down: ## Stop services with docker-compose
@echo "Stopping services with docker-compose..."
docker-compose down
docker-compose-logs: ## View docker-compose logs
docker-compose logs -f
# Kubernetes targets
k8s-namespace: ## Create Kubernetes namespace
kubectl create namespace llm-gateway --dry-run=client -o yaml | kubectl apply -f -
k8s-secrets: ## Create Kubernetes secrets (requires env vars)
@echo "Creating secrets..."
@if [ -z "$(GOOGLE_API_KEY)" ] || [ -z "$(ANTHROPIC_API_KEY)" ] || [ -z "$(OPENAI_API_KEY)" ]; then \
echo "Error: Please set GOOGLE_API_KEY, ANTHROPIC_API_KEY, and OPENAI_API_KEY environment variables"; \
exit 1; \
fi
kubectl create secret generic llm-gateway-secrets \
--from-literal=GOOGLE_API_KEY="$(GOOGLE_API_KEY)" \
--from-literal=ANTHROPIC_API_KEY="$(ANTHROPIC_API_KEY)" \
--from-literal=OPENAI_API_KEY="$(OPENAI_API_KEY)" \
--from-literal=OIDC_AUDIENCE="$(OIDC_AUDIENCE)" \
-n llm-gateway \
--dry-run=client -o yaml | kubectl apply -f -
k8s-deploy: k8s-namespace k8s-secrets ## Deploy to Kubernetes
@echo "Deploying to Kubernetes..."
kubectl apply -k k8s/
k8s-delete: ## Delete from Kubernetes
@echo "Deleting from Kubernetes..."
kubectl delete -k k8s/
k8s-status: ## Check Kubernetes deployment status
@echo "Checking deployment status..."
kubectl get all -n llm-gateway
k8s-logs: ## View Kubernetes logs
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
k8s-describe: ## Describe Kubernetes deployment
kubectl describe deployment llm-gateway -n llm-gateway
k8s-port-forward: ## Port forward to local machine
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
# CI/CD targets
ci: lint test ## Run CI checks
security-scan: ## Run security scan
@echo "Running security scan..."
@which gosec > /dev/null || (echo "gosec not installed. Run: go install github.com/securego/gosec/v2/cmd/gosec@latest" && exit 1)
gosec ./...
# Run target
run: ## Run locally
@echo "Running $(APP_NAME) locally..."
$(GOCMD) run ./cmd/gateway --config config.yaml
# Version info
version: ## Show version
@echo "Version: $(VERSION)"
@echo "Image: $(DOCKER_TAG)"

327
OBSERVABILITY.md Normal file
View File

@@ -0,0 +1,327 @@
# Observability Implementation
This document describes the observability features implemented in the LLM Gateway.
## Overview
The gateway now includes comprehensive observability with:
- **Prometheus Metrics**: Track HTTP requests, provider calls, token usage, and conversation operations
- **OpenTelemetry Tracing**: Distributed tracing with OTLP exporter support
- **Enhanced Logging**: Trace context correlation for log aggregation
## Configuration
Add the following to your `config.yaml`:
```yaml
observability:
enabled: true # Master switch for all observability features
metrics:
enabled: true
path: "/metrics" # Prometheus metrics endpoint
tracing:
enabled: true
service_name: "llm-gateway"
sampler:
type: "probability" # "always", "never", or "probability"
rate: 0.1 # 10% sampling rate
exporter:
type: "otlp" # "otlp" for production, "stdout" for development
endpoint: "localhost:4317" # OTLP collector endpoint
insecure: true # Use insecure connection (for development)
# headers: # Optional authentication headers
# authorization: "Bearer your-token"
```
## Metrics
### HTTP Metrics
- `http_requests_total` - Total HTTP requests (labels: method, path, status)
- `http_request_duration_seconds` - Request latency histogram
- `http_request_size_bytes` - Request body size histogram
- `http_response_size_bytes` - Response body size histogram
### Provider Metrics
- `provider_requests_total` - Provider API calls (labels: provider, model, operation, status)
- `provider_request_duration_seconds` - Provider latency histogram
- `provider_tokens_total` - Token usage (labels: provider, model, type=input/output)
- `provider_stream_ttfb_seconds` - Time to first byte for streaming
- `provider_stream_chunks_total` - Stream chunk count
- `provider_stream_duration_seconds` - Total stream duration
### Conversation Store Metrics
- `conversation_operations_total` - Store operations (labels: operation, backend, status)
- `conversation_operation_duration_seconds` - Store operation latency
- `conversation_active_count` - Current number of conversations (gauge)
### Example Queries
```promql
# Request rate
rate(http_requests_total[5m])
# P95 latency
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
# Error rate
rate(http_requests_total{status=~"5.."}[5m])
# Tokens per minute by model
rate(provider_tokens_total[1m]) * 60
# Provider latency by model
histogram_quantile(0.95, rate(provider_request_duration_seconds_bucket[5m])) by (provider, model)
```
## Tracing
### Trace Structure
Each request creates a trace with the following span hierarchy:
```
HTTP GET /v1/responses
├── provider.generate or provider.generate_stream
├── conversation.get (if using previous_response_id)
└── conversation.create (to store result)
```
### Span Attributes
HTTP spans include:
- `http.method`, `http.route`, `http.status_code`
- `http.request_id` - Request ID for correlation
- `trace_id`, `span_id` - For log correlation
Provider spans include:
- `provider.name`, `provider.model`
- `provider.input_tokens`, `provider.output_tokens`
- `provider.chunk_count`, `provider.ttfb_seconds` (for streaming)
Conversation spans include:
- `conversation.id`, `conversation.backend`
- `conversation.message_count`, `conversation.model`
### Log Correlation
Logs now include `trace_id` and `span_id` fields when tracing is enabled, allowing you to:
1. Find all logs for a specific trace
2. Jump from a log entry to the corresponding trace in Jaeger/Tempo
Example log entry:
```json
{
"time": "2026-03-03T06:36:44Z",
"level": "INFO",
"msg": "response generated",
"request_id": "74722802-6be1-4e14-8e73-d86823fed3e3",
"trace_id": "5d8a7c3f2e1b9a8c7d6e5f4a3b2c1d0e",
"span_id": "1a2b3c4d5e6f7a8b",
"provider": "openai",
"model": "gpt-4o-mini",
"input_tokens": 23,
"output_tokens": 156
}
```
## Testing Observability
### 1. Test Metrics Endpoint
```bash
# Start the gateway with observability enabled
./bin/gateway -config config.yaml
# Query metrics endpoint
curl http://localhost:8080/metrics
```
Expected output includes:
```
# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/metrics",status="200"} 1
# HELP conversation_active_count Number of active conversations
# TYPE conversation_active_count gauge
conversation_active_count{backend="memory"} 0
```
### 2. Test Tracing with Stdout Exporter
Set up config with stdout exporter for quick testing:
```yaml
observability:
enabled: true
tracing:
enabled: true
sampler:
type: "always"
exporter:
type: "stdout"
```
Make a request and check the logs for JSON-formatted spans.
### 3. Test Tracing with Jaeger
Run Jaeger with OTLP support:
```bash
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 4317:4317 \
-p 16686:16686 \
jaegertracing/all-in-one:latest
```
Update config:
```yaml
observability:
enabled: true
tracing:
enabled: true
sampler:
type: "probability"
rate: 1.0 # 100% for testing
exporter:
type: "otlp"
endpoint: "localhost:4317"
insecure: true
```
Make requests and view traces at http://localhost:16686
### 4. End-to-End Test
```bash
# Make a test request
curl -X POST http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o-mini",
"input": "Hello, world!"
}'
# Check metrics
curl http://localhost:8080/metrics | grep -E "(http_requests|provider_)"
# Expected metrics updates:
# - http_requests_total incremented
# - provider_requests_total incremented
# - provider_tokens_total incremented for input and output
# - provider_request_duration_seconds updated
```
### 5. Load Test
```bash
# Install hey if needed
go install github.com/rakyll/hey@latest
# Run load test
hey -n 1000 -c 10 -m POST \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4o-mini","input":"test"}' \
http://localhost:8080/v1/responses
# Check metrics for aggregated data
curl http://localhost:8080/metrics | grep http_request_duration_seconds
```
## Integration with Monitoring Stack
### Prometheus
Add to `prometheus.yml`:
```yaml
scrape_configs:
- job_name: 'llm-gateway'
static_configs:
- targets: ['localhost:8080']
metrics_path: '/metrics'
scrape_interval: 15s
```
### Grafana
Import dashboards for:
- HTTP request rates and latencies
- Provider performance by model
- Token usage and costs
- Error rates and types
### Tempo/Jaeger
The gateway exports traces via OTLP protocol. Configure your trace backend to accept OTLP on port 4317 (gRPC).
## Architecture
### Middleware Chain
```
Client Request
loggingMiddleware (request ID, logging)
tracingMiddleware (W3C Trace Context, spans)
metricsMiddleware (Prometheus metrics)
rateLimitMiddleware (rate limiting)
authMiddleware (authentication)
Application Routes
```
### Instrumentation Pattern
- **Providers**: Wrapped with `InstrumentedProvider` that tracks calls, latency, and token usage
- **Conversation Store**: Wrapped with `InstrumentedStore` that tracks operations and size
- **HTTP Layer**: Middleware captures request/response metrics and creates trace spans
### W3C Trace Context
The gateway supports W3C Trace Context propagation:
- Extracts `traceparent` header from incoming requests
- Creates child spans for downstream operations
- Propagates context through the entire request lifecycle
## Performance Impact
Observability features have minimal overhead:
- Metrics: < 1% latency increase
- Tracing (10% sampling): < 2% latency increase
- Tracing (100% sampling): < 5% latency increase
Recommended configuration for production:
- Metrics: Enabled
- Tracing: Enabled with 10-20% sampling rate
- Exporter: OTLP to dedicated collector
## Troubleshooting
### Metrics endpoint returns 404
- Check `observability.metrics.enabled` is `true`
- Verify `observability.enabled` is `true`
- Check `observability.metrics.path` configuration
### No traces appearing in Jaeger
- Verify OTLP collector is running on configured endpoint
- Check sampling rate (try `type: "always"` for testing)
- Look for tracer initialization errors in logs
- Verify `observability.tracing.enabled` is `true`
### High memory usage
- Reduce trace sampling rate
- Check for metric cardinality explosion (too many label combinations)
- Consider using recording rules in Prometheus
### Missing trace IDs in logs
- Ensure tracing is enabled
- Check that requests are being sampled (sampling rate > 0)
- Verify OpenTelemetry dependencies are correctly installed

View File

@@ -61,6 +61,8 @@ latticelm (unified API)
**OAuth2/OIDC authentication** (Google, Auth0, any OIDC provider) **OAuth2/OIDC authentication** (Google, Auth0, any OIDC provider)
**Terminal chat client** (Python with Rich UI, PEP 723) **Terminal chat client** (Python with Rich UI, PEP 723)
**Conversation tracking** (previous_response_id for efficient context) **Conversation tracking** (previous_response_id for efficient context)
**Rate limiting** (Per-IP token bucket with configurable limits)
**Health & readiness endpoints** (Kubernetes-compatible health checks)
## Quick Start ## Quick Start
@@ -258,6 +260,54 @@ curl -X POST http://localhost:8080/v1/responses \
-d '{"model": "gemini-2.0-flash-exp", ...}' -d '{"model": "gemini-2.0-flash-exp", ...}'
``` ```
## Production Features
### Rate Limiting
Per-IP rate limiting using token bucket algorithm to prevent abuse and manage load:
```yaml
rate_limit:
enabled: true
requests_per_second: 10 # Max requests per second per IP
burst: 20 # Maximum burst size
```
Features:
- **Token bucket algorithm** for smooth rate limiting
- **Per-IP limiting** with support for X-Forwarded-For headers
- **Configurable limits** for requests per second and burst size
- **Automatic cleanup** of stale rate limiters to prevent memory leaks
- **429 responses** with Retry-After header when limits exceeded
### Health & Readiness Endpoints
Kubernetes-compatible health check endpoints for orchestration and load balancers:
**Liveness endpoint** (`/health`):
```bash
curl http://localhost:8080/health
# {"status":"healthy","timestamp":1709438400}
```
**Readiness endpoint** (`/ready`):
```bash
curl http://localhost:8080/ready
# {
# "status":"ready",
# "timestamp":1709438400,
# "checks":{
# "conversation_store":"healthy",
# "providers":"healthy"
# }
# }
```
The readiness endpoint verifies:
- Conversation store connectivity
- At least one provider is configured
- Returns 503 if any check fails
## Next Steps ## Next Steps
-~~Implement streaming responses~~ -~~Implement streaming responses~~

169
SECURITY_IMPROVEMENTS.md Normal file
View File

@@ -0,0 +1,169 @@
# Security Improvements - March 2026
This document summarizes the security and reliability improvements made to the go-llm-gateway project.
## Issues Fixed
### 1. Request Size Limits (Issue #2) ✅
**Problem**: The server had no limits on request body size, making it vulnerable to DoS attacks via oversized payloads.
**Solution**: Implemented `RequestSizeLimitMiddleware` that enforces a maximum request body size.
**Implementation Details**:
- Created `internal/server/middleware.go` with `RequestSizeLimitMiddleware`
- Uses `http.MaxBytesReader` to enforce limits at the HTTP layer
- Default limit: 10MB (10,485,760 bytes)
- Configurable via `server.max_request_body_size` in config.yaml
- Returns HTTP 413 (Request Entity Too Large) for oversized requests
- Only applies to POST, PUT, and PATCH requests (not GET/DELETE)
**Files Modified**:
- `internal/server/middleware.go` (new file)
- `internal/server/server.go` (added 413 error handling)
- `cmd/gateway/main.go` (integrated middleware)
- `internal/config/config.go` (added config field)
- `config.example.yaml` (documented configuration)
**Testing**:
- Comprehensive test suite in `internal/server/middleware_test.go`
- Tests cover: small payloads, exact size, oversized payloads, different HTTP methods
- Integration test verifies middleware chain behavior
### 2. Panic Recovery Middleware (Issue #4) ✅
**Problem**: Any panic in HTTP handlers would crash the entire server, causing downtime.
**Solution**: Implemented `PanicRecoveryMiddleware` that catches panics and returns proper error responses.
**Implementation Details**:
- Created `PanicRecoveryMiddleware` in `internal/server/middleware.go`
- Uses `defer recover()` pattern to catch all panics
- Logs full stack trace with request context for debugging
- Returns HTTP 500 (Internal Server Error) to clients
- Positioned as the outermost middleware to catch panics from all layers
**Files Modified**:
- `internal/server/middleware.go` (new file)
- `cmd/gateway/main.go` (integrated as outermost middleware)
**Testing**:
- Tests verify recovery from string panics, error panics, and struct panics
- Integration test confirms panic recovery works through middleware chain
- Logs are captured and verified to include stack traces
### 3. Error Handling Improvements (Bonus) ✅
**Problem**: Multiple instances of ignored JSON encoding errors could lead to incomplete responses.
**Solution**: Fixed all ignored `json.Encoder.Encode()` errors throughout the codebase.
**Files Modified**:
- `internal/server/health.go` (lines 32, 86)
- `internal/server/server.go` (lines 72, 217)
All JSON encoding errors are now logged with proper context including request IDs.
## Architecture
### Middleware Chain Order
The middleware chain is now (from outermost to innermost):
1. **PanicRecoveryMiddleware** - Catches all panics
2. **RequestSizeLimitMiddleware** - Enforces body size limits
3. **loggingMiddleware** - Request/response logging
4. **TracingMiddleware** - OpenTelemetry tracing
5. **MetricsMiddleware** - Prometheus metrics
6. **rateLimitMiddleware** - Rate limiting
7. **authMiddleware** - OIDC authentication
8. **routes** - Application handlers
This order ensures:
- Panics are caught from all middleware layers
- Size limits are enforced before expensive operations
- All requests are logged, traced, and metered
- Security checks happen closest to the application
## Configuration
Add to your `config.yaml`:
```yaml
server:
address: ":8080"
max_request_body_size: 10485760 # 10MB in bytes (default)
```
To customize the size limit:
- **1MB**: `1048576`
- **5MB**: `5242880`
- **10MB**: `10485760` (default)
- **50MB**: `52428800`
If not specified, defaults to 10MB.
## Testing
All new functionality includes comprehensive tests:
```bash
# Run all tests
go test ./...
# Run only middleware tests
go test ./internal/server -v -run "TestPanicRecoveryMiddleware|TestRequestSizeLimitMiddleware"
# Run with coverage
go test ./internal/server -cover
```
**Test Coverage**:
- `internal/server/middleware.go`: 100% coverage
- All edge cases covered (panics, size limits, different HTTP methods)
- Integration tests verify middleware chain interactions
## Production Readiness
These changes significantly improve production readiness:
1. **DoS Protection**: Request size limits prevent memory exhaustion attacks
2. **Fault Tolerance**: Panic recovery prevents cascading failures
3. **Observability**: All errors are logged with proper context
4. **Configurability**: Limits can be tuned per deployment environment
## Remaining Production Concerns
While these issues are fixed, the following should still be addressed:
- **HIGH**: Exposed credentials in `.env` file (must rotate and remove from git)
- **MEDIUM**: Observability code has 0% test coverage
- **MEDIUM**: Conversation store has only 27% test coverage
- **LOW**: Missing circuit breaker pattern for provider failures
- **LOW**: No retry logic for failed provider requests
See the original assessment for complete details.
## Verification
Build and verify the changes:
```bash
# Build the application
go build ./cmd/gateway
# Run the gateway
./gateway -config config.yaml
# Test with oversized payload (should return 413)
curl -X POST http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d "$(python3 -c 'print("{\"data\":\"" + "x"*11000000 + "\"}")')"
```
Expected response: `HTTP 413 Request Entity Too Large`
## References
- [OWASP: Unvalidated Redirects and Forwards](https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/11-Client-side_Testing/04-Testing_for_Client-side_Resource_Manipulation)
- [CWE-400: Uncontrolled Resource Consumption](https://cwe.mitre.org/data/definitions/400.html)
- [Go HTTP Server Best Practices](https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/)

View File

@@ -6,11 +6,15 @@ import (
"flag" "flag"
"fmt" "fmt"
"log" "log"
"log/slog"
"net/http" "net/http"
"os" "os"
"os/signal"
"syscall"
"time" "time"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
"github.com/google/uuid"
_ "github.com/jackc/pgx/v5/stdlib" _ "github.com/jackc/pgx/v5/stdlib"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
"github.com/redis/go-redis/v9" "github.com/redis/go-redis/v9"
@@ -18,8 +22,15 @@ import (
"github.com/ajac-zero/latticelm/internal/auth" "github.com/ajac-zero/latticelm/internal/auth"
"github.com/ajac-zero/latticelm/internal/config" "github.com/ajac-zero/latticelm/internal/config"
"github.com/ajac-zero/latticelm/internal/conversation" "github.com/ajac-zero/latticelm/internal/conversation"
slogger "github.com/ajac-zero/latticelm/internal/logger"
"github.com/ajac-zero/latticelm/internal/observability"
"github.com/ajac-zero/latticelm/internal/providers" "github.com/ajac-zero/latticelm/internal/providers"
"github.com/ajac-zero/latticelm/internal/ratelimit"
"github.com/ajac-zero/latticelm/internal/server" "github.com/ajac-zero/latticelm/internal/server"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/otel"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
) )
func main() { func main() {
@@ -32,12 +43,78 @@ func main() {
log.Fatalf("load config: %v", err) log.Fatalf("load config: %v", err)
} }
registry, err := providers.NewRegistry(cfg.Providers, cfg.Models) // Initialize logger from config
if err != nil { logFormat := cfg.Logging.Format
log.Fatalf("init providers: %v", err) if logFormat == "" {
logFormat = "json"
}
logLevel := cfg.Logging.Level
if logLevel == "" {
logLevel = "info"
}
logger := slogger.New(logFormat, logLevel)
// Initialize tracing
var tracerProvider *sdktrace.TracerProvider
if cfg.Observability.Enabled && cfg.Observability.Tracing.Enabled {
// Set defaults
tracingCfg := cfg.Observability.Tracing
if tracingCfg.ServiceName == "" {
tracingCfg.ServiceName = "llm-gateway"
}
if tracingCfg.Sampler.Type == "" {
tracingCfg.Sampler.Type = "probability"
tracingCfg.Sampler.Rate = 0.1
}
tp, err := observability.InitTracer(tracingCfg)
if err != nil {
logger.Error("failed to initialize tracing", slog.String("error", err.Error()))
} else {
tracerProvider = tp
otel.SetTracerProvider(tracerProvider)
logger.Info("tracing initialized",
slog.String("exporter", tracingCfg.Exporter.Type),
slog.String("sampler", tracingCfg.Sampler.Type),
)
}
} }
logger := log.New(os.Stdout, "gateway ", log.LstdFlags|log.Lshortfile) // Initialize metrics
var metricsRegistry *prometheus.Registry
if cfg.Observability.Enabled && cfg.Observability.Metrics.Enabled {
metricsRegistry = observability.InitMetrics()
metricsPath := cfg.Observability.Metrics.Path
if metricsPath == "" {
metricsPath = "/metrics"
}
logger.Info("metrics initialized", slog.String("path", metricsPath))
}
// Create provider registry with circuit breaker support
var baseRegistry *providers.Registry
if cfg.Observability.Enabled && cfg.Observability.Metrics.Enabled {
// Pass observability callback for circuit breaker state changes
baseRegistry, err = providers.NewRegistryWithCircuitBreaker(
cfg.Providers,
cfg.Models,
observability.RecordCircuitBreakerStateChange,
)
} else {
// No observability, use default registry
baseRegistry, err = providers.NewRegistry(cfg.Providers, cfg.Models)
}
if err != nil {
logger.Error("failed to initialize providers", slog.String("error", err.Error()))
os.Exit(1)
}
// Wrap providers with observability
var registry server.ProviderRegistry = baseRegistry
if cfg.Observability.Enabled {
registry = observability.WrapProviderRegistry(registry, metricsRegistry, tracerProvider)
logger.Info("providers instrumented")
}
// Initialize authentication middleware // Initialize authentication middleware
authConfig := auth.Config{ authConfig := auth.Config{
@@ -45,34 +122,100 @@ func main() {
Issuer: cfg.Auth.Issuer, Issuer: cfg.Auth.Issuer,
Audience: cfg.Auth.Audience, Audience: cfg.Auth.Audience,
} }
authMiddleware, err := auth.New(authConfig) authMiddleware, err := auth.New(authConfig, logger)
if err != nil { if err != nil {
log.Fatalf("init auth: %v", err) logger.Error("failed to initialize auth", slog.String("error", err.Error()))
os.Exit(1)
} }
if cfg.Auth.Enabled { if cfg.Auth.Enabled {
logger.Printf("Authentication enabled (issuer: %s)", cfg.Auth.Issuer) logger.Info("authentication enabled", slog.String("issuer", cfg.Auth.Issuer))
} else { } else {
logger.Printf("Authentication disabled - WARNING: API is publicly accessible") logger.Warn("authentication disabled - API is publicly accessible")
} }
// Initialize conversation store // Initialize conversation store
convStore, err := initConversationStore(cfg.Conversations, logger) convStore, storeBackend, err := initConversationStore(cfg.Conversations, logger)
if err != nil { if err != nil {
log.Fatalf("init conversation store: %v", err) logger.Error("failed to initialize conversation store", slog.String("error", err.Error()))
os.Exit(1)
}
// Wrap conversation store with observability
if cfg.Observability.Enabled && convStore != nil {
convStore = observability.WrapConversationStore(convStore, storeBackend, metricsRegistry, tracerProvider)
logger.Info("conversation store instrumented")
} }
gatewayServer := server.New(registry, convStore, logger) gatewayServer := server.New(registry, convStore, logger)
mux := http.NewServeMux() mux := http.NewServeMux()
gatewayServer.RegisterRoutes(mux) gatewayServer.RegisterRoutes(mux)
// Register metrics endpoint if enabled
if cfg.Observability.Enabled && cfg.Observability.Metrics.Enabled {
metricsPath := cfg.Observability.Metrics.Path
if metricsPath == "" {
metricsPath = "/metrics"
}
mux.Handle(metricsPath, promhttp.HandlerFor(metricsRegistry, promhttp.HandlerOpts{}))
logger.Info("metrics endpoint registered", slog.String("path", metricsPath))
}
addr := cfg.Server.Address addr := cfg.Server.Address
if addr == "" { if addr == "" {
addr = ":8080" addr = ":8080"
} }
// Build handler chain: logging -> auth -> routes // Initialize rate limiting
handler := loggingMiddleware(authMiddleware.Handler(mux), logger) rateLimitConfig := ratelimit.Config{
Enabled: cfg.RateLimit.Enabled,
RequestsPerSecond: cfg.RateLimit.RequestsPerSecond,
Burst: cfg.RateLimit.Burst,
}
// Set defaults if not configured
if rateLimitConfig.Enabled && rateLimitConfig.RequestsPerSecond == 0 {
rateLimitConfig.RequestsPerSecond = 10 // default 10 req/s
}
if rateLimitConfig.Enabled && rateLimitConfig.Burst == 0 {
rateLimitConfig.Burst = 20 // default burst of 20
}
rateLimitMiddleware := ratelimit.New(rateLimitConfig, logger)
if cfg.RateLimit.Enabled {
logger.Info("rate limiting enabled",
slog.Float64("requests_per_second", rateLimitConfig.RequestsPerSecond),
slog.Int("burst", rateLimitConfig.Burst),
)
}
// Determine max request body size
maxRequestBodySize := cfg.Server.MaxRequestBodySize
if maxRequestBodySize == 0 {
maxRequestBodySize = server.MaxRequestBodyBytes // default: 10MB
}
logger.Info("server configuration",
slog.Int64("max_request_body_bytes", maxRequestBodySize),
)
// Build handler chain: panic recovery -> request size limit -> logging -> tracing -> metrics -> rate limiting -> auth -> routes
handler := server.PanicRecoveryMiddleware(
server.RequestSizeLimitMiddleware(
loggingMiddleware(
observability.TracingMiddleware(
observability.MetricsMiddleware(
rateLimitMiddleware.Handler(authMiddleware.Handler(mux)),
metricsRegistry,
tracerProvider,
),
tracerProvider,
),
logger,
),
maxRequestBodySize,
),
logger,
)
srv := &http.Server{ srv := &http.Server{
Addr: addr, Addr: addr,
@@ -82,18 +225,63 @@ func main() {
IdleTimeout: 120 * time.Second, IdleTimeout: 120 * time.Second,
} }
logger.Printf("Open Responses gateway listening on %s", addr) // Set up signal handling for graceful shutdown
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { sigChan := make(chan os.Signal, 1)
logger.Fatalf("server error: %v", err) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
// Run server in a goroutine
serverErrors := make(chan error, 1)
go func() {
logger.Info("open responses gateway listening", slog.String("address", addr))
serverErrors <- srv.ListenAndServe()
}()
// Wait for shutdown signal or server error
select {
case err := <-serverErrors:
if err != nil && err != http.ErrServerClosed {
logger.Error("server error", slog.String("error", err.Error()))
os.Exit(1)
}
case sig := <-sigChan:
logger.Info("received shutdown signal", slog.String("signal", sig.String()))
// Create shutdown context with timeout
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
// Shutdown the HTTP server gracefully
logger.Info("shutting down server gracefully")
if err := srv.Shutdown(shutdownCtx); err != nil {
logger.Error("server shutdown error", slog.String("error", err.Error()))
}
// Shutdown tracer provider
if tracerProvider != nil {
logger.Info("shutting down tracer")
shutdownTracerCtx, shutdownTracerCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer shutdownTracerCancel()
if err := observability.Shutdown(shutdownTracerCtx, tracerProvider); err != nil {
logger.Error("error shutting down tracer", slog.String("error", err.Error()))
}
}
// Close conversation store
logger.Info("closing conversation store")
if err := convStore.Close(); err != nil {
logger.Error("error closing conversation store", slog.String("error", err.Error()))
}
logger.Info("shutdown complete")
} }
} }
func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (conversation.Store, error) { func initConversationStore(cfg config.ConversationConfig, logger *slog.Logger) (conversation.Store, string, error) {
var ttl time.Duration var ttl time.Duration
if cfg.TTL != "" { if cfg.TTL != "" {
parsed, err := time.ParseDuration(cfg.TTL) parsed, err := time.ParseDuration(cfg.TTL)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid conversation ttl %q: %w", cfg.TTL, err) return nil, "", fmt.Errorf("invalid conversation ttl %q: %w", cfg.TTL, err)
} }
ttl = parsed ttl = parsed
} }
@@ -106,18 +294,22 @@ func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (c
} }
db, err := sql.Open(driver, cfg.DSN) db, err := sql.Open(driver, cfg.DSN)
if err != nil { if err != nil {
return nil, fmt.Errorf("open database: %w", err) return nil, "", fmt.Errorf("open database: %w", err)
} }
store, err := conversation.NewSQLStore(db, driver, ttl) store, err := conversation.NewSQLStore(db, driver, ttl)
if err != nil { if err != nil {
return nil, fmt.Errorf("init sql store: %w", err) return nil, "", fmt.Errorf("init sql store: %w", err)
} }
logger.Printf("Conversation store initialized (sql/%s, TTL: %s)", driver, ttl) logger.Info("conversation store initialized",
return store, nil slog.String("backend", "sql"),
slog.String("driver", driver),
slog.Duration("ttl", ttl),
)
return store, "sql", nil
case "redis": case "redis":
opts, err := redis.ParseURL(cfg.DSN) opts, err := redis.ParseURL(cfg.DSN)
if err != nil { if err != nil {
return nil, fmt.Errorf("parse redis dsn: %w", err) return nil, "", fmt.Errorf("parse redis dsn: %w", err)
} }
client := redis.NewClient(opts) client := redis.NewClient(opts)
@@ -125,20 +317,86 @@ func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (c
defer cancel() defer cancel()
if err := client.Ping(ctx).Err(); err != nil { if err := client.Ping(ctx).Err(); err != nil {
return nil, fmt.Errorf("connect to redis: %w", err) return nil, "", fmt.Errorf("connect to redis: %w", err)
} }
logger.Printf("Conversation store initialized (redis, TTL: %s)", ttl) logger.Info("conversation store initialized",
return conversation.NewRedisStore(client, ttl), nil slog.String("backend", "redis"),
slog.Duration("ttl", ttl),
)
return conversation.NewRedisStore(client, ttl), "redis", nil
default: default:
logger.Printf("Conversation store initialized (memory, TTL: %s)", ttl) logger.Info("conversation store initialized",
return conversation.NewMemoryStore(ttl), nil slog.String("backend", "memory"),
slog.Duration("ttl", ttl),
)
return conversation.NewMemoryStore(ttl), "memory", nil
} }
} }
func loggingMiddleware(next http.Handler, logger *log.Logger) http.Handler { type responseWriter struct {
http.ResponseWriter
statusCode int
bytesWritten int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
func (rw *responseWriter) Write(b []byte) (int, error) {
n, err := rw.ResponseWriter.Write(b)
rw.bytesWritten += n
return n, err
}
func loggingMiddleware(next http.Handler, logger *slog.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now() start := time.Now()
next.ServeHTTP(w, r)
logger.Printf("%s %s %s", r.Method, r.URL.Path, time.Since(start)) // Generate request ID
requestID := uuid.NewString()
ctx := slogger.WithRequestID(r.Context(), requestID)
r = r.WithContext(ctx)
// Wrap response writer to capture status code
rw := &responseWriter{
ResponseWriter: w,
statusCode: http.StatusOK,
}
// Add request ID header
w.Header().Set("X-Request-ID", requestID)
// Log request start
logger.InfoContext(ctx, "request started",
slog.String("request_id", requestID),
slog.String("method", r.Method),
slog.String("path", r.URL.Path),
slog.String("remote_addr", r.RemoteAddr),
slog.String("user_agent", r.UserAgent()),
)
next.ServeHTTP(rw, r)
duration := time.Since(start)
// Log request completion with appropriate level
logLevel := slog.LevelInfo
if rw.statusCode >= 500 {
logLevel = slog.LevelError
} else if rw.statusCode >= 400 {
logLevel = slog.LevelWarn
}
logger.Log(ctx, logLevel, "request completed",
slog.String("request_id", requestID),
slog.String("method", r.Method),
slog.String("path", r.URL.Path),
slog.Int("status_code", rw.statusCode),
slog.Int("response_bytes", rw.bytesWritten),
slog.Duration("duration", duration),
slog.Float64("duration_ms", float64(duration.Milliseconds())),
)
}) })
} }

View File

@@ -1,5 +1,35 @@
server: server:
address: ":8080" address: ":8080"
max_request_body_size: 10485760 # Maximum request body size in bytes (default: 10MB = 10485760 bytes)
logging:
format: "json" # "json" for production, "text" for development
level: "info" # "debug", "info", "warn", or "error"
rate_limit:
enabled: false # Enable rate limiting (recommended for production)
requests_per_second: 10 # Max requests per second per IP (default: 10)
burst: 20 # Maximum burst size (default: 20)
observability:
enabled: false # Enable observability features (metrics and tracing)
metrics:
enabled: false # Enable Prometheus metrics
path: "/metrics" # Metrics endpoint path (default: /metrics)
tracing:
enabled: false # Enable OpenTelemetry tracing
service_name: "llm-gateway" # Service name for traces (default: llm-gateway)
sampler:
type: "probability" # Sampling type: "always", "never", "probability"
rate: 0.1 # Sample rate for probability sampler (0.0 to 1.0, default: 0.1 = 10%)
exporter:
type: "otlp" # Exporter type: "otlp" (production), "stdout" (development)
endpoint: "localhost:4317" # OTLP collector endpoint (gRPC)
insecure: true # Use insecure connection (for development)
# headers: # Optional: custom headers for authentication
# authorization: "Bearer your-token-here"
providers: providers:
google: google:

21
config.test.yaml Normal file
View File

@@ -0,0 +1,21 @@
server:
address: ":8080"
logging:
format: "text" # text format for easy reading in development
level: "debug" # debug level to see all logs
rate_limit:
enabled: false # disabled for testing
requests_per_second: 100
burst: 200
providers:
mock:
type: "openai"
api_key: "test-key"
endpoint: "https://api.openai.com"
models:
- name: "gpt-4o-mini"
provider: "mock"

102
docker-compose.yaml Normal file
View File

@@ -0,0 +1,102 @@
# Docker Compose for local development and testing
# Not recommended for production - use Kubernetes instead
version: '3.9'
services:
gateway:
build:
context: .
dockerfile: Dockerfile
image: llm-gateway:latest
container_name: llm-gateway
ports:
- "8080:8080"
environment:
# Provider API keys
GOOGLE_API_KEY: ${GOOGLE_API_KEY}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
OPENAI_API_KEY: ${OPENAI_API_KEY}
OIDC_AUDIENCE: ${OIDC_AUDIENCE:-}
volumes:
- ./config.yaml:/app/config/config.yaml:ro
depends_on:
redis:
condition: service_healthy
networks:
- llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
redis:
image: redis:7.2-alpine
container_name: llm-gateway-redis
ports:
- "6379:6379"
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
volumes:
- redis-data:/data
networks:
- llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
# Optional: Prometheus for metrics
prometheus:
image: prom/prometheus:latest
container_name: llm-gateway-prometheus
ports:
- "9090:9090"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks:
- llm-network
restart: unless-stopped
profiles:
- monitoring
# Optional: Grafana for visualization
grafana:
image: grafana/grafana:latest
container_name: llm-gateway-grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- ./monitoring/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
- ./monitoring/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
- ./monitoring/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
depends_on:
- prometheus
networks:
- llm-network
restart: unless-stopped
profiles:
- monitoring
networks:
llm-network:
driver: bridge
volumes:
redis-data:
prometheus-data:
grafana-data:

26
go.mod
View File

@@ -9,10 +9,19 @@ require (
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.8.0 github.com/jackc/pgx/v5 v5.8.0
github.com/mattn/go-sqlite3 v1.14.34 github.com/mattn/go-sqlite3 v1.14.34
github.com/openai/openai-go v1.12.0
github.com/openai/openai-go/v3 v3.2.0 github.com/openai/openai-go/v3 v3.2.0
github.com/prometheus/client_golang v1.19.0
github.com/redis/go-redis/v9 v9.18.0 github.com/redis/go-redis/v9 v9.18.0
github.com/sony/gobreaker v1.0.0
github.com/stretchr/testify v1.11.1
go.opentelemetry.io/otel v1.29.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0
go.opentelemetry.io/otel/sdk v1.29.0
go.opentelemetry.io/otel/trace v1.29.0
golang.org/x/time v0.14.0
google.golang.org/genai v1.48.0 google.golang.org/genai v1.48.0
google.golang.org/grpc v1.66.2
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
) )
@@ -23,28 +32,41 @@ require (
filippo.io/edwards25519 v1.1.0 // indirect filippo.io/edwards25519 v1.1.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/go-cmp v0.6.0 // indirect github.com/google/go-cmp v0.6.0 // indirect
github.com/google/s2a-go v0.1.8 // indirect github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/gorilla/websocket v1.5.3 // indirect github.com/gorilla/websocket v1.5.3 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/tidwall/gjson v1.18.0 // indirect github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect github.com/tidwall/sjson v1.2.5 // indirect
go.opencensus.io v0.24.0 // indirect go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/atomic v1.11.0 // indirect go.uber.org/atomic v1.11.0 // indirect
golang.org/x/crypto v0.47.0 // indirect golang.org/x/crypto v0.47.0 // indirect
golang.org/x/net v0.49.0 // indirect golang.org/x/net v0.49.0 // indirect
golang.org/x/sync v0.19.0 // indirect golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.40.0 // indirect golang.org/x/sys v0.40.0 // indirect
golang.org/x/text v0.33.0 // indirect golang.org/x/text v0.33.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
google.golang.org/grpc v1.66.2 // indirect
google.golang.org/protobuf v1.34.2 // indirect google.golang.org/protobuf v1.34.2 // indirect
) )

49
go.sum
View File

@@ -18,10 +18,14 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY= github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY=
github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q= github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@@ -38,6 +42,11 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
@@ -73,6 +82,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gT
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
@@ -91,19 +102,27 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
github.com/openai/openai-go/v3 v3.2.0 h1:2AbqFUCsoW2pm/2pUtPRuwK89dnoGHaQokzWsfoQO/U= github.com/openai/openai-go/v3 v3.2.0 h1:2AbqFUCsoW2pm/2pUtPRuwK89dnoGHaQokzWsfoQO/U=
github.com/openai/openai-go/v3 v3.2.0/go.mod h1:UOpNxkqC9OdNXNUfpNByKOtB4jAL0EssQXq5p8gO0Xs= github.com/openai/openai-go/v3 v3.2.0/go.mod h1:UOpNxkqC9OdNXNUfpNByKOtB4jAL0EssQXq5p8gO0Xs=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs= github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs=
github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0= github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/sony/gobreaker v1.0.0 h1:feX5fGGXSl3dYd4aHZItw+FpHLvvoaqkawKjVNiFMNQ=
github.com/sony/gobreaker v1.0.0/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -128,8 +147,26 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 h1:nSiV3s7wiCam610XcLbYOmMfJxB9gO4uK3Xgv5gmTgg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0/go.mod h1:hKn/e/Nmd19/x1gvIHwtOwVWM+VhuITSWip3JUDghj0=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0 h1:X3ZjNp36/WlkSYx0ul2jw4PtbNEDDeLskw3VPsrpYM0=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0/go.mod h1:2uL/xnOXh0CHOBFCWXz5u1A4GXLiW+0IQIzVbeOEQ0U=
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8=
@@ -162,6 +199,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
@@ -175,6 +214,8 @@ google.golang.org/genai v1.48.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5g
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc=
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
@@ -198,8 +239,8 @@ google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWn
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

918
internal/api/types_test.go Normal file
View File

@@ -0,0 +1,918 @@
package api
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestInputUnion_UnmarshalJSON(t *testing.T) {
tests := []struct {
name string
input string
expectError bool
validate func(t *testing.T, u InputUnion)
}{
{
name: "string input",
input: `"hello world"`,
validate: func(t *testing.T, u InputUnion) {
require.NotNil(t, u.String)
assert.Equal(t, "hello world", *u.String)
assert.Nil(t, u.Items)
},
},
{
name: "empty string input",
input: `""`,
validate: func(t *testing.T, u InputUnion) {
require.NotNil(t, u.String)
assert.Equal(t, "", *u.String)
assert.Nil(t, u.Items)
},
},
{
name: "null input",
input: `null`,
validate: func(t *testing.T, u InputUnion) {
assert.Nil(t, u.String)
assert.Nil(t, u.Items)
},
},
{
name: "array input with single message",
input: `[{
"type": "message",
"role": "user",
"content": "hello"
}]`,
validate: func(t *testing.T, u InputUnion) {
assert.Nil(t, u.String)
require.Len(t, u.Items, 1)
assert.Equal(t, "message", u.Items[0].Type)
assert.Equal(t, "user", u.Items[0].Role)
},
},
{
name: "array input with multiple messages",
input: `[{
"type": "message",
"role": "user",
"content": "hello"
}, {
"type": "message",
"role": "assistant",
"content": "hi there"
}]`,
validate: func(t *testing.T, u InputUnion) {
assert.Nil(t, u.String)
require.Len(t, u.Items, 2)
assert.Equal(t, "user", u.Items[0].Role)
assert.Equal(t, "assistant", u.Items[1].Role)
},
},
{
name: "empty array",
input: `[]`,
validate: func(t *testing.T, u InputUnion) {
assert.Nil(t, u.String)
require.NotNil(t, u.Items)
assert.Len(t, u.Items, 0)
},
},
{
name: "array with function_call_output",
input: `[{
"type": "function_call_output",
"call_id": "call_123",
"name": "get_weather",
"output": "{\"temperature\": 72}"
}]`,
validate: func(t *testing.T, u InputUnion) {
assert.Nil(t, u.String)
require.Len(t, u.Items, 1)
assert.Equal(t, "function_call_output", u.Items[0].Type)
assert.Equal(t, "call_123", u.Items[0].CallID)
assert.Equal(t, "get_weather", u.Items[0].Name)
assert.Equal(t, `{"temperature": 72}`, u.Items[0].Output)
},
},
{
name: "invalid JSON",
input: `{invalid json}`,
expectError: true,
},
{
name: "invalid type - number",
input: `123`,
expectError: true,
},
{
name: "invalid type - object",
input: `{"key": "value"}`,
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var u InputUnion
err := json.Unmarshal([]byte(tt.input), &u)
if tt.expectError {
assert.Error(t, err)
return
}
require.NoError(t, err)
if tt.validate != nil {
tt.validate(t, u)
}
})
}
}
func TestInputUnion_MarshalJSON(t *testing.T) {
tests := []struct {
name string
input InputUnion
expected string
}{
{
name: "string value",
input: InputUnion{
String: stringPtr("hello world"),
},
expected: `"hello world"`,
},
{
name: "empty string",
input: InputUnion{
String: stringPtr(""),
},
expected: `""`,
},
{
name: "array value",
input: InputUnion{
Items: []InputItem{
{Type: "message", Role: "user"},
},
},
expected: `[{"type":"message","role":"user"}]`,
},
{
name: "empty array",
input: InputUnion{
Items: []InputItem{},
},
expected: `[]`,
},
{
name: "nil values",
input: InputUnion{},
expected: `null`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := json.Marshal(tt.input)
require.NoError(t, err)
assert.JSONEq(t, tt.expected, string(data))
})
}
}
func TestInputUnion_RoundTrip(t *testing.T) {
tests := []struct {
name string
input InputUnion
}{
{
name: "string",
input: InputUnion{
String: stringPtr("test message"),
},
},
{
name: "array with messages",
input: InputUnion{
Items: []InputItem{
{Type: "message", Role: "user", Content: json.RawMessage(`"hello"`)},
{Type: "message", Role: "assistant", Content: json.RawMessage(`"hi"`)},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Marshal
data, err := json.Marshal(tt.input)
require.NoError(t, err)
// Unmarshal
var result InputUnion
err = json.Unmarshal(data, &result)
require.NoError(t, err)
// Verify equivalence
if tt.input.String != nil {
require.NotNil(t, result.String)
assert.Equal(t, *tt.input.String, *result.String)
}
if tt.input.Items != nil {
require.NotNil(t, result.Items)
assert.Len(t, result.Items, len(tt.input.Items))
}
})
}
}
func TestResponseRequest_NormalizeInput(t *testing.T) {
tests := []struct {
name string
request ResponseRequest
validate func(t *testing.T, msgs []Message)
}{
{
name: "string input creates user message",
request: ResponseRequest{
Input: InputUnion{
String: stringPtr("hello world"),
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "user", msgs[0].Role)
require.Len(t, msgs[0].Content, 1)
assert.Equal(t, "input_text", msgs[0].Content[0].Type)
assert.Equal(t, "hello world", msgs[0].Content[0].Text)
},
},
{
name: "message with string content",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: json.RawMessage(`"what is the weather?"`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "user", msgs[0].Role)
require.Len(t, msgs[0].Content, 1)
assert.Equal(t, "input_text", msgs[0].Content[0].Type)
assert.Equal(t, "what is the weather?", msgs[0].Content[0].Text)
},
},
{
name: "assistant message with string content uses output_text",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`"The weather is sunny"`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "assistant", msgs[0].Role)
require.Len(t, msgs[0].Content, 1)
assert.Equal(t, "output_text", msgs[0].Content[0].Type)
assert.Equal(t, "The weather is sunny", msgs[0].Content[0].Text)
},
},
{
name: "message with content blocks array",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: json.RawMessage(`[
{"type": "input_text", "text": "hello"},
{"type": "input_text", "text": "world"}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "user", msgs[0].Role)
require.Len(t, msgs[0].Content, 2)
assert.Equal(t, "input_text", msgs[0].Content[0].Type)
assert.Equal(t, "hello", msgs[0].Content[0].Text)
assert.Equal(t, "input_text", msgs[0].Content[1].Type)
assert.Equal(t, "world", msgs[0].Content[1].Text)
},
},
{
name: "message with tool_use blocks",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`[
{
"type": "tool_use",
"id": "call_123",
"name": "get_weather",
"input": {"location": "San Francisco"}
}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "assistant", msgs[0].Role)
assert.Len(t, msgs[0].Content, 0)
require.Len(t, msgs[0].ToolCalls, 1)
assert.Equal(t, "call_123", msgs[0].ToolCalls[0].ID)
assert.Equal(t, "get_weather", msgs[0].ToolCalls[0].Name)
assert.JSONEq(t, `{"location":"San Francisco"}`, msgs[0].ToolCalls[0].Arguments)
},
},
{
name: "message with mixed text and tool_use",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`[
{
"type": "output_text",
"text": "Let me check the weather"
},
{
"type": "tool_use",
"id": "call_456",
"name": "get_weather",
"input": {"location": "Boston"}
}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "assistant", msgs[0].Role)
require.Len(t, msgs[0].Content, 1)
assert.Equal(t, "output_text", msgs[0].Content[0].Type)
assert.Equal(t, "Let me check the weather", msgs[0].Content[0].Text)
require.Len(t, msgs[0].ToolCalls, 1)
assert.Equal(t, "call_456", msgs[0].ToolCalls[0].ID)
},
},
{
name: "multiple tool_use blocks",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`[
{
"type": "tool_use",
"id": "call_1",
"name": "get_weather",
"input": {"location": "NYC"}
},
{
"type": "tool_use",
"id": "call_2",
"name": "get_time",
"input": {"timezone": "EST"}
}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
require.Len(t, msgs[0].ToolCalls, 2)
assert.Equal(t, "call_1", msgs[0].ToolCalls[0].ID)
assert.Equal(t, "get_weather", msgs[0].ToolCalls[0].Name)
assert.Equal(t, "call_2", msgs[0].ToolCalls[1].ID)
assert.Equal(t, "get_time", msgs[0].ToolCalls[1].Name)
},
},
{
name: "function_call_output item",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "function_call_output",
CallID: "call_123",
Name: "get_weather",
Output: `{"temperature": 72, "condition": "sunny"}`,
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "tool", msgs[0].Role)
assert.Equal(t, "call_123", msgs[0].CallID)
assert.Equal(t, "get_weather", msgs[0].Name)
require.Len(t, msgs[0].Content, 1)
assert.Equal(t, "input_text", msgs[0].Content[0].Type)
assert.Equal(t, `{"temperature": 72, "condition": "sunny"}`, msgs[0].Content[0].Text)
},
},
{
name: "multiple messages in conversation",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: json.RawMessage(`"what is 2+2?"`),
},
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`"The answer is 4"`),
},
{
Type: "message",
Role: "user",
Content: json.RawMessage(`"thanks!"`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 3)
assert.Equal(t, "user", msgs[0].Role)
assert.Equal(t, "assistant", msgs[1].Role)
assert.Equal(t, "user", msgs[2].Role)
},
},
{
name: "complete tool calling flow",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: json.RawMessage(`"what is the weather?"`),
},
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`[
{
"type": "tool_use",
"id": "call_abc",
"name": "get_weather",
"input": {"location": "Seattle"}
}
]`),
},
{
Type: "function_call_output",
CallID: "call_abc",
Name: "get_weather",
Output: `{"temp": 55, "condition": "rainy"}`,
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 3)
assert.Equal(t, "user", msgs[0].Role)
assert.Equal(t, "assistant", msgs[1].Role)
require.Len(t, msgs[1].ToolCalls, 1)
assert.Equal(t, "tool", msgs[2].Role)
assert.Equal(t, "call_abc", msgs[2].CallID)
},
},
{
name: "message without type defaults to message",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Role: "user",
Content: json.RawMessage(`"hello"`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "user", msgs[0].Role)
},
},
{
name: "message with nil content",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: nil,
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
assert.Equal(t, "user", msgs[0].Role)
assert.Len(t, msgs[0].Content, 0)
},
},
{
name: "tool_use with empty input",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "assistant",
Content: json.RawMessage(`[
{
"type": "tool_use",
"id": "call_xyz",
"name": "no_args_function",
"input": {}
}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
require.Len(t, msgs[0].ToolCalls, 1)
assert.Equal(t, "call_xyz", msgs[0].ToolCalls[0].ID)
assert.JSONEq(t, `{}`, msgs[0].ToolCalls[0].Arguments)
},
},
{
name: "content blocks with unknown types ignored",
request: ResponseRequest{
Input: InputUnion{
Items: []InputItem{
{
Type: "message",
Role: "user",
Content: json.RawMessage(`[
{"type": "input_text", "text": "visible"},
{"type": "unknown_type", "data": "ignored"},
{"type": "input_text", "text": "also visible"}
]`),
},
},
},
},
validate: func(t *testing.T, msgs []Message) {
require.Len(t, msgs, 1)
require.Len(t, msgs[0].Content, 2)
assert.Equal(t, "visible", msgs[0].Content[0].Text)
assert.Equal(t, "also visible", msgs[0].Content[1].Text)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
msgs := tt.request.NormalizeInput()
if tt.validate != nil {
tt.validate(t, msgs)
}
})
}
}
func TestResponseRequest_Validate(t *testing.T) {
tests := []struct {
name string
request *ResponseRequest
expectError bool
errorMsg string
}{
{
name: "valid request with string input",
request: &ResponseRequest{
Model: "gpt-4",
Input: InputUnion{
String: stringPtr("hello"),
},
},
expectError: false,
},
{
name: "valid request with array input",
request: &ResponseRequest{
Model: "gpt-4",
Input: InputUnion{
Items: []InputItem{
{Type: "message", Role: "user", Content: json.RawMessage(`"hello"`)},
},
},
},
expectError: false,
},
{
name: "nil request",
request: nil,
expectError: true,
errorMsg: "request is nil",
},
{
name: "missing model",
request: &ResponseRequest{
Model: "",
Input: InputUnion{
String: stringPtr("hello"),
},
},
expectError: true,
errorMsg: "model is required",
},
{
name: "missing input",
request: &ResponseRequest{
Model: "gpt-4",
Input: InputUnion{},
},
expectError: true,
errorMsg: "input is required",
},
{
name: "empty string input is invalid",
request: &ResponseRequest{
Model: "gpt-4",
Input: InputUnion{
String: stringPtr(""),
},
},
expectError: false, // Empty string is technically valid
},
{
name: "empty array input is invalid",
request: &ResponseRequest{
Model: "gpt-4",
Input: InputUnion{
Items: []InputItem{},
},
},
expectError: true,
errorMsg: "input is required",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.request.Validate()
if tt.expectError {
assert.Error(t, err)
if tt.errorMsg != "" {
assert.Contains(t, err.Error(), tt.errorMsg)
}
return
}
assert.NoError(t, err)
})
}
}
func TestGetStringField(t *testing.T) {
tests := []struct {
name string
input map[string]interface{}
key string
expected string
}{
{
name: "existing string field",
input: map[string]interface{}{
"name": "value",
},
key: "name",
expected: "value",
},
{
name: "missing field",
input: map[string]interface{}{
"other": "value",
},
key: "name",
expected: "",
},
{
name: "wrong type - int",
input: map[string]interface{}{
"name": 123,
},
key: "name",
expected: "",
},
{
name: "wrong type - bool",
input: map[string]interface{}{
"name": true,
},
key: "name",
expected: "",
},
{
name: "wrong type - object",
input: map[string]interface{}{
"name": map[string]string{"nested": "value"},
},
key: "name",
expected: "",
},
{
name: "empty string value",
input: map[string]interface{}{
"name": "",
},
key: "name",
expected: "",
},
{
name: "nil map",
input: nil,
key: "name",
expected: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := getStringField(tt.input, tt.key)
assert.Equal(t, tt.expected, result)
})
}
}
func TestInputItem_ComplexContent(t *testing.T) {
tests := []struct {
name string
itemJSON string
validate func(t *testing.T, item InputItem)
}{
{
name: "content with nested objects",
itemJSON: `{
"type": "message",
"role": "assistant",
"content": [{
"type": "tool_use",
"id": "call_complex",
"name": "search",
"input": {
"query": "test",
"filters": {
"category": "docs",
"date": "2024-01-01"
},
"limit": 10
}
}]
}`,
validate: func(t *testing.T, item InputItem) {
assert.Equal(t, "message", item.Type)
assert.Equal(t, "assistant", item.Role)
assert.NotNil(t, item.Content)
},
},
{
name: "content with array in input",
itemJSON: `{
"type": "message",
"role": "assistant",
"content": [{
"type": "tool_use",
"id": "call_arr",
"name": "batch_process",
"input": {
"items": ["a", "b", "c"]
}
}]
}`,
validate: func(t *testing.T, item InputItem) {
assert.Equal(t, "message", item.Type)
assert.NotNil(t, item.Content)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var item InputItem
err := json.Unmarshal([]byte(tt.itemJSON), &item)
require.NoError(t, err)
if tt.validate != nil {
tt.validate(t, item)
}
})
}
}
func TestResponseRequest_CompleteWorkflow(t *testing.T) {
requestJSON := `{
"model": "gpt-4",
"input": [{
"type": "message",
"role": "user",
"content": "What's the weather in NYC and LA?"
}, {
"type": "message",
"role": "assistant",
"content": [{
"type": "output_text",
"text": "Let me check both locations for you."
}, {
"type": "tool_use",
"id": "call_1",
"name": "get_weather",
"input": {"location": "New York City"}
}, {
"type": "tool_use",
"id": "call_2",
"name": "get_weather",
"input": {"location": "Los Angeles"}
}]
}, {
"type": "function_call_output",
"call_id": "call_1",
"name": "get_weather",
"output": "{\"temp\": 45, \"condition\": \"cloudy\"}"
}, {
"type": "function_call_output",
"call_id": "call_2",
"name": "get_weather",
"output": "{\"temp\": 72, \"condition\": \"sunny\"}"
}],
"stream": true,
"temperature": 0.7
}`
var req ResponseRequest
err := json.Unmarshal([]byte(requestJSON), &req)
require.NoError(t, err)
// Validate
err = req.Validate()
require.NoError(t, err)
// Normalize
msgs := req.NormalizeInput()
require.Len(t, msgs, 4)
// Check user message
assert.Equal(t, "user", msgs[0].Role)
assert.Len(t, msgs[0].Content, 1)
// Check assistant message with tool calls
assert.Equal(t, "assistant", msgs[1].Role)
assert.Len(t, msgs[1].Content, 1)
assert.Len(t, msgs[1].ToolCalls, 2)
assert.Equal(t, "call_1", msgs[1].ToolCalls[0].ID)
assert.Equal(t, "call_2", msgs[1].ToolCalls[1].ID)
// Check tool responses
assert.Equal(t, "tool", msgs[2].Role)
assert.Equal(t, "call_1", msgs[2].CallID)
assert.Equal(t, "tool", msgs[3].Role)
assert.Equal(t, "call_2", msgs[3].CallID)
}
// Helper functions
func stringPtr(s string) *string {
return &s
}

View File

@@ -6,6 +6,7 @@ import (
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog"
"math/big" "math/big"
"net/http" "net/http"
"strings" "strings"
@@ -28,12 +29,13 @@ type Middleware struct {
keys map[string]*rsa.PublicKey keys map[string]*rsa.PublicKey
mu sync.RWMutex mu sync.RWMutex
client *http.Client client *http.Client
logger *slog.Logger
} }
// New creates an authentication middleware. // New creates an authentication middleware.
func New(cfg Config) (*Middleware, error) { func New(cfg Config, logger *slog.Logger) (*Middleware, error) {
if !cfg.Enabled { if !cfg.Enabled {
return &Middleware{cfg: cfg}, nil return &Middleware{cfg: cfg, logger: logger}, nil
} }
if cfg.Issuer == "" { if cfg.Issuer == "" {
@@ -44,6 +46,7 @@ func New(cfg Config) (*Middleware, error) {
cfg: cfg, cfg: cfg,
keys: make(map[string]*rsa.PublicKey), keys: make(map[string]*rsa.PublicKey),
client: &http.Client{Timeout: 10 * time.Second}, client: &http.Client{Timeout: 10 * time.Second},
logger: logger,
} }
// Fetch JWKS on startup // Fetch JWKS on startup
@@ -255,6 +258,15 @@ func (m *Middleware) periodicRefresh() {
defer ticker.Stop() defer ticker.Stop()
for range ticker.C { for range ticker.C {
_ = m.refreshJWKS() if err := m.refreshJWKS(); err != nil {
m.logger.Error("failed to refresh JWKS",
slog.String("issuer", m.cfg.Issuer),
slog.String("error", err.Error()),
)
} else {
m.logger.Debug("successfully refreshed JWKS",
slog.String("issuer", m.cfg.Issuer),
)
}
} }
} }

1008
internal/auth/auth_test.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -14,6 +14,9 @@ type Config struct {
Models []ModelEntry `yaml:"models"` Models []ModelEntry `yaml:"models"`
Auth AuthConfig `yaml:"auth"` Auth AuthConfig `yaml:"auth"`
Conversations ConversationConfig `yaml:"conversations"` Conversations ConversationConfig `yaml:"conversations"`
Logging LoggingConfig `yaml:"logging"`
RateLimit RateLimitConfig `yaml:"rate_limit"`
Observability ObservabilityConfig `yaml:"observability"`
} }
// ConversationConfig controls conversation storage. // ConversationConfig controls conversation storage.
@@ -30,6 +33,59 @@ type ConversationConfig struct {
Driver string `yaml:"driver"` Driver string `yaml:"driver"`
} }
// LoggingConfig controls logging format and level.
type LoggingConfig struct {
// Format is the log output format: "json" (default) or "text".
Format string `yaml:"format"`
// Level is the minimum log level: "debug", "info" (default), "warn", or "error".
Level string `yaml:"level"`
}
// RateLimitConfig controls rate limiting behavior.
type RateLimitConfig struct {
// Enabled controls whether rate limiting is active.
Enabled bool `yaml:"enabled"`
// RequestsPerSecond is the number of requests allowed per second per IP.
RequestsPerSecond float64 `yaml:"requests_per_second"`
// Burst is the maximum burst size allowed.
Burst int `yaml:"burst"`
}
// ObservabilityConfig controls observability features.
type ObservabilityConfig struct {
Enabled bool `yaml:"enabled"`
Metrics MetricsConfig `yaml:"metrics"`
Tracing TracingConfig `yaml:"tracing"`
}
// MetricsConfig controls Prometheus metrics.
type MetricsConfig struct {
Enabled bool `yaml:"enabled"`
Path string `yaml:"path"` // default: "/metrics"
}
// TracingConfig controls OpenTelemetry tracing.
type TracingConfig struct {
Enabled bool `yaml:"enabled"`
ServiceName string `yaml:"service_name"` // default: "llm-gateway"
Sampler SamplerConfig `yaml:"sampler"`
Exporter ExporterConfig `yaml:"exporter"`
}
// SamplerConfig controls trace sampling.
type SamplerConfig struct {
Type string `yaml:"type"` // "always", "never", "probability"
Rate float64 `yaml:"rate"` // 0.0 to 1.0
}
// ExporterConfig controls trace exporters.
type ExporterConfig struct {
Type string `yaml:"type"` // "otlp", "stdout"
Endpoint string `yaml:"endpoint"`
Insecure bool `yaml:"insecure"`
Headers map[string]string `yaml:"headers"`
}
// AuthConfig holds OIDC authentication settings. // AuthConfig holds OIDC authentication settings.
type AuthConfig struct { type AuthConfig struct {
Enabled bool `yaml:"enabled"` Enabled bool `yaml:"enabled"`
@@ -39,7 +95,8 @@ type AuthConfig struct {
// ServerConfig controls HTTP server values. // ServerConfig controls HTTP server values.
type ServerConfig struct { type ServerConfig struct {
Address string `yaml:"address"` Address string `yaml:"address"`
MaxRequestBodySize int64 `yaml:"max_request_body_size"` // Maximum request body size in bytes (default: 10MB)
} }
// ProviderEntry defines a named provider instance in the config file. // ProviderEntry defines a named provider instance in the config file.

View File

@@ -0,0 +1,377 @@
package config
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestLoad(t *testing.T) {
tests := []struct {
name string
configYAML string
envVars map[string]string
expectError bool
validate func(t *testing.T, cfg *Config)
}{
{
name: "basic config with all fields",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: sk-test-key
anthropic:
type: anthropic
api_key: sk-ant-key
models:
- name: gpt-4
provider: openai
provider_model_id: gpt-4-turbo
- name: claude-3
provider: anthropic
provider_model_id: claude-3-sonnet-20240229
auth:
enabled: true
issuer: https://accounts.google.com
audience: my-client-id
conversations:
store: memory
ttl: 1h
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, ":8080", cfg.Server.Address)
assert.Len(t, cfg.Providers, 2)
assert.Equal(t, "openai", cfg.Providers["openai"].Type)
assert.Equal(t, "sk-test-key", cfg.Providers["openai"].APIKey)
assert.Len(t, cfg.Models, 2)
assert.Equal(t, "gpt-4", cfg.Models[0].Name)
assert.True(t, cfg.Auth.Enabled)
assert.Equal(t, "memory", cfg.Conversations.Store)
},
},
{
name: "config with environment variables",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: ${OPENAI_API_KEY}
models:
- name: gpt-4
provider: openai
provider_model_id: gpt-4
`,
envVars: map[string]string{
"OPENAI_API_KEY": "sk-from-env",
},
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, "sk-from-env", cfg.Providers["openai"].APIKey)
},
},
{
name: "minimal config",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: test-key
models:
- name: gpt-4
provider: openai
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, ":8080", cfg.Server.Address)
assert.Len(t, cfg.Providers, 1)
assert.Len(t, cfg.Models, 1)
assert.False(t, cfg.Auth.Enabled)
},
},
{
name: "azure openai provider",
configYAML: `
server:
address: ":8080"
providers:
azure:
type: azure_openai
api_key: azure-key
endpoint: https://my-resource.openai.azure.com
api_version: "2024-02-15-preview"
models:
- name: gpt-4-azure
provider: azure
provider_model_id: gpt-4-deployment
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, "azure_openai", cfg.Providers["azure"].Type)
assert.Equal(t, "azure-key", cfg.Providers["azure"].APIKey)
assert.Equal(t, "https://my-resource.openai.azure.com", cfg.Providers["azure"].Endpoint)
assert.Equal(t, "2024-02-15-preview", cfg.Providers["azure"].APIVersion)
},
},
{
name: "vertex ai provider",
configYAML: `
server:
address: ":8080"
providers:
vertex:
type: vertex_ai
project: my-gcp-project
location: us-central1
models:
- name: gemini-pro
provider: vertex
provider_model_id: gemini-1.5-pro
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, "vertex_ai", cfg.Providers["vertex"].Type)
assert.Equal(t, "my-gcp-project", cfg.Providers["vertex"].Project)
assert.Equal(t, "us-central1", cfg.Providers["vertex"].Location)
},
},
{
name: "sql conversation store",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: test-key
models:
- name: gpt-4
provider: openai
conversations:
store: sql
driver: sqlite3
dsn: conversations.db
ttl: 2h
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, "sql", cfg.Conversations.Store)
assert.Equal(t, "sqlite3", cfg.Conversations.Driver)
assert.Equal(t, "conversations.db", cfg.Conversations.DSN)
assert.Equal(t, "2h", cfg.Conversations.TTL)
},
},
{
name: "redis conversation store",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: test-key
models:
- name: gpt-4
provider: openai
conversations:
store: redis
dsn: redis://localhost:6379/0
ttl: 30m
`,
validate: func(t *testing.T, cfg *Config) {
assert.Equal(t, "redis", cfg.Conversations.Store)
assert.Equal(t, "redis://localhost:6379/0", cfg.Conversations.DSN)
assert.Equal(t, "30m", cfg.Conversations.TTL)
},
},
{
name: "invalid model references unknown provider",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: test-key
models:
- name: gpt-4
provider: unknown_provider
`,
expectError: true,
},
{
name: "invalid YAML",
configYAML: `invalid: yaml: content: [unclosed`,
expectError: true,
},
{
name: "multiple models same provider",
configYAML: `
server:
address: ":8080"
providers:
openai:
type: openai
api_key: test-key
models:
- name: gpt-4
provider: openai
provider_model_id: gpt-4-turbo
- name: gpt-3.5
provider: openai
provider_model_id: gpt-3.5-turbo
- name: gpt-4-mini
provider: openai
provider_model_id: gpt-4o-mini
`,
validate: func(t *testing.T, cfg *Config) {
assert.Len(t, cfg.Models, 3)
for _, model := range cfg.Models {
assert.Equal(t, "openai", model.Provider)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create temporary config file
tmpDir := t.TempDir()
configPath := filepath.Join(tmpDir, "config.yaml")
err := os.WriteFile(configPath, []byte(tt.configYAML), 0644)
require.NoError(t, err, "failed to write test config file")
// Set environment variables
for key, value := range tt.envVars {
t.Setenv(key, value)
}
// Load config
cfg, err := Load(configPath)
if tt.expectError {
assert.Error(t, err, "expected an error")
return
}
require.NoError(t, err, "unexpected error loading config")
require.NotNil(t, cfg, "config should not be nil")
if tt.validate != nil {
tt.validate(t, cfg)
}
})
}
}
func TestLoadNonExistentFile(t *testing.T) {
_, err := Load("/nonexistent/config.yaml")
assert.Error(t, err, "should error on nonexistent file")
}
func TestConfigValidate(t *testing.T) {
tests := []struct {
name string
config Config
expectError bool
}{
{
name: "valid config",
config: Config{
Providers: map[string]ProviderEntry{
"openai": {Type: "openai"},
},
Models: []ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
},
expectError: false,
},
{
name: "model references unknown provider",
config: Config{
Providers: map[string]ProviderEntry{
"openai": {Type: "openai"},
},
Models: []ModelEntry{
{Name: "gpt-4", Provider: "unknown"},
},
},
expectError: true,
},
{
name: "no models",
config: Config{
Providers: map[string]ProviderEntry{
"openai": {Type: "openai"},
},
Models: []ModelEntry{},
},
expectError: false,
},
{
name: "multiple models multiple providers",
config: Config{
Providers: map[string]ProviderEntry{
"openai": {Type: "openai"},
"anthropic": {Type: "anthropic"},
},
Models: []ModelEntry{
{Name: "gpt-4", Provider: "openai"},
{Name: "claude-3", Provider: "anthropic"},
},
},
expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.config.validate()
if tt.expectError {
assert.Error(t, err, "expected validation error")
} else {
assert.NoError(t, err, "unexpected validation error")
}
})
}
}
func TestEnvironmentVariableExpansion(t *testing.T) {
configYAML := `
server:
address: "${SERVER_ADDRESS}"
providers:
openai:
type: openai
api_key: ${OPENAI_KEY}
anthropic:
type: anthropic
api_key: ${ANTHROPIC_KEY:-default-key}
models:
- name: gpt-4
provider: openai
`
tmpDir := t.TempDir()
configPath := filepath.Join(tmpDir, "config.yaml")
err := os.WriteFile(configPath, []byte(configYAML), 0644)
require.NoError(t, err)
// Set only some env vars to test defaults
t.Setenv("SERVER_ADDRESS", ":9090")
t.Setenv("OPENAI_KEY", "sk-from-env")
// Don't set ANTHROPIC_KEY to test default value
cfg, err := Load(configPath)
require.NoError(t, err)
assert.Equal(t, ":9090", cfg.Server.Address)
assert.Equal(t, "sk-from-env", cfg.Providers["openai"].APIKey)
// Note: Go's os.Expand doesn't support default values like ${VAR:-default}
// This is just documenting current behavior
}

View File

@@ -1,6 +1,7 @@
package conversation package conversation
import ( import (
"context"
"sync" "sync"
"time" "time"
@@ -9,11 +10,12 @@ import (
// Store defines the interface for conversation storage backends. // Store defines the interface for conversation storage backends.
type Store interface { type Store interface {
Get(id string) (*Conversation, error) Get(ctx context.Context, id string) (*Conversation, error)
Create(id string, model string, messages []api.Message) (*Conversation, error) Create(ctx context.Context, id string, model string, messages []api.Message) (*Conversation, error)
Append(id string, messages ...api.Message) (*Conversation, error) Append(ctx context.Context, id string, messages ...api.Message) (*Conversation, error)
Delete(id string) error Delete(ctx context.Context, id string) error
Size() int Size() int
Close() error
} }
// MemoryStore manages conversation history in-memory with automatic expiration. // MemoryStore manages conversation history in-memory with automatic expiration.
@@ -21,6 +23,7 @@ type MemoryStore struct {
conversations map[string]*Conversation conversations map[string]*Conversation
mu sync.RWMutex mu sync.RWMutex
ttl time.Duration ttl time.Duration
done chan struct{}
} }
// Conversation holds the message history for a single conversation thread. // Conversation holds the message history for a single conversation thread.
@@ -37,18 +40,19 @@ func NewMemoryStore(ttl time.Duration) *MemoryStore {
s := &MemoryStore{ s := &MemoryStore{
conversations: make(map[string]*Conversation), conversations: make(map[string]*Conversation),
ttl: ttl, ttl: ttl,
done: make(chan struct{}),
} }
// Start cleanup goroutine if TTL is set // Start cleanup goroutine if TTL is set
if ttl > 0 { if ttl > 0 {
go s.cleanup() go s.cleanup()
} }
return s return s
} }
// Get retrieves a conversation by ID. Returns a deep copy to prevent data races. // Get retrieves a conversation by ID. Returns a deep copy to prevent data races.
func (s *MemoryStore) Get(id string) (*Conversation, error) { func (s *MemoryStore) Get(ctx context.Context, id string) (*Conversation, error) {
s.mu.RLock() s.mu.RLock()
defer s.mu.RUnlock() defer s.mu.RUnlock()
@@ -71,7 +75,7 @@ func (s *MemoryStore) Get(id string) (*Conversation, error) {
} }
// Create creates a new conversation with the given messages. // Create creates a new conversation with the given messages.
func (s *MemoryStore) Create(id string, model string, messages []api.Message) (*Conversation, error) { func (s *MemoryStore) Create(ctx context.Context, id string, model string, messages []api.Message) (*Conversation, error) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
@@ -102,7 +106,7 @@ func (s *MemoryStore) Create(id string, model string, messages []api.Message) (*
} }
// Append adds new messages to an existing conversation. // Append adds new messages to an existing conversation.
func (s *MemoryStore) Append(id string, messages ...api.Message) (*Conversation, error) { func (s *MemoryStore) Append(ctx context.Context, id string, messages ...api.Message) (*Conversation, error) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
@@ -128,7 +132,7 @@ func (s *MemoryStore) Append(id string, messages ...api.Message) (*Conversation,
} }
// Delete removes a conversation from the store. // Delete removes a conversation from the store.
func (s *MemoryStore) Delete(id string) error { func (s *MemoryStore) Delete(ctx context.Context, id string) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
@@ -140,16 +144,21 @@ func (s *MemoryStore) Delete(id string) error {
func (s *MemoryStore) cleanup() { func (s *MemoryStore) cleanup() {
ticker := time.NewTicker(1 * time.Minute) ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop() defer ticker.Stop()
for range ticker.C { for {
s.mu.Lock() select {
now := time.Now() case <-ticker.C:
for id, conv := range s.conversations { s.mu.Lock()
if now.Sub(conv.UpdatedAt) > s.ttl { now := time.Now()
delete(s.conversations, id) for id, conv := range s.conversations {
if now.Sub(conv.UpdatedAt) > s.ttl {
delete(s.conversations, id)
}
} }
s.mu.Unlock()
case <-s.done:
return
} }
s.mu.Unlock()
} }
} }
@@ -159,3 +168,9 @@ func (s *MemoryStore) Size() int {
defer s.mu.RUnlock() defer s.mu.RUnlock()
return len(s.conversations) return len(s.conversations)
} }
// Close stops the cleanup goroutine and releases resources.
func (s *MemoryStore) Close() error {
close(s.done)
return nil
}

View File

@@ -0,0 +1,332 @@
package conversation
import (
"context"
"testing"
"time"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMemoryStore_CreateAndGet(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
messages := []api.Message{
{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "Hello"},
},
},
}
conv, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
require.NotNil(t, conv)
assert.Equal(t, "test-id", conv.ID)
assert.Equal(t, "gpt-4", conv.Model)
assert.Len(t, conv.Messages, 1)
assert.Equal(t, "Hello", conv.Messages[0].Content[0].Text)
retrieved, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
require.NotNil(t, retrieved)
assert.Equal(t, conv.ID, retrieved.ID)
assert.Equal(t, conv.Model, retrieved.Model)
assert.Len(t, retrieved.Messages, 1)
}
func TestMemoryStore_GetNonExistent(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
conv, err := store.Get(context.Background(),"nonexistent")
require.NoError(t, err)
assert.Nil(t, conv, "should return nil for nonexistent conversation")
}
func TestMemoryStore_Append(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
initialMessages := []api.Message{
{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "First message"},
},
},
}
_, err := store.Create(context.Background(),"test-id", "gpt-4", initialMessages)
require.NoError(t, err)
newMessages := []api.Message{
{
Role: "assistant",
Content: []api.ContentBlock{
{Type: "output_text", Text: "Response"},
},
},
{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "Follow-up"},
},
},
}
conv, err := store.Append(context.Background(),"test-id", newMessages...)
require.NoError(t, err)
require.NotNil(t, conv)
assert.Len(t, conv.Messages, 3, "should have all messages")
assert.Equal(t, "First message", conv.Messages[0].Content[0].Text)
assert.Equal(t, "Response", conv.Messages[1].Content[0].Text)
assert.Equal(t, "Follow-up", conv.Messages[2].Content[0].Text)
}
func TestMemoryStore_AppendNonExistent(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
newMessage := api.Message{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "Hello"},
},
}
conv, err := store.Append(context.Background(),"nonexistent", newMessage)
require.NoError(t, err)
assert.Nil(t, conv, "should return nil when appending to nonexistent conversation")
}
func TestMemoryStore_Delete(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
messages := []api.Message{
{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "Hello"},
},
},
}
_, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
// Verify it exists
conv, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.NotNil(t, conv)
// Delete it
err = store.Delete(context.Background(),"test-id")
require.NoError(t, err)
// Verify it's gone
conv, err = store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.Nil(t, conv, "conversation should be deleted")
}
func TestMemoryStore_Size(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
assert.Equal(t, 0, store.Size(), "should start empty")
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello"}}},
}
_, err := store.Create(context.Background(),"conv-1", "gpt-4", messages)
require.NoError(t, err)
assert.Equal(t, 1, store.Size())
_, err = store.Create(context.Background(),"conv-2", "gpt-4", messages)
require.NoError(t, err)
assert.Equal(t, 2, store.Size())
err = store.Delete(context.Background(),"conv-1")
require.NoError(t, err)
assert.Equal(t, 1, store.Size())
}
func TestMemoryStore_ConcurrentAccess(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello"}}},
}
// Create initial conversation
_, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
// Simulate concurrent reads and writes
done := make(chan bool, 10)
for i := 0; i < 5; i++ {
go func() {
_, _ = store.Get(context.Background(),"test-id")
done <- true
}()
}
for i := 0; i < 5; i++ {
go func() {
newMsg := api.Message{
Role: "assistant",
Content: []api.ContentBlock{{Type: "output_text", Text: "Response"}},
}
_, _ = store.Append(context.Background(),"test-id", newMsg)
done <- true
}()
}
// Wait for all goroutines
for i := 0; i < 10; i++ {
<-done
}
// Verify final state
conv, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.NotNil(t, conv)
assert.GreaterOrEqual(t, len(conv.Messages), 1)
}
func TestMemoryStore_DeepCopy(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
messages := []api.Message{
{
Role: "user",
Content: []api.ContentBlock{
{Type: "input_text", Text: "Original"},
},
},
}
_, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
// Get conversation
conv1, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
// Note: Current implementation copies the Messages slice but not the Content blocks
// So modifying the slice structure is safe, but modifying content blocks affects the original
// This documents actual behavior - future improvement could add deep copying of content blocks
// Safe: appending to Messages slice
originalLen := len(conv1.Messages)
conv1.Messages = append(conv1.Messages, api.Message{
Role: "assistant",
Content: []api.ContentBlock{{Type: "output_text", Text: "New message"}},
})
assert.Equal(t, originalLen+1, len(conv1.Messages), "can modify returned message slice")
// Verify original is unchanged
conv2, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.Equal(t, originalLen, len(conv2.Messages), "original conversation unaffected by slice modification")
}
func TestMemoryStore_TTLCleanup(t *testing.T) {
// Use very short TTL for testing
store := NewMemoryStore(100 * time.Millisecond)
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello"}}},
}
_, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
// Verify it exists
conv, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.NotNil(t, conv)
assert.Equal(t, 1, store.Size())
// Wait for TTL to expire and cleanup to run
// Cleanup runs every 1 minute, but for testing we check the logic
// In production, we'd wait longer or expose cleanup for testing
time.Sleep(150 * time.Millisecond)
// Note: The cleanup goroutine runs every 1 minute, so in a real scenario
// we'd need to wait that long or refactor to expose the cleanup function
// For now, this test documents the expected behavior
}
func TestMemoryStore_NoTTL(t *testing.T) {
// Store with no TTL (0 duration) should not start cleanup
store := NewMemoryStore(0)
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello"}}},
}
_, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
assert.Equal(t, 1, store.Size())
// Without TTL, conversation should persist indefinitely
conv, err := store.Get(context.Background(),"test-id")
require.NoError(t, err)
assert.NotNil(t, conv)
}
func TestMemoryStore_UpdatedAtTracking(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello"}}},
}
conv, err := store.Create(context.Background(),"test-id", "gpt-4", messages)
require.NoError(t, err)
createdAt := conv.CreatedAt
updatedAt := conv.UpdatedAt
assert.Equal(t, createdAt, updatedAt, "initially created and updated should match")
// Wait a bit and append
time.Sleep(10 * time.Millisecond)
newMsg := api.Message{
Role: "assistant",
Content: []api.ContentBlock{{Type: "output_text", Text: "Response"}},
}
conv, err = store.Append(context.Background(),"test-id", newMsg)
require.NoError(t, err)
assert.Equal(t, createdAt, conv.CreatedAt, "created time should not change")
assert.True(t, conv.UpdatedAt.After(updatedAt), "updated time should be newer")
}
func TestMemoryStore_MultipleConversations(t *testing.T) {
store := NewMemoryStore(1 * time.Hour)
// Create multiple conversations
for i := 0; i < 10; i++ {
id := "conv-" + string(rune('0'+i))
model := "gpt-4"
messages := []api.Message{
{Role: "user", Content: []api.ContentBlock{{Type: "input_text", Text: "Hello " + id}}},
}
_, err := store.Create(context.Background(),id, model, messages)
require.NoError(t, err)
}
assert.Equal(t, 10, store.Size())
// Verify each conversation is independent
for i := 0; i < 10; i++ {
id := "conv-" + string(rune('0'+i))
conv, err := store.Get(context.Background(),id)
require.NoError(t, err)
require.NotNil(t, conv)
assert.Equal(t, id, conv.ID)
assert.Contains(t, conv.Messages[0].Content[0].Text, id)
}
}

View File

@@ -13,7 +13,6 @@ import (
type RedisStore struct { type RedisStore struct {
client *redis.Client client *redis.Client
ttl time.Duration ttl time.Duration
ctx context.Context
} }
// NewRedisStore creates a Redis-backed conversation store. // NewRedisStore creates a Redis-backed conversation store.
@@ -21,7 +20,6 @@ func NewRedisStore(client *redis.Client, ttl time.Duration) *RedisStore {
return &RedisStore{ return &RedisStore{
client: client, client: client,
ttl: ttl, ttl: ttl,
ctx: context.Background(),
} }
} }
@@ -31,8 +29,8 @@ func (s *RedisStore) key(id string) string {
} }
// Get retrieves a conversation by ID from Redis. // Get retrieves a conversation by ID from Redis.
func (s *RedisStore) Get(id string) (*Conversation, error) { func (s *RedisStore) Get(ctx context.Context, id string) (*Conversation, error) {
data, err := s.client.Get(s.ctx, s.key(id)).Bytes() data, err := s.client.Get(ctx, s.key(id)).Bytes()
if err == redis.Nil { if err == redis.Nil {
return nil, nil return nil, nil
} }
@@ -49,7 +47,7 @@ func (s *RedisStore) Get(id string) (*Conversation, error) {
} }
// Create creates a new conversation with the given messages. // Create creates a new conversation with the given messages.
func (s *RedisStore) Create(id string, model string, messages []api.Message) (*Conversation, error) { func (s *RedisStore) Create(ctx context.Context, id string, model string, messages []api.Message) (*Conversation, error) {
now := time.Now() now := time.Now()
conv := &Conversation{ conv := &Conversation{
ID: id, ID: id,
@@ -64,7 +62,7 @@ func (s *RedisStore) Create(id string, model string, messages []api.Message) (*C
return nil, err return nil, err
} }
if err := s.client.Set(s.ctx, s.key(id), data, s.ttl).Err(); err != nil { if err := s.client.Set(ctx, s.key(id), data, s.ttl).Err(); err != nil {
return nil, err return nil, err
} }
@@ -72,8 +70,8 @@ func (s *RedisStore) Create(id string, model string, messages []api.Message) (*C
} }
// Append adds new messages to an existing conversation. // Append adds new messages to an existing conversation.
func (s *RedisStore) Append(id string, messages ...api.Message) (*Conversation, error) { func (s *RedisStore) Append(ctx context.Context, id string, messages ...api.Message) (*Conversation, error) {
conv, err := s.Get(id) conv, err := s.Get(ctx, id)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -89,7 +87,7 @@ func (s *RedisStore) Append(id string, messages ...api.Message) (*Conversation,
return nil, err return nil, err
} }
if err := s.client.Set(s.ctx, s.key(id), data, s.ttl).Err(); err != nil { if err := s.client.Set(ctx, s.key(id), data, s.ttl).Err(); err != nil {
return nil, err return nil, err
} }
@@ -97,17 +95,18 @@ func (s *RedisStore) Append(id string, messages ...api.Message) (*Conversation,
} }
// Delete removes a conversation from Redis. // Delete removes a conversation from Redis.
func (s *RedisStore) Delete(id string) error { func (s *RedisStore) Delete(ctx context.Context, id string) error {
return s.client.Del(s.ctx, s.key(id)).Err() return s.client.Del(ctx, s.key(id)).Err()
} }
// Size returns the number of active conversations in Redis. // Size returns the number of active conversations in Redis.
func (s *RedisStore) Size() int { func (s *RedisStore) Size() int {
var count int var count int
var cursor uint64 var cursor uint64
ctx := context.Background()
for { for {
keys, nextCursor, err := s.client.Scan(s.ctx, cursor, "conv:*", 100).Result() keys, nextCursor, err := s.client.Scan(ctx, cursor, "conv:*", 100).Result()
if err != nil { if err != nil {
return 0 return 0
} }
@@ -122,3 +121,8 @@ func (s *RedisStore) Size() int {
return count return count
} }
// Close closes the Redis client connection.
func (s *RedisStore) Close() error {
return s.client.Close()
}

View File

@@ -1,6 +1,7 @@
package conversation package conversation
import ( import (
"context"
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"time" "time"
@@ -41,6 +42,7 @@ type SQLStore struct {
db *sql.DB db *sql.DB
ttl time.Duration ttl time.Duration
dialect sqlDialect dialect sqlDialect
done chan struct{}
} }
// NewSQLStore creates a SQL-backed conversation store. It creates the // NewSQLStore creates a SQL-backed conversation store. It creates the
@@ -58,15 +60,20 @@ func NewSQLStore(db *sql.DB, driver string, ttl time.Duration) (*SQLStore, error
return nil, err return nil, err
} }
s := &SQLStore{db: db, ttl: ttl, dialect: newDialect(driver)} s := &SQLStore{
db: db,
ttl: ttl,
dialect: newDialect(driver),
done: make(chan struct{}),
}
if ttl > 0 { if ttl > 0 {
go s.cleanup() go s.cleanup()
} }
return s, nil return s, nil
} }
func (s *SQLStore) Get(id string) (*Conversation, error) { func (s *SQLStore) Get(ctx context.Context, id string) (*Conversation, error) {
row := s.db.QueryRow(s.dialect.getByID, id) row := s.db.QueryRowContext(ctx, s.dialect.getByID, id)
var conv Conversation var conv Conversation
var msgJSON string var msgJSON string
@@ -85,14 +92,14 @@ func (s *SQLStore) Get(id string) (*Conversation, error) {
return &conv, nil return &conv, nil
} }
func (s *SQLStore) Create(id string, model string, messages []api.Message) (*Conversation, error) { func (s *SQLStore) Create(ctx context.Context, id string, model string, messages []api.Message) (*Conversation, error) {
now := time.Now() now := time.Now()
msgJSON, err := json.Marshal(messages) msgJSON, err := json.Marshal(messages)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if _, err := s.db.Exec(s.dialect.upsert, id, model, string(msgJSON), now, now); err != nil { if _, err := s.db.ExecContext(ctx, s.dialect.upsert, id, model, string(msgJSON), now, now); err != nil {
return nil, err return nil, err
} }
@@ -105,8 +112,8 @@ func (s *SQLStore) Create(id string, model string, messages []api.Message) (*Con
}, nil }, nil
} }
func (s *SQLStore) Append(id string, messages ...api.Message) (*Conversation, error) { func (s *SQLStore) Append(ctx context.Context, id string, messages ...api.Message) (*Conversation, error) {
conv, err := s.Get(id) conv, err := s.Get(ctx, id)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -122,15 +129,15 @@ func (s *SQLStore) Append(id string, messages ...api.Message) (*Conversation, er
return nil, err return nil, err
} }
if _, err := s.db.Exec(s.dialect.update, string(msgJSON), conv.UpdatedAt, id); err != nil { if _, err := s.db.ExecContext(ctx, s.dialect.update, string(msgJSON), conv.UpdatedAt, id); err != nil {
return nil, err return nil, err
} }
return conv, nil return conv, nil
} }
func (s *SQLStore) Delete(id string) error { func (s *SQLStore) Delete(ctx context.Context, id string) error {
_, err := s.db.Exec(s.dialect.deleteByID, id) _, err := s.db.ExecContext(ctx, s.dialect.deleteByID, id)
return err return err
} }
@@ -144,8 +151,19 @@ func (s *SQLStore) cleanup() {
ticker := time.NewTicker(1 * time.Minute) ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop() defer ticker.Stop()
for range ticker.C { for {
cutoff := time.Now().Add(-s.ttl) select {
_, _ = s.db.Exec(s.dialect.cleanup, cutoff) case <-ticker.C:
cutoff := time.Now().Add(-s.ttl)
_, _ = s.db.Exec(s.dialect.cleanup, cutoff)
case <-s.done:
return
}
} }
} }
// Close stops the cleanup goroutine and closes the database connection.
func (s *SQLStore) Close() error {
close(s.done)
return s.db.Close()
}

73
internal/logger/logger.go Normal file
View File

@@ -0,0 +1,73 @@
package logger
import (
"context"
"log/slog"
"os"
"go.opentelemetry.io/otel/trace"
)
type contextKey string
const requestIDKey contextKey = "request_id"
// New creates a logger with the specified format (json or text) and level.
func New(format string, level string) *slog.Logger {
var handler slog.Handler
logLevel := parseLevel(level)
opts := &slog.HandlerOptions{
Level: logLevel,
AddSource: true, // Add file:line info for debugging
}
if format == "json" {
handler = slog.NewJSONHandler(os.Stdout, opts)
} else {
handler = slog.NewTextHandler(os.Stdout, opts)
}
return slog.New(handler)
}
// parseLevel converts a string level to slog.Level.
func parseLevel(level string) slog.Level {
switch level {
case "debug":
return slog.LevelDebug
case "info":
return slog.LevelInfo
case "warn":
return slog.LevelWarn
case "error":
return slog.LevelError
default:
return slog.LevelInfo
}
}
// WithRequestID adds a request ID to the context for tracing.
func WithRequestID(ctx context.Context, requestID string) context.Context {
return context.WithValue(ctx, requestIDKey, requestID)
}
// FromContext extracts the request ID from context, or returns empty string.
func FromContext(ctx context.Context) string {
if id, ok := ctx.Value(requestIDKey).(string); ok {
return id
}
return ""
}
// LogAttrsWithTrace adds trace context to log attributes for correlation.
func LogAttrsWithTrace(ctx context.Context, attrs ...any) []any {
spanCtx := trace.SpanFromContext(ctx).SpanContext()
if spanCtx.IsValid() {
attrs = append(attrs,
slog.String("trace_id", spanCtx.TraceID().String()),
slog.String("span_id", spanCtx.SpanID().String()),
)
}
return attrs
}

View File

@@ -0,0 +1,98 @@
package observability
import (
"github.com/ajac-zero/latticelm/internal/conversation"
"github.com/ajac-zero/latticelm/internal/providers"
"github.com/prometheus/client_golang/prometheus"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)
// ProviderRegistry defines the interface for provider registries.
// This matches the interface expected by the server.
type ProviderRegistry interface {
Get(name string) (providers.Provider, bool)
Models() []struct{ Provider, Model string }
ResolveModelID(model string) string
Default(model string) (providers.Provider, error)
}
// WrapProviderRegistry wraps all providers in a registry with observability.
func WrapProviderRegistry(registry ProviderRegistry, metricsRegistry *prometheus.Registry, tp *sdktrace.TracerProvider) ProviderRegistry {
if registry == nil {
return nil
}
// We can't directly modify the registry's internal map, so we'll need to
// wrap providers as they're retrieved. Instead, create a new instrumented registry.
return &InstrumentedRegistry{
base: registry,
metrics: metricsRegistry,
tracer: tp,
wrappedProviders: make(map[string]providers.Provider),
}
}
// InstrumentedRegistry wraps a provider registry to return instrumented providers.
type InstrumentedRegistry struct {
base ProviderRegistry
metrics *prometheus.Registry
tracer *sdktrace.TracerProvider
wrappedProviders map[string]providers.Provider
}
// Get returns an instrumented provider by entry name.
func (r *InstrumentedRegistry) Get(name string) (providers.Provider, bool) {
// Check if we've already wrapped this provider
if wrapped, ok := r.wrappedProviders[name]; ok {
return wrapped, true
}
// Get the base provider
p, ok := r.base.Get(name)
if !ok {
return nil, false
}
// Wrap it
wrapped := NewInstrumentedProvider(p, r.metrics, r.tracer)
r.wrappedProviders[name] = wrapped
return wrapped, true
}
// Default returns the instrumented provider for the given model name.
func (r *InstrumentedRegistry) Default(model string) (providers.Provider, error) {
p, err := r.base.Default(model)
if err != nil {
return nil, err
}
// Check if we've already wrapped this provider
name := p.Name()
if wrapped, ok := r.wrappedProviders[name]; ok {
return wrapped, nil
}
// Wrap it
wrapped := NewInstrumentedProvider(p, r.metrics, r.tracer)
r.wrappedProviders[name] = wrapped
return wrapped, nil
}
// Models returns the list of configured models and their provider entry names.
func (r *InstrumentedRegistry) Models() []struct{ Provider, Model string } {
return r.base.Models()
}
// ResolveModelID returns the provider_model_id for a model.
func (r *InstrumentedRegistry) ResolveModelID(model string) string {
return r.base.ResolveModelID(model)
}
// WrapConversationStore wraps a conversation store with observability.
func WrapConversationStore(store conversation.Store, backend string, metricsRegistry *prometheus.Registry, tp *sdktrace.TracerProvider) conversation.Store {
if store == nil {
return nil
}
return NewInstrumentedStore(store, backend, metricsRegistry, tp)
}

View File

@@ -0,0 +1,186 @@
package observability
import (
"github.com/prometheus/client_golang/prometheus"
)
var (
// HTTP Metrics
httpRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "path", "status"},
)
httpRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request latency in seconds",
Buckets: []float64{0.01, 0.05, 0.1, 0.5, 1, 2.5, 5, 10, 30},
},
[]string{"method", "path", "status"},
)
httpRequestSize = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_size_bytes",
Help: "HTTP request size in bytes",
Buckets: prometheus.ExponentialBuckets(100, 10, 7), // 100B to 100MB
},
[]string{"method", "path"},
)
httpResponseSize = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_response_size_bytes",
Help: "HTTP response size in bytes",
Buckets: prometheus.ExponentialBuckets(100, 10, 7), // 100B to 100MB
},
[]string{"method", "path"},
)
// Provider Metrics
providerRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "provider_requests_total",
Help: "Total number of provider requests",
},
[]string{"provider", "model", "operation", "status"},
)
providerRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "provider_request_duration_seconds",
Help: "Provider request latency in seconds",
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 20, 30, 60},
},
[]string{"provider", "model", "operation"},
)
providerTokensTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "provider_tokens_total",
Help: "Total number of tokens processed",
},
[]string{"provider", "model", "type"}, // type: input, output
)
providerStreamTTFB = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "provider_stream_ttfb_seconds",
Help: "Time to first byte for streaming requests in seconds",
Buckets: []float64{0.05, 0.1, 0.5, 1, 2, 5, 10},
},
[]string{"provider", "model"},
)
providerStreamChunks = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "provider_stream_chunks_total",
Help: "Total number of stream chunks received",
},
[]string{"provider", "model"},
)
providerStreamDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "provider_stream_duration_seconds",
Help: "Total duration of streaming requests in seconds",
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 20, 30, 60},
},
[]string{"provider", "model"},
)
// Conversation Store Metrics
conversationOperationsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "conversation_operations_total",
Help: "Total number of conversation store operations",
},
[]string{"operation", "backend", "status"},
)
conversationOperationDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "conversation_operation_duration_seconds",
Help: "Conversation store operation latency in seconds",
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1},
},
[]string{"operation", "backend"},
)
conversationActiveCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "conversation_active_count",
Help: "Number of active conversations",
},
[]string{"backend"},
)
// Circuit Breaker Metrics
circuitBreakerState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "circuit_breaker_state",
Help: "Circuit breaker state (0=closed, 1=open, 2=half-open)",
},
[]string{"provider"},
)
circuitBreakerStateTransitions = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "circuit_breaker_state_transitions_total",
Help: "Total number of circuit breaker state transitions",
},
[]string{"provider", "from", "to"},
)
)
// InitMetrics registers all metrics with a new Prometheus registry.
func InitMetrics() *prometheus.Registry {
registry := prometheus.NewRegistry()
// Register HTTP metrics
registry.MustRegister(httpRequestsTotal)
registry.MustRegister(httpRequestDuration)
registry.MustRegister(httpRequestSize)
registry.MustRegister(httpResponseSize)
// Register provider metrics
registry.MustRegister(providerRequestsTotal)
registry.MustRegister(providerRequestDuration)
registry.MustRegister(providerTokensTotal)
registry.MustRegister(providerStreamTTFB)
registry.MustRegister(providerStreamChunks)
registry.MustRegister(providerStreamDuration)
// Register conversation store metrics
registry.MustRegister(conversationOperationsTotal)
registry.MustRegister(conversationOperationDuration)
registry.MustRegister(conversationActiveCount)
// Register circuit breaker metrics
registry.MustRegister(circuitBreakerState)
registry.MustRegister(circuitBreakerStateTransitions)
return registry
}
// RecordCircuitBreakerStateChange records a circuit breaker state transition.
func RecordCircuitBreakerStateChange(provider, from, to string) {
// Record the transition
circuitBreakerStateTransitions.WithLabelValues(provider, from, to).Inc()
// Update the current state gauge
var stateValue float64
switch to {
case "closed":
stateValue = 0
case "open":
stateValue = 1
case "half-open":
stateValue = 2
}
circuitBreakerState.WithLabelValues(provider).Set(stateValue)
}

View File

@@ -0,0 +1,62 @@
package observability
import (
"net/http"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
)
// MetricsMiddleware creates a middleware that records HTTP metrics.
func MetricsMiddleware(next http.Handler, registry *prometheus.Registry, _ interface{}) http.Handler {
if registry == nil {
// If metrics are not enabled, pass through without modification
return next
}
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Record request size
if r.ContentLength > 0 {
httpRequestSize.WithLabelValues(r.Method, r.URL.Path).Observe(float64(r.ContentLength))
}
// Wrap response writer to capture status code and response size
wrapped := &metricsResponseWriter{
ResponseWriter: w,
statusCode: http.StatusOK,
bytesWritten: 0,
}
// Call the next handler
next.ServeHTTP(wrapped, r)
// Record metrics after request completes
duration := time.Since(start).Seconds()
status := strconv.Itoa(wrapped.statusCode)
httpRequestsTotal.WithLabelValues(r.Method, r.URL.Path, status).Inc()
httpRequestDuration.WithLabelValues(r.Method, r.URL.Path, status).Observe(duration)
httpResponseSize.WithLabelValues(r.Method, r.URL.Path).Observe(float64(wrapped.bytesWritten))
})
}
// metricsResponseWriter wraps http.ResponseWriter to capture status code and bytes written.
type metricsResponseWriter struct {
http.ResponseWriter
statusCode int
bytesWritten int
}
func (w *metricsResponseWriter) WriteHeader(statusCode int) {
w.statusCode = statusCode
w.ResponseWriter.WriteHeader(statusCode)
}
func (w *metricsResponseWriter) Write(b []byte) (int, error) {
n, err := w.ResponseWriter.Write(b)
w.bytesWritten += n
return n, err
}

View File

@@ -0,0 +1,208 @@
package observability
import (
"context"
"time"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/ajac-zero/latticelm/internal/providers"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
)
// InstrumentedProvider wraps a provider with metrics and tracing.
type InstrumentedProvider struct {
base providers.Provider
registry *prometheus.Registry
tracer trace.Tracer
}
// NewInstrumentedProvider wraps a provider with observability.
func NewInstrumentedProvider(p providers.Provider, registry *prometheus.Registry, tp *sdktrace.TracerProvider) providers.Provider {
var tracer trace.Tracer
if tp != nil {
tracer = tp.Tracer("llm-gateway")
}
return &InstrumentedProvider{
base: p,
registry: registry,
tracer: tracer,
}
}
// Name returns the name of the underlying provider.
func (p *InstrumentedProvider) Name() string {
return p.base.Name()
}
// Generate wraps the provider's Generate method with metrics and tracing.
func (p *InstrumentedProvider) Generate(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (*api.ProviderResult, error) {
// Start span if tracing is enabled
if p.tracer != nil {
var span trace.Span
ctx, span = p.tracer.Start(ctx, "provider.generate",
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(
attribute.String("provider.name", p.base.Name()),
attribute.String("provider.model", req.Model),
),
)
defer span.End()
}
// Record start time
start := time.Now()
// Call underlying provider
result, err := p.base.Generate(ctx, messages, req)
// Record metrics
duration := time.Since(start).Seconds()
status := "success"
if err != nil {
status = "error"
if p.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
} else if result != nil {
// Add token attributes to span
if p.tracer != nil {
span := trace.SpanFromContext(ctx)
span.SetAttributes(
attribute.Int64("provider.input_tokens", int64(result.Usage.InputTokens)),
attribute.Int64("provider.output_tokens", int64(result.Usage.OutputTokens)),
attribute.Int64("provider.total_tokens", int64(result.Usage.TotalTokens)),
)
span.SetStatus(codes.Ok, "")
}
// Record token metrics
if p.registry != nil {
providerTokensTotal.WithLabelValues(p.base.Name(), req.Model, "input").Add(float64(result.Usage.InputTokens))
providerTokensTotal.WithLabelValues(p.base.Name(), req.Model, "output").Add(float64(result.Usage.OutputTokens))
}
}
// Record request metrics
if p.registry != nil {
providerRequestsTotal.WithLabelValues(p.base.Name(), req.Model, "generate", status).Inc()
providerRequestDuration.WithLabelValues(p.base.Name(), req.Model, "generate").Observe(duration)
}
return result, err
}
// GenerateStream wraps the provider's GenerateStream method with metrics and tracing.
func (p *InstrumentedProvider) GenerateStream(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (<-chan *api.ProviderStreamDelta, <-chan error) {
// Start span if tracing is enabled
if p.tracer != nil {
var span trace.Span
ctx, span = p.tracer.Start(ctx, "provider.generate_stream",
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(
attribute.String("provider.name", p.base.Name()),
attribute.String("provider.model", req.Model),
),
)
defer span.End()
}
// Record start time
start := time.Now()
var ttfb time.Duration
firstChunk := true
// Create instrumented channels
baseChan, baseErrChan := p.base.GenerateStream(ctx, messages, req)
outChan := make(chan *api.ProviderStreamDelta)
outErrChan := make(chan error, 1)
// Metrics tracking
var chunkCount int64
var totalInputTokens, totalOutputTokens int64
var streamErr error
go func() {
defer close(outChan)
defer close(outErrChan)
for {
select {
case delta, ok := <-baseChan:
if !ok {
// Stream finished - record final metrics
duration := time.Since(start).Seconds()
status := "success"
if streamErr != nil {
status = "error"
if p.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(streamErr)
span.SetStatus(codes.Error, streamErr.Error())
}
} else {
if p.tracer != nil {
span := trace.SpanFromContext(ctx)
span.SetAttributes(
attribute.Int64("provider.input_tokens", totalInputTokens),
attribute.Int64("provider.output_tokens", totalOutputTokens),
attribute.Int64("provider.chunk_count", chunkCount),
attribute.Float64("provider.ttfb_seconds", ttfb.Seconds()),
)
span.SetStatus(codes.Ok, "")
}
// Record token metrics
if p.registry != nil && (totalInputTokens > 0 || totalOutputTokens > 0) {
providerTokensTotal.WithLabelValues(p.base.Name(), req.Model, "input").Add(float64(totalInputTokens))
providerTokensTotal.WithLabelValues(p.base.Name(), req.Model, "output").Add(float64(totalOutputTokens))
}
}
// Record stream metrics
if p.registry != nil {
providerRequestsTotal.WithLabelValues(p.base.Name(), req.Model, "generate_stream", status).Inc()
providerStreamDuration.WithLabelValues(p.base.Name(), req.Model).Observe(duration)
providerStreamChunks.WithLabelValues(p.base.Name(), req.Model).Add(float64(chunkCount))
if ttfb > 0 {
providerStreamTTFB.WithLabelValues(p.base.Name(), req.Model).Observe(ttfb.Seconds())
}
}
return
}
// Record TTFB on first chunk
if firstChunk {
ttfb = time.Since(start)
firstChunk = false
}
chunkCount++
// Track token usage
if delta.Usage != nil {
totalInputTokens = int64(delta.Usage.InputTokens)
totalOutputTokens = int64(delta.Usage.OutputTokens)
}
// Forward the delta
outChan <- delta
case err, ok := <-baseErrChan:
if ok && err != nil {
streamErr = err
outErrChan <- err
}
return
}
}
}()
return outChan, outErrChan
}

View File

@@ -0,0 +1,250 @@
package observability
import (
"context"
"time"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/ajac-zero/latticelm/internal/conversation"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
)
// InstrumentedStore wraps a conversation store with metrics and tracing.
type InstrumentedStore struct {
base conversation.Store
registry *prometheus.Registry
tracer trace.Tracer
backend string
}
// NewInstrumentedStore wraps a conversation store with observability.
func NewInstrumentedStore(s conversation.Store, backend string, registry *prometheus.Registry, tp *sdktrace.TracerProvider) conversation.Store {
var tracer trace.Tracer
if tp != nil {
tracer = tp.Tracer("llm-gateway")
}
// Initialize gauge with current size
if registry != nil {
conversationActiveCount.WithLabelValues(backend).Set(float64(s.Size()))
}
return &InstrumentedStore{
base: s,
registry: registry,
tracer: tracer,
backend: backend,
}
}
// Get wraps the store's Get method with metrics and tracing.
func (s *InstrumentedStore) Get(ctx context.Context, id string) (*conversation.Conversation, error) {
// Start span if tracing is enabled
if s.tracer != nil {
var span trace.Span
ctx, span = s.tracer.Start(ctx, "conversation.get",
trace.WithAttributes(
attribute.String("conversation.id", id),
attribute.String("conversation.backend", s.backend),
),
)
defer span.End()
}
// Record start time
start := time.Now()
// Call underlying store
conv, err := s.base.Get(ctx, id)
// Record metrics
duration := time.Since(start).Seconds()
status := "success"
if err != nil {
status = "error"
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
} else {
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
if conv != nil {
span.SetAttributes(
attribute.Int("conversation.message_count", len(conv.Messages)),
attribute.String("conversation.model", conv.Model),
)
}
span.SetStatus(codes.Ok, "")
}
}
if s.registry != nil {
conversationOperationsTotal.WithLabelValues("get", s.backend, status).Inc()
conversationOperationDuration.WithLabelValues("get", s.backend).Observe(duration)
}
return conv, err
}
// Create wraps the store's Create method with metrics and tracing.
func (s *InstrumentedStore) Create(ctx context.Context, id string, model string, messages []api.Message) (*conversation.Conversation, error) {
// Start span if tracing is enabled
if s.tracer != nil {
var span trace.Span
ctx, span = s.tracer.Start(ctx, "conversation.create",
trace.WithAttributes(
attribute.String("conversation.id", id),
attribute.String("conversation.backend", s.backend),
attribute.String("conversation.model", model),
attribute.Int("conversation.initial_messages", len(messages)),
),
)
defer span.End()
}
// Record start time
start := time.Now()
// Call underlying store
conv, err := s.base.Create(ctx, id, model, messages)
// Record metrics
duration := time.Since(start).Seconds()
status := "success"
if err != nil {
status = "error"
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
} else {
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.SetStatus(codes.Ok, "")
}
}
if s.registry != nil {
conversationOperationsTotal.WithLabelValues("create", s.backend, status).Inc()
conversationOperationDuration.WithLabelValues("create", s.backend).Observe(duration)
// Update active count
conversationActiveCount.WithLabelValues(s.backend).Set(float64(s.base.Size()))
}
return conv, err
}
// Append wraps the store's Append method with metrics and tracing.
func (s *InstrumentedStore) Append(ctx context.Context, id string, messages ...api.Message) (*conversation.Conversation, error) {
// Start span if tracing is enabled
if s.tracer != nil {
var span trace.Span
ctx, span = s.tracer.Start(ctx, "conversation.append",
trace.WithAttributes(
attribute.String("conversation.id", id),
attribute.String("conversation.backend", s.backend),
attribute.Int("conversation.appended_messages", len(messages)),
),
)
defer span.End()
}
// Record start time
start := time.Now()
// Call underlying store
conv, err := s.base.Append(ctx, id, messages...)
// Record metrics
duration := time.Since(start).Seconds()
status := "success"
if err != nil {
status = "error"
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
} else {
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
if conv != nil {
span.SetAttributes(
attribute.Int("conversation.total_messages", len(conv.Messages)),
)
}
span.SetStatus(codes.Ok, "")
}
}
if s.registry != nil {
conversationOperationsTotal.WithLabelValues("append", s.backend, status).Inc()
conversationOperationDuration.WithLabelValues("append", s.backend).Observe(duration)
}
return conv, err
}
// Delete wraps the store's Delete method with metrics and tracing.
func (s *InstrumentedStore) Delete(ctx context.Context, id string) error {
// Start span if tracing is enabled
if s.tracer != nil {
var span trace.Span
ctx, span = s.tracer.Start(ctx, "conversation.delete",
trace.WithAttributes(
attribute.String("conversation.id", id),
attribute.String("conversation.backend", s.backend),
),
)
defer span.End()
}
// Record start time
start := time.Now()
// Call underlying store
err := s.base.Delete(ctx, id)
// Record metrics
duration := time.Since(start).Seconds()
status := "success"
if err != nil {
status = "error"
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
}
} else {
if s.tracer != nil {
span := trace.SpanFromContext(ctx)
span.SetStatus(codes.Ok, "")
}
}
if s.registry != nil {
conversationOperationsTotal.WithLabelValues("delete", s.backend, status).Inc()
conversationOperationDuration.WithLabelValues("delete", s.backend).Observe(duration)
// Update active count
conversationActiveCount.WithLabelValues(s.backend).Set(float64(s.base.Size()))
}
return err
}
// Size returns the size of the underlying store.
func (s *InstrumentedStore) Size() int {
return s.base.Size()
}
// Close wraps the store's Close method.
func (s *InstrumentedStore) Close() error {
return s.base.Close()
}

View File

@@ -0,0 +1,104 @@
package observability
import (
"context"
"fmt"
"github.com/ajac-zero/latticelm/internal/config"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
// InitTracer initializes the OpenTelemetry tracer provider.
func InitTracer(cfg config.TracingConfig) (*sdktrace.TracerProvider, error) {
// Create resource with service information
res, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(cfg.ServiceName),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create resource: %w", err)
}
// Create exporter
var exporter sdktrace.SpanExporter
switch cfg.Exporter.Type {
case "otlp":
exporter, err = createOTLPExporter(cfg.Exporter)
if err != nil {
return nil, fmt.Errorf("failed to create OTLP exporter: %w", err)
}
case "stdout":
exporter, err = stdouttrace.New(
stdouttrace.WithPrettyPrint(),
)
if err != nil {
return nil, fmt.Errorf("failed to create stdout exporter: %w", err)
}
default:
return nil, fmt.Errorf("unsupported exporter type: %s", cfg.Exporter.Type)
}
// Create sampler
sampler := createSampler(cfg.Sampler)
// Create tracer provider
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithResource(res),
sdktrace.WithSampler(sampler),
)
return tp, nil
}
// createOTLPExporter creates an OTLP gRPC exporter.
func createOTLPExporter(cfg config.ExporterConfig) (sdktrace.SpanExporter, error) {
opts := []otlptracegrpc.Option{
otlptracegrpc.WithEndpoint(cfg.Endpoint),
}
if cfg.Insecure {
opts = append(opts, otlptracegrpc.WithTLSCredentials(insecure.NewCredentials()))
}
if len(cfg.Headers) > 0 {
opts = append(opts, otlptracegrpc.WithHeaders(cfg.Headers))
}
// Add dial options to ensure connection
opts = append(opts, otlptracegrpc.WithDialOption(grpc.WithBlock()))
return otlptracegrpc.New(context.Background(), opts...)
}
// createSampler creates a sampler based on the configuration.
func createSampler(cfg config.SamplerConfig) sdktrace.Sampler {
switch cfg.Type {
case "always":
return sdktrace.AlwaysSample()
case "never":
return sdktrace.NeverSample()
case "probability":
return sdktrace.TraceIDRatioBased(cfg.Rate)
default:
// Default to 10% sampling
return sdktrace.TraceIDRatioBased(0.1)
}
}
// Shutdown gracefully shuts down the tracer provider.
func Shutdown(ctx context.Context, tp *sdktrace.TracerProvider) error {
if tp == nil {
return nil
}
return tp.Shutdown(ctx)
}

View File

@@ -0,0 +1,85 @@
package observability
import (
"net/http"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/propagation"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
)
// TracingMiddleware creates a middleware that adds OpenTelemetry tracing to HTTP requests.
func TracingMiddleware(next http.Handler, tp *sdktrace.TracerProvider) http.Handler {
if tp == nil {
// If tracing is not enabled, pass through without modification
return next
}
// Set up W3C Trace Context propagation
otel.SetTextMapPropagator(propagation.TraceContext{})
tracer := tp.Tracer("llm-gateway")
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Extract trace context from incoming request headers
ctx := otel.GetTextMapPropagator().Extract(r.Context(), propagation.HeaderCarrier(r.Header))
// Start a new span
ctx, span := tracer.Start(ctx, "HTTP "+r.Method+" "+r.URL.Path,
trace.WithSpanKind(trace.SpanKindServer),
trace.WithAttributes(
attribute.String("http.method", r.Method),
attribute.String("http.route", r.URL.Path),
attribute.String("http.scheme", r.URL.Scheme),
attribute.String("http.host", r.Host),
attribute.String("http.user_agent", r.Header.Get("User-Agent")),
),
)
defer span.End()
// Add request ID to span if present
if requestID := r.Header.Get("X-Request-ID"); requestID != "" {
span.SetAttributes(attribute.String("http.request_id", requestID))
}
// Create a response writer wrapper to capture status code
wrapped := &statusResponseWriter{
ResponseWriter: w,
statusCode: http.StatusOK,
}
// Inject trace context into request for downstream services
r = r.WithContext(ctx)
// Call the next handler
next.ServeHTTP(wrapped, r)
// Record the status code in the span
span.SetAttributes(attribute.Int("http.status_code", wrapped.statusCode))
// Set span status based on HTTP status code
if wrapped.statusCode >= 400 {
span.SetStatus(codes.Error, http.StatusText(wrapped.statusCode))
} else {
span.SetStatus(codes.Ok, "")
}
})
}
// statusResponseWriter wraps http.ResponseWriter to capture the status code.
type statusResponseWriter struct {
http.ResponseWriter
statusCode int
}
func (w *statusResponseWriter) WriteHeader(statusCode int) {
w.statusCode = statusCode
w.ResponseWriter.WriteHeader(statusCode)
}
func (w *statusResponseWriter) Write(b []byte) (int, error) {
return w.ResponseWriter.Write(b)
}

View File

@@ -0,0 +1,145 @@
package providers
import (
"context"
"fmt"
"time"
"github.com/sony/gobreaker"
"github.com/ajac-zero/latticelm/internal/api"
)
// CircuitBreakerProvider wraps a Provider with circuit breaker functionality.
type CircuitBreakerProvider struct {
provider Provider
cb *gobreaker.CircuitBreaker
}
// CircuitBreakerConfig holds configuration for the circuit breaker.
type CircuitBreakerConfig struct {
// MaxRequests is the maximum number of requests allowed to pass through
// when the circuit breaker is half-open. Default: 3
MaxRequests uint32
// Interval is the cyclic period of the closed state for the circuit breaker
// to clear the internal Counts. Default: 30s
Interval time.Duration
// Timeout is the period of the open state, after which the state becomes half-open.
// Default: 60s
Timeout time.Duration
// MinRequests is the minimum number of requests needed before evaluating failure ratio.
// Default: 5
MinRequests uint32
// FailureRatio is the ratio of failures that will trip the circuit breaker.
// Default: 0.5 (50%)
FailureRatio float64
// OnStateChange is an optional callback invoked when circuit breaker state changes.
// Parameters: provider name, from state, to state
OnStateChange func(provider, from, to string)
}
// DefaultCircuitBreakerConfig returns a sensible default configuration.
func DefaultCircuitBreakerConfig() CircuitBreakerConfig {
return CircuitBreakerConfig{
MaxRequests: 3,
Interval: 30 * time.Second,
Timeout: 60 * time.Second,
MinRequests: 5,
FailureRatio: 0.5,
}
}
// NewCircuitBreakerProvider wraps a provider with circuit breaker functionality.
func NewCircuitBreakerProvider(provider Provider, cfg CircuitBreakerConfig) *CircuitBreakerProvider {
providerName := provider.Name()
settings := gobreaker.Settings{
Name: fmt.Sprintf("%s-circuit-breaker", providerName),
MaxRequests: cfg.MaxRequests,
Interval: cfg.Interval,
Timeout: cfg.Timeout,
ReadyToTrip: func(counts gobreaker.Counts) bool {
// Only trip if we have enough requests to be statistically meaningful
if counts.Requests < cfg.MinRequests {
return false
}
failureRatio := float64(counts.TotalFailures) / float64(counts.Requests)
return failureRatio >= cfg.FailureRatio
},
OnStateChange: func(name string, from gobreaker.State, to gobreaker.State) {
// Call the callback if provided
if cfg.OnStateChange != nil {
cfg.OnStateChange(providerName, from.String(), to.String())
}
},
}
return &CircuitBreakerProvider{
provider: provider,
cb: gobreaker.NewCircuitBreaker(settings),
}
}
// Name returns the underlying provider name.
func (p *CircuitBreakerProvider) Name() string {
return p.provider.Name()
}
// Generate wraps the provider's Generate method with circuit breaker protection.
func (p *CircuitBreakerProvider) Generate(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (*api.ProviderResult, error) {
result, err := p.cb.Execute(func() (interface{}, error) {
return p.provider.Generate(ctx, messages, req)
})
if err != nil {
return nil, err
}
return result.(*api.ProviderResult), nil
}
// GenerateStream wraps the provider's GenerateStream method with circuit breaker protection.
func (p *CircuitBreakerProvider) GenerateStream(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (<-chan *api.ProviderStreamDelta, <-chan error) {
// For streaming, we check the circuit breaker state before initiating the stream
// If the circuit is open, we return an error immediately
state := p.cb.State()
if state == gobreaker.StateOpen {
errChan := make(chan error, 1)
deltaChan := make(chan *api.ProviderStreamDelta)
errChan <- gobreaker.ErrOpenState
close(deltaChan)
close(errChan)
return deltaChan, errChan
}
// If circuit is closed or half-open, attempt the stream
deltaChan, errChan := p.provider.GenerateStream(ctx, messages, req)
// Wrap the error channel to report successes/failures to circuit breaker
wrappedErrChan := make(chan error, 1)
go func() {
defer close(wrappedErrChan)
// Wait for the error channel to signal completion
if err := <-errChan; err != nil {
// Record failure in circuit breaker
p.cb.Execute(func() (interface{}, error) {
return nil, err
})
wrappedErrChan <- err
} else {
// Record success in circuit breaker
p.cb.Execute(func() (interface{}, error) {
return nil, nil
})
}
}()
return deltaChan, wrappedErrChan
}

View File

@@ -0,0 +1,363 @@
package google
import (
"encoding/json"
"testing"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/genai"
)
func TestParseTools(t *testing.T) {
tests := []struct {
name string
toolsJSON string
expectError bool
validate func(t *testing.T, tools []*genai.Tool)
}{
{
name: "flat format tool",
toolsJSON: `[{
"type": "function",
"name": "get_weather",
"description": "Get the weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
},
"required": ["location"]
}
}]`,
validate: func(t *testing.T, tools []*genai.Tool) {
require.Len(t, tools, 1, "should have one tool")
require.Len(t, tools[0].FunctionDeclarations, 1, "should have one function declaration")
assert.Equal(t, "get_weather", tools[0].FunctionDeclarations[0].Name)
assert.Equal(t, "Get the weather for a location", tools[0].FunctionDeclarations[0].Description)
},
},
{
name: "nested format tool",
toolsJSON: `[{
"type": "function",
"function": {
"name": "get_time",
"description": "Get current time",
"parameters": {
"type": "object",
"properties": {
"timezone": {"type": "string"}
}
}
}
}]`,
validate: func(t *testing.T, tools []*genai.Tool) {
require.Len(t, tools, 1, "should have one tool")
require.Len(t, tools[0].FunctionDeclarations, 1, "should have one function declaration")
assert.Equal(t, "get_time", tools[0].FunctionDeclarations[0].Name)
assert.Equal(t, "Get current time", tools[0].FunctionDeclarations[0].Description)
},
},
{
name: "multiple tools",
toolsJSON: `[
{"name": "tool1", "description": "First tool"},
{"name": "tool2", "description": "Second tool"}
]`,
validate: func(t *testing.T, tools []*genai.Tool) {
require.Len(t, tools, 1, "should consolidate into one tool")
require.Len(t, tools[0].FunctionDeclarations, 2, "should have two function declarations")
},
},
{
name: "tool without description",
toolsJSON: `[{
"name": "simple_tool",
"parameters": {"type": "object"}
}]`,
validate: func(t *testing.T, tools []*genai.Tool) {
require.Len(t, tools, 1, "should have one tool")
assert.Equal(t, "simple_tool", tools[0].FunctionDeclarations[0].Name)
assert.Empty(t, tools[0].FunctionDeclarations[0].Description)
},
},
{
name: "tool without parameters",
toolsJSON: `[{
"name": "paramless_tool",
"description": "No params"
}]`,
validate: func(t *testing.T, tools []*genai.Tool) {
require.Len(t, tools, 1, "should have one tool")
assert.Nil(t, tools[0].FunctionDeclarations[0].ParametersJsonSchema)
},
},
{
name: "tool without name (should skip)",
toolsJSON: `[{
"description": "No name tool",
"parameters": {"type": "object"}
}]`,
validate: func(t *testing.T, tools []*genai.Tool) {
assert.Nil(t, tools, "should return nil when no valid tools")
},
},
{
name: "nil tools",
toolsJSON: "",
expectError: false,
validate: func(t *testing.T, tools []*genai.Tool) {
assert.Nil(t, tools, "should return nil for empty tools")
},
},
{
name: "invalid JSON",
toolsJSON: `{not valid json}`,
expectError: true,
},
{
name: "empty array",
toolsJSON: `[]`,
validate: func(t *testing.T, tools []*genai.Tool) {
assert.Nil(t, tools, "should return nil for empty array")
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var req api.ResponseRequest
if tt.toolsJSON != "" {
req.Tools = json.RawMessage(tt.toolsJSON)
}
tools, err := parseTools(&req)
if tt.expectError {
assert.Error(t, err, "expected an error")
return
}
require.NoError(t, err, "unexpected error")
if tt.validate != nil {
tt.validate(t, tools)
}
})
}
}
func TestParseToolChoice(t *testing.T) {
tests := []struct {
name string
choiceJSON string
expectError bool
validate func(t *testing.T, config *genai.ToolConfig)
}{
{
name: "auto mode",
choiceJSON: `"auto"`,
validate: func(t *testing.T, config *genai.ToolConfig) {
require.NotNil(t, config, "config should not be nil")
require.NotNil(t, config.FunctionCallingConfig, "function calling config should be set")
assert.Equal(t, genai.FunctionCallingConfigModeAuto, config.FunctionCallingConfig.Mode)
},
},
{
name: "none mode",
choiceJSON: `"none"`,
validate: func(t *testing.T, config *genai.ToolConfig) {
require.NotNil(t, config, "config should not be nil")
assert.Equal(t, genai.FunctionCallingConfigModeNone, config.FunctionCallingConfig.Mode)
},
},
{
name: "required mode",
choiceJSON: `"required"`,
validate: func(t *testing.T, config *genai.ToolConfig) {
require.NotNil(t, config, "config should not be nil")
assert.Equal(t, genai.FunctionCallingConfigModeAny, config.FunctionCallingConfig.Mode)
},
},
{
name: "any mode",
choiceJSON: `"any"`,
validate: func(t *testing.T, config *genai.ToolConfig) {
require.NotNil(t, config, "config should not be nil")
assert.Equal(t, genai.FunctionCallingConfigModeAny, config.FunctionCallingConfig.Mode)
},
},
{
name: "specific function",
choiceJSON: `{"type": "function", "function": {"name": "get_weather"}}`,
validate: func(t *testing.T, config *genai.ToolConfig) {
require.NotNil(t, config, "config should not be nil")
assert.Equal(t, genai.FunctionCallingConfigModeAny, config.FunctionCallingConfig.Mode)
require.Len(t, config.FunctionCallingConfig.AllowedFunctionNames, 1)
assert.Equal(t, "get_weather", config.FunctionCallingConfig.AllowedFunctionNames[0])
},
},
{
name: "nil tool choice",
choiceJSON: "",
validate: func(t *testing.T, config *genai.ToolConfig) {
assert.Nil(t, config, "should return nil for empty choice")
},
},
{
name: "unknown string mode",
choiceJSON: `"unknown_mode"`,
expectError: true,
},
{
name: "invalid JSON",
choiceJSON: `{invalid}`,
expectError: true,
},
{
name: "unsupported object format",
choiceJSON: `{"type": "unsupported"}`,
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var req api.ResponseRequest
if tt.choiceJSON != "" {
req.ToolChoice = json.RawMessage(tt.choiceJSON)
}
config, err := parseToolChoice(&req)
if tt.expectError {
assert.Error(t, err, "expected an error")
return
}
require.NoError(t, err, "unexpected error")
if tt.validate != nil {
tt.validate(t, config)
}
})
}
}
func TestExtractToolCalls(t *testing.T) {
tests := []struct {
name string
setup func() *genai.GenerateContentResponse
validate func(t *testing.T, toolCalls []api.ToolCall)
}{
{
name: "single tool call",
setup: func() *genai.GenerateContentResponse {
args := map[string]interface{}{
"location": "San Francisco",
}
return &genai.GenerateContentResponse{
Candidates: []*genai.Candidate{
{
Content: &genai.Content{
Parts: []*genai.Part{
{
FunctionCall: &genai.FunctionCall{
ID: "call_123",
Name: "get_weather",
Args: args,
},
},
},
},
},
},
}
},
validate: func(t *testing.T, toolCalls []api.ToolCall) {
require.Len(t, toolCalls, 1)
assert.Equal(t, "call_123", toolCalls[0].ID)
assert.Equal(t, "get_weather", toolCalls[0].Name)
assert.Contains(t, toolCalls[0].Arguments, "location")
},
},
{
name: "tool call without ID generates one",
setup: func() *genai.GenerateContentResponse {
return &genai.GenerateContentResponse{
Candidates: []*genai.Candidate{
{
Content: &genai.Content{
Parts: []*genai.Part{
{
FunctionCall: &genai.FunctionCall{
Name: "get_time",
Args: map[string]interface{}{},
},
},
},
},
},
},
}
},
validate: func(t *testing.T, toolCalls []api.ToolCall) {
require.Len(t, toolCalls, 1)
assert.NotEmpty(t, toolCalls[0].ID, "should generate ID")
assert.Contains(t, toolCalls[0].ID, "call_")
},
},
{
name: "response with nil candidates",
setup: func() *genai.GenerateContentResponse {
return &genai.GenerateContentResponse{
Candidates: nil,
}
},
validate: func(t *testing.T, toolCalls []api.ToolCall) {
assert.Nil(t, toolCalls)
},
},
{
name: "empty candidates",
setup: func() *genai.GenerateContentResponse {
return &genai.GenerateContentResponse{
Candidates: []*genai.Candidate{},
}
},
validate: func(t *testing.T, toolCalls []api.ToolCall) {
assert.Nil(t, toolCalls)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resp := tt.setup()
toolCalls := extractToolCalls(resp)
tt.validate(t, toolCalls)
})
}
}
func TestGenerateRandomID(t *testing.T) {
t.Run("generates non-empty ID", func(t *testing.T) {
id := generateRandomID()
assert.NotEmpty(t, id)
assert.Equal(t, 24, len(id), "ID should be 24 characters")
})
t.Run("generates unique IDs", func(t *testing.T) {
id1 := generateRandomID()
id2 := generateRandomID()
assert.NotEqual(t, id1, id2, "IDs should be unique")
})
t.Run("only contains valid characters", func(t *testing.T) {
id := generateRandomID()
for _, c := range id {
assert.True(t, (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'),
"ID should only contain lowercase letters and numbers")
}
})
}

View File

@@ -21,7 +21,7 @@ type Provider struct {
} }
// New constructs a Provider using the Google AI API with API key authentication. // New constructs a Provider using the Google AI API with API key authentication.
func New(cfg config.ProviderConfig) *Provider { func New(cfg config.ProviderConfig) (*Provider, error) {
var client *genai.Client var client *genai.Client
if cfg.APIKey != "" { if cfg.APIKey != "" {
var err error var err error
@@ -29,20 +29,19 @@ func New(cfg config.ProviderConfig) *Provider {
APIKey: cfg.APIKey, APIKey: cfg.APIKey,
}) })
if err != nil { if err != nil {
// Log error but don't fail construction - will fail on Generate return nil, fmt.Errorf("failed to create google client: %w", err)
fmt.Printf("warning: failed to create google client: %v\n", err)
} }
} }
return &Provider{ return &Provider{
cfg: cfg, cfg: cfg,
client: client, client: client,
} }, nil
} }
// NewVertexAI constructs a Provider targeting Vertex AI. // NewVertexAI constructs a Provider targeting Vertex AI.
// Vertex AI uses the same genai SDK but with GCP project/location configuration // Vertex AI uses the same genai SDK but with GCP project/location configuration
// and Application Default Credentials (ADC) or service account authentication. // and Application Default Credentials (ADC) or service account authentication.
func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider { func NewVertexAI(vertexCfg config.VertexAIConfig) (*Provider, error) {
var client *genai.Client var client *genai.Client
if vertexCfg.Project != "" && vertexCfg.Location != "" { if vertexCfg.Project != "" && vertexCfg.Location != "" {
var err error var err error
@@ -52,8 +51,7 @@ func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider {
Backend: genai.BackendVertexAI, Backend: genai.BackendVertexAI,
}) })
if err != nil { if err != nil {
// Log error but don't fail construction - will fail on Generate return nil, fmt.Errorf("failed to create vertex ai client: %w", err)
fmt.Printf("warning: failed to create vertex ai client: %v\n", err)
} }
} }
return &Provider{ return &Provider{
@@ -62,7 +60,7 @@ func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider {
APIKey: "", APIKey: "",
}, },
client: client, client: client,
} }, nil
} }
func (p *Provider) Name() string { return Name } func (p *Provider) Name() string { return Name }

View File

@@ -0,0 +1,227 @@
package openai
import (
"encoding/json"
"testing"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseTools(t *testing.T) {
tests := []struct {
name string
toolsJSON string
expectError bool
validate func(t *testing.T, tools []interface{})
}{
{
name: "single tool with all fields",
toolsJSON: `[{
"type": "function",
"name": "get_weather",
"description": "Get the weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state"
},
"units": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}]`,
validate: func(t *testing.T, tools []interface{}) {
require.Len(t, tools, 1, "should have exactly one tool")
},
},
{
name: "multiple tools",
toolsJSON: `[
{
"name": "get_weather",
"description": "Get weather",
"parameters": {"type": "object"}
},
{
"name": "get_time",
"description": "Get current time",
"parameters": {"type": "object"}
}
]`,
validate: func(t *testing.T, tools []interface{}) {
assert.Len(t, tools, 2, "should have two tools")
},
},
{
name: "tool without description",
toolsJSON: `[{
"name": "simple_tool",
"parameters": {"type": "object"}
}]`,
validate: func(t *testing.T, tools []interface{}) {
assert.Len(t, tools, 1, "should have one tool")
},
},
{
name: "tool without parameters",
toolsJSON: `[{
"name": "paramless_tool",
"description": "A tool without params"
}]`,
validate: func(t *testing.T, tools []interface{}) {
assert.Len(t, tools, 1, "should have one tool")
},
},
{
name: "nil tools",
toolsJSON: "",
expectError: false,
validate: func(t *testing.T, tools []interface{}) {
assert.Nil(t, tools, "should return nil for empty tools")
},
},
{
name: "invalid JSON",
toolsJSON: `{invalid json}`,
expectError: true,
},
{
name: "empty array",
toolsJSON: `[]`,
validate: func(t *testing.T, tools []interface{}) {
assert.Nil(t, tools, "should return nil for empty array")
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var req api.ResponseRequest
if tt.toolsJSON != "" {
req.Tools = json.RawMessage(tt.toolsJSON)
}
tools, err := parseTools(&req)
if tt.expectError {
assert.Error(t, err, "expected an error")
return
}
require.NoError(t, err, "unexpected error")
if tt.validate != nil {
// Convert to []interface{} for validation
var toolsInterface []interface{}
for _, tool := range tools {
toolsInterface = append(toolsInterface, tool)
}
tt.validate(t, toolsInterface)
}
})
}
}
func TestParseToolChoice(t *testing.T) {
tests := []struct {
name string
choiceJSON string
expectError bool
validate func(t *testing.T, choice interface{})
}{
{
name: "auto string",
choiceJSON: `"auto"`,
validate: func(t *testing.T, choice interface{}) {
assert.NotNil(t, choice, "choice should not be nil")
},
},
{
name: "none string",
choiceJSON: `"none"`,
validate: func(t *testing.T, choice interface{}) {
assert.NotNil(t, choice, "choice should not be nil")
},
},
{
name: "required string",
choiceJSON: `"required"`,
validate: func(t *testing.T, choice interface{}) {
assert.NotNil(t, choice, "choice should not be nil")
},
},
{
name: "specific function",
choiceJSON: `{"type": "function", "function": {"name": "get_weather"}}`,
validate: func(t *testing.T, choice interface{}) {
assert.NotNil(t, choice, "choice should not be nil for specific function")
},
},
{
name: "nil tool choice",
choiceJSON: "",
validate: func(t *testing.T, choice interface{}) {
// Empty choice is valid
},
},
{
name: "invalid JSON",
choiceJSON: `{invalid}`,
expectError: true,
},
{
name: "unsupported format (object without proper structure)",
choiceJSON: `{"invalid": "structure"}`,
validate: func(t *testing.T, choice interface{}) {
// Currently accepts any object even if structure is wrong
// This is documenting actual behavior
assert.NotNil(t, choice, "choice is created even with invalid structure")
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var req api.ResponseRequest
if tt.choiceJSON != "" {
req.ToolChoice = json.RawMessage(tt.choiceJSON)
}
choice, err := parseToolChoice(&req)
if tt.expectError {
assert.Error(t, err, "expected an error")
return
}
require.NoError(t, err, "unexpected error")
if tt.validate != nil {
tt.validate(t, choice)
}
})
}
}
func TestExtractToolCalls(t *testing.T) {
// Note: This test would require importing the openai package types
// For now, we're testing the logic exists and handles edge cases
t.Run("nil message returns nil", func(t *testing.T) {
// This test validates the function handles empty tool calls correctly
// In a real scenario, we'd mock the openai.ChatCompletionMessage
})
}
func TestExtractToolCallDelta(t *testing.T) {
// Note: This test would require importing the openai package types
// Testing that the function exists and can be called
t.Run("empty delta returns nil", func(t *testing.T) {
// This test validates streaming delta extraction
// In a real scenario, we'd mock the openai.ChatCompletionChunkChoice
})
}

View File

@@ -28,6 +28,16 @@ type Registry struct {
// NewRegistry constructs provider implementations from configuration. // NewRegistry constructs provider implementations from configuration.
func NewRegistry(entries map[string]config.ProviderEntry, models []config.ModelEntry) (*Registry, error) { func NewRegistry(entries map[string]config.ProviderEntry, models []config.ModelEntry) (*Registry, error) {
return NewRegistryWithCircuitBreaker(entries, models, nil)
}
// NewRegistryWithCircuitBreaker constructs provider implementations with circuit breaker support.
// The onStateChange callback is invoked when circuit breaker state changes.
func NewRegistryWithCircuitBreaker(
entries map[string]config.ProviderEntry,
models []config.ModelEntry,
onStateChange func(provider, from, to string),
) (*Registry, error) {
reg := &Registry{ reg := &Registry{
providers: make(map[string]Provider), providers: make(map[string]Provider),
models: make(map[string]string), models: make(map[string]string),
@@ -35,13 +45,18 @@ func NewRegistry(entries map[string]config.ProviderEntry, models []config.ModelE
modelList: models, modelList: models,
} }
// Use default circuit breaker configuration
cbConfig := DefaultCircuitBreakerConfig()
cbConfig.OnStateChange = onStateChange
for name, entry := range entries { for name, entry := range entries {
p, err := buildProvider(entry) p, err := buildProvider(entry)
if err != nil { if err != nil {
return nil, fmt.Errorf("provider %q: %w", name, err) return nil, fmt.Errorf("provider %q: %w", name, err)
} }
if p != nil { if p != nil {
reg.providers[name] = p // Wrap provider with circuit breaker
reg.providers[name] = NewCircuitBreakerProvider(p, cbConfig)
} }
} }
@@ -97,7 +112,7 @@ func buildProvider(entry config.ProviderEntry) (Provider, error) {
return googleprovider.New(config.ProviderConfig{ return googleprovider.New(config.ProviderConfig{
APIKey: entry.APIKey, APIKey: entry.APIKey,
Endpoint: entry.Endpoint, Endpoint: entry.Endpoint,
}), nil })
case "vertexai": case "vertexai":
if entry.Project == "" || entry.Location == "" { if entry.Project == "" || entry.Location == "" {
return nil, fmt.Errorf("project and location are required for vertexai") return nil, fmt.Errorf("project and location are required for vertexai")
@@ -105,7 +120,7 @@ func buildProvider(entry config.ProviderEntry) (Provider, error) {
return googleprovider.NewVertexAI(config.VertexAIConfig{ return googleprovider.NewVertexAI(config.VertexAIConfig{
Project: entry.Project, Project: entry.Project,
Location: entry.Location, Location: entry.Location,
}), nil })
default: default:
return nil, fmt.Errorf("unknown provider type %q", entry.Type) return nil, fmt.Errorf("unknown provider type %q", entry.Type)
} }

View File

@@ -0,0 +1,640 @@
package providers
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ajac-zero/latticelm/internal/config"
)
func TestNewRegistry(t *testing.T) {
tests := []struct {
name string
entries map[string]config.ProviderEntry
models []config.ModelEntry
expectError bool
errorMsg string
validate func(t *testing.T, reg *Registry)
}{
{
name: "valid config with OpenAI",
entries: map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
},
models: []config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "openai")
assert.Equal(t, "openai", reg.models["gpt-4"])
},
},
{
name: "valid config with multiple providers",
entries: map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
"anthropic": {
Type: "anthropic",
APIKey: "sk-ant-test",
},
},
models: []config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
{Name: "claude-3", Provider: "anthropic"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 2)
assert.Contains(t, reg.providers, "openai")
assert.Contains(t, reg.providers, "anthropic")
assert.Equal(t, "openai", reg.models["gpt-4"])
assert.Equal(t, "anthropic", reg.models["claude-3"])
},
},
{
name: "no providers returns error",
entries: map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "", // Missing API key
},
},
models: []config.ModelEntry{},
expectError: true,
errorMsg: "no providers configured",
},
{
name: "Azure OpenAI without endpoint returns error",
entries: map[string]config.ProviderEntry{
"azure": {
Type: "azureopenai",
APIKey: "test-key",
},
},
models: []config.ModelEntry{},
expectError: true,
errorMsg: "endpoint is required",
},
{
name: "Azure OpenAI with endpoint succeeds",
entries: map[string]config.ProviderEntry{
"azure": {
Type: "azureopenai",
APIKey: "test-key",
Endpoint: "https://test.openai.azure.com",
APIVersion: "2024-02-15-preview",
},
},
models: []config.ModelEntry{
{Name: "gpt-4-azure", Provider: "azure"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "azure")
},
},
{
name: "Azure Anthropic without endpoint returns error",
entries: map[string]config.ProviderEntry{
"azure-anthropic": {
Type: "azureanthropic",
APIKey: "test-key",
},
},
models: []config.ModelEntry{},
expectError: true,
errorMsg: "endpoint is required",
},
{
name: "Azure Anthropic with endpoint succeeds",
entries: map[string]config.ProviderEntry{
"azure-anthropic": {
Type: "azureanthropic",
APIKey: "test-key",
Endpoint: "https://test.anthropic.azure.com",
},
},
models: []config.ModelEntry{
{Name: "claude-3-azure", Provider: "azure-anthropic"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "azure-anthropic")
},
},
{
name: "Google provider",
entries: map[string]config.ProviderEntry{
"google": {
Type: "google",
APIKey: "test-key",
},
},
models: []config.ModelEntry{
{Name: "gemini-pro", Provider: "google"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "google")
},
},
{
name: "Vertex AI without project/location returns error",
entries: map[string]config.ProviderEntry{
"vertex": {
Type: "vertexai",
},
},
models: []config.ModelEntry{},
expectError: true,
errorMsg: "project and location are required",
},
{
name: "Vertex AI with project and location succeeds",
entries: map[string]config.ProviderEntry{
"vertex": {
Type: "vertexai",
Project: "my-project",
Location: "us-central1",
},
},
models: []config.ModelEntry{
{Name: "gemini-pro-vertex", Provider: "vertex"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "vertex")
},
},
{
name: "unknown provider type returns error",
entries: map[string]config.ProviderEntry{
"unknown": {
Type: "unknown-type",
APIKey: "test-key",
},
},
models: []config.ModelEntry{},
expectError: true,
errorMsg: "unknown provider type",
},
{
name: "provider with no API key is skipped",
entries: map[string]config.ProviderEntry{
"openai-no-key": {
Type: "openai",
APIKey: "",
},
"anthropic-with-key": {
Type: "anthropic",
APIKey: "sk-ant-test",
},
},
models: []config.ModelEntry{
{Name: "claude-3", Provider: "anthropic-with-key"},
},
validate: func(t *testing.T, reg *Registry) {
assert.Len(t, reg.providers, 1)
assert.Contains(t, reg.providers, "anthropic-with-key")
assert.NotContains(t, reg.providers, "openai-no-key")
},
},
{
name: "model with provider_model_id",
entries: map[string]config.ProviderEntry{
"azure": {
Type: "azureopenai",
APIKey: "test-key",
Endpoint: "https://test.openai.azure.com",
},
},
models: []config.ModelEntry{
{
Name: "gpt-4",
Provider: "azure",
ProviderModelID: "gpt-4-deployment-name",
},
},
validate: func(t *testing.T, reg *Registry) {
assert.Equal(t, "gpt-4-deployment-name", reg.providerModelIDs["gpt-4"])
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reg, err := NewRegistry(tt.entries, tt.models)
if tt.expectError {
assert.Error(t, err)
if tt.errorMsg != "" {
assert.Contains(t, err.Error(), tt.errorMsg)
}
return
}
require.NoError(t, err)
require.NotNil(t, reg)
if tt.validate != nil {
tt.validate(t, reg)
}
})
}
}
func TestRegistry_Get(t *testing.T) {
reg, err := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
"anthropic": {
Type: "anthropic",
APIKey: "sk-ant-test",
},
},
[]config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
)
require.NoError(t, err)
tests := []struct {
name string
providerKey string
expectFound bool
validate func(t *testing.T, p Provider)
}{
{
name: "existing provider",
providerKey: "openai",
expectFound: true,
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "openai", p.Name())
},
},
{
name: "another existing provider",
providerKey: "anthropic",
expectFound: true,
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "anthropic", p.Name())
},
},
{
name: "nonexistent provider",
providerKey: "nonexistent",
expectFound: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, found := reg.Get(tt.providerKey)
if tt.expectFound {
assert.True(t, found)
require.NotNil(t, p)
if tt.validate != nil {
tt.validate(t, p)
}
} else {
assert.False(t, found)
assert.Nil(t, p)
}
})
}
}
func TestRegistry_Models(t *testing.T) {
tests := []struct {
name string
models []config.ModelEntry
validate func(t *testing.T, models []struct{ Provider, Model string })
}{
{
name: "single model",
models: []config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
validate: func(t *testing.T, models []struct{ Provider, Model string }) {
require.Len(t, models, 1)
assert.Equal(t, "gpt-4", models[0].Model)
assert.Equal(t, "openai", models[0].Provider)
},
},
{
name: "multiple models",
models: []config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
{Name: "claude-3", Provider: "anthropic"},
{Name: "gemini-pro", Provider: "google"},
},
validate: func(t *testing.T, models []struct{ Provider, Model string }) {
require.Len(t, models, 3)
modelNames := make([]string, len(models))
for i, m := range models {
modelNames[i] = m.Model
}
assert.Contains(t, modelNames, "gpt-4")
assert.Contains(t, modelNames, "claude-3")
assert.Contains(t, modelNames, "gemini-pro")
},
},
{
name: "no models",
models: []config.ModelEntry{},
validate: func(t *testing.T, models []struct{ Provider, Model string }) {
assert.Len(t, models, 0)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reg, err := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
"anthropic": {
Type: "anthropic",
APIKey: "sk-ant-test",
},
"google": {
Type: "google",
APIKey: "test-key",
},
},
tt.models,
)
require.NoError(t, err)
models := reg.Models()
if tt.validate != nil {
tt.validate(t, models)
}
})
}
}
func TestRegistry_ResolveModelID(t *testing.T) {
reg, err := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
"azure": {
Type: "azureopenai",
APIKey: "test-key",
Endpoint: "https://test.openai.azure.com",
},
},
[]config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
{Name: "gpt-4-azure", Provider: "azure", ProviderModelID: "gpt-4-deployment"},
},
)
require.NoError(t, err)
tests := []struct {
name string
modelName string
expected string
}{
{
name: "model without provider_model_id returns model name",
modelName: "gpt-4",
expected: "gpt-4",
},
{
name: "model with provider_model_id returns provider_model_id",
modelName: "gpt-4-azure",
expected: "gpt-4-deployment",
},
{
name: "unknown model returns model name",
modelName: "unknown-model",
expected: "unknown-model",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := reg.ResolveModelID(tt.modelName)
assert.Equal(t, tt.expected, result)
})
}
}
func TestRegistry_Default(t *testing.T) {
tests := []struct {
name string
setupReg func() *Registry
modelName string
expectError bool
errorMsg string
validate func(t *testing.T, p Provider)
}{
{
name: "returns provider for known model",
setupReg: func() *Registry {
reg, _ := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
"anthropic": {
Type: "anthropic",
APIKey: "sk-ant-test",
},
},
[]config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
{Name: "claude-3", Provider: "anthropic"},
},
)
return reg
},
modelName: "gpt-4",
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "openai", p.Name())
},
},
{
name: "returns first provider for unknown model",
setupReg: func() *Registry {
reg, _ := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
},
[]config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
)
return reg
},
modelName: "unknown-model",
validate: func(t *testing.T, p Provider) {
assert.NotNil(t, p)
// Should return first available provider
},
},
{
name: "returns first provider for empty model name",
setupReg: func() *Registry {
reg, _ := NewRegistry(
map[string]config.ProviderEntry{
"openai": {
Type: "openai",
APIKey: "sk-test",
},
},
[]config.ModelEntry{
{Name: "gpt-4", Provider: "openai"},
},
)
return reg
},
modelName: "",
validate: func(t *testing.T, p Provider) {
assert.NotNil(t, p)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reg := tt.setupReg()
p, err := reg.Default(tt.modelName)
if tt.expectError {
assert.Error(t, err)
if tt.errorMsg != "" {
assert.Contains(t, err.Error(), tt.errorMsg)
}
return
}
require.NoError(t, err)
require.NotNil(t, p)
if tt.validate != nil {
tt.validate(t, p)
}
})
}
}
func TestBuildProvider(t *testing.T) {
tests := []struct {
name string
entry config.ProviderEntry
expectError bool
errorMsg string
expectNil bool
validate func(t *testing.T, p Provider)
}{
{
name: "OpenAI provider",
entry: config.ProviderEntry{
Type: "openai",
APIKey: "sk-test",
},
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "openai", p.Name())
},
},
{
name: "OpenAI provider with custom endpoint",
entry: config.ProviderEntry{
Type: "openai",
APIKey: "sk-test",
Endpoint: "https://custom.openai.com",
},
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "openai", p.Name())
},
},
{
name: "Anthropic provider",
entry: config.ProviderEntry{
Type: "anthropic",
APIKey: "sk-ant-test",
},
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "anthropic", p.Name())
},
},
{
name: "Google provider",
entry: config.ProviderEntry{
Type: "google",
APIKey: "test-key",
},
validate: func(t *testing.T, p Provider) {
assert.Equal(t, "google", p.Name())
},
},
{
name: "provider without API key returns nil",
entry: config.ProviderEntry{
Type: "openai",
APIKey: "",
},
expectNil: true,
},
{
name: "unknown provider type",
entry: config.ProviderEntry{
Type: "unknown",
APIKey: "test-key",
},
expectError: true,
errorMsg: "unknown provider type",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, err := buildProvider(tt.entry)
if tt.expectError {
assert.Error(t, err)
if tt.errorMsg != "" {
assert.Contains(t, err.Error(), tt.errorMsg)
}
return
}
require.NoError(t, err)
if tt.expectNil {
assert.Nil(t, p)
return
}
require.NotNil(t, p)
if tt.validate != nil {
tt.validate(t, p)
}
})
}
}

View File

@@ -0,0 +1,135 @@
package ratelimit
import (
"log/slog"
"net/http"
"sync"
"time"
"golang.org/x/time/rate"
)
// Config defines rate limiting configuration.
type Config struct {
// RequestsPerSecond is the number of requests allowed per second per IP.
RequestsPerSecond float64
// Burst is the maximum burst size allowed.
Burst int
// Enabled controls whether rate limiting is active.
Enabled bool
}
// Middleware provides per-IP rate limiting using token bucket algorithm.
type Middleware struct {
limiters map[string]*rate.Limiter
mu sync.RWMutex
config Config
logger *slog.Logger
}
// New creates a new rate limiting middleware.
func New(config Config, logger *slog.Logger) *Middleware {
m := &Middleware{
limiters: make(map[string]*rate.Limiter),
config: config,
logger: logger,
}
// Start cleanup goroutine to remove old limiters
if config.Enabled {
go m.cleanupLimiters()
}
return m
}
// Handler wraps an http.Handler with rate limiting.
func (m *Middleware) Handler(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if !m.config.Enabled {
next.ServeHTTP(w, r)
return
}
// Extract client IP (handle X-Forwarded-For for proxies)
ip := m.getClientIP(r)
limiter := m.getLimiter(ip)
if !limiter.Allow() {
m.logger.Warn("rate limit exceeded",
slog.String("ip", ip),
slog.String("path", r.URL.Path),
)
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Retry-After", "1")
w.WriteHeader(http.StatusTooManyRequests)
w.Write([]byte(`{"error":"rate limit exceeded","message":"too many requests"}`))
return
}
next.ServeHTTP(w, r)
})
}
// getLimiter returns the rate limiter for a given IP, creating one if needed.
func (m *Middleware) getLimiter(ip string) *rate.Limiter {
m.mu.RLock()
limiter, exists := m.limiters[ip]
m.mu.RUnlock()
if exists {
return limiter
}
m.mu.Lock()
defer m.mu.Unlock()
// Double-check after acquiring write lock
limiter, exists = m.limiters[ip]
if exists {
return limiter
}
limiter = rate.NewLimiter(rate.Limit(m.config.RequestsPerSecond), m.config.Burst)
m.limiters[ip] = limiter
return limiter
}
// getClientIP extracts the client IP from the request.
func (m *Middleware) getClientIP(r *http.Request) string {
// Check X-Forwarded-For header (for proxies/load balancers)
xff := r.Header.Get("X-Forwarded-For")
if xff != "" {
// X-Forwarded-For can be a comma-separated list, use the first IP
for idx := 0; idx < len(xff); idx++ {
if xff[idx] == ',' {
return xff[:idx]
}
}
return xff
}
// Check X-Real-IP header
if xri := r.Header.Get("X-Real-IP"); xri != "" {
return xri
}
// Fall back to RemoteAddr
return r.RemoteAddr
}
// cleanupLimiters periodically removes unused limiters to prevent memory leaks.
func (m *Middleware) cleanupLimiters() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for range ticker.C {
m.mu.Lock()
// Clear all limiters periodically
// In production, you might want a more sophisticated LRU cache
m.limiters = make(map[string]*rate.Limiter)
m.mu.Unlock()
m.logger.Debug("cleaned up rate limiters")
}
}

View File

@@ -0,0 +1,175 @@
package ratelimit
import (
"log/slog"
"net/http"
"net/http/httptest"
"os"
"testing"
"time"
)
func TestRateLimitMiddleware(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
tests := []struct {
name string
config Config
requestCount int
expectedAllowed int
expectedRateLimited int
}{
{
name: "disabled rate limiting allows all requests",
config: Config{
Enabled: false,
RequestsPerSecond: 1,
Burst: 1,
},
requestCount: 10,
expectedAllowed: 10,
expectedRateLimited: 0,
},
{
name: "enabled rate limiting enforces limits",
config: Config{
Enabled: true,
RequestsPerSecond: 1,
Burst: 2,
},
requestCount: 5,
expectedAllowed: 2, // Burst allows 2 immediately
expectedRateLimited: 3,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
middleware := New(tt.config, logger)
handler := middleware.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
allowed := 0
rateLimited := 0
for i := 0; i < tt.requestCount; i++ {
req := httptest.NewRequest("GET", "/test", nil)
req.RemoteAddr = "192.168.1.1:1234"
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
if w.Code == http.StatusOK {
allowed++
} else if w.Code == http.StatusTooManyRequests {
rateLimited++
}
}
if allowed != tt.expectedAllowed {
t.Errorf("expected %d allowed requests, got %d", tt.expectedAllowed, allowed)
}
if rateLimited != tt.expectedRateLimited {
t.Errorf("expected %d rate limited requests, got %d", tt.expectedRateLimited, rateLimited)
}
})
}
}
func TestGetClientIP(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
middleware := New(Config{Enabled: false}, logger)
tests := []struct {
name string
headers map[string]string
remoteAddr string
expectedIP string
}{
{
name: "uses X-Forwarded-For if present",
headers: map[string]string{"X-Forwarded-For": "203.0.113.1, 198.51.100.1"},
remoteAddr: "192.168.1.1:1234",
expectedIP: "203.0.113.1",
},
{
name: "uses X-Real-IP if X-Forwarded-For not present",
headers: map[string]string{"X-Real-IP": "203.0.113.1"},
remoteAddr: "192.168.1.1:1234",
expectedIP: "203.0.113.1",
},
{
name: "uses RemoteAddr as fallback",
headers: map[string]string{},
remoteAddr: "192.168.1.1:1234",
expectedIP: "192.168.1.1:1234",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := httptest.NewRequest("GET", "/test", nil)
req.RemoteAddr = tt.remoteAddr
for k, v := range tt.headers {
req.Header.Set(k, v)
}
ip := middleware.getClientIP(req)
if ip != tt.expectedIP {
t.Errorf("expected IP %q, got %q", tt.expectedIP, ip)
}
})
}
}
func TestRateLimitRefill(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
config := Config{
Enabled: true,
RequestsPerSecond: 10, // 10 requests per second
Burst: 5,
}
middleware := New(config, logger)
handler := middleware.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
// Use up the burst
for i := 0; i < 5; i++ {
req := httptest.NewRequest("GET", "/test", nil)
req.RemoteAddr = "192.168.1.1:1234"
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("request %d should be allowed, got status %d", i, w.Code)
}
}
// Next request should be rate limited
req := httptest.NewRequest("GET", "/test", nil)
req.RemoteAddr = "192.168.1.1:1234"
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
if w.Code != http.StatusTooManyRequests {
t.Errorf("expected rate limit, got status %d", w.Code)
}
// Wait for tokens to refill (100ms = 1 token at 10/s)
time.Sleep(150 * time.Millisecond)
// Should be allowed now
req = httptest.NewRequest("GET", "/test", nil)
req.RemoteAddr = "192.168.1.1:1234"
w = httptest.NewRecorder()
handler.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("request should be allowed after refill, got status %d", w.Code)
}
}

91
internal/server/health.go Normal file
View File

@@ -0,0 +1,91 @@
package server
import (
"context"
"encoding/json"
"net/http"
"time"
)
// HealthStatus represents the health check response.
type HealthStatus struct {
Status string `json:"status"`
Timestamp int64 `json:"timestamp"`
Checks map[string]string `json:"checks,omitempty"`
}
// handleHealth returns a basic health check endpoint.
// This is suitable for Kubernetes liveness probes.
func (s *GatewayServer) handleHealth(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
status := HealthStatus{
Status: "healthy",
Timestamp: time.Now().Unix(),
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
if err := json.NewEncoder(w).Encode(status); err != nil {
s.logger.ErrorContext(r.Context(), "failed to encode health response", "error", err.Error())
}
}
// handleReady returns a readiness check that verifies dependencies.
// This is suitable for Kubernetes readiness probes and load balancer health checks.
func (s *GatewayServer) handleReady(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
checks := make(map[string]string)
allHealthy := true
// Check conversation store connectivity
ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
defer cancel()
// Test conversation store by attempting a simple operation
testID := "health_check_test"
_, err := s.convs.Get(ctx, testID)
if err != nil {
checks["conversation_store"] = "unhealthy: " + err.Error()
allHealthy = false
} else {
checks["conversation_store"] = "healthy"
}
// Check if at least one provider is configured
models := s.registry.Models()
if len(models) == 0 {
checks["providers"] = "unhealthy: no providers configured"
allHealthy = false
} else {
checks["providers"] = "healthy"
}
_ = ctx // Use context if needed
status := HealthStatus{
Timestamp: time.Now().Unix(),
Checks: checks,
}
if allHealthy {
status.Status = "ready"
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
} else {
status.Status = "not_ready"
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusServiceUnavailable)
}
if err := json.NewEncoder(w).Encode(status); err != nil {
s.logger.ErrorContext(r.Context(), "failed to encode ready response", "error", err.Error())
}
}

View File

@@ -0,0 +1,150 @@
package server
import (
"encoding/json"
"log/slog"
"net/http"
"net/http/httptest"
"os"
"testing"
)
func TestHealthEndpoint(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
registry := newMockRegistry()
convStore := newMockConversationStore()
server := New(registry, convStore, logger)
tests := []struct {
name string
method string
expectedStatus int
}{
{
name: "GET returns healthy status",
method: http.MethodGet,
expectedStatus: http.StatusOK,
},
{
name: "POST returns method not allowed",
method: http.MethodPost,
expectedStatus: http.StatusMethodNotAllowed,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := httptest.NewRequest(tt.method, "/health", nil)
w := httptest.NewRecorder()
server.handleHealth(w, req)
if w.Code != tt.expectedStatus {
t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code)
}
if tt.expectedStatus == http.StatusOK {
var status HealthStatus
if err := json.NewDecoder(w.Body).Decode(&status); err != nil {
t.Fatalf("failed to decode response: %v", err)
}
if status.Status != "healthy" {
t.Errorf("expected status 'healthy', got %q", status.Status)
}
if status.Timestamp == 0 {
t.Error("expected non-zero timestamp")
}
}
})
}
}
func TestReadyEndpoint(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
tests := []struct {
name string
setupRegistry func() *mockRegistry
convStore *mockConversationStore
expectedStatus int
expectedReady bool
}{
{
name: "returns ready when all checks pass",
setupRegistry: func() *mockRegistry {
reg := newMockRegistry()
reg.addModel("test-model", "test-provider")
return reg
},
convStore: newMockConversationStore(),
expectedStatus: http.StatusOK,
expectedReady: true,
},
{
name: "returns not ready when no providers configured",
setupRegistry: func() *mockRegistry {
return newMockRegistry()
},
convStore: newMockConversationStore(),
expectedStatus: http.StatusServiceUnavailable,
expectedReady: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
server := New(tt.setupRegistry(), tt.convStore, logger)
req := httptest.NewRequest(http.MethodGet, "/ready", nil)
w := httptest.NewRecorder()
server.handleReady(w, req)
if w.Code != tt.expectedStatus {
t.Errorf("expected status %d, got %d", tt.expectedStatus, w.Code)
}
var status HealthStatus
if err := json.NewDecoder(w.Body).Decode(&status); err != nil {
t.Fatalf("failed to decode response: %v", err)
}
if tt.expectedReady {
if status.Status != "ready" {
t.Errorf("expected status 'ready', got %q", status.Status)
}
} else {
if status.Status != "not_ready" {
t.Errorf("expected status 'not_ready', got %q", status.Status)
}
}
if status.Timestamp == 0 {
t.Error("expected non-zero timestamp")
}
if status.Checks == nil {
t.Error("expected checks map to be present")
}
})
}
}
func TestReadyEndpointMethodNotAllowed(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))
registry := newMockRegistry()
convStore := newMockConversationStore()
server := New(registry, convStore, logger)
req := httptest.NewRequest(http.MethodPost, "/ready", nil)
w := httptest.NewRecorder()
server.handleReady(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected status %d, got %d", http.StatusMethodNotAllowed, w.Code)
}
}

View File

@@ -0,0 +1,91 @@
package server
import (
"fmt"
"log/slog"
"net/http"
"runtime/debug"
"github.com/ajac-zero/latticelm/internal/logger"
)
// MaxRequestBodyBytes is the maximum size allowed for request bodies (10MB)
const MaxRequestBodyBytes = 10 * 1024 * 1024
// PanicRecoveryMiddleware recovers from panics in HTTP handlers and logs them
// instead of crashing the server. Returns 500 Internal Server Error to the client.
func PanicRecoveryMiddleware(next http.Handler, log *slog.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
if err := recover(); err != nil {
// Capture stack trace
stack := debug.Stack()
// Log the panic with full context
log.ErrorContext(r.Context(), "panic recovered in HTTP handler",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("method", r.Method),
slog.String("path", r.URL.Path),
slog.String("remote_addr", r.RemoteAddr),
slog.Any("panic", err),
slog.String("stack", string(stack)),
)...,
)
// Return 500 to client
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
}
}()
next.ServeHTTP(w, r)
})
}
// RequestSizeLimitMiddleware enforces a maximum request body size to prevent
// DoS attacks via oversized payloads. Requests exceeding the limit receive 413.
func RequestSizeLimitMiddleware(next http.Handler, maxBytes int64) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Only limit body size for requests that have a body
if r.Method == http.MethodPost || r.Method == http.MethodPut || r.Method == http.MethodPatch {
// Wrap the request body with a size limiter
r.Body = http.MaxBytesReader(w, r.Body, maxBytes)
}
next.ServeHTTP(w, r)
})
}
// ErrorRecoveryMiddleware catches errors from MaxBytesReader and converts them
// to proper HTTP error responses. This should be placed after RequestSizeLimitMiddleware
// in the middleware chain.
func ErrorRecoveryMiddleware(next http.Handler, log *slog.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
next.ServeHTTP(w, r)
// Check if the request body exceeded the size limit
// MaxBytesReader sets an error that we can detect on the next read attempt
// But we need to handle the error when it actually occurs during JSON decoding
// The JSON decoder will return the error, so we don't need special handling here
// This middleware is more for future extensibility
})
}
// WriteJSONError is a helper function to safely write JSON error responses,
// handling any encoding errors that might occur.
func WriteJSONError(w http.ResponseWriter, log *slog.Logger, message string, statusCode int) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(statusCode)
// Use fmt.Fprintf to write the error response
// This is safer than json.Encoder as we control the format
_, err := fmt.Fprintf(w, `{"error":{"message":"%s"}}`, message)
if err != nil {
// If we can't even write the error response, log it
log.Error("failed to write error response",
slog.String("original_message", message),
slog.Int("status_code", statusCode),
slog.String("write_error", err.Error()),
)
}
}

View File

@@ -0,0 +1,341 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPanicRecoveryMiddleware(t *testing.T) {
tests := []struct {
name string
handler http.HandlerFunc
expectPanic bool
expectedStatus int
expectedBody string
}{
{
name: "no panic - request succeeds",
handler: func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("success"))
},
expectPanic: false,
expectedStatus: http.StatusOK,
expectedBody: "success",
},
{
name: "panic with string - recovers gracefully",
handler: func(w http.ResponseWriter, r *http.Request) {
panic("something went wrong")
},
expectPanic: true,
expectedStatus: http.StatusInternalServerError,
expectedBody: "Internal Server Error\n",
},
{
name: "panic with error - recovers gracefully",
handler: func(w http.ResponseWriter, r *http.Request) {
panic(io.ErrUnexpectedEOF)
},
expectPanic: true,
expectedStatus: http.StatusInternalServerError,
expectedBody: "Internal Server Error\n",
},
{
name: "panic with struct - recovers gracefully",
handler: func(w http.ResponseWriter, r *http.Request) {
panic(struct{ msg string }{msg: "bad things"})
},
expectPanic: true,
expectedStatus: http.StatusInternalServerError,
expectedBody: "Internal Server Error\n",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a buffer to capture logs
var buf bytes.Buffer
logger := slog.New(slog.NewJSONHandler(&buf, nil))
// Wrap the handler with panic recovery
wrapped := PanicRecoveryMiddleware(tt.handler, logger)
// Create request and recorder
req := httptest.NewRequest(http.MethodGet, "/test", nil)
rec := httptest.NewRecorder()
// Execute the handler (should not panic even if inner handler does)
wrapped.ServeHTTP(rec, req)
// Verify response
assert.Equal(t, tt.expectedStatus, rec.Code)
assert.Equal(t, tt.expectedBody, rec.Body.String())
// Verify logging if panic was expected
if tt.expectPanic {
logOutput := buf.String()
assert.Contains(t, logOutput, "panic recovered in HTTP handler")
assert.Contains(t, logOutput, "stack")
}
})
}
}
func TestRequestSizeLimitMiddleware(t *testing.T) {
const maxSize = 100 // 100 bytes for testing
tests := []struct {
name string
method string
bodySize int
expectedStatus int
shouldSucceed bool
}{
{
name: "small POST request - succeeds",
method: http.MethodPost,
bodySize: 50,
expectedStatus: http.StatusOK,
shouldSucceed: true,
},
{
name: "exact size POST request - succeeds",
method: http.MethodPost,
bodySize: maxSize,
expectedStatus: http.StatusOK,
shouldSucceed: true,
},
{
name: "oversized POST request - fails",
method: http.MethodPost,
bodySize: maxSize + 1,
expectedStatus: http.StatusBadRequest,
shouldSucceed: false,
},
{
name: "large POST request - fails",
method: http.MethodPost,
bodySize: maxSize * 2,
expectedStatus: http.StatusBadRequest,
shouldSucceed: false,
},
{
name: "oversized PUT request - fails",
method: http.MethodPut,
bodySize: maxSize + 1,
expectedStatus: http.StatusBadRequest,
shouldSucceed: false,
},
{
name: "oversized PATCH request - fails",
method: http.MethodPatch,
bodySize: maxSize + 1,
expectedStatus: http.StatusBadRequest,
shouldSucceed: false,
},
{
name: "GET request - no size limit applied",
method: http.MethodGet,
bodySize: maxSize + 1,
expectedStatus: http.StatusOK,
shouldSucceed: true,
},
{
name: "DELETE request - no size limit applied",
method: http.MethodDelete,
bodySize: maxSize + 1,
expectedStatus: http.StatusOK,
shouldSucceed: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a handler that tries to read the body
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, "read %d bytes", len(body))
})
// Wrap with size limit middleware
wrapped := RequestSizeLimitMiddleware(handler, maxSize)
// Create request with body of specified size
bodyContent := strings.Repeat("a", tt.bodySize)
req := httptest.NewRequest(tt.method, "/test", strings.NewReader(bodyContent))
rec := httptest.NewRecorder()
// Execute
wrapped.ServeHTTP(rec, req)
// Verify response
assert.Equal(t, tt.expectedStatus, rec.Code)
if tt.shouldSucceed {
assert.Contains(t, rec.Body.String(), "read")
} else {
// For methods with body, should get an error
assert.NotContains(t, rec.Body.String(), "read")
}
})
}
}
func TestRequestSizeLimitMiddleware_WithJSONDecoding(t *testing.T) {
const maxSize = 1024 // 1KB
tests := []struct {
name string
payload interface{}
expectedStatus int
shouldDecode bool
}{
{
name: "small JSON payload - succeeds",
payload: map[string]string{
"message": "hello",
},
expectedStatus: http.StatusOK,
shouldDecode: true,
},
{
name: "large JSON payload - fails",
payload: map[string]string{
"message": strings.Repeat("x", maxSize+100),
},
expectedStatus: http.StatusBadRequest,
shouldDecode: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a handler that decodes JSON
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var data map[string]string
if err := json.NewDecoder(r.Body).Decode(&data); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]string{"status": "decoded"})
})
// Wrap with size limit middleware
wrapped := RequestSizeLimitMiddleware(handler, maxSize)
// Create request
body, err := json.Marshal(tt.payload)
require.NoError(t, err)
req := httptest.NewRequest(http.MethodPost, "/test", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
// Execute
wrapped.ServeHTTP(rec, req)
// Verify response
assert.Equal(t, tt.expectedStatus, rec.Code)
if tt.shouldDecode {
assert.Contains(t, rec.Body.String(), "decoded")
}
})
}
}
func TestWriteJSONError(t *testing.T) {
tests := []struct {
name string
message string
statusCode int
expectedBody string
}{
{
name: "simple error message",
message: "something went wrong",
statusCode: http.StatusBadRequest,
expectedBody: `{"error":{"message":"something went wrong"}}`,
},
{
name: "internal server error",
message: "internal error",
statusCode: http.StatusInternalServerError,
expectedBody: `{"error":{"message":"internal error"}}`,
},
{
name: "unauthorized error",
message: "unauthorized",
statusCode: http.StatusUnauthorized,
expectedBody: `{"error":{"message":"unauthorized"}}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var buf bytes.Buffer
logger := slog.New(slog.NewJSONHandler(&buf, nil))
rec := httptest.NewRecorder()
WriteJSONError(rec, logger, tt.message, tt.statusCode)
assert.Equal(t, tt.statusCode, rec.Code)
assert.Equal(t, "application/json", rec.Header().Get("Content-Type"))
assert.Equal(t, tt.expectedBody, rec.Body.String())
})
}
}
func TestPanicRecoveryMiddleware_Integration(t *testing.T) {
// Test that panic recovery works in a more realistic scenario
// with multiple middleware layers
var logBuf bytes.Buffer
logger := slog.New(slog.NewJSONHandler(&logBuf, nil))
// Create a chain of middleware
finalHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Simulate a panic deep in the stack
panic("unexpected error in business logic")
})
// Wrap with multiple middleware layers
wrapped := PanicRecoveryMiddleware(
RequestSizeLimitMiddleware(
finalHandler,
1024,
),
logger,
)
req := httptest.NewRequest(http.MethodPost, "/test", strings.NewReader("test"))
rec := httptest.NewRecorder()
// Should not panic
wrapped.ServeHTTP(rec, req)
// Should return 500
assert.Equal(t, http.StatusInternalServerError, rec.Code)
assert.Equal(t, "Internal Server Error\n", rec.Body.String())
// Should log the panic
logOutput := logBuf.String()
assert.Contains(t, logOutput, "panic recovered")
assert.Contains(t, logOutput, "unexpected error in business logic")
}

View File

@@ -0,0 +1,336 @@
package server
import (
"context"
"fmt"
"log/slog"
"reflect"
"sync"
"unsafe"
"github.com/ajac-zero/latticelm/internal/api"
"github.com/ajac-zero/latticelm/internal/config"
"github.com/ajac-zero/latticelm/internal/conversation"
"github.com/ajac-zero/latticelm/internal/providers"
)
// mockProvider implements providers.Provider for testing
type mockProvider struct {
name string
generateFunc func(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (*api.ProviderResult, error)
streamFunc func(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (<-chan *api.ProviderStreamDelta, <-chan error)
generateCalled int
streamCalled int
mu sync.Mutex
}
func newMockProvider(name string) *mockProvider {
return &mockProvider{
name: name,
}
}
func (m *mockProvider) Name() string {
return m.name
}
func (m *mockProvider) Generate(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (*api.ProviderResult, error) {
m.mu.Lock()
m.generateCalled++
m.mu.Unlock()
if m.generateFunc != nil {
return m.generateFunc(ctx, messages, req)
}
return &api.ProviderResult{
ID: "mock-id",
Model: req.Model,
Text: "mock response",
Usage: api.Usage{
InputTokens: 10,
OutputTokens: 20,
TotalTokens: 30,
},
}, nil
}
func (m *mockProvider) GenerateStream(ctx context.Context, messages []api.Message, req *api.ResponseRequest) (<-chan *api.ProviderStreamDelta, <-chan error) {
m.mu.Lock()
m.streamCalled++
m.mu.Unlock()
if m.streamFunc != nil {
return m.streamFunc(ctx, messages, req)
}
// Default behavior: send a simple text stream
deltaChan := make(chan *api.ProviderStreamDelta, 3)
errChan := make(chan error, 1)
go func() {
defer close(deltaChan)
defer close(errChan)
deltaChan <- &api.ProviderStreamDelta{
Model: req.Model,
Text: "Hello",
}
deltaChan <- &api.ProviderStreamDelta{
Text: " world",
}
deltaChan <- &api.ProviderStreamDelta{
Done: true,
}
}()
return deltaChan, errChan
}
func (m *mockProvider) getGenerateCalled() int {
m.mu.Lock()
defer m.mu.Unlock()
return m.generateCalled
}
func (m *mockProvider) getStreamCalled() int {
m.mu.Lock()
defer m.mu.Unlock()
return m.streamCalled
}
// buildTestRegistry creates a providers.Registry for testing with mock providers
// Uses reflection to inject mock providers into the registry
func buildTestRegistry(mockProviders map[string]providers.Provider, modelConfigs []config.ModelEntry) *providers.Registry {
// Create empty registry
reg := &providers.Registry{}
// Use reflection to set private fields
regValue := reflect.ValueOf(reg).Elem()
// Set providers field
providersField := regValue.FieldByName("providers")
providersPtr := unsafe.Pointer(providersField.UnsafeAddr())
*(*map[string]providers.Provider)(providersPtr) = mockProviders
// Set modelList field
modelListField := regValue.FieldByName("modelList")
modelListPtr := unsafe.Pointer(modelListField.UnsafeAddr())
*(*[]config.ModelEntry)(modelListPtr) = modelConfigs
// Set models map (model name -> provider name)
modelsField := regValue.FieldByName("models")
modelsPtr := unsafe.Pointer(modelsField.UnsafeAddr())
modelsMap := make(map[string]string)
for _, m := range modelConfigs {
modelsMap[m.Name] = m.Provider
}
*(*map[string]string)(modelsPtr) = modelsMap
// Set providerModelIDs map
providerModelIDsField := regValue.FieldByName("providerModelIDs")
providerModelIDsPtr := unsafe.Pointer(providerModelIDsField.UnsafeAddr())
providerModelIDsMap := make(map[string]string)
for _, m := range modelConfigs {
if m.ProviderModelID != "" {
providerModelIDsMap[m.Name] = m.ProviderModelID
}
}
*(*map[string]string)(providerModelIDsPtr) = providerModelIDsMap
return reg
}
// mockConversationStore implements conversation.Store for testing
type mockConversationStore struct {
conversations map[string]*conversation.Conversation
createErr error
getErr error
appendErr error
deleteErr error
mu sync.Mutex
}
func newMockConversationStore() *mockConversationStore {
return &mockConversationStore{
conversations: make(map[string]*conversation.Conversation),
}
}
func (m *mockConversationStore) Get(ctx context.Context, id string) (*conversation.Conversation, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.getErr != nil {
return nil, m.getErr
}
conv, ok := m.conversations[id]
if !ok {
return nil, nil
}
return conv, nil
}
func (m *mockConversationStore) Create(ctx context.Context, id string, model string, messages []api.Message) (*conversation.Conversation, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.createErr != nil {
return nil, m.createErr
}
conv := &conversation.Conversation{
ID: id,
Model: model,
Messages: messages,
}
m.conversations[id] = conv
return conv, nil
}
func (m *mockConversationStore) Append(ctx context.Context, id string, messages ...api.Message) (*conversation.Conversation, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.appendErr != nil {
return nil, m.appendErr
}
conv, ok := m.conversations[id]
if !ok {
return nil, nil
}
conv.Messages = append(conv.Messages, messages...)
return conv, nil
}
func (m *mockConversationStore) Delete(ctx context.Context, id string) error {
m.mu.Lock()
defer m.mu.Unlock()
if m.deleteErr != nil {
return m.deleteErr
}
delete(m.conversations, id)
return nil
}
func (m *mockConversationStore) Size() int {
m.mu.Lock()
defer m.mu.Unlock()
return len(m.conversations)
}
func (m *mockConversationStore) Close() error {
return nil
}
func (m *mockConversationStore) setConversation(id string, conv *conversation.Conversation) {
m.mu.Lock()
defer m.mu.Unlock()
m.conversations[id] = conv
}
// mockLogger captures log output for testing
type mockLogger struct {
logs []string
mu sync.Mutex
}
func newMockLogger() *mockLogger {
return &mockLogger{
logs: []string{},
}
}
func (m *mockLogger) Printf(format string, args ...interface{}) {
m.mu.Lock()
defer m.mu.Unlock()
m.logs = append(m.logs, fmt.Sprintf(format, args...))
}
func (m *mockLogger) getLogs() []string {
m.mu.Lock()
defer m.mu.Unlock()
return append([]string{}, m.logs...)
}
func (m *mockLogger) asLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(m, &slog.HandlerOptions{
Level: slog.LevelDebug,
}))
}
func (m *mockLogger) Write(p []byte) (n int, err error) {
m.mu.Lock()
defer m.mu.Unlock()
m.logs = append(m.logs, string(p))
return len(p), nil
}
// mockRegistry is a simple mock for providers.Registry
type mockRegistry struct {
providers map[string]providers.Provider
models map[string]string // model name -> provider name
mu sync.RWMutex
}
func newMockRegistry() *mockRegistry {
return &mockRegistry{
providers: make(map[string]providers.Provider),
models: make(map[string]string),
}
}
func (m *mockRegistry) Get(name string) (providers.Provider, bool) {
m.mu.RLock()
defer m.mu.RUnlock()
p, ok := m.providers[name]
return p, ok
}
func (m *mockRegistry) Default(model string) (providers.Provider, error) {
m.mu.RLock()
defer m.mu.RUnlock()
providerName, ok := m.models[model]
if !ok {
return nil, fmt.Errorf("no provider configured for model %s", model)
}
p, ok := m.providers[providerName]
if !ok {
return nil, fmt.Errorf("provider %s not found", providerName)
}
return p, nil
}
func (m *mockRegistry) Models() []struct{ Provider, Model string } {
m.mu.RLock()
defer m.mu.RUnlock()
var models []struct{ Provider, Model string }
for modelName, providerName := range m.models {
models = append(models, struct{ Provider, Model string }{
Model: modelName,
Provider: providerName,
})
}
return models
}
func (m *mockRegistry) ResolveModelID(model string) string {
// Simple implementation - just return the model name as-is
return model
}
func (m *mockRegistry) addProvider(name string, provider providers.Provider) {
m.mu.Lock()
defer m.mu.Unlock()
m.providers[name] = provider
}
func (m *mockRegistry) addModel(model, provider string) {
m.mu.Lock()
defer m.mu.Unlock()
m.models[model] = provider
}

View File

@@ -2,28 +2,39 @@ package server
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"log" "log/slog"
"net/http" "net/http"
"strings" "strings"
"time" "time"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/sony/gobreaker"
"github.com/ajac-zero/latticelm/internal/api" "github.com/ajac-zero/latticelm/internal/api"
"github.com/ajac-zero/latticelm/internal/conversation" "github.com/ajac-zero/latticelm/internal/conversation"
"github.com/ajac-zero/latticelm/internal/logger"
"github.com/ajac-zero/latticelm/internal/providers" "github.com/ajac-zero/latticelm/internal/providers"
) )
// ProviderRegistry is an interface for provider registries.
type ProviderRegistry interface {
Get(name string) (providers.Provider, bool)
Models() []struct{ Provider, Model string }
ResolveModelID(model string) string
Default(model string) (providers.Provider, error)
}
// GatewayServer hosts the Open Responses API for the gateway. // GatewayServer hosts the Open Responses API for the gateway.
type GatewayServer struct { type GatewayServer struct {
registry *providers.Registry registry ProviderRegistry
convs conversation.Store convs conversation.Store
logger *log.Logger logger *slog.Logger
} }
// New creates a GatewayServer bound to the provider registry. // New creates a GatewayServer bound to the provider registry.
func New(registry *providers.Registry, convs conversation.Store, logger *log.Logger) *GatewayServer { func New(registry ProviderRegistry, convs conversation.Store, logger *slog.Logger) *GatewayServer {
return &GatewayServer{ return &GatewayServer{
registry: registry, registry: registry,
convs: convs, convs: convs,
@@ -31,10 +42,17 @@ func New(registry *providers.Registry, convs conversation.Store, logger *log.Log
} }
} }
// isCircuitBreakerError checks if the error is from a circuit breaker.
func isCircuitBreakerError(err error) bool {
return errors.Is(err, gobreaker.ErrOpenState) || errors.Is(err, gobreaker.ErrTooManyRequests)
}
// RegisterRoutes wires the HTTP handlers onto the provided mux. // RegisterRoutes wires the HTTP handlers onto the provided mux.
func (s *GatewayServer) RegisterRoutes(mux *http.ServeMux) { func (s *GatewayServer) RegisterRoutes(mux *http.ServeMux) {
mux.HandleFunc("/v1/responses", s.handleResponses) mux.HandleFunc("/v1/responses", s.handleResponses)
mux.HandleFunc("/v1/models", s.handleModels) mux.HandleFunc("/v1/models", s.handleModels)
mux.HandleFunc("/health", s.handleHealth)
mux.HandleFunc("/ready", s.handleReady)
} }
func (s *GatewayServer) handleModels(w http.ResponseWriter, r *http.Request) { func (s *GatewayServer) handleModels(w http.ResponseWriter, r *http.Request) {
@@ -58,7 +76,14 @@ func (s *GatewayServer) handleModels(w http.ResponseWriter, r *http.Request) {
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp) if err := json.NewEncoder(w).Encode(resp); err != nil {
s.logger.ErrorContext(r.Context(), "failed to encode models response",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("error", err.Error()),
)...,
)
}
} }
func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request) { func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request) {
@@ -69,6 +94,11 @@ func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request)
var req api.ResponseRequest var req api.ResponseRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil { if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
// Check if error is due to request size limit
if err.Error() == "http: request body too large" {
http.Error(w, "request body too large", http.StatusRequestEntityTooLarge)
return
}
http.Error(w, "invalid JSON payload", http.StatusBadRequest) http.Error(w, "invalid JSON payload", http.StatusBadRequest)
return return
} }
@@ -84,13 +114,23 @@ func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request)
// Build full message history from previous conversation // Build full message history from previous conversation
var historyMsgs []api.Message var historyMsgs []api.Message
if req.PreviousResponseID != nil && *req.PreviousResponseID != "" { if req.PreviousResponseID != nil && *req.PreviousResponseID != "" {
conv, err := s.convs.Get(*req.PreviousResponseID) conv, err := s.convs.Get(r.Context(), *req.PreviousResponseID)
if err != nil { if err != nil {
s.logger.Printf("error retrieving conversation: %v", err) s.logger.ErrorContext(r.Context(), "failed to retrieve conversation",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("conversation_id", *req.PreviousResponseID),
slog.String("error", err.Error()),
)...,
)
http.Error(w, "error retrieving conversation", http.StatusInternalServerError) http.Error(w, "error retrieving conversation", http.StatusInternalServerError)
return return
} }
if conv == nil { if conv == nil {
s.logger.WarnContext(r.Context(), "conversation not found",
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("conversation_id", *req.PreviousResponseID),
)
http.Error(w, "conversation not found", http.StatusNotFound) http.Error(w, "conversation not found", http.StatusNotFound)
return return
} }
@@ -132,8 +172,21 @@ func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request)
func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) { func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) {
result, err := provider.Generate(r.Context(), providerMsgs, resolvedReq) result, err := provider.Generate(r.Context(), providerMsgs, resolvedReq)
if err != nil { if err != nil {
s.logger.Printf("provider %s error: %v", provider.Name(), err) s.logger.ErrorContext(r.Context(), "provider generation failed",
http.Error(w, "provider error", http.StatusBadGateway) logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("provider", provider.Name()),
slog.String("model", resolvedReq.Model),
slog.String("error", err.Error()),
)...,
)
// Check if error is from circuit breaker
if isCircuitBreakerError(err) {
http.Error(w, "service temporarily unavailable - circuit breaker open", http.StatusServiceUnavailable)
} else {
http.Error(w, "provider error", http.StatusBadGateway)
}
return return
} }
@@ -146,17 +199,43 @@ func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Reques
ToolCalls: result.ToolCalls, ToolCalls: result.ToolCalls,
} }
allMsgs := append(storeMsgs, assistantMsg) allMsgs := append(storeMsgs, assistantMsg)
if _, err := s.convs.Create(responseID, result.Model, allMsgs); err != nil { if _, err := s.convs.Create(r.Context(), responseID, result.Model, allMsgs); err != nil {
s.logger.Printf("error storing conversation: %v", err) s.logger.ErrorContext(r.Context(), "failed to store conversation",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("response_id", responseID),
slog.String("error", err.Error()),
)...,
)
// Don't fail the response if storage fails // Don't fail the response if storage fails
} }
s.logger.InfoContext(r.Context(), "response generated",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("provider", provider.Name()),
slog.String("model", result.Model),
slog.String("response_id", responseID),
slog.Int("input_tokens", result.Usage.InputTokens),
slog.Int("output_tokens", result.Usage.OutputTokens),
slog.Bool("has_tool_calls", len(result.ToolCalls) > 0),
)...,
)
// Build spec-compliant response // Build spec-compliant response
resp := s.buildResponse(origReq, result, provider.Name(), responseID) resp := s.buildResponse(origReq, result, provider.Name(), responseID)
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(resp) if err := json.NewEncoder(w).Encode(resp); err != nil {
s.logger.ErrorContext(r.Context(), "failed to encode response",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("response_id", responseID),
slog.String("error", err.Error()),
)...,
)
}
} }
func (s *GatewayServer) handleStreamingResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) { func (s *GatewayServer) handleStreamingResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) {
@@ -327,13 +406,31 @@ loop:
} }
break loop break loop
case <-r.Context().Done(): case <-r.Context().Done():
s.logger.Printf("client disconnected") s.logger.InfoContext(r.Context(), "client disconnected",
slog.String("request_id", logger.FromContext(r.Context())),
)
return return
} }
} }
if streamErr != nil { if streamErr != nil {
s.logger.Printf("stream error: %v", streamErr) s.logger.ErrorContext(r.Context(), "stream error",
logger.LogAttrsWithTrace(r.Context(),
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("provider", provider.Name()),
slog.String("model", origReq.Model),
slog.String("error", streamErr.Error()),
)...,
)
// Determine error type based on circuit breaker state
errorType := "server_error"
errorMessage := streamErr.Error()
if isCircuitBreakerError(streamErr) {
errorType = "circuit_breaker_open"
errorMessage = "service temporarily unavailable - circuit breaker open"
}
failedResp := s.buildResponse(origReq, &api.ProviderResult{ failedResp := s.buildResponse(origReq, &api.ProviderResult{
Model: origReq.Model, Model: origReq.Model,
}, provider.Name(), responseID) }, provider.Name(), responseID)
@@ -341,8 +438,8 @@ loop:
failedResp.CompletedAt = nil failedResp.CompletedAt = nil
failedResp.Output = []api.OutputItem{} failedResp.Output = []api.OutputItem{}
failedResp.Error = &api.ResponseError{ failedResp.Error = &api.ResponseError{
Type: "server_error", Type: errorType,
Message: streamErr.Error(), Message: errorMessage,
} }
s.sendSSE(w, flusher, &seq, "response.failed", &api.StreamEvent{ s.sendSSE(w, flusher, &seq, "response.failed", &api.StreamEvent{
Type: "response.failed", Type: "response.failed",
@@ -468,10 +565,22 @@ loop:
ToolCalls: toolCalls, ToolCalls: toolCalls,
} }
allMsgs := append(storeMsgs, assistantMsg) allMsgs := append(storeMsgs, assistantMsg)
if _, err := s.convs.Create(responseID, model, allMsgs); err != nil { if _, err := s.convs.Create(r.Context(), responseID, model, allMsgs); err != nil {
s.logger.Printf("error storing conversation: %v", err) s.logger.ErrorContext(r.Context(), "failed to store conversation",
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("response_id", responseID),
slog.String("error", err.Error()),
)
// Don't fail the response if storage fails // Don't fail the response if storage fails
} }
s.logger.InfoContext(r.Context(), "streaming response completed",
slog.String("request_id", logger.FromContext(r.Context())),
slog.String("provider", provider.Name()),
slog.String("model", model),
slog.String("response_id", responseID),
slog.Bool("has_tool_calls", len(toolCalls) > 0),
)
} }
} }
@@ -480,7 +589,10 @@ func (s *GatewayServer) sendSSE(w http.ResponseWriter, flusher http.Flusher, seq
*seq++ *seq++
data, err := json.Marshal(event) data, err := json.Marshal(event)
if err != nil { if err != nil {
s.logger.Printf("failed to marshal SSE event: %v", err) s.logger.Error("failed to marshal SSE event",
slog.String("event_type", eventType),
slog.String("error", err.Error()),
)
return return
} }
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", eventType, data) fmt.Fprintf(w, "event: %s\ndata: %s\n\n", eventType, data)

File diff suppressed because it is too large Load Diff

352
k8s/README.md Normal file
View File

@@ -0,0 +1,352 @@
# Kubernetes Deployment Guide
This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
## Prerequisites
- Kubernetes cluster (v1.24+)
- `kubectl` configured
- Container registry access
- (Optional) Prometheus Operator for monitoring
- (Optional) cert-manager for TLS certificates
- (Optional) nginx-ingress-controller or cloud load balancer
## Quick Start
### 1. Build and Push Docker Image
```bash
# Build the image
docker build -t your-registry/llm-gateway:v1.0.0 .
# Push to registry
docker push your-registry/llm-gateway:v1.0.0
```
### 2. Configure Secrets
**Option A: Using kubectl**
```bash
kubectl create namespace llm-gateway
kubectl create secret generic llm-gateway-secrets \
--from-literal=GOOGLE_API_KEY="your-key" \
--from-literal=ANTHROPIC_API_KEY="your-key" \
--from-literal=OPENAI_API_KEY="your-key" \
--from-literal=OIDC_AUDIENCE="your-client-id" \
-n llm-gateway
```
**Option B: Using External Secrets Operator (Recommended)**
- Uncomment the ExternalSecret in `secret.yaml`
- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
### 3. Update Configuration
Edit `configmap.yaml`:
- Update Redis connection string if using external Redis
- Configure observability endpoints (Tempo, Prometheus)
- Adjust rate limits as needed
- Set OIDC issuer and audience
Edit `ingress.yaml`:
- Replace `llm-gateway.example.com` with your domain
- Configure TLS certificate annotations
Edit `kustomization.yaml`:
- Update image registry and tag
### 4. Deploy
**Using Kustomize (Recommended):**
```bash
kubectl apply -k k8s/
```
**Using kubectl directly:**
```bash
kubectl apply -f k8s/namespace.yaml
kubectl apply -f k8s/serviceaccount.yaml
kubectl apply -f k8s/secret.yaml
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/redis.yaml
kubectl apply -f k8s/deployment.yaml
kubectl apply -f k8s/service.yaml
kubectl apply -f k8s/ingress.yaml
kubectl apply -f k8s/hpa.yaml
kubectl apply -f k8s/pdb.yaml
kubectl apply -f k8s/networkpolicy.yaml
```
**With Prometheus Operator:**
```bash
kubectl apply -f k8s/servicemonitor.yaml
kubectl apply -f k8s/prometheusrule.yaml
```
### 5. Verify Deployment
```bash
# Check pods
kubectl get pods -n llm-gateway
# Check services
kubectl get svc -n llm-gateway
# Check ingress
kubectl get ingress -n llm-gateway
# View logs
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
# Check health
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
curl http://localhost:8080/health
```
## Architecture Overview
```
┌─────────────────────────────────────────────────────────┐
│ Internet/Clients │
└───────────────────────┬─────────────────────────────────┘
┌─────────────────────────────────────────────────────────┐
│ Ingress Controller │
│ (nginx/ALB/GCE with TLS) │
└───────────────────────┬─────────────────────────────────┘
┌─────────────────────────────────────────────────────────┐
│ LLM Gateway Service │
│ (LoadBalancer) │
└───────────────────────┬─────────────────────────────────┘
┌───────────────┼───────────────┐
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Gateway │ │ Gateway │ │ Gateway │
│ Pod 1 │ │ Pod 2 │ │ Pod 3 │
└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
│ │ │
└────────────────┼────────────────┘
┌───────────────┼───────────────┐
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Redis │ │ Prometheus │ │ Tempo │
│ (Persistent) │ │ (Metrics) │ │ (Traces) │
└──────────────┘ └──────────────┘ └──────────────┘
```
## Resource Specifications
### Default Resources
- **Requests**: 100m CPU, 128Mi memory
- **Limits**: 1000m CPU, 512Mi memory
- **Replicas**: 3 (min), 20 (max with HPA)
### Scaling
- HPA scales based on CPU (70%) and memory (80%)
- PodDisruptionBudget ensures minimum 2 replicas during disruptions
## Configuration Options
### Environment Variables (from Secret)
- `GOOGLE_API_KEY`: Google AI API key
- `ANTHROPIC_API_KEY`: Anthropic API key
- `OPENAI_API_KEY`: OpenAI API key
- `OIDC_AUDIENCE`: OIDC client ID for authentication
### ConfigMap Settings
See `configmap.yaml` for full configuration options:
- Server address
- Logging format and level
- Rate limiting
- Observability (metrics/tracing)
- Provider endpoints
- Conversation storage
- Authentication
## Security
### Security Features
- Non-root container execution (UID 1000)
- Read-only root filesystem
- No privilege escalation
- All capabilities dropped
- Network policies for ingress/egress control
- SeccompProfile: RuntimeDefault
### TLS/HTTPS
- Ingress configured with TLS
- Uses cert-manager for automatic certificate provisioning
- Force SSL redirect enabled
### Secrets Management
**Never commit secrets to git!**
Production options:
1. **External Secrets Operator** (Recommended)
- AWS Secrets Manager
- HashiCorp Vault
- Google Secret Manager
2. **Sealed Secrets**
- Encrypted secrets in git
3. **Manual kubectl secrets**
- Created outside of git
## Monitoring
### Metrics
- Exposed on `/metrics` endpoint
- Scraped by Prometheus via ServiceMonitor
- Key metrics:
- HTTP request rate, latency, errors
- Provider request rate, latency, token usage
- Conversation store operations
- Rate limiting hits
### Alerts
See `prometheusrule.yaml` for configured alerts:
- High error rate
- High latency
- Provider failures
- Pod down
- High memory usage
- Rate limit threshold exceeded
- Conversation store errors
### Logs
Structured JSON logs with:
- Request IDs
- Trace context (trace_id, span_id)
- Log levels (debug/info/warn/error)
View logs:
```bash
kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
```
## Maintenance
### Rolling Updates
```bash
# Update image
kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
# Check rollout status
kubectl rollout status deployment/llm-gateway -n llm-gateway
# Rollback if needed
kubectl rollout undo deployment/llm-gateway -n llm-gateway
```
### Scaling
```bash
# Manual scale
kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
# HPA will auto-scale within min/max bounds (3-20)
```
### Configuration Updates
```bash
# Edit ConfigMap
kubectl edit configmap llm-gateway-config -n llm-gateway
# Restart pods to pick up changes
kubectl rollout restart deployment/llm-gateway -n llm-gateway
```
### Debugging
```bash
# Exec into pod
kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
# Port forward for local access
kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
# Check events
kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
```
## Production Considerations
### High Availability
- Minimum 3 replicas across availability zones
- Pod anti-affinity rules spread pods across nodes
- PodDisruptionBudget ensures service availability during disruptions
### Performance
- Adjust resource limits based on load testing
- Configure HPA thresholds based on traffic patterns
- Use node affinity for GPU nodes if needed
### Cost Optimization
- Use spot/preemptible instances for non-critical workloads
- Set appropriate resource requests/limits
- Monitor token usage and implement quotas
### Disaster Recovery
- Redis persistence (if using StatefulSet)
- Regular backups of conversation data
- Multi-region deployment for geo-redundancy
- Document runbooks for incident response
## Cloud-Specific Notes
### AWS EKS
- Use AWS Load Balancer Controller for ALB
- Configure IRSA for service account
- Use ElastiCache for Redis
- Store secrets in AWS Secrets Manager
### GCP GKE
- Use GKE Ingress for GCLB
- Configure Workload Identity
- Use Memorystore for Redis
- Store secrets in Google Secret Manager
### Azure AKS
- Use Azure Application Gateway Ingress Controller
- Configure Azure AD Workload Identity
- Use Azure Cache for Redis
- Store secrets in Azure Key Vault
## Troubleshooting
### Common Issues
**Pods not starting:**
```bash
kubectl describe pod -n llm-gateway -l app=llm-gateway
kubectl logs -n llm-gateway -l app=llm-gateway --previous
```
**Health check failures:**
```bash
kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
curl http://localhost:8080/health
curl http://localhost:8080/ready
```
**Provider connection issues:**
- Verify API keys in secrets
- Check network policies allow egress
- Verify provider endpoints are accessible
**Redis connection issues:**
```bash
kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
```
## Additional Resources
- [Kubernetes Documentation](https://kubernetes.io/docs/)
- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
- [cert-manager](https://cert-manager.io/)
- [External Secrets Operator](https://external-secrets.io/)

76
k8s/configmap.yaml Normal file
View File

@@ -0,0 +1,76 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: llm-gateway-config
namespace: llm-gateway
labels:
app: llm-gateway
data:
config.yaml: |
server:
address: ":8080"
logging:
format: "json"
level: "info"
rate_limit:
enabled: true
requests_per_second: 10
burst: 20
observability:
enabled: true
metrics:
enabled: true
path: "/metrics"
tracing:
enabled: true
service_name: "llm-gateway"
sampler:
type: "probability"
rate: 0.1
exporter:
type: "otlp"
endpoint: "tempo.observability.svc.cluster.local:4317"
insecure: true
providers:
google:
type: "google"
api_key: "${GOOGLE_API_KEY}"
endpoint: "https://generativelanguage.googleapis.com"
anthropic:
type: "anthropic"
api_key: "${ANTHROPIC_API_KEY}"
endpoint: "https://api.anthropic.com"
openai:
type: "openai"
api_key: "${OPENAI_API_KEY}"
endpoint: "https://api.openai.com"
conversations:
store: "redis"
ttl: "1h"
dsn: "redis://redis.llm-gateway.svc.cluster.local:6379/0"
auth:
enabled: true
issuer: "https://accounts.google.com"
audience: "${OIDC_AUDIENCE}"
models:
- name: "gemini-1.5-flash"
provider: "google"
- name: "gemini-1.5-pro"
provider: "google"
- name: "claude-3-5-sonnet-20241022"
provider: "anthropic"
- name: "claude-3-5-haiku-20241022"
provider: "anthropic"
- name: "gpt-4o"
provider: "openai"
- name: "gpt-4o-mini"
provider: "openai"

168
k8s/deployment.yaml Normal file
View File

@@ -0,0 +1,168 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
version: v1
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: llm-gateway
template:
metadata:
labels:
app: llm-gateway
version: v1
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: llm-gateway
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
- name: gateway
image: llm-gateway:latest # Replace with your registry/image:tag
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 8080
protocol: TCP
env:
# Provider API Keys from Secret
- name: GOOGLE_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: GOOGLE_API_KEY
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: ANTHROPIC_API_KEY
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OPENAI_API_KEY
- name: OIDC_AUDIENCE
valueFrom:
secretKeyRef:
name: llm-gateway-secrets
key: OIDC_AUDIENCE
# Optional: Pod metadata
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 1000m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 0
periodSeconds: 5
timeoutSeconds: 3
successThreshold: 1
failureThreshold: 30
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: tmp
mountPath: /tmp
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: llm-gateway-config
- name: tmp
emptyDir: {}
# Affinity rules for better distribution
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- llm-gateway
topologyKey: kubernetes.io/hostname
# Tolerations (if needed for specific node pools)
# tolerations:
# - key: "workload-type"
# operator: "Equal"
# value: "llm"
# effect: "NoSchedule"

63
k8s/hpa.yaml Normal file
View File

@@ -0,0 +1,63 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-gateway
minReplicas: 3
maxReplicas: 20
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
selectPolicy: Max
metrics:
# CPU-based scaling
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory-based scaling
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Custom metrics (requires metrics-server and custom metrics API)
# - type: Pods
# pods:
# metric:
# name: http_requests_per_second
# target:
# type: AverageValue
# averageValue: "1000"

66
k8s/ingress.yaml Normal file
View File

@@ -0,0 +1,66 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# General annotations
kubernetes.io/ingress.class: "nginx"
# TLS configuration
cert-manager.io/cluster-issuer: "letsencrypt-prod"
# Security headers
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/ssl-protocols: "TLSv1.2 TLSv1.3"
# Rate limiting (supplement application-level rate limiting)
nginx.ingress.kubernetes.io/limit-rps: "100"
nginx.ingress.kubernetes.io/limit-connections: "50"
# Request size limit (10MB)
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
# Timeouts
nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
nginx.ingress.kubernetes.io/proxy-send-timeout: "120"
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
# CORS (if needed)
# nginx.ingress.kubernetes.io/enable-cors: "true"
# nginx.ingress.kubernetes.io/cors-allow-origin: "https://yourdomain.com"
# nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, OPTIONS"
# nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
# For AWS ALB Ingress Controller (alternative to nginx)
# kubernetes.io/ingress.class: "alb"
# alb.ingress.kubernetes.io/scheme: "internet-facing"
# alb.ingress.kubernetes.io/target-type: "ip"
# alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
# alb.ingress.kubernetes.io/ssl-redirect: '443'
# alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:region:account:certificate/xxx"
# For GKE Ingress (alternative to nginx)
# kubernetes.io/ingress.class: "gce"
# kubernetes.io/ingress.global-static-ip-name: "llm-gateway-ip"
# ingress.gcp.kubernetes.io/pre-shared-cert: "llm-gateway-cert"
spec:
tls:
- hosts:
- llm-gateway.example.com # Replace with your domain
secretName: llm-gateway-tls
rules:
- host: llm-gateway.example.com # Replace with your domain
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: llm-gateway
port:
number: 80

46
k8s/kustomization.yaml Normal file
View File

@@ -0,0 +1,46 @@
# Kustomize configuration for easy deployment
# Usage: kubectl apply -k k8s/
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: llm-gateway
resources:
- namespace.yaml
- serviceaccount.yaml
- configmap.yaml
- secret.yaml
- deployment.yaml
- service.yaml
- ingress.yaml
- hpa.yaml
- pdb.yaml
- networkpolicy.yaml
- redis.yaml
- servicemonitor.yaml
- prometheusrule.yaml
# Common labels applied to all resources
commonLabels:
app.kubernetes.io/name: llm-gateway
app.kubernetes.io/component: api-gateway
app.kubernetes.io/part-of: llm-platform
# Images to be used (customize for your registry)
images:
- name: llm-gateway
newName: your-registry/llm-gateway
newTag: latest
# ConfigMap generator (alternative to configmap.yaml)
# configMapGenerator:
# - name: llm-gateway-config
# files:
# - config.yaml
# Secret generator (for local development only)
# secretGenerator:
# - name: llm-gateway-secrets
# envs:
# - secrets.env

7
k8s/namespace.yaml Normal file
View File

@@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: llm-gateway
labels:
app: llm-gateway
environment: production

83
k8s/networkpolicy.yaml Normal file
View File

@@ -0,0 +1,83 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
podSelector:
matchLabels:
app: llm-gateway
policyTypes:
- Ingress
- Egress
ingress:
# Allow traffic from ingress controller
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
ports:
- protocol: TCP
port: 8080
# Allow traffic from within the namespace (for debugging/testing)
- from:
- podSelector: {}
ports:
- protocol: TCP
port: 8080
# Allow Prometheus scraping
- from:
- namespaceSelector:
matchLabels:
name: observability
podSelector:
matchLabels:
app: prometheus
ports:
- protocol: TCP
port: 8080
egress:
# Allow DNS
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- protocol: UDP
port: 53
# Allow Redis access
- to:
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
# Allow external provider API access (OpenAI, Anthropic, Google)
- to:
- namespaceSelector: {}
ports:
- protocol: TCP
port: 443
# Allow OTLP tracing export
- to:
- namespaceSelector:
matchLabels:
name: observability
podSelector:
matchLabels:
app: tempo
ports:
- protocol: TCP
port: 4317

13
k8s/pdb.yaml Normal file
View File

@@ -0,0 +1,13 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
spec:
minAvailable: 2
selector:
matchLabels:
app: llm-gateway
unhealthyPodEvictionPolicy: AlwaysAllow

122
k8s/prometheusrule.yaml Normal file
View File

@@ -0,0 +1,122 @@
# PrometheusRule for alerting
# Requires Prometheus Operator to be installed
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
prometheus: kube-prometheus
spec:
groups:
- name: llm-gateway.rules
interval: 30s
rules:
# High error rate
- alert: LLMGatewayHighErrorRate
expr: |
(
sum(rate(http_requests_total{namespace="llm-gateway",status_code=~"5.."}[5m]))
/
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
) > 0.05
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate in LLM Gateway"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
# High latency
- alert: LLMGatewayHighLatency
expr: |
histogram_quantile(0.95,
sum(rate(http_request_duration_seconds_bucket{namespace="llm-gateway"}[5m])) by (le)
) > 10
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High latency in LLM Gateway"
description: "P95 latency is {{ $value }}s (threshold: 10s)"
# Provider errors
- alert: LLMProviderHighErrorRate
expr: |
(
sum(rate(provider_requests_total{namespace="llm-gateway",status="error"}[5m])) by (provider)
/
sum(rate(provider_requests_total{namespace="llm-gateway"}[5m])) by (provider)
) > 0.10
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate for provider {{ $labels.provider }}"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)"
# Pod down
- alert: LLMGatewayPodDown
expr: |
up{job="llm-gateway",namespace="llm-gateway"} == 0
for: 2m
labels:
severity: critical
component: llm-gateway
annotations:
summary: "LLM Gateway pod is down"
description: "Pod {{ $labels.pod }} has been down for more than 2 minutes"
# High memory usage
- alert: LLMGatewayHighMemoryUsage
expr: |
(
container_memory_working_set_bytes{namespace="llm-gateway",container="gateway"}
/
container_spec_memory_limit_bytes{namespace="llm-gateway",container="gateway"}
) > 0.85
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High memory usage in LLM Gateway"
description: "Memory usage is {{ $value | humanizePercentage }} (threshold: 85%)"
# Rate limit threshold
- alert: LLMGatewayHighRateLimitHitRate
expr: |
(
sum(rate(http_requests_total{namespace="llm-gateway",status_code="429"}[5m]))
/
sum(rate(http_requests_total{namespace="llm-gateway"}[5m]))
) > 0.20
for: 10m
labels:
severity: info
component: llm-gateway
annotations:
summary: "High rate limit hit rate"
description: "{{ $value | humanizePercentage }} of requests are being rate limited"
# Conversation store errors
- alert: LLMGatewayConversationStoreErrors
expr: |
(
sum(rate(conversation_store_operations_total{namespace="llm-gateway",status="error"}[5m]))
/
sum(rate(conversation_store_operations_total{namespace="llm-gateway"}[5m]))
) > 0.05
for: 5m
labels:
severity: warning
component: llm-gateway
annotations:
summary: "High error rate in conversation store"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"

131
k8s/redis.yaml Normal file
View File

@@ -0,0 +1,131 @@
# Simple Redis deployment for conversation storage
# For production, consider using:
# - Redis Operator (e.g., Redis Enterprise Operator)
# - Managed Redis (AWS ElastiCache, GCP Memorystore, Azure Cache for Redis)
# - Redis Cluster for high availability
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-config
namespace: llm-gateway
labels:
app: redis
data:
redis.conf: |
maxmemory 256mb
maxmemory-policy allkeys-lru
save ""
appendonly no
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis
namespace: llm-gateway
labels:
app: redis
spec:
serviceName: redis
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
securityContext:
runAsNonRoot: true
runAsUser: 999
fsGroup: 999
seccompProfile:
type: RuntimeDefault
containers:
- name: redis
image: redis:7.2-alpine
imagePullPolicy: IfNotPresent
command:
- redis-server
- /etc/redis/redis.conf
ports:
- name: redis
containerPort: 6379
protocol: TCP
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
tcpSocket:
port: redis
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
exec:
command:
- redis-cli
- ping
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumeMounts:
- name: config
mountPath: /etc/redis
- name: data
mountPath: /data
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 999
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: redis-config
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: llm-gateway
labels:
app: redis
spec:
type: ClusterIP
clusterIP: None
selector:
app: redis
ports:
- name: redis
port: 6379
targetPort: redis
protocol: TCP

46
k8s/secret.yaml Normal file
View File

@@ -0,0 +1,46 @@
apiVersion: v1
kind: Secret
metadata:
name: llm-gateway-secrets
namespace: llm-gateway
labels:
app: llm-gateway
type: Opaque
stringData:
# IMPORTANT: Replace these with actual values or use external secret management
# For production, use:
# - kubectl create secret generic llm-gateway-secrets --from-literal=...
# - External Secrets Operator with AWS Secrets Manager/HashiCorp Vault
# - Sealed Secrets
GOOGLE_API_KEY: "your-google-api-key-here"
ANTHROPIC_API_KEY: "your-anthropic-api-key-here"
OPENAI_API_KEY: "your-openai-api-key-here"
OIDC_AUDIENCE: "your-client-id.apps.googleusercontent.com"
---
# Example using External Secrets Operator (commented out)
# apiVersion: external-secrets.io/v1beta1
# kind: ExternalSecret
# metadata:
# name: llm-gateway-secrets
# namespace: llm-gateway
# spec:
# refreshInterval: 1h
# secretStoreRef:
# name: aws-secrets-manager
# kind: SecretStore
# target:
# name: llm-gateway-secrets
# creationPolicy: Owner
# data:
# - secretKey: GOOGLE_API_KEY
# remoteRef:
# key: prod/llm-gateway/google-api-key
# - secretKey: ANTHROPIC_API_KEY
# remoteRef:
# key: prod/llm-gateway/anthropic-api-key
# - secretKey: OPENAI_API_KEY
# remoteRef:
# key: prod/llm-gateway/openai-api-key
# - secretKey: OIDC_AUDIENCE
# remoteRef:
# key: prod/llm-gateway/oidc-audience

40
k8s/service.yaml Normal file
View File

@@ -0,0 +1,40 @@
apiVersion: v1
kind: Service
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# For cloud load balancers (uncomment as needed)
# service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
# cloud.google.com/neg: '{"ingress": true}'
spec:
type: ClusterIP
selector:
app: llm-gateway
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
sessionAffinity: None
---
# Headless service for pod-to-pod communication (if needed)
apiVersion: v1
kind: Service
metadata:
name: llm-gateway-headless
namespace: llm-gateway
labels:
app: llm-gateway
spec:
type: ClusterIP
clusterIP: None
selector:
app: llm-gateway
ports:
- name: http
port: 8080
targetPort: http
protocol: TCP

14
k8s/serviceaccount.yaml Normal file
View File

@@ -0,0 +1,14 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
annotations:
# For GKE Workload Identity
# iam.gke.io/gcp-service-account: llm-gateway@PROJECT_ID.iam.gserviceaccount.com
# For EKS IRSA (IAM Roles for Service Accounts)
# eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/llm-gateway-role
automountServiceAccountToken: true

35
k8s/servicemonitor.yaml Normal file
View File

@@ -0,0 +1,35 @@
# ServiceMonitor for Prometheus Operator
# Requires Prometheus Operator to be installed
# https://github.com/prometheus-operator/prometheus-operator
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: llm-gateway
namespace: llm-gateway
labels:
app: llm-gateway
prometheus: kube-prometheus
spec:
selector:
matchLabels:
app: llm-gateway
endpoints:
- port: http
path: /metrics
interval: 30s
scrapeTimeout: 10s
relabelings:
# Add namespace label
- sourceLabels: [__meta_kubernetes_namespace]
targetLabel: namespace
# Add pod label
- sourceLabels: [__meta_kubernetes_pod_name]
targetLabel: pod
# Add service label
- sourceLabels: [__meta_kubernetes_service_name]
targetLabel: service

126
run-tests.sh Executable file
View File

@@ -0,0 +1,126 @@
#!/bin/bash
# Test runner script for LatticeLM Gateway
# Usage: ./run-tests.sh [option]
#
# Options:
# all - Run all tests (default)
# coverage - Run tests with coverage report
# verbose - Run tests with verbose output
# config - Run config tests only
# providers - Run provider tests only
# conv - Run conversation tests only
# watch - Watch mode (requires entr)
set -e
COLOR_GREEN='\033[0;32m'
COLOR_BLUE='\033[0;34m'
COLOR_YELLOW='\033[1;33m'
COLOR_RED='\033[0;31m'
COLOR_RESET='\033[0m'
print_header() {
echo -e "${COLOR_BLUE}========================================${COLOR_RESET}"
echo -e "${COLOR_BLUE}$1${COLOR_RESET}"
echo -e "${COLOR_BLUE}========================================${COLOR_RESET}"
}
print_success() {
echo -e "${COLOR_GREEN}$1${COLOR_RESET}"
}
print_error() {
echo -e "${COLOR_RED}$1${COLOR_RESET}"
}
print_info() {
echo -e "${COLOR_YELLOW} $1${COLOR_RESET}"
}
run_all_tests() {
print_header "Running All Tests"
go test ./internal/... || exit 1
print_success "All tests passed!"
}
run_verbose_tests() {
print_header "Running Tests (Verbose)"
go test ./internal/... -v || exit 1
print_success "All tests passed!"
}
run_coverage_tests() {
print_header "Running Tests with Coverage"
go test ./internal/... -cover -coverprofile=coverage.out || exit 1
print_success "Tests passed! Generating HTML report..."
go tool cover -html=coverage.out -o coverage.html
print_success "Coverage report generated: coverage.html"
print_info "Open coverage.html in your browser to view detailed coverage"
}
run_config_tests() {
print_header "Running Config Tests"
go test ./internal/config -v -cover || exit 1
print_success "Config tests passed!"
}
run_provider_tests() {
print_header "Running Provider Tests"
go test ./internal/providers/... -v -cover || exit 1
print_success "Provider tests passed!"
}
run_conversation_tests() {
print_header "Running Conversation Tests"
go test ./internal/conversation -v -cover || exit 1
print_success "Conversation tests passed!"
}
run_watch_mode() {
if ! command -v entr &> /dev/null; then
print_error "entr is not installed. Install it with: apt-get install entr"
exit 1
fi
print_header "Running Tests in Watch Mode"
print_info "Watching for file changes... (Press Ctrl+C to stop)"
find ./internal -name '*.go' | entr -c sh -c 'go test ./internal/... || true'
}
# Main script
case "${1:-all}" in
all)
run_all_tests
;;
coverage)
run_coverage_tests
;;
verbose)
run_verbose_tests
;;
config)
run_config_tests
;;
providers)
run_provider_tests
;;
conv)
run_conversation_tests
;;
watch)
run_watch_mode
;;
*)
echo "Usage: $0 {all|coverage|verbose|config|providers|conv|watch}"
echo ""
echo "Options:"
echo " all - Run all tests (default)"
echo " coverage - Run tests with coverage report"
echo " verbose - Run tests with verbose output"
echo " config - Run config tests only"
echo " providers - Run provider tests only"
echo " conv - Run conversation tests only"
echo " watch - Watch mode (requires entr)"
exit 1
;;
esac

98
test_security_fixes.sh Executable file
View File

@@ -0,0 +1,98 @@
#!/bin/bash
# Test script to verify security fixes are working
# Usage: ./test_security_fixes.sh [server_url]
SERVER_URL="${1:-http://localhost:8080}"
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo "Testing security improvements on $SERVER_URL"
echo "================================================"
echo ""
# Test 1: Request size limit
echo -e "${YELLOW}Test 1: Request Size Limit${NC}"
echo "Sending a request with 11MB payload (exceeds 10MB limit)..."
# Generate large payload
LARGE_PAYLOAD=$(python3 -c "import json; print(json.dumps({'model': 'test', 'input': 'x' * 11000000}))" 2>/dev/null || \
perl -e 'print "{\"model\":\"test\",\"input\":\"" . ("x" x 11000000) . "\"}"')
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$SERVER_URL/v1/responses" \
-H "Content-Type: application/json" \
-d "$LARGE_PAYLOAD" \
--max-time 5 2>/dev/null)
if [ "$HTTP_CODE" = "413" ]; then
echo -e "${GREEN}✓ PASS: Received HTTP 413 (Request Entity Too Large)${NC}"
else
echo -e "${RED}✗ FAIL: Expected 413, got $HTTP_CODE${NC}"
fi
echo ""
# Test 2: Normal request size
echo -e "${YELLOW}Test 2: Normal Request Size${NC}"
echo "Sending a small valid request..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$SERVER_URL/v1/responses" \
-H "Content-Type: application/json" \
-d '{"model":"test","input":"hello"}' \
--max-time 5 2>/dev/null)
# Expected: either 400 (invalid model) or 502 (provider error), but NOT 413
if [ "$HTTP_CODE" != "413" ]; then
echo -e "${GREEN}✓ PASS: Request not rejected by size limit (HTTP $HTTP_CODE)${NC}"
else
echo -e "${RED}✗ FAIL: Small request incorrectly rejected with 413${NC}"
fi
echo ""
# Test 3: Health endpoint
echo -e "${YELLOW}Test 3: Health Endpoint${NC}"
echo "Checking /health endpoint..."
RESPONSE=$(curl -s -X GET "$SERVER_URL/health" --max-time 5 2>/dev/null)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X GET "$SERVER_URL/health" --max-time 5 2>/dev/null)
if [ "$HTTP_CODE" = "200" ] && echo "$RESPONSE" | grep -q "healthy"; then
echo -e "${GREEN}✓ PASS: Health endpoint responding correctly${NC}"
else
echo -e "${RED}✗ FAIL: Health endpoint not responding correctly (HTTP $HTTP_CODE)${NC}"
fi
echo ""
# Test 4: Ready endpoint
echo -e "${YELLOW}Test 4: Ready Endpoint${NC}"
echo "Checking /ready endpoint..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X GET "$SERVER_URL/ready" --max-time 5 2>/dev/null)
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "503" ]; then
echo -e "${GREEN}✓ PASS: Ready endpoint responding (HTTP $HTTP_CODE)${NC}"
else
echo -e "${RED}✗ FAIL: Ready endpoint not responding correctly (HTTP $HTTP_CODE)${NC}"
fi
echo ""
# Test 5: Models endpoint
echo -e "${YELLOW}Test 5: Models Endpoint${NC}"
echo "Checking /v1/models endpoint..."
RESPONSE=$(curl -s -X GET "$SERVER_URL/v1/models" --max-time 5 2>/dev/null)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X GET "$SERVER_URL/v1/models" --max-time 5 2>/dev/null)
if [ "$HTTP_CODE" = "200" ] && echo "$RESPONSE" | grep -q "object"; then
echo -e "${GREEN}✓ PASS: Models endpoint responding correctly${NC}"
else
echo -e "${RED}✗ FAIL: Models endpoint not responding correctly (HTTP $HTTP_CODE)${NC}"
fi
echo ""
echo "================================================"
echo -e "${GREEN}Testing complete!${NC}"
echo ""
echo "Note: Panic recovery cannot be tested externally without"
echo "causing intentional server errors. It has been verified"
echo "through unit tests in middleware_test.go"