97 lines
3.5 KiB
YAML
97 lines
3.5 KiB
YAML
server:
|
|
address: ":8080"
|
|
max_request_body_size: 10485760 # Maximum request body size in bytes (default: 10MB = 10485760 bytes)
|
|
|
|
logging:
|
|
format: "json" # "json" for production, "text" for development
|
|
level: "info" # "debug", "info", "warn", or "error"
|
|
|
|
rate_limit:
|
|
enabled: false # Enable rate limiting (recommended for production)
|
|
requests_per_second: 10 # Max requests per second per IP (default: 10)
|
|
burst: 20 # Maximum burst size (default: 20)
|
|
|
|
observability:
|
|
enabled: false # Enable observability features (metrics and tracing)
|
|
|
|
metrics:
|
|
enabled: false # Enable Prometheus metrics
|
|
path: "/metrics" # Metrics endpoint path (default: /metrics)
|
|
|
|
tracing:
|
|
enabled: false # Enable OpenTelemetry tracing
|
|
service_name: "llm-gateway" # Service name for traces (default: llm-gateway)
|
|
sampler:
|
|
type: "probability" # Sampling type: "always", "never", "probability"
|
|
rate: 0.1 # Sample rate for probability sampler (0.0 to 1.0, default: 0.1 = 10%)
|
|
exporter:
|
|
type: "otlp" # Exporter type: "otlp" (production), "stdout" (development)
|
|
endpoint: "localhost:4317" # OTLP collector endpoint (gRPC)
|
|
insecure: true # Use insecure connection (for development)
|
|
# headers: # Optional: custom headers for authentication
|
|
# authorization: "Bearer your-token-here"
|
|
|
|
admin:
|
|
enabled: true # Enable admin UI and API (default: false)
|
|
|
|
providers:
|
|
google:
|
|
type: "google"
|
|
api_key: "YOUR_GOOGLE_API_KEY"
|
|
endpoint: "https://generativelanguage.googleapis.com"
|
|
anthropic:
|
|
type: "anthropic"
|
|
api_key: "YOUR_ANTHROPIC_API_KEY"
|
|
endpoint: "https://api.anthropic.com"
|
|
openai:
|
|
type: "openai"
|
|
api_key: "YOUR_OPENAI_API_KEY"
|
|
endpoint: "https://api.openai.com"
|
|
# Vertex AI (Google Cloud) - optional
|
|
# Uses Application Default Credentials (ADC) or service account
|
|
# vertexai:
|
|
# type: "vertexai"
|
|
# project: "your-gcp-project-id"
|
|
# location: "us-central1" # or other GCP region
|
|
# Azure OpenAI - optional
|
|
# azureopenai:
|
|
# type: "azureopenai"
|
|
# api_key: "YOUR_AZURE_OPENAI_API_KEY"
|
|
# endpoint: "https://your-resource.openai.azure.com"
|
|
# api_version: "2024-12-01-preview"
|
|
# Azure-hosted Anthropic (Microsoft Foundry) - optional
|
|
# azureanthropic:
|
|
# type: "azureanthropic"
|
|
# api_key: "YOUR_AZURE_ANTHROPIC_API_KEY"
|
|
# endpoint: "https://your-resource.services.ai.azure.com/anthropic"
|
|
|
|
# conversations:
|
|
# store: "sql" # "memory" (default), "sql", or "redis"
|
|
# ttl: "1h" # conversation expiration (default: 1h)
|
|
# driver: "sqlite3" # SQL driver: "sqlite3", "mysql", "pgx" (required for sql store)
|
|
# dsn: "conversations.db" # connection string (required for sql/redis store)
|
|
# # MySQL example:
|
|
# # driver: "mysql"
|
|
# # dsn: "user:password@tcp(localhost:3306)/dbname?parseTime=true"
|
|
# # PostgreSQL example:
|
|
# # driver: "pgx"
|
|
# # dsn: "postgres://user:password@localhost:5432/dbname?sslmode=disable"
|
|
# # Redis example:
|
|
# # store: "redis"
|
|
# # dsn: "redis://:password@localhost:6379/0"
|
|
|
|
models:
|
|
- name: "gemini-1.5-flash"
|
|
provider: "google"
|
|
- name: "claude-3-5-sonnet"
|
|
provider: "anthropic"
|
|
- name: "gpt-4o-mini"
|
|
provider: "openai"
|
|
# - name: "gemini-2.0-flash-exp"
|
|
# provider: "vertexai" # Use Vertex AI instead of Google AI API
|
|
# - name: "gpt-4o"
|
|
# provider: "azureopenai"
|
|
# provider_model_id: "my-gpt4o-deployment" # optional: defaults to name
|
|
# - name: "claude-sonnet-4-5-20250514"
|
|
# provider: "azureanthropic"
|