From 610b6c3367e407cb6323eb012b0f3f894da61649 Mon Sep 17 00:00:00 2001
From: Anibal Angulo <anibal.angulo.cardoza@banorte.com>
Date: Fri, 6 Mar 2026 16:26:25 +0000
Subject: [PATCH 1/3] Add deployment guides

---
 Dockerfile                |   24 +-
 README.md                 |  764 +++++++++++++++++++++----
 docs/DOCKER_DEPLOYMENT.md |  471 ++++++++++++++++
 docs/README.md            |   74 +++
 k8s/README.md             | 1122 +++++++++++++++++++++++++++----------
 5 files changed, 2051 insertions(+), 404 deletions(-)
 create mode 100644 docs/DOCKER_DEPLOYMENT.md
 create mode 100644 docs/README.md

diff --git a/Dockerfile b/Dockerfile
index 51d348e..0914e82 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,23 @@
 # Multi-stage build for Go LLM Gateway
-# Stage 1: Build the Go binary
+
+# Stage 1: Build the frontend
+FROM node:18-alpine AS frontend-builder
+
+WORKDIR /frontend
+
+# Copy package files for better caching
+COPY frontend/admin/package*.json ./
+RUN npm ci --only=production
+
+# Copy frontend source and build
+COPY frontend/admin/ ./
+RUN npm run build
+
+# Stage 2: Build the Go binary
 FROM golang:alpine AS builder
 
 # Install build dependencies
-RUN apk add --no-cache git ca-certificates tzdata
+RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev
 
 WORKDIR /build
 
@@ -14,10 +28,12 @@ RUN go mod download
 # Copy source code
 COPY . .
 
+# Copy pre-built frontend assets from stage 1
+COPY --from=frontend-builder /frontend/dist ./internal/admin/dist
+
 # Build the binary with optimizations
 # CGO is required for SQLite support
-RUN apk add --no-cache gcc musl-dev && \
-    CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
+RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build \
     -ldflags='-w -s -extldflags "-static"' \
     -a -installsuffix cgo \
     -o gateway \
diff --git a/README.md b/README.md
index f0781d4..26290d9 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,47 @@
 # latticelm
 
+> A production-ready LLM proxy gateway written in Go with enterprise features
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Supported Providers](#supported-providers)
+- [Key Features](#key-features)
+- [Status](#status)
+- [Use Cases](#use-cases)
+- [Architecture](#architecture)
+- [Quick Start](#quick-start)
+- [API Standard](#api-standard)
+- [API Reference](#api-reference)
+- [Tech Stack](#tech-stack)
+- [Project Structure](#project-structure)
+- [Configuration](#configuration)
+- [Chat Client](#chat-client)
+- [Conversation Management](#conversation-management)
+- [Observability](#observability)
+- [Circuit Breakers](#circuit-breakers)
+- [Azure OpenAI](#azure-openai)
+- [Azure Anthropic](#azure-anthropic-microsoft-foundry)
+- [Admin Web UI](#admin-web-ui)
+- [Deployment](#deployment)
+- [Authentication](#authentication)
+- [Production Features](#production-features)
+- [Roadmap](#roadmap)
+- [Documentation](#documentation)
+- [Contributing](#contributing)
+- [License](#license)
+
 ## Overview
 
-A lightweight LLM proxy gateway written in Go that provides a unified API interface for multiple LLM providers. Similar to LiteLLM, but built natively in Go using each provider's official SDK.
+A production-ready LLM proxy gateway written in Go that provides a unified API interface for multiple LLM providers. Similar to LiteLLM, but built natively in Go using each provider's official SDK with enterprise features including rate limiting, circuit breakers, observability, and authentication.
 
-## Purpose
+## Supported Providers
 
-Simplify LLM integration by exposing a single, consistent API that routes requests to different providers:
 - **OpenAI** (GPT models)
-- **Azure OpenAI** (Azure-deployed models)
-- **Anthropic** (Claude)
-- **Google Generative AI** (Gemini)
+- **Azure OpenAI** (Azure-deployed OpenAI models)
+- **Anthropic** (Claude models)
+- **Azure Anthropic** (Microsoft Foundry-hosted Claude models)
+- **Google Generative AI** (Gemini models)
 - **Vertex AI** (Google Cloud-hosted Gemini models)
 
 Instead of managing multiple SDK integrations in your application, call one endpoint and let the gateway handle provider-specific implementations.
@@ -31,11 +62,24 @@ latticelm (unified API)
 
 ## Key Features
 
+### Core Functionality
 - **Single API interface** for multiple LLM providers
 - **Native Go SDKs** for optimal performance and type safety
 - **Provider abstraction** - switch providers without changing client code
-- **Lightweight** - minimal overhead, fast routing
-- **Easy configuration** - manage API keys and provider settings centrally
+- **Streaming support** - Server-Sent Events for all providers
+- **Conversation tracking** - Efficient context management with `previous_response_id`
+
+### Production Features
+- **Circuit breakers** - Automatic failure detection and recovery per provider
+- **Rate limiting** - Per-IP token bucket algorithm with configurable limits
+- **OAuth2/OIDC authentication** - Support for Google, Auth0, and any OIDC provider
+- **Observability** - Prometheus metrics and OpenTelemetry tracing
+- **Health checks** - Kubernetes-compatible liveness and readiness endpoints
+- **Admin Web UI** - Built-in dashboard for monitoring and configuration
+
+### Configuration
+- **Easy setup** - YAML configuration with environment variable overrides
+- **Flexible storage** - In-memory, SQLite, MySQL, PostgreSQL, or Redis for conversations
 
 ## Use Cases
 
@@ -45,43 +89,70 @@ latticelm (unified API)
 - A/B testing across different models
 - Centralized LLM access for microservices
 
-## 🎉 Status: **WORKING!**
+## Status
 
-✅ **All providers integrated with official Go SDKs:**
+**Production Ready** - All core features implemented and tested.
+
+### Provider Integration
+✅ All providers use official Go SDKs:
 - OpenAI → `github.com/openai/openai-go/v3`
 - Azure OpenAI → `github.com/openai/openai-go/v3` (with Azure auth)
 - Anthropic → `github.com/anthropics/anthropic-sdk-go`
-- Google → `google.golang.org/genai`
+- Azure Anthropic → `github.com/anthropics/anthropic-sdk-go` (with Azure auth)
+- Google Gen AI → `google.golang.org/genai`
 - Vertex AI → `google.golang.org/genai` (with GCP auth)
 
-✅ **Compiles successfully** (36MB binary)
-✅ **Provider auto-selection** (gpt→Azure/OpenAI, claude→Anthropic, gemini→Google)
-✅ **Configuration system** (YAML with env var support)
-✅ **Streaming support** (Server-Sent Events for all providers)
-✅ **OAuth2/OIDC authentication** (Google, Auth0, any OIDC provider)
-✅ **Terminal chat client** (Python with Rich UI, PEP 723)
-✅ **Conversation tracking** (previous_response_id for efficient context)
-✅ **Rate limiting** (Per-IP token bucket with configurable limits)
-✅ **Health & readiness endpoints** (Kubernetes-compatible health checks)
-✅ **Admin Web UI** (Dashboard with system info, health checks, provider status)
+### Features
+✅ Provider auto-selection (gpt→OpenAI, claude→Anthropic, gemini→Google)
+✅ Streaming responses (Server-Sent Events)
+✅ Conversation tracking with `previous_response_id`
+✅ OAuth2/OIDC authentication
+✅ Rate limiting with token bucket algorithm
+✅ Circuit breakers for fault tolerance
+✅ Observability (Prometheus metrics + OpenTelemetry tracing)
+✅ Health & readiness endpoints
+✅ Admin Web UI dashboard
+✅ Terminal chat client (Python with Rich UI)
 
 ## Quick Start
 
+### Prerequisites
+
+- Go 1.21+ (for building from source)
+- Docker (optional, for containerized deployment)
+- Node.js 18+ (optional, for Admin UI development)
+
+### Running Locally
+
 ```bash
-# 1. Set API keys
+# 1. Clone the repository
+git clone https://github.com/yourusername/latticelm.git
+cd latticelm
+
+# 2. Set API keys
 export OPENAI_API_KEY="your-key"
 export ANTHROPIC_API_KEY="your-key"
 export GOOGLE_API_KEY="your-key"
 
-# 2. Build (includes Admin UI)
-cd latticelm
+# 3. Copy and configure settings (optional)
+cp config.example.yaml config.yaml
+# Edit config.yaml to customize settings
+
+# 4. Build (includes Admin UI)
 make build-all
 
-# 3. Run
+# 5. Run
 ./bin/llm-gateway
 
-# 4. Test (non-streaming)
-curl -X POST http://localhost:8080/v1/chat/completions \
+# Gateway starts on http://localhost:8080
+# Admin UI available at http://localhost:8080/admin/
+```
+
+### Testing the API
+
+**Non-streaming request:**
+```bash
+curl -X POST http://localhost:8080/v1/responses \
   -H "Content-Type: application/json" \
   -d '{
     "model": "gpt-4o-mini",
@@ -92,9 +163,11 @@ curl -X POST http://localhost:8080/v1/chat/completions \
       }
     ]
   }'
+```
 
-# 5. Test streaming
-curl -X POST http://localhost:8080/v1/chat/completions \
+**Streaming request:**
+```bash
+curl -X POST http://localhost:8080/v1/responses \
   -H "Content-Type: application/json" \
   -N \
   -d '{
@@ -109,6 +182,20 @@ curl -X POST http://localhost:8080/v1/chat/completions \
   }'
 ```
 
+### Development Mode
+
+Run backend and frontend separately for live reloading:
+
+```bash
+# Terminal 1: Backend with auto-reload
+make dev-backend
+
+# Terminal 2: Frontend dev server
+make dev-frontend
+```
+
+Frontend runs on `http://localhost:5173` with hot module replacement.
+
 ## API Standard
 
 This gateway implements the **[Open Responses](https://www.openresponses.org)** specification — an open-source, multi-provider API standard for LLM interfaces based on OpenAI's Responses API.
@@ -125,64 +212,245 @@ By following the Open Responses spec, this gateway ensures:
 
 For full specification details, see: **https://www.openresponses.org**
 
+## API Reference
+
+### Core Endpoints
+
+#### POST /v1/responses
+Create a chat completion response (streaming or non-streaming).
+
+**Request body:**
+```json
+{
+  "model": "gpt-4o-mini",
+  "stream": false,
+  "input": [
+    {
+      "role": "user",
+      "content": [{"type": "input_text", "text": "Hello!"}]
+    }
+  ],
+  "previous_response_id": "optional-conversation-id",
+  "provider": "optional-explicit-provider"
+}
+```
+
+**Response (non-streaming):**
+```json
+{
+  "id": "resp_abc123",
+  "object": "response",
+  "model": "gpt-4o-mini",
+  "provider": "openai",
+  "output": [
+    {
+      "role": "assistant",
+      "content": [{"type": "text", "text": "Hello! How can I help you?"}]
+    }
+  ],
+  "usage": {
+    "input_tokens": 10,
+    "output_tokens": 8
+  }
+}
+```
+
+**Response (streaming):**
+Server-Sent Events with `data: {...}` lines containing deltas.
+
+#### GET /v1/models
+List available models.
+
+**Response:**
+```json
+{
+  "object": "list",
+  "data": [
+    {"id": "gpt-4o-mini", "provider": "openai"},
+    {"id": "claude-3-5-sonnet", "provider": "anthropic"},
+    {"id": "gemini-1.5-flash", "provider": "google"}
+  ]
+}
+```
+
+### Health Endpoints
+
+#### GET /health
+Liveness probe (always returns 200 if server is running).
+
+**Response:**
+```json
+{
+  "status": "healthy",
+  "timestamp": 1709438400
+}
+```
+
+#### GET /ready
+Readiness probe (checks conversation store and providers).
+
+**Response:**
+```json
+{
+  "status": "ready",
+  "timestamp": 1709438400,
+  "checks": {
+    "conversation_store": "healthy",
+    "providers": "healthy"
+  }
+}
+```
+
+Returns 503 if any check fails.
+
+### Admin Endpoints
+
+#### GET /admin/
+Web dashboard (when admin UI is enabled).
+
+#### GET /admin/api/info
+System information.
+
+#### GET /admin/api/health
+Detailed health status.
+
+#### GET /admin/api/config
+Current configuration (secrets masked).
+
+### Observability Endpoints
+
+#### GET /metrics
+Prometheus metrics (when observability is enabled).
+
 ## Tech Stack
 
 - **Language:** Go
 - **API Specification:** [Open Responses](https://www.openresponses.org)
-- **SDKs:**
-  - `google.golang.org/genai` (Google Generative AI)
-  - Anthropic Go SDK
-  - OpenAI Go SDK
-- **Transport:** RESTful HTTP (potentially gRPC in the future)
-
-## Status
-
-🚧 **In Development** - Project specification and initial setup phase.
-
-## Getting Started
-
-1. **Copy the example config** and fill in provider API keys:
-
-   ```bash
-   cp config.example.yaml config.yaml
-   ```
-
-   You can also override API keys via environment variables (`GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
-
-2. **Run the gateway** using the default configuration path:
-
-   ```bash
-   go run ./cmd/gateway --config config.yaml
-   ```
-
-   The server listens on the address configured under `server.address` (defaults to `:8080`).
-
-3. **Call the Open Responses endpoint**:
-
-   ```bash
-   curl -X POST http://localhost:8080/v1/responses \
-     -H 'Content-Type: application/json' \
-     -d '{
-           "model": "gpt-4o-mini",
-           "input": [
-             {"role": "user", "content": [{"type": "input_text", "text": "Hello!"}]}
-           ]
-         }'
-   ```
-
-   Include `"provider": "anthropic"` (or `google`, `openai`) to pin a provider; otherwise the gateway infers it from the model name.
+- **Official SDKs:**
+  - `google.golang.org/genai` (Google Generative AI & Vertex AI)
+  - `github.com/anthropics/anthropic-sdk-go` (Anthropic & Azure Anthropic)
+  - `github.com/openai/openai-go/v3` (OpenAI & Azure OpenAI)
+- **Observability:**
+  - Prometheus for metrics
+  - OpenTelemetry for distributed tracing
+- **Resilience:**
+  - Circuit breakers via `github.com/sony/gobreaker`
+  - Token bucket rate limiting
+- **Transport:** RESTful HTTP with Server-Sent Events for streaming
 
 ## Project Structure
 
-- `cmd/gateway`: Entry point that loads configuration, wires providers, and starts the HTTP server.
-- `internal/config`: YAML configuration loader with environment overrides for API keys.
-- `internal/api`: Open Responses request/response types and validation helpers.
-- `internal/server`: HTTP handlers that expose `/v1/responses`.
-- `internal/providers`: Provider abstractions plus provider-specific scaffolding in `google`, `anthropic`, and `openai` subpackages.
+```
+latticelm/
+├── cmd/gateway/          # Main application entry point
+├── internal/
+│   ├── admin/            # Admin UI backend and embedded frontend
+│   ├── api/              # Open Responses types and validation
+│   ├── auth/             # OAuth2/OIDC authentication
+│   ├── config/           # YAML configuration loader
+│   ├── conversation/     # Conversation tracking and storage
+│   ├── logger/           # Structured logging setup
+│   ├── metrics/          # Prometheus metrics
+│   ├── providers/        # Provider implementations
+│   │   ├── anthropic/
+│   │   ├── azureanthropic/
+│   │   ├── azureopenai/
+│   │   ├── google/
+│   │   ├── openai/
+│   │   └── vertexai/
+│   ├── ratelimit/        # Rate limiting implementation
+│   ├── server/           # HTTP server and handlers
+│   └── tracing/          # OpenTelemetry tracing
+├── frontend/admin/       # Vue.js Admin UI
+├── k8s/                  # Kubernetes manifests
+├── tests/                # Integration tests
+├── config.example.yaml   # Example configuration
+├── Makefile              # Build and development tasks
+└── README.md
+```
+
+## Configuration
+
+The gateway uses a YAML configuration file with support for environment variable overrides.
+
+### Basic Configuration
+
+```yaml
+server:
+  address: ":8080"
+  max_request_body_size: 10485760  # 10MB
+
+logging:
+  format: "json"  # or "text" for development
+  level: "info"   # debug, info, warn, error
+
+# Configure providers (API keys can use ${ENV_VAR} syntax)
+providers:
+  openai:
+    type: "openai"
+    api_key: "${OPENAI_API_KEY}"
+  anthropic:
+    type: "anthropic"
+    api_key: "${ANTHROPIC_API_KEY}"
+  google:
+    type: "google"
+    api_key: "${GOOGLE_API_KEY}"
+
+# Map model names to providers
+models:
+  - name: "gpt-4o-mini"
+    provider: "openai"
+  - name: "claude-3-5-sonnet"
+    provider: "anthropic"
+  - name: "gemini-1.5-flash"
+    provider: "google"
+```
+
+### Advanced Configuration
+
+```yaml
+# Rate limiting
+rate_limit:
+  enabled: true
+  requests_per_second: 10
+  burst: 20
+
+# Authentication
+auth:
+  enabled: true
+  issuer: "https://accounts.google.com"
+  audience: "your-client-id.apps.googleusercontent.com"
+
+# Observability
+observability:
+  enabled: true
+  metrics:
+    enabled: true
+    path: "/metrics"
+  tracing:
+    enabled: true
+    service_name: "llm-gateway"
+    exporter:
+      type: "otlp"
+      endpoint: "localhost:4317"
+
+# Conversation storage
+conversations:
+  store: "sql"  # memory, sql, or redis
+  ttl: "1h"
+  driver: "sqlite3"
+  dsn: "conversations.db"
+
+# Admin UI
+admin:
+  enabled: true
+```
+
+See `config.example.yaml` for complete configuration options with detailed comments.
 
 ## Chat Client
 
-Interactive terminal chat interface with beautiful Rich UI:
+Interactive terminal chat interface with beautiful Rich UI powered by Python and the Rich library:
 
 ```bash
 # Basic usage
@@ -196,20 +464,118 @@ You> /model claude
 You> /models  # List all available models
 ```
 
-The chat client automatically uses `previous_response_id` to reduce token usage by only sending new messages instead of the full conversation history.
+Features:
+- **Syntax highlighting** for code blocks
+- **Markdown rendering** for formatted responses
+- **Model switching** on the fly with `/model` command
+- **Conversation history** with automatic `previous_response_id` tracking
+- **Streaming responses** with real-time display
 
-See **[CHAT_CLIENT.md](./CHAT_CLIENT.md)** for full documentation.
+The chat client uses [PEP 723](https://peps.python.org/pep-0723/) inline script metadata, so `uv run` automatically installs dependencies.
 
 ## Conversation Management
 
-The gateway implements conversation tracking using `previous_response_id` from the Open Responses spec:
+The gateway implements efficient conversation tracking using `previous_response_id` from the Open Responses spec:
 
-- 📉 **Reduced token usage** - Only send new messages
-- ⚡ **Smaller requests** - Less bandwidth
-- 🧠 **Server-side context** - Gateway maintains history
-- ⏰ **Auto-expire** - Conversations expire after 1 hour
+- 📉 **Reduced token usage** - Only send new messages, not full history
+- ⚡ **Smaller requests** - Less bandwidth and faster responses
+- 🧠 **Server-side context** - Gateway maintains conversation state
+- ⏰ **Auto-expire** - Conversations expire after configurable TTL (default: 1 hour)
 
-See **[CONVERSATIONS.md](./CONVERSATIONS.md)** for details.
+### Storage Options
+
+Choose from multiple storage backends:
+
+```yaml
+conversations:
+  store: "memory"  # "memory", "sql", or "redis"
+  ttl: "1h"        # Conversation expiration
+
+  # SQLite (default for sql)
+  driver: "sqlite3"
+  dsn: "conversations.db"
+
+  # MySQL
+  # driver: "mysql"
+  # dsn: "user:password@tcp(localhost:3306)/dbname?parseTime=true"
+
+  # PostgreSQL
+  # driver: "pgx"
+  # dsn: "postgres://user:password@localhost:5432/dbname?sslmode=disable"
+
+  # Redis
+  # store: "redis"
+  # dsn: "redis://:password@localhost:6379/0"
+```
+
+## Observability
+
+The gateway provides comprehensive observability through Prometheus metrics and OpenTelemetry tracing.
+
+### Metrics
+
+Enable Prometheus metrics to monitor gateway performance:
+
+```yaml
+observability:
+  enabled: true
+  metrics:
+    enabled: true
+    path: "/metrics"  # Default endpoint
+```
+
+Available metrics include:
+- Request counts and latencies per provider and model
+- Error rates and types
+- Circuit breaker state changes
+- Rate limit hits
+- Conversation store operations
+
+Access metrics at `http://localhost:8080/metrics` (Prometheus scrape format).
+
+### Tracing
+
+Enable OpenTelemetry tracing for distributed request tracking:
+
+```yaml
+observability:
+  enabled: true
+  tracing:
+    enabled: true
+    service_name: "llm-gateway"
+    sampler:
+      type: "probability"  # "always", "never", or "probability"
+      rate: 0.1  # Sample 10% of requests
+    exporter:
+      type: "otlp"  # Send to OpenTelemetry Collector
+      endpoint: "localhost:4317"  # gRPC endpoint
+      insecure: true  # Use TLS in production
+```
+
+Traces include:
+- End-to-end request flow
+- Provider API calls
+- Conversation store lookups
+- Circuit breaker operations
+- Authentication checks
+
+Use with Jaeger, Zipkin, or any OpenTelemetry-compatible backend.
+
+## Circuit Breakers
+
+The gateway automatically wraps each provider with a circuit breaker for fault tolerance. When a provider experiences failures, the circuit breaker:
+
+1. **Closed state** - Normal operation, requests pass through
+2. **Open state** - Fast-fail after threshold reached, returns errors immediately
+3. **Half-open state** - Allows test requests to check if provider recovered
+
+Default configuration (per provider):
+- **Max requests in half-open**: 3
+- **Interval**: 60 seconds (resets failure count)
+- **Timeout**: 30 seconds (open → half-open transition)
+- **Failure ratio**: 0.5 (50% failures trips circuit)
+
+Circuit breaker state changes are logged and exposed via metrics.
 
 ## Azure OpenAI
 
@@ -235,7 +601,33 @@ export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com"
 ./gateway
 ```
 
-The `provider_model_id` field lets you map a friendly model name to the actual provider identifier (e.g., an Azure deployment name). If omitted, the model `name` is used directly. See **[AZURE_OPENAI.md](./AZURE_OPENAI.md)** for complete setup guide.
+The `provider_model_id` field lets you map a friendly model name to the actual provider identifier (e.g., an Azure deployment name). If omitted, the model `name` is used directly.
+
+## Azure Anthropic (Microsoft Foundry)
+
+The gateway supports Azure-hosted Anthropic models through Microsoft's AI Foundry:
+
+```yaml
+providers:
+  azureanthropic:
+    type: "azureanthropic"
+    api_key: "${AZURE_ANTHROPIC_API_KEY}"
+    endpoint: "https://your-resource.services.ai.azure.com/anthropic"
+
+models:
+  - name: "claude-sonnet-4-5"
+    provider: "azureanthropic"
+    provider_model_id: "claude-sonnet-4-5-20250514"  # optional
+```
+
+```bash
+export AZURE_ANTHROPIC_API_KEY="..."
+export AZURE_ANTHROPIC_ENDPOINT="https://your-resource.services.ai.azure.com/anthropic"
+
+./gateway
+```
+
+Azure Anthropic provides Claude models with Azure's compliance, security, and regional deployment options.
 
 ## Admin Web UI
 
@@ -277,11 +669,94 @@ make dev-frontend
 
 Frontend dev server runs on `http://localhost:5173` and proxies API requests to backend.
 
+## Deployment
+
+### Docker
+
+**See the [Docker Deployment Guide](./docs/DOCKER_DEPLOYMENT.md)** for complete instructions on using pre-built images.
+
+Build and run with Docker:
+
+```bash
+# Build Docker image (includes Admin UI automatically)
+docker build -t llm-gateway:latest .
+
+# Run container
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -e GOOGLE_API_KEY="your-key" \
+  -e ANTHROPIC_API_KEY="your-key" \
+  -e OPENAI_API_KEY="your-key" \
+  llm-gateway:latest
+
+# Check status
+docker logs llm-gateway
+```
+
+The Docker build uses a multi-stage process that automatically builds the frontend, so you don't need Node.js installed locally.
+
+**Using Docker Compose:**
+
+```yaml
+version: '3.8'
+services:
+  llm-gateway:
+    build: .
+    ports:
+      - "8080:8080"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY}
+    restart: unless-stopped
+```
+
+```bash
+docker-compose up -d
+```
+
+The Docker image:
+- Uses 3-stage build (frontend → backend → runtime) for minimal size (~50MB)
+- Automatically builds and embeds the Admin UI
+- Runs as non-root user (UID 1000) for security
+- Includes health checks for orchestration
+- No need for Node.js or Go installed locally
+
+### Kubernetes
+
+Production-ready Kubernetes manifests are available in the `k8s/` directory:
+
+```bash
+# Deploy to Kubernetes
+kubectl apply -k k8s/
+
+# Or deploy individual manifests
+kubectl apply -f k8s/namespace.yaml
+kubectl apply -f k8s/deployment.yaml
+kubectl apply -f k8s/service.yaml
+kubectl apply -f k8s/ingress.yaml
+```
+
+Features included:
+- **High availability** - 3+ replicas with pod anti-affinity
+- **Auto-scaling** - HorizontalPodAutoscaler (3-20 replicas)
+- **Security** - Non-root, read-only filesystem, network policies
+- **Monitoring** - ServiceMonitor and PrometheusRule for Prometheus Operator
+- **Storage** - Redis StatefulSet for conversation persistence
+- **Ingress** - TLS with cert-manager integration
+
+See **[k8s/README.md](./k8s/README.md)** for complete deployment guide including:
+- Cloud-specific configurations (AWS EKS, GCP GKE, Azure AKS)
+- Secrets management (External Secrets Operator, Sealed Secrets)
+- Monitoring and alerting setup
+- Troubleshooting guide
+
 ## Authentication
 
-The gateway supports OAuth2/OIDC authentication. See **[AUTH.md](./AUTH.md)** for setup instructions.
+The gateway supports OAuth2/OIDC authentication for securing API access.
 
-**Quick example with Google OAuth:**
+### Configuration
 
 ```yaml
 auth:
@@ -349,12 +824,109 @@ The readiness endpoint verifies:
 - At least one provider is configured
 - Returns 503 if any check fails
 
-## Next Steps
+## Roadmap
 
-- ✅ ~~Implement streaming responses~~
-- ✅ ~~Add OAuth2/OIDC authentication~~
-- ✅ ~~Implement conversation tracking with previous_response_id~~
-- ⬜ Add structured logging, tracing, and request-level metrics
-- ⬜ Support tool/function calling
-- ⬜ Persistent conversation storage (Redis/database)
-- ⬜ Expand configuration to support routing policies (cost, latency, failover)
+### Completed ✅
+- ✅ Streaming responses (Server-Sent Events)
+- ✅ OAuth2/OIDC authentication
+- ✅ Conversation tracking with `previous_response_id`
+- ✅ Persistent conversation storage (SQL and Redis)
+- ✅ Circuit breakers for fault tolerance
+- ✅ Rate limiting
+- ✅ Observability (Prometheus metrics and OpenTelemetry tracing)
+- ✅ Admin Web UI
+- ✅ Health and readiness endpoints
+
+### In Progress 🚧
+- ⬜ Tool/function calling support across providers
+- ⬜ Request-level cost tracking and budgets
+- ⬜ Advanced routing policies (cost optimization, latency-based, failover)
+- ⬜ Multi-tenancy with per-tenant rate limits and quotas
+- ⬜ Request caching for identical prompts
+- ⬜ Webhook notifications for events (failures, circuit breaker changes)
+
+## Documentation
+
+Comprehensive guides and documentation are available in the `/docs` directory:
+
+- **[Docker Deployment Guide](./docs/DOCKER_DEPLOYMENT.md)** - Deploy with pre-built images or build from source
+- **[Kubernetes Deployment Guide](./k8s/README.md)** - Production deployment with Kubernetes
+- **[Admin UI Documentation](./docs/ADMIN_UI.md)** - Using the web dashboard
+- **[Configuration Reference](./config.example.yaml)** - All configuration options explained
+
+See the **[docs directory README](./docs/README.md)** for a complete documentation index.
+
+## Contributing
+
+Contributions are welcome! Here's how you can help:
+
+### Reporting Issues
+
+- **Bug reports**: Include steps to reproduce, expected vs actual behavior, and environment details
+- **Feature requests**: Describe the use case and why it would be valuable
+- **Security issues**: Email security concerns privately (don't open public issues)
+
+### Development Workflow
+
+1. **Fork and clone** the repository
+2. **Create a branch** for your feature: `git checkout -b feature/your-feature-name`
+3. **Make your changes** with clear, atomic commits
+4. **Add tests** for new functionality
+5. **Run tests**: `make test`
+6. **Run linter**: `make lint`
+7. **Update documentation** if needed
+8. **Submit a pull request** with a clear description
+
+### Code Standards
+
+- Follow Go best practices and idioms
+- Write tests for new features and bug fixes
+- Keep functions small and focused
+- Use meaningful variable names
+- Add comments for complex logic
+- Run `go fmt` before committing
+
+### Testing
+
+```bash
+# Run all tests
+make test
+
+# Run specific package tests
+go test ./internal/providers/...
+
+# Run with coverage
+make test-coverage
+
+# Run integration tests (requires API keys)
+make test-integration
+```
+
+### Adding a New Provider
+
+1. Create provider implementation in `internal/providers/yourprovider/`
+2. Implement the `Provider` interface
+3. Add provider registration in `internal/providers/providers.go`
+4. Add configuration support in `internal/config/`
+5. Add tests and update documentation
+
+## License
+
+MIT License - see the repository for details.
+
+## Acknowledgments
+
+- Built with official SDKs from OpenAI, Anthropic, and Google
+- Inspired by [LiteLLM](https://github.com/BerriAI/litellm)
+- Implements the [Open Responses](https://www.openresponses.org) specification
+- Uses [gobreaker](https://github.com/sony/gobreaker) for circuit breaker functionality
+
+## Support
+
+- **Documentation**: Check this README and the files in `/docs`
+- **Issues**: Open a GitHub issue for bugs or feature requests
+- **Discussions**: Use GitHub Discussions for questions and community support
+
+---
+
+**Made with ❤️ in Go**
diff --git a/docs/DOCKER_DEPLOYMENT.md b/docs/DOCKER_DEPLOYMENT.md
new file mode 100644
index 0000000..0917549
--- /dev/null
+++ b/docs/DOCKER_DEPLOYMENT.md
@@ -0,0 +1,471 @@
+# Docker Deployment Guide
+
+> Deploy the LLM Gateway using pre-built Docker images or build your own.
+
+## Table of Contents
+
+- [Quick Start](#quick-start)
+- [Using Pre-Built Images](#using-pre-built-images)
+- [Configuration](#configuration)
+- [Docker Compose](#docker-compose)
+- [Building from Source](#building-from-source)
+- [Production Considerations](#production-considerations)
+- [Troubleshooting](#troubleshooting)
+
+## Quick Start
+
+Pull and run the latest image:
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -e OPENAI_API_KEY="sk-your-key" \
+  -e ANTHROPIC_API_KEY="sk-ant-your-key" \
+  -e GOOGLE_API_KEY="your-key" \
+  ghcr.io/yourusername/llm-gateway:latest
+
+# Verify it's running
+curl http://localhost:8080/health
+```
+
+## Using Pre-Built Images
+
+Images are automatically built and published via GitHub Actions on every release.
+
+### Available Tags
+
+- `latest` - Latest stable release
+- `v1.2.3` - Specific version tags
+- `main` - Latest commit on main branch (unstable)
+- `sha-abc1234` - Specific commit SHA
+
+### Pull from Registry
+
+```bash
+# Pull latest stable
+docker pull ghcr.io/yourusername/llm-gateway:latest
+
+# Pull specific version
+docker pull ghcr.io/yourusername/llm-gateway:v1.2.3
+
+# List local images
+docker images | grep llm-gateway
+```
+
+### Basic Usage
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  --env-file .env \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+## Configuration
+
+### Environment Variables
+
+Create a `.env` file with your API keys:
+
+```bash
+# Required: At least one provider
+OPENAI_API_KEY=sk-your-openai-key
+ANTHROPIC_API_KEY=sk-ant-your-anthropic-key
+GOOGLE_API_KEY=your-google-key
+
+# Optional: Server settings
+SERVER_ADDRESS=:8080
+LOGGING_LEVEL=info
+LOGGING_FORMAT=json
+
+# Optional: Features
+ADMIN_ENABLED=true
+RATE_LIMIT_ENABLED=true
+RATE_LIMIT_REQUESTS_PER_SECOND=10
+RATE_LIMIT_BURST=20
+
+# Optional: Auth
+AUTH_ENABLED=false
+AUTH_ISSUER=https://accounts.google.com
+AUTH_AUDIENCE=your-client-id.apps.googleusercontent.com
+
+# Optional: Observability
+OBSERVABILITY_ENABLED=false
+OBSERVABILITY_METRICS_ENABLED=false
+OBSERVABILITY_TRACING_ENABLED=false
+```
+
+Run with environment file:
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  --env-file .env \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+### Using Config File
+
+For more complex configurations, use a YAML config file:
+
+```bash
+# Create config from example
+cp config.example.yaml config.yaml
+# Edit config.yaml with your settings
+
+# Mount config file into container
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -v $(pwd)/config.yaml:/app/config.yaml:ro \
+  ghcr.io/yourusername/llm-gateway:latest \
+  --config /app/config.yaml
+```
+
+### Persistent Storage
+
+For persistent conversation storage with SQLite:
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -v llm-gateway-data:/app/data \
+  -e OPENAI_API_KEY="your-key" \
+  -e CONVERSATIONS_STORE=sql \
+  -e CONVERSATIONS_DRIVER=sqlite3 \
+  -e CONVERSATIONS_DSN=/app/data/conversations.db \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+## Docker Compose
+
+The project includes a production-ready `docker-compose.yaml` file.
+
+### Basic Setup
+
+```bash
+# Create .env file with API keys
+cat > .env <<EOF
+GOOGLE_API_KEY=your-google-key
+ANTHROPIC_API_KEY=sk-ant-your-key
+OPENAI_API_KEY=sk-your-key
+EOF
+
+# Start gateway + Redis
+docker-compose up -d
+
+# Check status
+docker-compose ps
+
+# View logs
+docker-compose logs -f gateway
+```
+
+### With Monitoring
+
+Enable Prometheus and Grafana:
+
+```bash
+docker-compose --profile monitoring up -d
+```
+
+Access services:
+- Gateway: http://localhost:8080
+- Admin UI: http://localhost:8080/admin/
+- Prometheus: http://localhost:9090
+- Grafana: http://localhost:3000 (admin/admin)
+
+### Managing Services
+
+```bash
+# Stop all services
+docker-compose down
+
+# Stop and remove volumes (deletes data!)
+docker-compose down -v
+
+# Restart specific service
+docker-compose restart gateway
+
+# View logs
+docker-compose logs -f gateway
+
+# Update to latest image
+docker-compose pull
+docker-compose up -d
+```
+
+## Building from Source
+
+If you need to build your own image:
+
+```bash
+# Clone repository
+git clone https://github.com/yourusername/latticelm.git
+cd latticelm
+
+# Build image (includes frontend automatically)
+docker build -t llm-gateway:local .
+
+# Run your build
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  --env-file .env \
+  llm-gateway:local
+```
+
+### Multi-Platform Builds
+
+Build for multiple architectures:
+
+```bash
+# Setup buildx
+docker buildx create --use
+
+# Build and push multi-platform
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  -t ghcr.io/yourusername/llm-gateway:latest \
+  --push .
+```
+
+## Production Considerations
+
+### Security
+
+**Use secrets management:**
+```bash
+# Docker secrets (Swarm)
+echo "sk-your-key" | docker secret create openai_key -
+
+docker service create \
+  --name llm-gateway \
+  --secret openai_key \
+  -e OPENAI_API_KEY_FILE=/run/secrets/openai_key \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+**Run as non-root:**
+The image already runs as UID 1000 (non-root) by default.
+
+**Read-only filesystem:**
+```bash
+docker run -d \
+  --name llm-gateway \
+  --read-only \
+  --tmpfs /tmp \
+  -v llm-gateway-data:/app/data \
+  -p 8080:8080 \
+  --env-file .env \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+### Resource Limits
+
+Set memory and CPU limits:
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  --memory="512m" \
+  --cpus="1.0" \
+  --env-file .env \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+### Health Checks
+
+The image includes built-in health checks:
+
+```bash
+# Check health status
+docker inspect --format='{{.State.Health.Status}}' llm-gateway
+
+# Manual health check
+curl http://localhost:8080/health
+curl http://localhost:8080/ready
+```
+
+### Logging
+
+Configure structured JSON logging:
+
+```bash
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -e LOGGING_FORMAT=json \
+  -e LOGGING_LEVEL=info \
+  --log-driver=json-file \
+  --log-opt max-size=10m \
+  --log-opt max-file=3 \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+### Networking
+
+**Custom network:**
+```bash
+# Create network
+docker network create llm-network
+
+# Run gateway on network
+docker run -d \
+  --name llm-gateway \
+  --network llm-network \
+  -p 8080:8080 \
+  --env-file .env \
+  ghcr.io/yourusername/llm-gateway:latest
+
+# Run Redis on same network
+docker run -d \
+  --name redis \
+  --network llm-network \
+  redis:7-alpine
+```
+
+## Troubleshooting
+
+### Container Won't Start
+
+Check logs:
+```bash
+docker logs llm-gateway
+docker logs --tail 50 llm-gateway
+```
+
+Common issues:
+- Missing required API keys
+- Port 8080 already in use (use `-p 9000:8080`)
+- Invalid configuration file syntax
+
+### High Memory Usage
+
+Monitor resources:
+```bash
+docker stats llm-gateway
+```
+
+Set limits:
+```bash
+docker update --memory="512m" llm-gateway
+```
+
+### Connection Issues
+
+**Test from inside container:**
+```bash
+docker exec -it llm-gateway wget -O- http://localhost:8080/health
+```
+
+**Check port bindings:**
+```bash
+docker port llm-gateway
+```
+
+**Test provider connectivity:**
+```bash
+docker exec llm-gateway wget -O- https://api.openai.com
+```
+
+### Database Locked (SQLite)
+
+If using SQLite with multiple containers:
+```bash
+# SQLite doesn't support concurrent writes
+# Use Redis or PostgreSQL instead:
+
+docker run -d \
+  --name redis \
+  redis:7-alpine
+
+docker run -d \
+  --name llm-gateway \
+  -p 8080:8080 \
+  -e CONVERSATIONS_STORE=redis \
+  -e CONVERSATIONS_DSN=redis://redis:6379/0 \
+  --link redis \
+  ghcr.io/yourusername/llm-gateway:latest
+```
+
+### Image Pull Failures
+
+**Authentication:**
+```bash
+# Login to GitHub Container Registry
+echo $GITHUB_TOKEN | docker login ghcr.io -u USERNAME --password-stdin
+
+# Pull image
+docker pull ghcr.io/yourusername/llm-gateway:latest
+```
+
+**Rate limiting:**
+Images are public but may be rate-limited. Use Docker Hub mirror or cache.
+
+### Debugging
+
+**Interactive shell:**
+```bash
+docker exec -it llm-gateway sh
+```
+
+**Inspect configuration:**
+```bash
+# Check environment variables
+docker exec llm-gateway env
+
+# Check config file
+docker exec llm-gateway cat /app/config.yaml
+```
+
+**Network debugging:**
+```bash
+docker exec llm-gateway wget --spider http://localhost:8080/health
+docker exec llm-gateway ping google.com
+```
+
+## Useful Commands
+
+```bash
+# Container lifecycle
+docker stop llm-gateway
+docker start llm-gateway
+docker restart llm-gateway
+docker rm -f llm-gateway
+
+# Logs
+docker logs -f llm-gateway
+docker logs --tail 100 llm-gateway
+docker logs --since 30m llm-gateway
+
+# Cleanup
+docker system prune -a
+docker volume prune
+docker image prune -a
+
+# Updates
+docker pull ghcr.io/yourusername/llm-gateway:latest
+docker stop llm-gateway
+docker rm llm-gateway
+docker run -d --name llm-gateway ... ghcr.io/yourusername/llm-gateway:latest
+```
+
+## Next Steps
+
+- **Production deployment**: See [Kubernetes guide](../k8s/README.md) for orchestration
+- **Monitoring**: Enable Prometheus metrics and set up Grafana dashboards
+- **Security**: Configure OAuth2/OIDC authentication
+- **Scaling**: Use Kubernetes HPA or Docker Swarm for auto-scaling
+
+## Additional Resources
+
+- [Main README](../README.md) - Full documentation
+- [Kubernetes Deployment](../k8s/README.md) - Production orchestration
+- [Configuration Reference](../config.example.yaml) - All config options
+- [GitHub Container Registry](https://github.com/yourusername/latticelm/pkgs/container/llm-gateway) - Published images
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..d4fd98f
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,74 @@
+# Documentation
+
+Welcome to the latticelm documentation. This directory contains detailed guides and documentation for various aspects of the LLM Gateway.
+
+## User Guides
+
+### [Docker Deployment Guide](./DOCKER_DEPLOYMENT.md)
+Complete guide to deploying the LLM Gateway using Docker with pre-built images or building from source.
+
+**Topics covered:**
+- Using pre-built container images from CI/CD
+- Configuration with environment variables and config files
+- Docker Compose setup with Redis and monitoring
+- Production considerations (security, resources, networking)
+- Multi-platform builds
+- Troubleshooting and debugging
+
+### [Admin Web UI](./ADMIN_UI.md)
+Documentation for the built-in admin dashboard.
+
+**Topics covered:**
+- Accessing the Admin UI
+- Features and capabilities
+- System information dashboard
+- Provider status monitoring
+- Configuration management
+
+## Developer Documentation
+
+### [Admin UI Specification](./admin-ui-spec.md)
+Technical specification and design document for the Admin UI component.
+
+**Topics covered:**
+- Component architecture
+- API endpoints
+- UI mockups and wireframes
+- Implementation details
+
+### [Implementation Summary](./IMPLEMENTATION_SUMMARY.md)
+Overview of the implementation details and architecture decisions.
+
+**Topics covered:**
+- System architecture
+- Provider implementations
+- Key features and their implementations
+- Technology stack
+
+## Additional Resources
+
+## Deployment Guides
+
+### [Kubernetes Deployment Guide](../k8s/README.md)
+Production-grade Kubernetes deployment with high availability, monitoring, and security.
+
+**Topics covered:**
+- Deploying with Kustomize and kubectl
+- Secrets management (External Secrets Operator, Sealed Secrets)
+- Monitoring with Prometheus and OpenTelemetry
+- Horizontal Pod Autoscaling and PodDisruptionBudgets
+- Security best practices (RBAC, NetworkPolicies, Pod Security)
+- Cloud-specific guides (AWS EKS, GCP GKE, Azure AKS)
+- Storage options (Redis, PostgreSQL, managed services)
+- Rolling updates and rollback strategies
+
+For more documentation, see:
+
+- **[Main README](../README.md)** - Overview, quick start, and feature documentation
+- **[Configuration Example](../config.example.yaml)** - Detailed configuration options with comments
+
+## Need Help?
+
+- **Issues**: Check the [GitHub Issues](https://github.com/yourusername/latticelm/issues)
+- **Discussions**: Use [GitHub Discussions](https://github.com/yourusername/latticelm/discussions) for questions
+- **Contributing**: See [Contributing Guidelines](../README.md#contributing) in the main README
diff --git a/k8s/README.md b/k8s/README.md
index 3fa3641..a6d77fc 100644
--- a/k8s/README.md
+++ b/k8s/README.md
@@ -1,352 +1,866 @@
 # Kubernetes Deployment Guide
 
-This directory contains Kubernetes manifests for deploying the LLM Gateway to production.
+> Production-ready Kubernetes manifests for deploying the LLM Gateway with high availability, monitoring, and security.
 
-## Prerequisites
+## Table of Contents
 
-- Kubernetes cluster (v1.24+)
-- `kubectl` configured
-- Container registry access
-- (Optional) Prometheus Operator for monitoring
-- (Optional) cert-manager for TLS certificates
-- (Optional) nginx-ingress-controller or cloud load balancer
+- [Quick Start](#quick-start)
+- [Prerequisites](#prerequisites)
+- [Deployment](#deployment)
+- [Configuration](#configuration)
+- [Secrets Management](#secrets-management)
+- [Monitoring](#monitoring)
+- [Storage Options](#storage-options)
+- [Scaling](#scaling)
+- [Updates and Rollbacks](#updates-and-rollbacks)
+- [Security](#security)
+- [Cloud Provider Guides](#cloud-provider-guides)
+- [Troubleshooting](#troubleshooting)
 
 ## Quick Start
 
-### 1. Build and Push Docker Image
+Deploy with default settings using pre-built images:
 
 ```bash
-# Build the image
-docker build -t your-registry/llm-gateway:v1.0.0 .
+# Update kustomization.yaml with your image
+cd k8s/
+vim kustomization.yaml  # Set image to ghcr.io/yourusername/llm-gateway:v1.0.0
 
-# Push to registry
-docker push your-registry/llm-gateway:v1.0.0
+# Create secrets
+kubectl create namespace llm-gateway
+kubectl create secret generic llm-gateway-secrets \
+  --from-literal=OPENAI_API_KEY="sk-your-key" \
+  --from-literal=ANTHROPIC_API_KEY="sk-ant-your-key" \
+  --from-literal=GOOGLE_API_KEY="your-key" \
+  -n llm-gateway
+
+# Deploy
+kubectl apply -k .
+
+# Verify
+kubectl get pods -n llm-gateway
+kubectl logs -n llm-gateway -l app=llm-gateway
 ```
 
-### 2. Configure Secrets
+## Prerequisites
+
+- **Kubernetes**: v1.24+ cluster
+- **kubectl**: Configured and authenticated
+- **Container images**: Access to `ghcr.io/yourusername/llm-gateway`
+
+**Optional but recommended:**
+- **Prometheus Operator**: For metrics and alerting
+- **cert-manager**: For automatic TLS certificates
+- **Ingress Controller**: nginx, ALB, or GCE
+- **External Secrets Operator**: For secrets management
+
+## Deployment
+
+### Using Kustomize (Recommended)
 
-**Option A: Using kubectl**
 ```bash
-kubectl create namespace llm-gateway
+# Review and customize
+cd k8s/
+vim kustomization.yaml  # Update image, namespace, etc.
+vim configmap.yaml      # Configure gateway settings
+vim ingress.yaml        # Set your domain
 
+# Deploy all resources
+kubectl apply -k .
+
+# Deploy with Kustomize overlays
+kubectl apply -k overlays/production/
+```
+
+### Using kubectl
+
+```bash
+kubectl apply -f namespace.yaml
+kubectl apply -f serviceaccount.yaml
+kubectl apply -f secret.yaml
+kubectl apply -f configmap.yaml
+kubectl apply -f redis.yaml
+kubectl apply -f deployment.yaml
+kubectl apply -f service.yaml
+kubectl apply -f ingress.yaml
+kubectl apply -f hpa.yaml
+kubectl apply -f pdb.yaml
+kubectl apply -f networkpolicy.yaml
+```
+
+### With Monitoring
+
+If Prometheus Operator is installed:
+
+```bash
+kubectl apply -f servicemonitor.yaml
+kubectl apply -f prometheusrule.yaml
+```
+
+## Configuration
+
+### Image Configuration
+
+Update `kustomization.yaml`:
+
+```yaml
+images:
+  - name: llm-gateway
+    newName: ghcr.io/yourusername/llm-gateway
+    newTag: v1.2.3  # Or 'latest', 'main', 'sha-abc123'
+```
+
+### Gateway Configuration
+
+Edit `configmap.yaml` for gateway settings:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llm-gateway-config
+data:
+  config.yaml: |
+    server:
+      address: ":8080"
+
+    logging:
+      level: info
+      format: json
+
+    rate_limit:
+      enabled: true
+      requests_per_second: 10
+      burst: 20
+
+    observability:
+      enabled: true
+      metrics:
+        enabled: true
+      tracing:
+        enabled: true
+        exporter:
+          type: otlp
+          endpoint: tempo:4317
+
+    conversations:
+      store: redis
+      dsn: redis://redis:6379/0
+      ttl: 1h
+```
+
+### Resource Limits
+
+Default resources (adjust based on load testing):
+
+```yaml
+resources:
+  requests:
+    cpu: 100m
+    memory: 128Mi
+  limits:
+    cpu: 1000m
+    memory: 512Mi
+```
+
+### Ingress Configuration
+
+Edit `ingress.yaml` for your domain:
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: llm-gateway
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+spec:
+  ingressClassName: nginx
+  tls:
+    - hosts:
+        - llm-gateway.yourdomain.com
+      secretName: llm-gateway-tls
+  rules:
+    - host: llm-gateway.yourdomain.com
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: llm-gateway
+                port:
+                  number: 80
+```
+
+## Secrets Management
+
+### Option 1: kubectl (Development)
+
+```bash
 kubectl create secret generic llm-gateway-secrets \
-  --from-literal=GOOGLE_API_KEY="your-key" \
-  --from-literal=ANTHROPIC_API_KEY="your-key" \
-  --from-literal=OPENAI_API_KEY="your-key" \
+  --from-literal=OPENAI_API_KEY="sk-..." \
+  --from-literal=ANTHROPIC_API_KEY="sk-ant-..." \
+  --from-literal=GOOGLE_API_KEY="..." \
   --from-literal=OIDC_AUDIENCE="your-client-id" \
   -n llm-gateway
 ```
 
-**Option B: Using External Secrets Operator (Recommended)**
-- Uncomment the ExternalSecret in `secret.yaml`
-- Configure your SecretStore (AWS Secrets Manager, Vault, etc.)
+### Option 2: External Secrets Operator (Production)
 
-### 3. Update Configuration
+Install ESO, then create ExternalSecret:
 
-Edit `configmap.yaml`:
-- Update Redis connection string if using external Redis
-- Configure observability endpoints (Tempo, Prometheus)
-- Adjust rate limits as needed
-- Set OIDC issuer and audience
-
-Edit `ingress.yaml`:
-- Replace `llm-gateway.example.com` with your domain
-- Configure TLS certificate annotations
-
-Edit `kustomization.yaml`:
-- Update image registry and tag
-
-### 4. Deploy
-
-**Using Kustomize (Recommended):**
-```bash
-kubectl apply -k k8s/
+```yaml
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+  name: llm-gateway-secrets
+  namespace: llm-gateway
+spec:
+  refreshInterval: 1h
+  secretStoreRef:
+    name: aws-secretsmanager  # or vault, gcpsm, etc.
+    kind: ClusterSecretStore
+  target:
+    name: llm-gateway-secrets
+  data:
+    - secretKey: OPENAI_API_KEY
+      remoteRef:
+        key: llm-gateway/openai-key
+    - secretKey: ANTHROPIC_API_KEY
+      remoteRef:
+        key: llm-gateway/anthropic-key
+    - secretKey: GOOGLE_API_KEY
+      remoteRef:
+        key: llm-gateway/google-key
 ```
 
-**Using kubectl directly:**
+### Option 3: Sealed Secrets
+
 ```bash
-kubectl apply -f k8s/namespace.yaml
-kubectl apply -f k8s/serviceaccount.yaml
-kubectl apply -f k8s/secret.yaml
-kubectl apply -f k8s/configmap.yaml
-kubectl apply -f k8s/redis.yaml
-kubectl apply -f k8s/deployment.yaml
-kubectl apply -f k8s/service.yaml
-kubectl apply -f k8s/ingress.yaml
-kubectl apply -f k8s/hpa.yaml
-kubectl apply -f k8s/pdb.yaml
-kubectl apply -f k8s/networkpolicy.yaml
+# Encrypt secrets
+echo -n "sk-your-key" | kubectl create secret generic llm-gateway-secrets \
+  --dry-run=client --from-file=OPENAI_API_KEY=/dev/stdin -o yaml | \
+  kubeseal -o yaml > sealed-secret.yaml
+
+# Commit sealed-secret.yaml to git
+kubectl apply -f sealed-secret.yaml
 ```
 
-**With Prometheus Operator:**
-```bash
-kubectl apply -f k8s/servicemonitor.yaml
-kubectl apply -f k8s/prometheusrule.yaml
+## Monitoring
+
+### Metrics
+
+ServiceMonitor for Prometheus Operator:
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: llm-gateway
+spec:
+  selector:
+    matchLabels:
+      app: llm-gateway
+  endpoints:
+    - port: http
+      path: /metrics
+      interval: 30s
 ```
 
-### 5. Verify Deployment
+**Available metrics:**
+- `gateway_requests_total` - Total requests by provider/model
+- `gateway_request_duration_seconds` - Request latency histogram
+- `gateway_provider_errors_total` - Errors by provider
+- `gateway_circuit_breaker_state` - Circuit breaker state changes
+- `gateway_rate_limit_hits_total` - Rate limit violations
+
+### Alerts
+
+PrometheusRule with common alerts:
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: llm-gateway-alerts
+spec:
+  groups:
+    - name: llm-gateway
+      interval: 30s
+      rules:
+        - alert: HighErrorRate
+          expr: rate(gateway_requests_total{status=~"5.."}[5m]) > 0.05
+          for: 5m
+          annotations:
+            summary: High error rate detected
+
+        - alert: PodDown
+          expr: kube_deployment_status_replicas_available{deployment="llm-gateway"} < 2
+          for: 5m
+          annotations:
+            summary: Less than 2 gateway pods running
+```
+
+### Logging
+
+View logs:
 
 ```bash
-# Check pods
+# Tail logs
+kubectl logs -n llm-gateway -l app=llm-gateway -f
+
+# Filter by level
+kubectl logs -n llm-gateway -l app=llm-gateway | jq 'select(.level=="error")'
+
+# Search logs
+kubectl logs -n llm-gateway -l app=llm-gateway | grep "circuit.*open"
+```
+
+### Tracing
+
+Configure OpenTelemetry collector:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    exporter:
+      type: otlp
+      endpoint: tempo:4317  # or jaeger-collector:4317
+```
+
+## Storage Options
+
+### In-Memory (Default)
+
+No persistence, lost on pod restart:
+
+```yaml
+conversations:
+  store: memory
+```
+
+### Redis (Recommended)
+
+Deploy Redis StatefulSet:
+
+```bash
+kubectl apply -f redis.yaml
+```
+
+Configure gateway:
+
+```yaml
+conversations:
+  store: redis
+  dsn: redis://redis:6379/0
+  ttl: 1h
+```
+
+### External Redis
+
+For production, use managed Redis:
+
+```yaml
+conversations:
+  store: redis
+  dsn: redis://:password@redis.example.com:6379/0
+  ttl: 1h
+```
+
+**Cloud providers:**
+- **AWS**: ElastiCache for Redis
+- **GCP**: Memorystore for Redis
+- **Azure**: Azure Cache for Redis
+
+### PostgreSQL
+
+```yaml
+conversations:
+  store: sql
+  driver: pgx
+  dsn: postgres://user:pass@postgres:5432/llm_gateway?sslmode=require
+  ttl: 1h
+```
+
+## Scaling
+
+### Horizontal Pod Autoscaler
+
+Default HPA configuration:
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: llm-gateway
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: llm-gateway
+  minReplicas: 3
+  maxReplicas: 20
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: 80
+```
+
+Monitor HPA:
+
+```bash
+kubectl get hpa -n llm-gateway
+kubectl describe hpa llm-gateway -n llm-gateway
+```
+
+### Manual Scaling
+
+```bash
+# Scale to specific replica count
+kubectl scale deployment/llm-gateway --replicas=10 -n llm-gateway
+
+# Check status
+kubectl get deployment llm-gateway -n llm-gateway
+```
+
+### Pod Disruption Budget
+
+Ensures availability during disruptions:
+
+```yaml
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: llm-gateway
+spec:
+  minAvailable: 2
+  selector:
+    matchLabels:
+      app: llm-gateway
+```
+
+## Updates and Rollbacks
+
+### Rolling Updates
+
+```bash
+# Update image
+kubectl set image deployment/llm-gateway \
+  gateway=ghcr.io/yourusername/llm-gateway:v1.2.3 \
+  -n llm-gateway
+
+# Watch rollout
+kubectl rollout status deployment/llm-gateway -n llm-gateway
+
+# Pause rollout if issues
+kubectl rollout pause deployment/llm-gateway -n llm-gateway
+
+# Resume rollout
+kubectl rollout resume deployment/llm-gateway -n llm-gateway
+```
+
+### Rollback
+
+```bash
+# Rollback to previous version
+kubectl rollout undo deployment/llm-gateway -n llm-gateway
+
+# Rollback to specific revision
+kubectl rollout history deployment/llm-gateway -n llm-gateway
+kubectl rollout undo deployment/llm-gateway --to-revision=3 -n llm-gateway
+```
+
+### Blue-Green Deployment
+
+```bash
+# Deploy new version with different label
+kubectl apply -f deployment-v2.yaml
+
+# Test new version
+kubectl port-forward -n llm-gateway deployment/llm-gateway-v2 8080:8080
+
+# Switch service to new version
+kubectl patch service llm-gateway -n llm-gateway \
+  -p '{"spec":{"selector":{"version":"v2"}}}'
+
+# Delete old version after verification
+kubectl delete deployment llm-gateway-v1 -n llm-gateway
+```
+
+## Security
+
+### Pod Security
+
+Deployment includes security best practices:
+
+```yaml
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 1000
+  fsGroup: 1000
+  seccompProfile:
+    type: RuntimeDefault
+
+containers:
+  - name: gateway
+    securityContext:
+      allowPrivilegeEscalation: false
+      readOnlyRootFilesystem: true
+      capabilities:
+        drop:
+          - ALL
+```
+
+### Network Policies
+
+Restrict traffic to/from gateway pods:
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: llm-gateway
+spec:
+  podSelector:
+    matchLabels:
+      app: llm-gateway
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              name: ingress-nginx
+      ports:
+        - protocol: TCP
+          port: 8080
+  egress:
+    - to:  # Allow DNS
+        - namespaceSelector: {}
+          podSelector:
+            matchLabels:
+              k8s-app: kube-dns
+      ports:
+        - protocol: UDP
+          port: 53
+    - to:  # Allow Redis
+        - podSelector:
+            matchLabels:
+              app: redis
+      ports:
+        - protocol: TCP
+          port: 6379
+    - to:  # Allow external LLM providers (HTTPS)
+        - namespaceSelector: {}
+      ports:
+        - protocol: TCP
+          port: 443
+```
+
+### RBAC
+
+ServiceAccount with minimal permissions:
+
+```yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: llm-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: llm-gateway
+rules:
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: llm-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: llm-gateway
+subjects:
+  - kind: ServiceAccount
+    name: llm-gateway
+```
+
+## Cloud Provider Guides
+
+### AWS EKS
+
+```bash
+# Install AWS Load Balancer Controller
+kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller//crds?ref=master"
+helm install aws-load-balancer-controller eks/aws-load-balancer-controller \
+  -n kube-system \
+  --set clusterName=my-cluster
+
+# Update ingress for ALB
+# Add annotations to ingress.yaml:
+metadata:
+  annotations:
+    kubernetes.io/ingress.class: alb
+    alb.ingress.kubernetes.io/scheme: internet-facing
+    alb.ingress.kubernetes.io/target-type: ip
+```
+
+**IRSA for secrets:**
+
+```bash
+# Create IAM role and associate with ServiceAccount
+eksctl create iamserviceaccount \
+  --name llm-gateway \
+  --namespace llm-gateway \
+  --cluster my-cluster \
+  --attach-policy-arn arn:aws:iam::aws:policy/SecretsManagerReadWrite \
+  --approve
+```
+
+**ElastiCache Redis:**
+
+```yaml
+conversations:
+  store: redis
+  dsn: redis://my-cluster.cache.amazonaws.com:6379/0
+```
+
+### GCP GKE
+
+```bash
+# Enable Workload Identity
+gcloud container clusters update my-cluster \
+  --workload-pool=PROJECT_ID.svc.id.goog
+
+# Create service account with Secret Manager access
+gcloud iam service-accounts create llm-gateway
+
+gcloud projects add-iam-policy-binding PROJECT_ID \
+  --member "serviceAccount:llm-gateway@PROJECT_ID.iam.gserviceaccount.com" \
+  --role "roles/secretmanager.secretAccessor"
+
+# Bind K8s SA to GCP SA
+kubectl annotate serviceaccount llm-gateway \
+  -n llm-gateway \
+  iam.gke.io/gcp-service-account=llm-gateway@PROJECT_ID.iam.gserviceaccount.com
+```
+
+**Memorystore Redis:**
+
+```yaml
+conversations:
+  store: redis
+  dsn: redis://10.0.0.3:6379/0  # Private IP from Memorystore
+```
+
+### Azure AKS
+
+```bash
+# Install Application Gateway Ingress Controller
+az aks enable-addons \
+  --resource-group myResourceGroup \
+  --name myAKSCluster \
+  --addons ingress-appgw \
+  --appgw-name myApplicationGateway
+
+# Configure Azure AD Workload Identity
+az aks update \
+  --resource-group myResourceGroup \
+  --name myAKSCluster \
+  --enable-oidc-issuer \
+  --enable-workload-identity
+```
+
+**Azure Key Vault with ESO:**
+
+```yaml
+apiVersion: external-secrets.io/v1beta1
+kind: SecretStore
+metadata:
+  name: azure-keyvault
+spec:
+  provider:
+    azurekv:
+      authType: WorkloadIdentity
+      vaultUrl: https://my-vault.vault.azure.net
+```
+
+## Troubleshooting
+
+### Pods Not Starting
+
+```bash
+# Check pod status
 kubectl get pods -n llm-gateway
 
-# Check services
-kubectl get svc -n llm-gateway
+# Describe pod for events
+kubectl describe pod llm-gateway-xxx -n llm-gateway
 
-# Check ingress
-kubectl get ingress -n llm-gateway
+# Check logs
+kubectl logs -n llm-gateway llm-gateway-xxx
 
-# View logs
-kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
+# Check previous container logs (if crashed)
+kubectl logs -n llm-gateway llm-gateway-xxx --previous
+```
 
-# Check health
+**Common issues:**
+- Image pull errors: Check registry credentials
+- CrashLoopBackOff: Check logs for startup errors
+- Pending: Check resource quotas and node capacity
+
+### Health Check Failures
+
+```bash
+# Port-forward to test locally
 kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
+
+# Test endpoints
 curl http://localhost:8080/health
+curl http://localhost:8080/ready
+
+# Check from inside pod
+kubectl exec -n llm-gateway deployment/llm-gateway -- wget -O- http://localhost:8080/health
+```
+
+### Provider Connection Issues
+
+```bash
+# Test egress from pod
+kubectl exec -n llm-gateway deployment/llm-gateway -- wget -O- https://api.openai.com
+
+# Check secrets
+kubectl get secret llm-gateway-secrets -n llm-gateway -o jsonpath='{.data.OPENAI_API_KEY}' | base64 -d
+
+# Verify network policies
+kubectl get networkpolicy -n llm-gateway
+kubectl describe networkpolicy llm-gateway -n llm-gateway
+```
+
+### Redis Connection Issues
+
+```bash
+# Test Redis connectivity
+kubectl exec -n llm-gateway deployment/llm-gateway -- nc -zv redis 6379
+
+# Connect to Redis
+kubectl exec -it -n llm-gateway redis-0 -- redis-cli
+
+# Check Redis logs
+kubectl logs -n llm-gateway redis-0
+```
+
+### Performance Issues
+
+```bash
+# Check resource usage
+kubectl top pods -n llm-gateway
+kubectl top nodes
+
+# Check HPA status
+kubectl describe hpa llm-gateway -n llm-gateway
+
+# Check for throttling
+kubectl describe pod llm-gateway-xxx -n llm-gateway | grep -i throttl
+```
+
+### Debug Container
+
+For distroless/minimal images:
+
+```bash
+# Use ephemeral debug container
+kubectl debug -it -n llm-gateway llm-gateway-xxx --image=busybox --target=gateway
+
+# Or use debug pod
+kubectl run debug --rm -it --image=nicolaka/netshoot -n llm-gateway -- /bin/bash
+```
+
+## Useful Commands
+
+```bash
+# View all resources
+kubectl get all -n llm-gateway
+
+# Check deployment status
+kubectl rollout status deployment/llm-gateway -n llm-gateway
+
+# Tail logs from all pods
+kubectl logs -n llm-gateway -l app=llm-gateway -f --max-log-requests=10
+
+# Get events
+kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
+
+# Check resource quotas
+kubectl describe resourcequota -n llm-gateway
+
+# Export current config
+kubectl get deployment llm-gateway -n llm-gateway -o yaml > deployment-backup.yaml
+
+# Force pod restart
+kubectl rollout restart deployment/llm-gateway -n llm-gateway
+
+# Delete and recreate deployment
+kubectl delete deployment llm-gateway -n llm-gateway
+kubectl apply -f deployment.yaml
 ```
 
 ## Architecture Overview
 
 ```
-┌─────────────────────────────────────────────────────────┐
-│                    Internet/Clients                      │
-└───────────────────────┬─────────────────────────────────┘
-                        │
-                        ▼
-┌─────────────────────────────────────────────────────────┐
-│                  Ingress Controller                      │
-│            (nginx/ALB/GCE with TLS)                     │
-└───────────────────────┬─────────────────────────────────┘
-                        │
-                        ▼
-┌─────────────────────────────────────────────────────────┐
-│                  LLM Gateway Service                     │
-│                    (LoadBalancer)                        │
-└───────────────────────┬─────────────────────────────────┘
-                        │
-        ┌───────────────┼───────────────┐
-        ▼               ▼               ▼
-┌──────────────┐ ┌──────────────┐ ┌──────────────┐
-│   Gateway    │ │   Gateway    │ │   Gateway    │
-│   Pod 1      │ │   Pod 2      │ │   Pod 3      │
-└──────┬───────┘ └──────┬───────┘ └──────┬───────┘
-       │                │                │
-       └────────────────┼────────────────┘
-                        │
-        ┌───────────────┼───────────────┐
-        ▼               ▼               ▼
-┌──────────────┐ ┌──────────────┐ ┌──────────────┐
-│    Redis     │ │  Prometheus  │ │    Tempo     │
-│ (Persistent) │ │  (Metrics)   │ │  (Traces)    │
-└──────────────┘ └──────────────┘ └──────────────┘
-```
-
-## Resource Specifications
-
-### Default Resources
-- **Requests**: 100m CPU, 128Mi memory
-- **Limits**: 1000m CPU, 512Mi memory
-- **Replicas**: 3 (min), 20 (max with HPA)
-
-### Scaling
-- HPA scales based on CPU (70%) and memory (80%)
-- PodDisruptionBudget ensures minimum 2 replicas during disruptions
-
-## Configuration Options
-
-### Environment Variables (from Secret)
-- `GOOGLE_API_KEY`: Google AI API key
-- `ANTHROPIC_API_KEY`: Anthropic API key
-- `OPENAI_API_KEY`: OpenAI API key
-- `OIDC_AUDIENCE`: OIDC client ID for authentication
-
-### ConfigMap Settings
-See `configmap.yaml` for full configuration options:
-- Server address
-- Logging format and level
-- Rate limiting
-- Observability (metrics/tracing)
-- Provider endpoints
-- Conversation storage
-- Authentication
-
-## Security
-
-### Security Features
-- Non-root container execution (UID 1000)
-- Read-only root filesystem
-- No privilege escalation
-- All capabilities dropped
-- Network policies for ingress/egress control
-- SeccompProfile: RuntimeDefault
-
-### TLS/HTTPS
-- Ingress configured with TLS
-- Uses cert-manager for automatic certificate provisioning
-- Force SSL redirect enabled
-
-### Secrets Management
-**Never commit secrets to git!**
-
-Production options:
-1. **External Secrets Operator** (Recommended)
-   - AWS Secrets Manager
-   - HashiCorp Vault
-   - Google Secret Manager
-
-2. **Sealed Secrets**
-   - Encrypted secrets in git
-
-3. **Manual kubectl secrets**
-   - Created outside of git
-
-## Monitoring
-
-### Metrics
-- Exposed on `/metrics` endpoint
-- Scraped by Prometheus via ServiceMonitor
-- Key metrics:
-  - HTTP request rate, latency, errors
-  - Provider request rate, latency, token usage
-  - Conversation store operations
-  - Rate limiting hits
-
-### Alerts
-See `prometheusrule.yaml` for configured alerts:
-- High error rate
-- High latency
-- Provider failures
-- Pod down
-- High memory usage
-- Rate limit threshold exceeded
-- Conversation store errors
-
-### Logs
-Structured JSON logs with:
-- Request IDs
-- Trace context (trace_id, span_id)
-- Log levels (debug/info/warn/error)
-
-View logs:
-```bash
-kubectl logs -n llm-gateway -l app=llm-gateway --tail=100 -f
-```
-
-## Maintenance
-
-### Rolling Updates
-```bash
-# Update image
-kubectl set image deployment/llm-gateway gateway=your-registry/llm-gateway:v1.0.1 -n llm-gateway
-
-# Check rollout status
-kubectl rollout status deployment/llm-gateway -n llm-gateway
-
-# Rollback if needed
-kubectl rollout undo deployment/llm-gateway -n llm-gateway
-```
-
-### Scaling
-```bash
-# Manual scale
-kubectl scale deployment/llm-gateway --replicas=5 -n llm-gateway
-
-# HPA will auto-scale within min/max bounds (3-20)
-```
-
-### Configuration Updates
-```bash
-# Edit ConfigMap
-kubectl edit configmap llm-gateway-config -n llm-gateway
-
-# Restart pods to pick up changes
-kubectl rollout restart deployment/llm-gateway -n llm-gateway
-```
-
-### Debugging
-```bash
-# Exec into pod
-kubectl exec -it -n llm-gateway deployment/llm-gateway -- /bin/sh
-
-# Port forward for local access
-kubectl port-forward -n llm-gateway svc/llm-gateway 8080:80
-
-# Check events
-kubectl get events -n llm-gateway --sort-by='.lastTimestamp'
-```
-
-## Production Considerations
-
-### High Availability
-- Minimum 3 replicas across availability zones
-- Pod anti-affinity rules spread pods across nodes
-- PodDisruptionBudget ensures service availability during disruptions
-
-### Performance
-- Adjust resource limits based on load testing
-- Configure HPA thresholds based on traffic patterns
-- Use node affinity for GPU nodes if needed
-
-### Cost Optimization
-- Use spot/preemptible instances for non-critical workloads
-- Set appropriate resource requests/limits
-- Monitor token usage and implement quotas
-
-### Disaster Recovery
-- Redis persistence (if using StatefulSet)
-- Regular backups of conversation data
-- Multi-region deployment for geo-redundancy
-- Document runbooks for incident response
-
-## Cloud-Specific Notes
-
-### AWS EKS
-- Use AWS Load Balancer Controller for ALB
-- Configure IRSA for service account
-- Use ElastiCache for Redis
-- Store secrets in AWS Secrets Manager
-
-### GCP GKE
-- Use GKE Ingress for GCLB
-- Configure Workload Identity
-- Use Memorystore for Redis
-- Store secrets in Google Secret Manager
-
-### Azure AKS
-- Use Azure Application Gateway Ingress Controller
-- Configure Azure AD Workload Identity
-- Use Azure Cache for Redis
-- Store secrets in Azure Key Vault
-
-## Troubleshooting
-
-### Common Issues
-
-**Pods not starting:**
-```bash
-kubectl describe pod -n llm-gateway -l app=llm-gateway
-kubectl logs -n llm-gateway -l app=llm-gateway --previous
-```
-
-**Health check failures:**
-```bash
-kubectl port-forward -n llm-gateway deployment/llm-gateway 8080:8080
-curl http://localhost:8080/health
-curl http://localhost:8080/ready
-```
-
-**Provider connection issues:**
-- Verify API keys in secrets
-- Check network policies allow egress
-- Verify provider endpoints are accessible
-
-**Redis connection issues:**
-```bash
-kubectl exec -it -n llm-gateway redis-0 -- redis-cli ping
+┌─────────────────────────────────────────────────┐
+│           Internet / Load Balancer              │
+└────────────────────┬────────────────────────────┘
+                     │
+                     ▼
+          ┌──────────────────────┐
+          │  Ingress Controller  │
+          │    (TLS/SSL)         │
+          └──────────┬───────────┘
+                     │
+                     ▼
+          ┌──────────────────────┐
+          │  Gateway Service     │
+          │   (ClusterIP:80)     │
+          └──────────┬───────────┘
+                     │
+        ┌────────────┼────────────┐
+        ▼            ▼            ▼
+    ┌─────┐      ┌─────┐      ┌─────┐
+    │ Pod │      │ Pod │      │ Pod │
+    │  1  │      │  2  │      │  3  │
+    └──┬──┘      └──┬──┘      └──┬──┘
+       │            │            │
+       └────────────┼────────────┘
+                    │
+       ┌────────────┼────────────┐
+       ▼            ▼            ▼
+   ┌──────┐    ┌──────┐    ┌──────┐
+   │Redis │    │Prom  │    │Tempo │
+   └──────┘    └──────┘    └──────┘
 ```
 
 ## Additional Resources
 
-- [Kubernetes Documentation](https://kubernetes.io/docs/)
-- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator)
-- [cert-manager](https://cert-manager.io/)
+- [Main Documentation](../README.md)
+- [Docker Deployment](../docs/DOCKER_DEPLOYMENT.md)
+- [Kubernetes Best Practices](https://kubernetes.io/docs/concepts/configuration/overview/)
+- [Prometheus Operator](https://prometheus-operator.dev/)
 - [External Secrets Operator](https://external-secrets.io/)
+- [cert-manager](https://cert-manager.io/)

From 89c7e3ac85d09bebc2a4c0f46cc2a01675fe5260 Mon Sep 17 00:00:00 2001
From: Anibal Angulo <anibal.angulo.cardoza@banorte.com>
Date: Fri, 6 Mar 2026 21:31:51 +0000
Subject: [PATCH 2/3] Add fail-fast on init for missing provider credentials

---
 cmd/gateway/main.go                           |  21 ++++++
 cmd/gateway/main_test.go                      |  57 +++++++++++++++
 frontend/admin/vite.config.ts                 |   1 +
 internal/config/config.go                     |  25 ++++++-
 internal/config/config_test.go                |  42 ++++++++---
 internal/observability/metrics_middleware.go  |  15 ++++
 .../middleware_response_writer_test.go        |  65 ++++++++++++++++++
 internal/observability/tracing_middleware.go  |  17 ++++-
 internal/providers/providers.go               |   5 ++
 internal/providers/providers_test.go          |  58 ++++++++++++++--
 internal/server/server.go                     |  10 +--
 internal/server/streaming_writer_test.go      |  53 ++++++++++++++
 scripts/__pycache__/chat.cpython-312.pyc      | Bin 0 -> 20974 bytes
 scripts/chat.py                               |  49 +++++++++++++
 14 files changed, 398 insertions(+), 20 deletions(-)
 create mode 100644 cmd/gateway/main_test.go
 create mode 100644 internal/observability/middleware_response_writer_test.go
 create mode 100644 internal/server/streaming_writer_test.go
 create mode 100644 scripts/__pycache__/chat.cpython-312.pyc

diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go
index 94d0fef..2bc134f 100644
--- a/cmd/gateway/main.go
+++ b/cmd/gateway/main.go
@@ -155,6 +155,11 @@ func main() {
 
 	// Register admin endpoints if enabled
 	if cfg.Admin.Enabled {
+		// Check if frontend dist exists
+		if _, err := os.Stat("internal/admin/dist"); os.IsNotExist(err) {
+			log.Fatalf("admin UI enabled but frontend dist not found")
+		}
+		
 		buildInfo := admin.BuildInfo{
 			Version:   "dev",
 			BuildTime: time.Now().Format(time.RFC3339),
@@ -348,23 +353,39 @@ func initConversationStore(cfg config.ConversationConfig, logger *slog.Logger) (
 		return conversation.NewMemoryStore(ttl), "memory", nil
 	}
 }
+
 type responseWriter struct {
 	http.ResponseWriter
 	statusCode   int
 	bytesWritten int
+	wroteHeader  bool
 }
 
 func (rw *responseWriter) WriteHeader(code int) {
+	if rw.wroteHeader {
+		return
+	}
+	rw.wroteHeader = true
 	rw.statusCode = code
 	rw.ResponseWriter.WriteHeader(code)
 }
 
 func (rw *responseWriter) Write(b []byte) (int, error) {
+	if !rw.wroteHeader {
+		rw.wroteHeader = true
+		rw.statusCode = http.StatusOK
+	}
 	n, err := rw.ResponseWriter.Write(b)
 	rw.bytesWritten += n
 	return n, err
 }
 
+func (rw *responseWriter) Flush() {
+	if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
+		flusher.Flush()
+	}
+}
+
 func loggingMiddleware(next http.Handler, logger *slog.Logger) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		start := time.Now()
diff --git a/cmd/gateway/main_test.go b/cmd/gateway/main_test.go
new file mode 100644
index 0000000..c08cf50
--- /dev/null
+++ b/cmd/gateway/main_test.go
@@ -0,0 +1,57 @@
+package main
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+var _ http.Flusher = (*responseWriter)(nil)
+
+type countingFlusherRecorder struct {
+	*httptest.ResponseRecorder
+	flushCount int
+}
+
+func newCountingFlusherRecorder() *countingFlusherRecorder {
+	return &countingFlusherRecorder{ResponseRecorder: httptest.NewRecorder()}
+}
+
+func (r *countingFlusherRecorder) Flush() {
+	r.flushCount++
+}
+
+func TestResponseWriterWriteHeaderOnlyOnce(t *testing.T) {
+	rec := httptest.NewRecorder()
+	rw := &responseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.WriteHeader(http.StatusCreated)
+	rw.WriteHeader(http.StatusInternalServerError)
+
+	assert.Equal(t, http.StatusCreated, rec.Code)
+	assert.Equal(t, http.StatusCreated, rw.statusCode)
+}
+
+func TestResponseWriterWriteSetsImplicitStatus(t *testing.T) {
+	rec := httptest.NewRecorder()
+	rw := &responseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	n, err := rw.Write([]byte("ok"))
+
+	assert.NoError(t, err)
+	assert.Equal(t, 2, n)
+	assert.Equal(t, http.StatusOK, rec.Code)
+	assert.Equal(t, http.StatusOK, rw.statusCode)
+	assert.Equal(t, 2, rw.bytesWritten)
+}
+
+func TestResponseWriterFlushDelegates(t *testing.T) {
+	rec := newCountingFlusherRecorder()
+	rw := &responseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.Flush()
+
+	assert.Equal(t, 1, rec.flushCount)
+}
diff --git a/frontend/admin/vite.config.ts b/frontend/admin/vite.config.ts
index 4c37cb7..c5182bd 100644
--- a/frontend/admin/vite.config.ts
+++ b/frontend/admin/vite.config.ts
@@ -6,6 +6,7 @@ export default defineConfig({
   base: '/admin/',
   server: {
     port: 5173,
+    allowedHosts: ['.coder.ia-innovacion.work', 'localhost'],
     proxy: {
       '/admin/api': {
         target: 'http://localhost:8080',
diff --git a/internal/config/config.go b/internal/config/config.go
index d32c46e..89d6334 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -172,9 +172,32 @@ func Load(path string) (*Config, error) {
 
 func (cfg *Config) validate() error {
 	for _, m := range cfg.Models {
-		if _, ok := cfg.Providers[m.Provider]; !ok {
+		providerEntry, ok := cfg.Providers[m.Provider]
+		if !ok {
 			return fmt.Errorf("model %q references unknown provider %q", m.Name, m.Provider)
 		}
+
+		switch providerEntry.Type {
+		case "openai", "anthropic", "google", "azureopenai", "azureanthropic":
+			if providerEntry.APIKey == "" {
+				return fmt.Errorf("model %q references provider %q (%s) without api_key", m.Name, m.Provider, providerEntry.Type)
+			}
+		}
+
+		switch providerEntry.Type {
+		case "azureopenai", "azureanthropic":
+			if providerEntry.Endpoint == "" {
+				return fmt.Errorf("model %q references provider %q (%s) without endpoint", m.Name, m.Provider, providerEntry.Type)
+			}
+		case "vertexai":
+			if providerEntry.Project == "" || providerEntry.Location == "" {
+				return fmt.Errorf("model %q references provider %q (vertexai) without project/location", m.Name, m.Provider)
+			}
+		case "openai", "anthropic", "google":
+			// No additional required fields.
+		default:
+			return fmt.Errorf("model %q references provider %q with unknown type %q", m.Name, m.Provider, providerEntry.Type)
+		}
 	}
 	return nil
 }
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 867b4b2..2615f29 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -103,7 +103,7 @@ server:
   address: ":8080"
 providers:
   azure:
-    type: azure_openai
+    type: azureopenai
     api_key: azure-key
     endpoint: https://my-resource.openai.azure.com
     api_version: "2024-02-15-preview"
@@ -113,7 +113,7 @@ models:
     provider_model_id: gpt-4-deployment
 `,
 			validate: func(t *testing.T, cfg *Config) {
-				assert.Equal(t, "azure_openai", cfg.Providers["azure"].Type)
+				assert.Equal(t, "azureopenai", cfg.Providers["azure"].Type)
 				assert.Equal(t, "azure-key", cfg.Providers["azure"].APIKey)
 				assert.Equal(t, "https://my-resource.openai.azure.com", cfg.Providers["azure"].Endpoint)
 				assert.Equal(t, "2024-02-15-preview", cfg.Providers["azure"].APIVersion)
@@ -126,7 +126,7 @@ server:
   address: ":8080"
 providers:
   vertex:
-    type: vertex_ai
+    type: vertexai
     project: my-gcp-project
     location: us-central1
 models:
@@ -135,7 +135,7 @@ models:
     provider_model_id: gemini-1.5-pro
 `,
 			validate: func(t *testing.T, cfg *Config) {
-				assert.Equal(t, "vertex_ai", cfg.Providers["vertex"].Type)
+				assert.Equal(t, "vertexai", cfg.Providers["vertex"].Type)
 				assert.Equal(t, "my-gcp-project", cfg.Providers["vertex"].Project)
 				assert.Equal(t, "us-central1", cfg.Providers["vertex"].Location)
 			},
@@ -208,6 +208,20 @@ models:
 			configYAML:  `invalid: yaml: content: [unclosed`,
 			expectError: true,
 		},
+		{
+			name: "model references provider without required API key",
+			configYAML: `
+server:
+  address: ":8080"
+providers:
+  openai:
+    type: openai
+models:
+  - name: gpt-4
+    provider: openai
+`,
+			expectError: true,
+		},
 		{
 			name: "multiple models same provider",
 			configYAML: `
@@ -283,7 +297,7 @@ func TestConfigValidate(t *testing.T) {
 			name: "valid config",
 			config: Config{
 				Providers: map[string]ProviderEntry{
-					"openai": {Type: "openai"},
+					"openai": {Type: "openai", APIKey: "test-key"},
 				},
 				Models: []ModelEntry{
 					{Name: "gpt-4", Provider: "openai"},
@@ -295,7 +309,7 @@ func TestConfigValidate(t *testing.T) {
 			name: "model references unknown provider",
 			config: Config{
 				Providers: map[string]ProviderEntry{
-					"openai": {Type: "openai"},
+					"openai": {Type: "openai", APIKey: "test-key"},
 				},
 				Models: []ModelEntry{
 					{Name: "gpt-4", Provider: "unknown"},
@@ -303,6 +317,18 @@ func TestConfigValidate(t *testing.T) {
 			},
 			expectError: true,
 		},
+		{
+			name: "model references provider without api key",
+			config: Config{
+				Providers: map[string]ProviderEntry{
+					"openai": {Type: "openai"},
+				},
+				Models: []ModelEntry{
+					{Name: "gpt-4", Provider: "openai"},
+				},
+			},
+			expectError: true,
+		},
 		{
 			name: "no models",
 			config: Config{
@@ -317,8 +343,8 @@ func TestConfigValidate(t *testing.T) {
 			name: "multiple models multiple providers",
 			config: Config{
 				Providers: map[string]ProviderEntry{
-					"openai":    {Type: "openai"},
-					"anthropic": {Type: "anthropic"},
+					"openai":    {Type: "openai", APIKey: "test-key"},
+					"anthropic": {Type: "anthropic", APIKey: "ant-key"},
 				},
 				Models: []ModelEntry{
 					{Name: "gpt-4", Provider: "openai"},
diff --git a/internal/observability/metrics_middleware.go b/internal/observability/metrics_middleware.go
index 8537935..fdb98f4 100644
--- a/internal/observability/metrics_middleware.go
+++ b/internal/observability/metrics_middleware.go
@@ -48,15 +48,30 @@ type metricsResponseWriter struct {
 	http.ResponseWriter
 	statusCode   int
 	bytesWritten int
+	wroteHeader  bool
 }
 
 func (w *metricsResponseWriter) WriteHeader(statusCode int) {
+	if w.wroteHeader {
+		return
+	}
+	w.wroteHeader = true
 	w.statusCode = statusCode
 	w.ResponseWriter.WriteHeader(statusCode)
 }
 
 func (w *metricsResponseWriter) Write(b []byte) (int, error) {
+	if !w.wroteHeader {
+		w.wroteHeader = true
+		w.statusCode = http.StatusOK
+	}
 	n, err := w.ResponseWriter.Write(b)
 	w.bytesWritten += n
 	return n, err
 }
+
+func (w *metricsResponseWriter) Flush() {
+	if flusher, ok := w.ResponseWriter.(http.Flusher); ok {
+		flusher.Flush()
+	}
+}
diff --git a/internal/observability/middleware_response_writer_test.go b/internal/observability/middleware_response_writer_test.go
new file mode 100644
index 0000000..14d0cb3
--- /dev/null
+++ b/internal/observability/middleware_response_writer_test.go
@@ -0,0 +1,65 @@
+package observability
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+var _ http.Flusher = (*metricsResponseWriter)(nil)
+var _ http.Flusher = (*statusResponseWriter)(nil)
+
+type testFlusherRecorder struct {
+	*httptest.ResponseRecorder
+	flushCount int
+}
+
+func newTestFlusherRecorder() *testFlusherRecorder {
+	return &testFlusherRecorder{ResponseRecorder: httptest.NewRecorder()}
+}
+
+func (r *testFlusherRecorder) Flush() {
+	r.flushCount++
+}
+
+func TestMetricsResponseWriterWriteHeaderOnlyOnce(t *testing.T) {
+	rec := httptest.NewRecorder()
+	rw := &metricsResponseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.WriteHeader(http.StatusAccepted)
+	rw.WriteHeader(http.StatusInternalServerError)
+
+	assert.Equal(t, http.StatusAccepted, rec.Code)
+	assert.Equal(t, http.StatusAccepted, rw.statusCode)
+}
+
+func TestMetricsResponseWriterFlushDelegates(t *testing.T) {
+	rec := newTestFlusherRecorder()
+	rw := &metricsResponseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.Flush()
+
+	assert.Equal(t, 1, rec.flushCount)
+}
+
+func TestStatusResponseWriterWriteHeaderOnlyOnce(t *testing.T) {
+	rec := httptest.NewRecorder()
+	rw := &statusResponseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.WriteHeader(http.StatusNoContent)
+	rw.WriteHeader(http.StatusInternalServerError)
+
+	assert.Equal(t, http.StatusNoContent, rec.Code)
+	assert.Equal(t, http.StatusNoContent, rw.statusCode)
+}
+
+func TestStatusResponseWriterFlushDelegates(t *testing.T) {
+	rec := newTestFlusherRecorder()
+	rw := &statusResponseWriter{ResponseWriter: rec, statusCode: http.StatusOK}
+
+	rw.Flush()
+
+	assert.Equal(t, 1, rec.flushCount)
+}
diff --git a/internal/observability/tracing_middleware.go b/internal/observability/tracing_middleware.go
index c1b426e..9feae16 100644
--- a/internal/observability/tracing_middleware.go
+++ b/internal/observability/tracing_middleware.go
@@ -72,14 +72,29 @@ func TracingMiddleware(next http.Handler, tp *sdktrace.TracerProvider) http.Hand
 // statusResponseWriter wraps http.ResponseWriter to capture the status code.
 type statusResponseWriter struct {
 	http.ResponseWriter
-	statusCode int
+	statusCode  int
+	wroteHeader bool
 }
 
 func (w *statusResponseWriter) WriteHeader(statusCode int) {
+	if w.wroteHeader {
+		return
+	}
+	w.wroteHeader = true
 	w.statusCode = statusCode
 	w.ResponseWriter.WriteHeader(statusCode)
 }
 
 func (w *statusResponseWriter) Write(b []byte) (int, error) {
+	if !w.wroteHeader {
+		w.wroteHeader = true
+		w.statusCode = http.StatusOK
+	}
 	return w.ResponseWriter.Write(b)
 }
+
+func (w *statusResponseWriter) Flush() {
+	if flusher, ok := w.ResponseWriter.(http.Flusher); ok {
+		flusher.Flush()
+	}
+}
diff --git a/internal/providers/providers.go b/internal/providers/providers.go
index bd807bc..639fcda 100644
--- a/internal/providers/providers.go
+++ b/internal/providers/providers.go
@@ -136,6 +136,9 @@ func (r *Registry) Get(name string) (Provider, bool) {
 func (r *Registry) Models() []struct{ Provider, Model string } {
 	var out []struct{ Provider, Model string }
 	for _, m := range r.modelList {
+		if _, ok := r.providers[m.Provider]; !ok {
+			continue
+		}
 		out = append(out, struct{ Provider, Model string }{Provider: m.Provider, Model: m.Name})
 	}
 	return out
@@ -156,7 +159,9 @@ func (r *Registry) Default(model string) (Provider, error) {
 			if p, ok := r.providers[providerName]; ok {
 				return p, nil
 			}
+			return nil, fmt.Errorf("model %q is mapped to provider %q, but that provider is not available", model, providerName)
 		}
+		return nil, fmt.Errorf("model %q not configured", model)
 	}
 
 	for _, p := range r.providers {
diff --git a/internal/providers/providers_test.go b/internal/providers/providers_test.go
index 49b8595..367b6f0 100644
--- a/internal/providers/providers_test.go
+++ b/internal/providers/providers_test.go
@@ -475,7 +475,7 @@ func TestRegistry_Default(t *testing.T) {
 			},
 		},
 		{
-			name: "returns first provider for unknown model",
+			name: "returns error for unknown model",
 			setupReg: func() *Registry {
 				reg, _ := NewRegistry(
 					map[string]config.ProviderEntry{
@@ -490,11 +490,34 @@ func TestRegistry_Default(t *testing.T) {
 				)
 				return reg
 			},
-			modelName: "unknown-model",
-			validate: func(t *testing.T, p Provider) {
-				assert.NotNil(t, p)
-				// Should return first available provider
+			modelName:   "unknown-model",
+			expectError: true,
+			errorMsg:    "not configured",
+		},
+		{
+			name: "returns error for model whose provider is unavailable",
+			setupReg: func() *Registry {
+				reg, _ := NewRegistry(
+					map[string]config.ProviderEntry{
+						"openai": {
+							Type:   "openai",
+							APIKey: "", // unavailable provider
+						},
+						"google": {
+							Type:   "google",
+							APIKey: "test-key",
+						},
+					},
+					[]config.ModelEntry{
+						{Name: "gpt-4", Provider: "openai"},
+						{Name: "gemini-pro", Provider: "google"},
+					},
+				)
+				return reg
 			},
+			modelName:   "gpt-4",
+			expectError: true,
+			errorMsg:    "not available",
 		},
 		{
 			name: "returns first provider for empty model name",
@@ -542,6 +565,31 @@ func TestRegistry_Default(t *testing.T) {
 	}
 }
 
+func TestRegistry_Models_FiltersUnavailableProviders(t *testing.T) {
+	reg, err := NewRegistry(
+		map[string]config.ProviderEntry{
+			"openai": {
+				Type:   "openai",
+				APIKey: "", // unavailable provider
+			},
+			"google": {
+				Type:   "google",
+				APIKey: "test-key",
+			},
+		},
+		[]config.ModelEntry{
+			{Name: "gpt-4", Provider: "openai"},
+			{Name: "gemini-pro", Provider: "google"},
+		},
+	)
+	require.NoError(t, err)
+
+	models := reg.Models()
+	require.Len(t, models, 1)
+	assert.Equal(t, "gemini-pro", models[0].Model)
+	assert.Equal(t, "google", models[0].Provider)
+}
+
 func TestBuildProvider(t *testing.T) {
 	tests := []struct {
 		name        string
diff --git a/internal/server/server.go b/internal/server/server.go
index 0dcb490..5190944 100644
--- a/internal/server/server.go
+++ b/internal/server/server.go
@@ -239,17 +239,17 @@ func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Reques
 }
 
 func (s *GatewayServer) handleStreamingResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) {
-	w.Header().Set("Content-Type", "text/event-stream")
-	w.Header().Set("Cache-Control", "no-cache")
-	w.Header().Set("Connection", "keep-alive")
-	w.WriteHeader(http.StatusOK)
-
 	flusher, ok := w.(http.Flusher)
 	if !ok {
 		http.Error(w, "streaming not supported", http.StatusInternalServerError)
 		return
 	}
 
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.WriteHeader(http.StatusOK)
+
 	responseID := generateID("resp_")
 	itemID := generateID("msg_")
 	seq := 0
diff --git a/internal/server/streaming_writer_test.go b/internal/server/streaming_writer_test.go
new file mode 100644
index 0000000..95dc3b2
--- /dev/null
+++ b/internal/server/streaming_writer_test.go
@@ -0,0 +1,53 @@
+package server
+
+import (
+	"io"
+	"log/slog"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+type nonFlusherRecorder struct {
+	recorder         *httptest.ResponseRecorder
+	writeHeaderCalls int
+}
+
+func newNonFlusherRecorder() *nonFlusherRecorder {
+	return &nonFlusherRecorder{recorder: httptest.NewRecorder()}
+}
+
+func (w *nonFlusherRecorder) Header() http.Header {
+	return w.recorder.Header()
+}
+
+func (w *nonFlusherRecorder) Write(b []byte) (int, error) {
+	return w.recorder.Write(b)
+}
+
+func (w *nonFlusherRecorder) WriteHeader(statusCode int) {
+	w.writeHeaderCalls++
+	w.recorder.WriteHeader(statusCode)
+}
+
+func (w *nonFlusherRecorder) StatusCode() int {
+	return w.recorder.Code
+}
+
+func (w *nonFlusherRecorder) BodyString() string {
+	return w.recorder.Body.String()
+}
+
+func TestHandleStreamingResponseWithoutFlusherWritesSingleErrorHeader(t *testing.T) {
+	s := New(nil, nil, slog.New(slog.NewTextHandler(io.Discard, nil)))
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+	w := newNonFlusherRecorder()
+
+	s.handleStreamingResponse(w, req, nil, nil, nil, nil, nil)
+
+	assert.Equal(t, 1, w.writeHeaderCalls)
+	assert.Equal(t, http.StatusInternalServerError, w.StatusCode())
+	assert.Contains(t, w.BodyString(), "streaming not supported")
+}
diff --git a/scripts/__pycache__/chat.cpython-312.pyc b/scripts/__pycache__/chat.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..066ed18f78cad3ca2e37f54c65f99fa54e6ab4b9
GIT binary patch
literal 20974
zcmch9ZEzdOmDu3>`-=n#kOavQzabHzzQ4aHQlBf4+!f_rQKyAL%!mXD0`vfs#4)JV
zo%d|YtILwMt3<`x5_Q?DrLxK`x9pVZN~KQTIA_^O&VWD}@K~G3=dE)2M-p-+$1ZK9
zF7NeV0FVOh^(B>&*wfSfy8Cta>(}qSevSWRGU+LJ{>Sn6gS%d)s9)iS^yspYr#&)?
zx<N5iAH~QRc}UhLBX4=1oV*o%3V16*$`NIsQid?fkZMHTrykMtX~?%Kq#e=q=|=Q@
zdh)Ff8AgnKMiSSAOe5w#Gl^?MmJw^8mBe)++ek@Y35n}Nr6cw}JBb@YWh3Q%<q$VA
zCdSNI80!GV*#26^lrW{Y6pWoIW6E#I;q4%A=Pd=~J0ZUUaw~5sp<ET?f^yZ2m8n6P
zpp2=75|B3_?{hFT>txok6|9rhT##45ATV{T^MabGe^c34DW{H7Ov9TL)A+tz9G|`_
zris;EkTcDY?_yfsr248EH-t58Ewg^Og`zmEoMLO<&km=I8kdjD-5dB{>U&vkBpCLG
z=)j;qMhC+&mb>5&u=IrpM~D2eSTMkbM!b6cnW%q&-L0qL86A%eM#7|kcXYf+67BJf
zaUpsz78~8&))tBc{Gq`}G`4%^#+@6BgO5ZQHbf7wfJ)G_*}KtmA>@w^df3aO#o=R-
zVKz)Rv<w77kuiq$kHrS*Q4X;81Q|9Q3&zGh1jMcX4R*Q9EfchiKgPy_Bdl94XuC&a
z!3gwPP_&1~Awk_e%7)vI3+DEo<ELZ(*jV&1$3-|Ors;@;qmd8`QRT_tMSN>d`MF^x
zaw!aPRgXW+h9IKu;UXiWF-+?9_lH<Pdtodbz*Yo3^gl2LgBjuY{vH_L2sc9a`=h}C
z?dO8A!4W_!Ku1Pd&X4d>FMWD!bTq=nqO_l3f|%AyN5}eOoS#(KN{@_%V!_c+FhB~l
z(*7_*Gr^0&D1dqeb)<iY4a5X>GzLR5;8qF>s7KIBRS4Q_`Efywr5Kj4?;T`|B16X_
zG$4j3yA^^mHa^M<N;W(;A{ZDp8sLItAO$s~^s^jq#d<CTxoC_QQ`~Z{943HsLcrTF
zH_8SgfNnNhA?R`)7i4}x*6&u35`un|!<<-<jS5<h{nA*FV;Q$n%v6T`BP^*D;QSnX
z0Nq9f>i`?`1;#iIXuyYa&2Kr%#^@L{n@y!L708_S)6r2j5WEm%8Is0F!YmCmf2OxX
z(2}B%z`xRqm0%V&NVl-w0dFha3_#8BY6l!(4%)idOTM!aZWz-Jk0IJ^C!vp#aI@P>
zcd{4!FkMj`Nhs%)Mx+`>M9{?|k&qAT7Ic1YU~B{sj|yszjg4{PKz4NlAVq25PXT{V
zpA$|P2OdY(^;>b^hsYr&<PcMkm;z!-5>v+IjEcai<BGVFM>tjZRY+AAqyqni)RT~^
zDM(e3)OJYKlGM7mlF^ZvBL+B#&rl(b*)&Ga7=p6*voNPA(z1cnR{2Lu8sqYBQ||yj
z%$NVeW#X#199v*ax0LxZFg|92>#4YG2zs*mL7pvf&k1!5N0&T9S}B%)OI&k8^G7hz
z3_&kfKT_$qnz1n@w-k9C|8Q+EiluRC^;o#={HDW~*}zx~)+>-yBt#a!NHB!ZIW`L9
zM*D$z&>{oyauz^}tHhuR0~ZD*7?fgQ$Dj-YoJ>K*BJ1O=omqxL_m9&&%SHIoGvVPd
zu)RF1+D!v<0cyn-SJ%G|E3Cr`6+rfU=jp-7CE!Lh*$(LFcr?b2h^w(hT5fc1DbwLd
zjJ_}y3XRjjk<k!~>jG9kupndM@)qk?0x-*)CG2oe05P?o9Et+>RWV~DqftQttbxM?
zBq*Q{f-V@1AXD?l+*+<4QEk9r)fmCXDfWSDgIE-412;f?p095kjEt~t0ob{?wt<Kz
z6dLgiz!r1KKi(!X#%LSvsjx)~Mpm3MAL7PshHAR7zICufu2N4l)~S;ZHFU;aJu{rN
zub<RqjAc`SYh9DFT;z$tKBIbQsLqs?&)BBVTsx96mCvkyXlnSw2`vvzbs39ws{QL1
zA3LgU@YDS4jzsJJMBRau<6y#iFjxAMQpb(m)4OLbCD!j-2risUv>Z>=oJf_vlrX;Z
ziKX;y{u)2~g$4J6b%|F_Co6kXmNN<MnJ2oEM0v|YUCWbF*NuJC`(`WW&ZJ7WOllvS
z%ij)N3%wPdR6G_7XI!<nPu)EA?e2^d`q{UvQC8`nQc9~~iBjqe&&-t7j&02Bm<`XL
zoqs9Oa4=EXp0XTDXb(Mwk)i?^NDmPg;p6GM5Zs_x3WPxl1VK9^XBCWsRkDN)UQja1
zH&uNq5XMxHu7*Dis|F!T3#l4b3-CHt$LJy50B<7*P+G?Hrb=3{eR{@B(iKpX1q3TY
z!M7E@jigQ^i}^PAHWhrAz_*zx1rf|LE_0U)`VJt4j!+QjxJ%H&=Fj>-L=sd)N(mrc
zN)IwC{C+wQ!3_%7SB}|AARrFr*khdX<?>@WMi-}wG3IYmjQkyWTz**=m4ooAxTlm@
zAk>n>VSs`!rPNj|$I_&6fn2qe%c$Th$2`X=x4KJ^wegC!i<@|LIN}**LAv6$1Gu2^
zj|P44-Rsr@%RtUFN-{XCOi%~N*nnulMp4p?a=6$84af>8eF-L9IZ+=QM%ju%Q6$QS
zE{GNE0LUn!56hC<gvGQz9|)2$pN}uikGoe&j3PWPz^fFH<88}L%bX(RY@JkIH$5(Q
zWt^_trkkdzH)aEgvh^8z#Y21J%xlk73Wpvb3`<(7qKbezYQ8xR5o4x$-K6r7wvyX|
zO&iCeEa%JiLQoOGmPlf(e<Y9_Re;QG=u_|k^GprFd-XGvo0WVHDm)C?;%~_ZWQ;1m
zrO05b<!n$>jt=sr;`02)0}#16D-PjS^Npuj5YzmkUXbk%*=vB2(!laZVP;sbppm+&
z#EuL4V0d&4WH0t|?5-&KfzXH_S*cFR;cVix#$YLcIJXZF5Y+xrkiQwh>I1rff$~Z9
z3ua$*JRHb<LxF=(fIEai2L_!GL~&eU^0TwAM7GOY3T7S?CJ^v61XrmgHKjAXZMtSk
zYh6jL3m9+4UYS|vnU5!(oe9TbU{5vrXULhBjg+w@Q|d^Uwj@hi=E_o~Z3$!B6HI7I
zmNw0frAj>sqeq;21<2ik0!UoYFrv`MH9~vESN9;~;H+PT^>~`fivn>fmJ_L@)yb8t
zF{d(^)hsmBk(<*va1F@G(#~nhE#nWg2LiCNXi+twL9ye4{UAg{>8Sq#3)?K}d;Jkm
zqfU3el!XEbg6I{2Skw#qydKnxa5Ux*2S5d2f`J(4g+2%xpa-lD<n~wKo5LaIdLdZ3
zBy~Q3^C2K_FPNEJ>RVW1Cj?}AjF!ngnbOM1uEz%03U6&)G|-vS@*6v*cg$>=)uc*W
z62_Ltmh!3SOvkJvWpO98ZgCD&>_wof6_PU&2M)#4uR}l<T8?2tLiPcPStbo7NYYd>
zrVEmAZn!5DYrwz;l&eP>S8|3}b{Z%uULc>2E8Qx7%i+sJr=y9)MN5n90%(%7KflH<
z_=CXnXwow;>O$h$<C+0??g#|DQ(QJEUZJMMg8C&E0_J-b00ad9xK-lX>BC}whC!}^
zC^8IOK-m>k=7S}R+NQJ~MUjv$oVa2T--2qQxHdtm0f}P&^#0k#xyn@O)=6!~YQJHg
zHqTt1bEK@BCRLe|@*Cc1?`+weE>*I5QuA{|$>Y-UsY~D7n=sa9EaG&om`5aUGLM&G
z{{C<0ae$JRhqxIIP!kIHQ^pmY)Ok>oCSYSLN&~9={4&a6*m^kG^@=rNRWG&!^&f-!
z)oYYd7t~L5wCLLPs~HWWmHIKEiE9A1Ev_uKX%vVC8fhbt-ZJX?2_<zsuA~@k(T$;i
zZrwV5zeKhEXf%lYAVx>VV!-ujWc2iE>J5NdBn0ZYM8Jgfz(zreh{ge=VeS+Jf<a6s
z`v-RdQz&5hNWvU$*xY$cQVvCeVG!F7VoHvh1!J~s5EP53phskUpiqE5t;8fji65lW
z6@t7dF*tG*pTn0R!Pga|`IRv503OlrLr{e3LF&FTePwoVUb|42a_oEP=(t}pX?#-V
znAHEmPEU4bKt8^5?aB-@dn9GqFgO0tvNL0Kq6}ibX`YSEon4TpoO>QR+ZQ+prj)Q3
zk!95|r-r>~eqXBeV5X|}_Wqmu=NjhK09>_as#9=OWt??sr#tC%->aWv-giH8Ze7+<
z6^CR?8mesFQVC@(dHd3}OH+KxQkT%y5#G6Cv7wAj7TZB!4PbbcO#t%)rl*L@hhfK=
zl5@jxV0F9+NEulnfUkavks+gu%RmZMy^~Xew^1Oq$e;#f)2kjveM=5Yk{Zg%n|T0E
z7efh=JfQq*_CidKQWRpo>KQVwN`TKH^NVRrZ4j9ByRv^Fn@}-Y{3o$;83oKi4SyQ=
z)4rh^vc6av7WV&y(a^<pL#SJ?da|aIbC8Sr_NoWUz653UaedLUdNgIg;x3JAha9Vl
z!Y7nM``W4}zr5p`Vx<^ttw=e>a4RP(0&6%0tYKu0J{hxkP3Wi?S{BGNgyw}+50tN2
zl?0y>h7*Rkx)_E~s_w;7klIir)c^*d9902L74Nh8Pps3DH!8tcEGwa2uC_R}7V80{
zW2|uvV_SilI$9Rja$pP^!VP%U6W2<ka++dF!FX1DZMjz!gHOgD+aRUJ)lx06+RE~)
zjnJ_%t_FG=0D3e13ALBUO^gGLicq7oV76POHuG<QtU$;D94g;YNnbK5hSn7FNOk5)
zWTn{&bKF>TJgWfGg>}cxOm*R?UJt_D6y+jAs1mMv;>J8S5>MmgQ}XM><2CivCRl5i
zKtnzYyh%n)$*%v?WyM+QlB}MJp@E1zLv%J$@M@qqwTuc<6Na-`YWNZ*mUGj*yDJj*
z6iR8L3`buA`@d+$i;6aX?{AQS5lh@}P)_B`&h<w^4Ba0ZW6$>v2E)TB<+boix7Uld
z<PLxlR7C4SYXT_AT|_42{8-I#%r!|T5k5GG2!ZY_4>DXWX5%^nYd4hgf%bM+G_G?I
z%p&<A%-7}ic@B6KCCKz#KfHO%IVL!AUNlA`PS0(mBP~(H>|7gW{?-711dOKt@vGmU
zNv(pcg*5zJ;})R=`w9eoS<wK|kkm#EV(tU9zX*xXx1J-XGrQ?i{>wD%?b&k^kbV7u
zK@c>+ypMgNv5;tTjEtVYOSvn!{ZJgWaXjH6swZ~<Q&hd(-6v0TxQ7TjNxlX<d;n^M
z>c0r-qNIXCiYSSkz!=iLpbAC&OjI08%Zbz7U7c{k!LS5H4n)c9SOy3zIiwO4AvVl;
zFbi#gf*M2>P=DPPQKTVLY98@l_T~Eq21}`*f_nH8oE1bxK}t_%4|^IFY`K~8Wfg3W
z#YQe*Bc^PUuOBS1!y>vyQW!dn2Nc=E2&{^O?S_h4ARtOXJ|Myp*8>@RMWJA{LP`Hy
zEc`EFC4)Q!wi0v3UN)(FVs^}UQ|1;hY0lWdwt4)Ct?Y(t+Lg97ByA0e=DiDN7LMNE
znrJ-s$aefmbsbn$ZlAw-ep#icESXY+)y3{imo`2uZG7CcVd}`tj_K|vRXb+3-rjR_
zPr7PjvT9?xYDW_Pw=Xn7Fs1#SjVd`ITPmk2U6Us=bX~$&odIl4JVBGsEsH2h?V8>-
zt9w}Le&Y6g;JW9UF=eV+(p8>hm1n*>Rkc6kYD&92Ntb7CB<($z^d4MvwP#$Ov}<e9
zwRK@6ai%|gW*`aww+%jW1(!9{`t5*edF73>(`RSfQe~T#l!~(E#|<s-ow##iF80Bd
zyH`>T`)70jR92g|Hzw_knVP!Wp_`!$-SD37jxN*a1>-|=IT#<x97}qttSN18P1;*C
z^!inR>((#pRNnI6QYrv!rYc&Nw^P=2sAZX^O=)XG(%O)5HD+ARnc9s|Wm5@MX)9ee
zQ}&vlZ`?f<yD>gJK3kb`v?i>F?#m$nqln4&_Q`7})5e;lv1YdRyQTAe->FS(Y)`bm
zlBhksXza~YRZpJ$Rdw@}4%;zK8`IXhq_uAL+<e8t_T;8RiH#liH~p~pM}0r&OZ1#g
z^s&jF3#r2ciO#{qU?fpLx@i5<<BG=F;JiB7v}2*-{+{Hn?o>t3lsaQ|rLFZzYkkJG
zIaAv*XTSMIrltYVZY=pNq771OP0NxU5j=J6RN6=<jr6Q*(db<|C8sRT8SkRGWvN$A
zS&ztm*Dc3+Puh-u_G~NVqSIAv$*Q(bDOp)_rlINEx+Mj~o<~s{`Jml!e23zPCABA%
z@*nR|osemMqAWX6t@?>e24Qh=xCeyJBI2-u6Y}CPqnuDOstFZr1UZqh(7;p7G|R~D
zsU=}^8w!zYiNv7s-BN&{_{W7}J)`@7A{4iPP^>3Hu_12g1kwLI><tq}*r1CD!3+^q
zx%F`+<QEZxiNc@+Aq>x(Rz0ztXq&f`<hSYT`_^dDK+gHLf~`e%y&J{_rGr&Ze!B;e
z#{`0OAZ`N1;-7=X)itmnqA)Sk<pkk85(Vw6uHRXs1{3MWS7Oytxww%rOEdvE|9dD`
zM2Idr3zn>4o?fF2Ewzz<uK`&^00$+mI3`8Ja>jZ~8CQRPc%)bmowh)UV$@|LW7N4~
zj7oB2B!`kcP_ilCD=8LNU!<a#|3!jL5eOt3ru4G>GC56$TGpW6fFdBzV)@lIu(XrW
z$JTDFz%)ObnD;kO`cIorEOmd4Qboo-*Jt5RnE@P!2{R~kITd4Qy)=sXH;CV*2q~>k
z{KioMwF~8%RZrY3!GSm}zh0-LbYKL)f70B6!lojMRaOBgte}_x&y>eics8D|cO0#)
zc2JOuw9h!eMBpq`YvUSFf!1M9*C<f|J*vbW!n-Q_COu|cD^$XIP&>41s)WUGsb*^8
zTJZK-p{z#baV1gqK5zf&7#=r~C!VwDfoisHm9l3cJw;{D47~C4)?O#|8fvUBpoFbb
zoB21+d?6)3iG~%*9v;t;N2)VdB8$z0HEt<7YmERY&ViL_DjcsrpzQ6ETFbxVmOPad
zD0_<Q!gw>NbSUeRX9&+TRz0B7)dFpP{*f!ow*^X|;`)rF1Qt;%`K)^A=69YZ=U$=#
zzfd&{fy)`_HDPcvz)U=lA*zj}EpkJEFwqvdVT|=d&;@ECQFKZ;_5uj^gFf)1@`-*|
zy>Lu}`UQuo9XUMXMj*ylN?MP%zyj+9r@WY-hibB?eL1@)Z^@;J2MsS)fHqez_?@u~
z(LcB_Gy}HWymCpDPYr^BV#hb-E6n=M5pC6%^NJFY;ieF8EXr5pN)#A=z2J4l@~!8<
zSL{5|z|f}z&aDQ-VawG<qoQ`M4Z)tfL=8JvWcRr?qH<#bo!6teQEWPo|CjLd2nHMm
zs2lTE{VJuHi(%dv1{X27gu!JD#xeLZ25|`7)tnCFIB$8yl)HkT$XB^HFt~~V`qP0|
z3r>xo9velsMvmAF1~K<_%+(C~qj=ONj(lDRCCHNzM9r1^a}2sL_zMiag29^@Alo9U
zDfgEUbKAIsm_X>}s~Gzl1cEuQZKADRFpB04A2}=Ht|5peublcqa8|&59aEJ^)Z8Ri
z)QJI_@C9>@wnXFqUt!L53~+r$Z4l(u&}>Z9M!O-4uUesvt~^BkC#>MVD*lK*nyIdx
z>;hZ*<Vm6?W@_lkZc%Og#8x)Lppv%0J+d9T&*ZgJ(8)kaAquL#h=N+NZN`4P>Sk5C
z!ket{rYp83@&ADZH3U=YPpa#u4rd%LbjwLJwm)>VXPixQjUQ~dyCLb^He<#^0K-kg
zyT|5UPc`g)<lL93sJUHvvog`V=V8U3f2{RnJe$&<<4Mo)2aPFD&rFv9Nt+JLno*ZD
z+%eocHvf96wet~uI9G7z!-}1m<_+oQ_GEMW{l--Dv75$RX2-*dj>lDXv)ksH9#w6c
zXTLY};n4RYA4VS3rnbKZ+F^6+dqa1I(oH*)O*=C!o)7eQ^&eR7S~8xt%!Z90?7qAE
zgMD}Rfl|7wX6D4~*n9jP{=JDi6DilWB_mZ?D`MXLu%i2!nsU`cEAF3FZCxn)yXx;(
zKUkmI)eE{dl!6jT*S7iLl<Uxa_D91%7*4uge&jloL-NpK#i1;cKU<09;`Y~`)HdAa
zZ}PV%ZcfbWQ?<J@t=s3vzQ=#ae{bT$iDc{HS>2LcwQf-M$-ef#>-&D+16|^k^T}hc
zr}p_~kIxM!8+K<Jwxk<&Lu6ss&l(OdAC*xx4Zm|xB^|P5Mn+j(E7V??v-_?0YZCQ6
zi`JJjj*4Y9Wv$6r*QKp3No&iz=Rr-<{o11S?60a@QDM%eLpL*(^|PHf+dw^^y#Tg_
zOm)-jmy^|7VCdF^x~l?pS3%Wymf?`*bLsQ-8AoHn+PF%8PpsSg-7WKPq_!MQY(ADa
zb~aJlw`e?<)!+$2|KBxuPf-p2xFV;)_b5<<|NXOojB063H}6R{?*YXfCM*LMl#_Q(
zrW>{=8@4BQvFU~Z(BmQF`R~B{38p7$JM=t?JIsfNTs>8a|6ne~u&%uVUO(K6;eRY|
z-vO^5S<D#LZiDbaxe>z#%zkjd3E__`F#h8$t=&z^pVTTK{?jIJk5iMdV?5!IVcbdL
zRjQsw4Ir9-P?`ZILp?>s;RYNSfooJQ7o5Q4z@$Q~pQFJL2mB8hG<a*%gTNLe+_)E9
z)Cu2**Fh*1#hSCH@umP4^JO7<X925QkrRe8QQSqq?&hqVRQ{EToa?z|P<OA}ie6FZ
zY3%a}dY=y*lg2_AH~M^E8uN$59CE<QDWMLo9Rfkw4|Y`UWsCxQ2aE&YX+0QWICK~g
z&t-AL;mFq0gE8dB+_y2nZH*uxvgvL3NdkyOKZ4**YFVyT)y<YJQSh33Z9Z`K{8NlB
z+l?wOCc|s)G<hvFJjJgi7o{#)maA3kr=m*~ybuarbK=)R;8Xm1ssN}Zgj%v{AalzS
z1+V!nPw}->Vo}v&t?-&Z{S;qIb=@-65!r0V5=A0&9Z$u`QonqJ<OG%|5}6A;6(dhy
zSIAXWV)sa=aFbp^4r)js<S+@-a-+b*=Y7b)XF{f1OR&VzUG?Nj$@3n0449c1^*v3_
z1pri_A=9dY@ChfGx_kNpRjW9ZfibRtl7or_n=iORD2FUB)*3H*3JR%36z6=~*Vlu>
zBpX69UG?ORK5#un0fIS>AbC&-hg0UFr5IUU0V;>QQ0+MjzKs)#D~fSAp~m+mnIwd}
zP5kB~Y%DNHBxz!dr~MbfzXsh3Y2vIG6+Ia6&Op|k*UMKEdGd(phXa267f2N%_-^{S
zS`;L^cx!vEu2XoUCEEMkNJPmIaA^$3{C5>_MiY#I2b>`a?$EwL)(?WcU=-irY%>m7
zs_si+NK^*K{o%WCorI+FT2di>yi-sOa4Z{k>qQSqwTJ}@RneHBJA667k}DaaG>60>
z=s^mH1A)-kNEo~@A<lsVv=$t>S;p5tP9$2lT9h)0`1x<JkPHxs;iwR?FN$*^N?l|%
z3ngUikc&VseInq$1sLR8(Rqkmr9Ls2y%oHAbV;sMSu!@qjq2&@w5>jAt54Y)C)LmN
zl&K`)T)&`vuz%6mclB^asY@&ENu_=2)!8EnrF~JkA)nMYdx<2q{mSM5-ytpW-kV}?
zgr~!^?0bWE1|MnLVAm{h!Y-&Xe(ltspPFiXq;&rNnS(NxeM-pz1FZ!;{u0h@^ZxnM
z3+hDqfko}X$6E7eODf2I9!2Bh^~ysP)JGM%4w>Sky0Sxi6(8+YK|BCixhJ=-;a*tr
zbxq}6zC>S5@Uulu0MCSMjq3>FI>s#o9`uMP=EF(Wu_nKk@+$;ZY8%uJzJ$eA7NeJx
zQid2y!I<cT77Su*g(0}O8Huq&&X|Z&pTp?-CUU-7guBFZzR0j{&29@{-h(%>3f2{B
z0Z}h+$*+!HOzGwR6|~L03jwdg6uJwxC+;pL{%Z&XLudYq5qA%hQTO4@7=R60yj9fY
z){vFJA!;0wo~Si>G4>$_Ux5JBUP9a7g*bSmV)Hm9K++;T+eCUM8Ci1vKIDHD$@vOI
za^(EjXiGTV^RWlsMdR5*sy#QS7D+Tuwdd!WNz&$DTgxVu8H45Ro!53w9Z4CgX84rB
zoiUQ*jG2a+i#J>6lym2kEqfjr_b#a@TREH%70|Khp95&*9@K}h15fX3S5jdy6Av6N
z<P2pKxaSnvUl;}MCU6LWwZcP;9>w4^4D2VL0}edv?-}y6x<;$y0_8o+idHou28Cg6
z_mjnPJ4Y_9U`GV1HWro><B*Cpv+9v>n^1tU6UUQ0aYeEHtC-KuP_B$r7R;g9sI=ZD
zly4}bO|U9)F|B%-5(x@CB65cY#k^-2TOJbnQnRWe_~cB*(<jtn3z$BMfo_O?u^hM|
z%7*G*OgDn3Q*o?gjS_S*Gdp94`ijf|?9gR#HH78hKO<)xVHx9uuZp+^!b%u(RPt9n
zE8vPkK-sEyz#oLEzE>krP=2JCTDTOsj;UknnFgj242YTutqu&&c`uov)>TcyC(|s|
zKs>pCtFC^eIR-A;0!K)jR_8+<Em9p*GHx7Ta;DlmEMvRpmY~q@D?3LhORjpP_P|TZ
zz^uOrH@Aw<46{M15BM+zu&v~<q_#M8ejXVQ<1IddQ?jrV>S-hOl*jUh+@fnF-+K6e
zL#d6Vlp(fD>W<V885RETkiUuKcgOZf`SJWNwRsr!oi+QpCEwEZ-@wS(aB;zbq;_bZ
zR9F5Thl9%8N{2X5eDT$|mC)z5LOC*g6Y$(l@T`t&)?O3L4noyCfvR@_O|Got64bGq
z)Umxdog3lShA-z6xCVB6ikF2oQoPl@(CS{&Y8{k3kRJ~zmY<C^THKc(<#5>=F#F>M
z=<k8TF@qzj?|@!Ye2fp0F>Wt}2khD<obvCu4vwre_YO&`8jfu>(aPer#yX^Oag&6z
z^d{b0&q0?!!GS!WOKb&=rZ|4q)rU%-pD%;X0h-bk6&!Cp7h`#?1KRH_Xn)(9?RS9o
zbVP#8zhR6!A?H{=MT)@@Sk5^c{nf;luYXy>U`4-c)@=9i>VD^Hnw4sjG$`oH38{4$
zOFPI3#mDkUeh$u3*MAs0Sx`!WxQ<g&o$jMu;zcdd9WM8?17Hh5|5RQ@eoufeEBb>1
zyy2*L>yke6%1Jo+=!Fu3a*z#;@*0naC@B1TQF#K*4cy%5mh9~uawS{5te(>W-B^fR
z%HvDh@mB@JUm9pR1Qr;Ua|1^I4ly!#JaD^BG<XYowBWG57&pf9w$5Ob=#pX{96aE!
z3yAmj#iCpCrww_95==s2xUvteNlNF+Lx^E)v@>5)^nWWnr|61E>MJ0MCZ-F)fiXD2
zpuxsesLPVGBL1*g6{?u$bHh!pchJYBpMc2LPGIyMB7#s~EV7$EM^w=Byz^X}7}Jvh
zhKzAG6pCCr&zGNT%SQF5rK2s#CQ`hh0K5d*dER)g?Yy3-0+8iL)e#e*t&tHp=!)X6
zO%OcKw~=Y2_o9(%zlhg)+C!fP?@3VQN#-ipXddb$D8>cK2!W2~!NrNngZD-T2EZ1O
zo4-6HQ4U@J#8k=CFb|8`_$v5Sc#zzD0b<F^SKjf0r7#Z)KZOvJAmP?lc8J)^`BV>m
z7{2l)a5YFJVA#O)05lPd(wHu2US&gY_#!G^uz>ys7Z8{&I516!CJA0WFdFl0jqnEX
z?ZKbv5af|fd}ScyA7fb07SA>hT(%CgG0*0Wo40P-xOp>QQII$24-Svzv+Tuw>4P_C
zQ#X0HdEmz%+!{d%?QP|C7?L)*j}SMwD~gQ@DpISU7K`!vY;h>cSLsCpCtkB>BV%Ic
zMJHMgB@+0Z4lQ_CM&U;^Vqlq7wc((`3rBGb1#F3uk47R)|2VsjUb;Q~R*t9vl5m($
z#KF%|Y<7@g<X4dFi?k|cM<HVsS>(vGO{748C_<5KgnIIMtB9~IM}99hAkkeLp*YAd
zN@`?0AQS0GP#z=PG>bE@&sWc@#l+vBoq%s7W-{@YVsZmOI^@Z&Ts%_cE3@oFT)u>*
zh--K^&1)r&!fUbvz}qBniI32NCVLMUe$+)|w+tXcI>wu_(7mYQ(~wNM$#2Le10Q?A
z_D&NJmdQ0;F7_k)YX`74kR9!&xxdCvI<urDGQRfQ9jF%G4F1f-+9zhgUV*zlZxu-q
zyDpwc^QM)|eHx+a^%BLMfh|fLe%KAfRh?y}?q)#?C)jwR&B9OBWPdND2S34xh?YI+
ze4G3#3fwV=Z}BEIvAhckxO2{-7|;DJ2IOEFt$o}F7<>f+xG6Xe7pAaTV)!H0K|vRe
zWN!j;1_E>evQenyP}hT>mx&Ieof58e<7S9=uF$U?l5lyUgY+dh6h`wP_k9fB#DLgm
z$U(u&_(@K%QS%4WC~?mQizP51l!gS`OYC@m#LqEgZ{o&AV}kZ@_YpFhZnK~!0|FLP
z(pzHY!JY}$+^(LrQ*z(IWRvs@5`;1Zv$)QrtL1`;B>ViKAY9Uidt`X68E%uq{Wd`t
z7-6zk$`lZVVSyihA>hOpJBmgI(MT$VqH#$%;tz(oUqkM<(4O?F675NkO=W3QL(<fc
z*mPvkboA<xj8dOgI+9Aqj3(hecz+_HbSx^*<WqHXTNB&+Not@VHPNz<r0vhAndU-?
z_TFrHgDq`vB@M2Wq2_95)+YPd<d{+3*51_4HoUj~&ia(ebM?rQTB))>uBx3?zh}B*
zO4D1D^wu=JD@pH4(R)%=dlOpcvYs-!BtvTXpDk+7Kh_%F)?U+2*{&O&XbfND|ANO;
zYyD0AHPaJk%bYFg+>my9lTPoXDN|8>yXIz1qOAF9SI$cNt;QSfY4?&`rlaxHT6;%3
z!!J>?^_7{*=9yZExg9^R+c<q`o=ueRU1(njC-$9!)Gk>m_y?zJHb1P{JiTK<l_=T2
z;J>d<AM8yY>`fdvvqb5>Alta4GwC+MiIK&bDXV(6&Q_{^M%h%FrS+7pB2!kEF_&aY
zyct_-rlciP-n4AdmZ`6vf-_2kWl7#6)0I4~5KpZe4y7tOz;V2^I$c_yEUjNGZG<~|
z&<AT}+R~7;G|ZlUWN{N;@z$iPb>5$FwWeG<;n26dc2+hWN|(1J%Ue?A>!%b?O3M?K
zt;y2XjJ@i{sp(URx<iZh4)~FQCuPo=qu+QvoBVF$l3Y{aUb0fPjhV&`&&mw-<^PK^
zfTwpQRbDZ3{9#$+V|&x=MLaHlWZ#B`Pd+Sbd2DZdWN*f=W8d&Sc2v&n`sS56*+Yjr
z<E+KsG)Od^T6A{7Zyr3M-E-^i7^jXup*?AOQ<B~^?_M~YqK{4;&op?ZPD$7`>|dBz
zh&*^LQTK&M_E)iDY&zrKk!aj8)s=(V_JjQ&1{1IP6WjU|{X>bm;YapR7K=>Vwsc!Z
zvaKVrp=0XEqP-<sX!g{?#q@#G4-cIF(49CNNNi#fOej$|^2i>RO4=*a_Lii*Wzi15
z!jK)U?nm|>82B<589X?r-?GrSa4}H_H6Q&Y&WXWvNS3iW&<Q?4?^^II)IKmLYF=Hm
zzLu$4hnEFr`T2bhYG#0<Uz^hY($V-ttoq%Sxt*!Ht&8?;%iut6u>AhnIa&7Tz%Eg|
z^Q8yv54I%AyBD=RIl_P2@O1+@La=}KETo|9b#P*B>dWJ>ao+a5>JO{uEejJ5UjFIX
zAD?~jQljRywDoM#diJx&r7i?7KzZ-N%L^TenuBR;d(zte8Ju!M?q}dKVO<B1I)F5m
zq7Qtc^5D;m?T;-EOfC7j(FTyZlIKx8bp2?Zr^`zHy`!;bJN5Vb+Kupfpl{y-uOIL4
z(8BA#=-O4?R_Y(Dx^9!=A1d74TE$PbDv1Bos_WUJ_-Vz49-ShgQ~_v0XM~hQnZ3uW
zNVrLgSA{8C6qvic^5tqpvP6raO9mkxG9$P1`P{Hg`+Q6!;PY|+8L%RkWw<29(8MgL
z;kU#<JrWO_(a;Qcg5gIX{6X@AARO5WjPUbXgI-W(@Harj^DZ4pA|6C&2^LJ`dnEgd
zY-Ae|^dxN*f0<3(uMA=W`Gq!d%MwAvb1QOifJT1t^!rteZN>nNaRu8JGH~t|1~|(3
zqisRE7d8PnP2bOb3K_uS;qDBCFeNhCvRx}vKDARa>n|w7FDU)TRP}$O=wDOrk16_N
zs`6v1?qkaJF;xR0_{rF+;3`DM@#QCW7m#^%qu@H6sXqojMl~gnTiuwc-v&2g%1hvC
z2!3oQAIoLtD)Xl`RK>>S`VxnJS?5rxmo}5S>N1X#P}dV{WyaNzslNp9)g?;`Rh4;J
zrA1H^fNp8l$*-!GdS&Zm9kLlX1jqMW=X~jWVBUGROZ;AlJ|*eDJ0`oLkX0>H7%m;s
tT4fb87oSq_`n1Uaf$OAPR<iWEl2RHc`G<;%pDVS0c?6E;l;ZEB{2#TmTO|Mh

literal 0
HcmV?d00001

diff --git a/scripts/chat.py b/scripts/chat.py
index 545faeb..83cf362 100755
--- a/scripts/chat.py
+++ b/scripts/chat.py
@@ -135,6 +135,41 @@ class ChatClient:
             return self._stream_response(model)
         else:
             return self._sync_response(model)
+
+    @staticmethod
+    def _get_attr(obj: Any, key: str, default: Any = None) -> Any:
+        """Access object attributes safely for both SDK objects and dicts."""
+        if obj is None:
+            return default
+        if isinstance(obj, dict):
+            return obj.get(key, default)
+        return getattr(obj, key, default)
+
+    def _extract_stream_error(self, event: Any) -> str:
+        """Extract error message from a response.failed event."""
+        response = self._get_attr(event, "response")
+        error = self._get_attr(response, "error")
+        message = self._get_attr(error, "message")
+        if message:
+            return str(message)
+        return "streaming request failed"
+
+    def _extract_completed_text(self, event: Any) -> str:
+        """Extract assistant output text from a response.completed event."""
+        response = self._get_attr(event, "response")
+        output_items = self._get_attr(response, "output", []) or []
+
+        text_parts = []
+        for item in output_items:
+            if self._get_attr(item, "type") != "message":
+                continue
+            for part in self._get_attr(item, "content", []) or []:
+                if self._get_attr(part, "type") == "output_text":
+                    text = self._get_attr(part, "text", "")
+                    if text:
+                        text_parts.append(str(text))
+
+        return "".join(text_parts)
     
     def _sync_response(self, model: str) -> str:
         """Non-streaming response with tool support."""
@@ -225,6 +260,7 @@ class ChatClient:
         while iteration < max_iterations:
             iteration += 1
             assistant_text = ""
+            stream_error = None
             tool_calls = {}  # Dict to track tool calls by item_id
             tool_calls_list = []  # Final list of completed tool calls
             assistant_content = []
@@ -244,6 +280,15 @@ class ChatClient:
                     if event.type == "response.output_text.delta":
                         assistant_text += event.delta
                         live.update(Markdown(assistant_text))
+                    elif event.type == "response.completed":
+                        # Some providers may emit final text only in response.completed.
+                        if not assistant_text:
+                            completed_text = self._extract_completed_text(event)
+                            if completed_text:
+                                assistant_text = completed_text
+                                live.update(Markdown(assistant_text))
+                    elif event.type == "response.failed":
+                        stream_error = self._extract_stream_error(event)
                     elif event.type == "response.output_item.added":
                         if hasattr(event, 'item') and event.item.type == "function_call":
                             # Start tracking a new tool call
@@ -270,6 +315,10 @@ class ChatClient:
                                 except json.JSONDecodeError:
                                     self.console.print(f"[red]Error parsing tool arguments JSON[/red]")
 
+            if stream_error:
+                self.console.print(f"[bold red]Error:[/bold red] {stream_error}")
+                return ""
+
             # Build assistant content
             if assistant_text:
                 assistant_content.append({"type": "output_text", "text": assistant_text})

From 9bf562bf3ac8d5102eb856ceb63d364ac3357773 Mon Sep 17 00:00:00 2001
From: Anibal Angulo <anibal.angulo.cardoza@banorte.com>
Date: Fri, 6 Mar 2026 23:03:21 +0000
Subject: [PATCH 3/3] Add chat client to admin UI

---
 frontend/admin/package-lock.json       |  22 +
 frontend/admin/package.json            |   5 +-
 frontend/admin/src/router.ts           |   6 +
 frontend/admin/src/views/Chat.vue      | 550 +++++++++++++++++++++++++
 frontend/admin/src/views/Dashboard.vue |  28 +-
 frontend/admin/vite.config.ts          |   4 +
 scripts/chat.py                        |   2 +-
 7 files changed, 613 insertions(+), 4 deletions(-)
 create mode 100644 frontend/admin/src/views/Chat.vue

diff --git a/frontend/admin/package-lock.json b/frontend/admin/package-lock.json
index 2341e79..b50320e 100644
--- a/frontend/admin/package-lock.json
+++ b/frontend/admin/package-lock.json
@@ -9,6 +9,7 @@
       "version": "0.1.0",
       "dependencies": {
         "axios": "^1.6.0",
+        "openai": "^6.27.0",
         "vue": "^3.4.0",
         "vue-router": "^4.2.0"
       },
@@ -1438,6 +1439,27 @@
         "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
       }
     },
+    "node_modules/openai": {
+      "version": "6.27.0",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz",
+      "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==",
+      "license": "Apache-2.0",
+      "bin": {
+        "openai": "bin/cli"
+      },
+      "peerDependencies": {
+        "ws": "^8.18.0",
+        "zod": "^3.25 || ^4.0"
+      },
+      "peerDependenciesMeta": {
+        "ws": {
+          "optional": true
+        },
+        "zod": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/path-browserify": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz",
diff --git a/frontend/admin/package.json b/frontend/admin/package.json
index 3ca81dc..59cad53 100644
--- a/frontend/admin/package.json
+++ b/frontend/admin/package.json
@@ -9,9 +9,10 @@
     "preview": "vite preview"
   },
   "dependencies": {
+    "axios": "^1.6.0",
+    "openai": "^6.27.0",
     "vue": "^3.4.0",
-    "vue-router": "^4.2.0",
-    "axios": "^1.6.0"
+    "vue-router": "^4.2.0"
   },
   "devDependencies": {
     "@vitejs/plugin-vue": "^5.0.0",
diff --git a/frontend/admin/src/router.ts b/frontend/admin/src/router.ts
index 2df7166..429f38d 100644
--- a/frontend/admin/src/router.ts
+++ b/frontend/admin/src/router.ts
@@ -1,5 +1,6 @@
 import { createRouter, createWebHistory } from 'vue-router'
 import Dashboard from './views/Dashboard.vue'
+import Chat from './views/Chat.vue'
 
 const router = createRouter({
   history: createWebHistory('/admin/'),
@@ -8,6 +9,11 @@ const router = createRouter({
       path: '/',
       name: 'dashboard',
       component: Dashboard
+    },
+    {
+      path: '/chat',
+      name: 'chat',
+      component: Chat
     }
   ]
 })
diff --git a/frontend/admin/src/views/Chat.vue b/frontend/admin/src/views/Chat.vue
new file mode 100644
index 0000000..1816062
--- /dev/null
+++ b/frontend/admin/src/views/Chat.vue
@@ -0,0 +1,550 @@
+<template>
+  <div class="chat-page">
+    <header class="header">
+      <div class="header-content">
+        <router-link to="/" class="back-link">← Dashboard</router-link>
+        <h1>Playground</h1>
+      </div>
+    </header>
+
+    <div class="chat-container">
+      <!-- Sidebar -->
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <label class="field-label">Model</label>
+          <select v-model="selectedModel" class="select-input" :disabled="modelsLoading">
+            <option v-if="modelsLoading" value="">Loading...</option>
+            <option v-for="m in models" :key="m.id" :value="m.id">
+              {{ m.id }}
+            </option>
+          </select>
+        </div>
+
+        <div class="sidebar-section">
+          <label class="field-label">System Instructions</label>
+          <textarea
+            v-model="instructions"
+            class="textarea-input"
+            rows="4"
+            placeholder="You are a helpful assistant..."
+          ></textarea>
+        </div>
+
+        <div class="sidebar-section">
+          <label class="field-label">Temperature</label>
+          <div class="slider-row">
+            <input type="range" v-model.number="temperature" min="0" max="2" step="0.1" class="slider" />
+            <span class="slider-value">{{ temperature }}</span>
+          </div>
+        </div>
+
+        <div class="sidebar-section">
+          <label class="field-label">Stream</label>
+          <label class="toggle">
+            <input type="checkbox" v-model="stream" />
+            <span class="toggle-slider"></span>
+          </label>
+        </div>
+
+        <button class="btn-clear" @click="clearChat">Clear Chat</button>
+      </aside>
+
+      <!-- Chat Area -->
+      <main class="chat-main">
+        <div class="messages" ref="messagesContainer">
+          <div v-if="messages.length === 0" class="empty-chat">
+            <p>Send a message to start chatting.</p>
+          </div>
+          <div
+            v-for="(msg, i) in messages"
+            :key="i"
+            :class="['message', `message-${msg.role}`]"
+          >
+            <div class="message-role">{{ msg.role }}</div>
+            <div class="message-content" v-html="renderContent(msg.content)"></div>
+          </div>
+          <div v-if="isLoading" class="message message-assistant">
+            <div class="message-role">assistant</div>
+            <div class="message-content">
+              <span class="typing-indicator">
+                <span></span><span></span><span></span>
+              </span>
+              {{ streamingText }}
+            </div>
+          </div>
+        </div>
+
+        <div class="input-area">
+          <textarea
+            v-model="userInput"
+            class="chat-input"
+            placeholder="Type a message..."
+            rows="1"
+            @keydown.enter.exact.prevent="sendMessage"
+            @input="autoResize"
+            ref="chatInputEl"
+          ></textarea>
+          <button class="btn-send" @click="sendMessage" :disabled="isLoading || !userInput.trim()">
+            Send
+          </button>
+        </div>
+      </main>
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, onMounted, nextTick } from 'vue'
+import OpenAI from 'openai'
+
+interface ChatMessage {
+  role: 'user' | 'assistant'
+  content: string
+}
+
+interface ModelOption {
+  id: string
+  provider: string
+}
+
+const models = ref<ModelOption[]>([])
+const modelsLoading = ref(true)
+const selectedModel = ref('')
+const instructions = ref('')
+const temperature = ref(1.0)
+const stream = ref(true)
+const userInput = ref('')
+const messages = ref<ChatMessage[]>([])
+const isLoading = ref(false)
+const streamingText = ref('')
+const lastResponseId = ref<string | null>(null)
+const messagesContainer = ref<HTMLElement | null>(null)
+const chatInputEl = ref<HTMLTextAreaElement | null>(null)
+
+const client = new OpenAI({
+  baseURL: `${window.location.origin}/v1`,
+  apiKey: 'unused',
+  dangerouslyAllowBrowser: true,
+})
+
+async function loadModels() {
+  try {
+    const resp = await fetch('/v1/models')
+    const data = await resp.json()
+    models.value = data.data || []
+    if (models.value.length > 0) {
+      selectedModel.value = models.value[0].id
+    }
+  } catch (e) {
+    console.error('Failed to load models:', e)
+  } finally {
+    modelsLoading.value = false
+  }
+}
+
+function scrollToBottom() {
+  nextTick(() => {
+    if (messagesContainer.value) {
+      messagesContainer.value.scrollTop = messagesContainer.value.scrollHeight
+    }
+  })
+}
+
+function autoResize(e: Event) {
+  const el = e.target as HTMLTextAreaElement
+  el.style.height = 'auto'
+  el.style.height = Math.min(el.scrollHeight, 150) + 'px'
+}
+
+function renderContent(content: string): string {
+  return content
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/\n/g, '<br>')
+}
+
+function clearChat() {
+  messages.value = []
+  lastResponseId.value = null
+  streamingText.value = ''
+}
+
+async function sendMessage() {
+  const text = userInput.value.trim()
+  if (!text || isLoading.value) return
+
+  messages.value.push({ role: 'user', content: text })
+  userInput.value = ''
+  if (chatInputEl.value) {
+    chatInputEl.value.style.height = 'auto'
+  }
+  scrollToBottom()
+
+  isLoading.value = true
+  streamingText.value = ''
+
+  try {
+    const params: Record<string, any> = {
+      model: selectedModel.value,
+      input: text,
+      temperature: temperature.value,
+      stream: stream.value,
+    }
+
+    if (instructions.value.trim()) {
+      params.instructions = instructions.value.trim()
+    }
+
+    if (lastResponseId.value) {
+      params.previous_response_id = lastResponseId.value
+    }
+
+    if (stream.value) {
+      const response = await client.responses.create(params as any)
+
+      // The SDK returns an async iterable for streaming
+      let fullText = ''
+      for await (const event of response as any) {
+        if (event.type === 'response.output_text.delta') {
+          fullText += event.delta
+          streamingText.value = fullText
+          scrollToBottom()
+        } else if (event.type === 'response.completed') {
+          lastResponseId.value = event.response.id
+        }
+      }
+
+      messages.value.push({ role: 'assistant', content: fullText })
+    } else {
+      const response = await client.responses.create(params as any) as any
+      lastResponseId.value = response.id
+
+      const text = response.output
+        ?.filter((item: any) => item.type === 'message')
+        ?.flatMap((item: any) => item.content)
+        ?.filter((part: any) => part.type === 'output_text')
+        ?.map((part: any) => part.text)
+        ?.join('') || ''
+
+      messages.value.push({ role: 'assistant', content: text })
+    }
+  } catch (e: any) {
+    messages.value.push({
+      role: 'assistant',
+      content: `Error: ${e.message || 'Failed to get response'}`,
+    })
+  } finally {
+    isLoading.value = false
+    streamingText.value = ''
+    scrollToBottom()
+  }
+}
+
+onMounted(() => {
+  loadModels()
+})
+</script>
+
+<style scoped>
+.chat-page {
+  min-height: 100vh;
+  display: flex;
+  flex-direction: column;
+  background-color: #f5f5f5;
+}
+
+.header {
+  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+  color: white;
+  padding: 1rem 2rem;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+.header-content {
+  display: flex;
+  align-items: center;
+  gap: 1.5rem;
+}
+
+.back-link {
+  color: rgba(255, 255, 255, 0.85);
+  text-decoration: none;
+  font-size: 0.95rem;
+}
+
+.back-link:hover {
+  color: white;
+}
+
+.header h1 {
+  font-size: 1.5rem;
+  font-weight: 600;
+}
+
+.chat-container {
+  flex: 1;
+  display: flex;
+  overflow: hidden;
+  height: calc(100vh - 65px);
+}
+
+/* Sidebar */
+.sidebar {
+  width: 280px;
+  background: white;
+  border-right: 1px solid #e2e8f0;
+  padding: 1.5rem;
+  display: flex;
+  flex-direction: column;
+  gap: 1.25rem;
+  overflow-y: auto;
+}
+
+.sidebar-section {
+  display: flex;
+  flex-direction: column;
+  gap: 0.5rem;
+}
+
+.field-label {
+  font-size: 0.8rem;
+  font-weight: 600;
+  color: #4a5568;
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+}
+
+.select-input {
+  padding: 0.5rem;
+  border: 1px solid #e2e8f0;
+  border-radius: 6px;
+  font-size: 0.875rem;
+  background: white;
+  color: #2d3748;
+}
+
+.textarea-input {
+  padding: 0.5rem;
+  border: 1px solid #e2e8f0;
+  border-radius: 6px;
+  font-size: 0.875rem;
+  resize: vertical;
+  font-family: inherit;
+  color: #2d3748;
+}
+
+.slider-row {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+}
+
+.slider {
+  flex: 1;
+  accent-color: #667eea;
+}
+
+.slider-value {
+  font-size: 0.875rem;
+  font-weight: 500;
+  color: #2d3748;
+  min-width: 2rem;
+  text-align: right;
+}
+
+.toggle {
+  position: relative;
+  width: 44px;
+  height: 24px;
+  cursor: pointer;
+}
+
+.toggle input {
+  opacity: 0;
+  width: 0;
+  height: 0;
+}
+
+.toggle-slider {
+  position: absolute;
+  inset: 0;
+  background-color: #cbd5e0;
+  border-radius: 24px;
+  transition: 0.2s;
+}
+
+.toggle-slider::before {
+  content: '';
+  position: absolute;
+  height: 18px;
+  width: 18px;
+  left: 3px;
+  bottom: 3px;
+  background-color: white;
+  border-radius: 50%;
+  transition: 0.2s;
+}
+
+.toggle input:checked + .toggle-slider {
+  background-color: #667eea;
+}
+
+.toggle input:checked + .toggle-slider::before {
+  transform: translateX(20px);
+}
+
+.btn-clear {
+  margin-top: auto;
+  padding: 0.5rem;
+  background: #fed7d7;
+  color: #742a2a;
+  border: none;
+  border-radius: 6px;
+  font-size: 0.875rem;
+  font-weight: 500;
+  cursor: pointer;
+}
+
+.btn-clear:hover {
+  background: #feb2b2;
+}
+
+/* Chat Main */
+.chat-main {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  min-width: 0;
+}
+
+.messages {
+  flex: 1;
+  overflow-y: auto;
+  padding: 1.5rem;
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+}
+
+.empty-chat {
+  flex: 1;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  color: #a0aec0;
+  font-size: 1.1rem;
+}
+
+.message {
+  max-width: 80%;
+  padding: 0.75rem 1rem;
+  border-radius: 12px;
+  line-height: 1.5;
+}
+
+.message-user {
+  align-self: flex-end;
+  background: #667eea;
+  color: white;
+}
+
+.message-user .message-role {
+  color: rgba(255, 255, 255, 0.7);
+}
+
+.message-assistant {
+  align-self: flex-start;
+  background: white;
+  border: 1px solid #e2e8f0;
+  color: #2d3748;
+}
+
+.message-role {
+  font-size: 0.7rem;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+  margin-bottom: 0.25rem;
+  color: #a0aec0;
+}
+
+.message-content {
+  font-size: 0.95rem;
+  word-break: break-word;
+}
+
+/* Typing indicator */
+.typing-indicator {
+  display: inline-flex;
+  gap: 3px;
+  margin-right: 6px;
+}
+
+.typing-indicator span {
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: #a0aec0;
+  animation: bounce 1.2s infinite;
+}
+
+.typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
+.typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
+
+@keyframes bounce {
+  0%, 60%, 100% { transform: translateY(0); }
+  30% { transform: translateY(-4px); }
+}
+
+/* Input Area */
+.input-area {
+  padding: 1rem 1.5rem;
+  background: white;
+  border-top: 1px solid #e2e8f0;
+  display: flex;
+  gap: 0.75rem;
+  align-items: flex-end;
+}
+
+.chat-input {
+  flex: 1;
+  padding: 0.75rem 1rem;
+  border: 1px solid #e2e8f0;
+  border-radius: 12px;
+  font-size: 0.95rem;
+  font-family: inherit;
+  resize: none;
+  color: #2d3748;
+  line-height: 1.4;
+  max-height: 150px;
+  overflow-y: auto;
+}
+
+.chat-input:focus {
+  outline: none;
+  border-color: #667eea;
+  box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.15);
+}
+
+.btn-send {
+  padding: 0.75rem 1.5rem;
+  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+  color: white;
+  border: none;
+  border-radius: 12px;
+  font-size: 0.95rem;
+  font-weight: 500;
+  cursor: pointer;
+  white-space: nowrap;
+}
+
+.btn-send:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
+
+.btn-send:hover:not(:disabled) {
+  opacity: 0.9;
+}
+</style>
diff --git a/frontend/admin/src/views/Dashboard.vue b/frontend/admin/src/views/Dashboard.vue
index 4f73255..5b4896e 100644
--- a/frontend/admin/src/views/Dashboard.vue
+++ b/frontend/admin/src/views/Dashboard.vue
@@ -1,7 +1,10 @@
 <template>
   <div class="dashboard">
     <header class="header">
-      <h1>LLM Gateway Admin</h1>
+      <div class="header-row">
+        <h1>LLM Gateway Admin</h1>
+        <router-link to="/chat" class="nav-link">Playground →</router-link>
+      </div>
     </header>
 
     <div class="container">
@@ -168,11 +171,34 @@ onUnmounted(() => {
   box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
 }
 
+.header-row {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
 .header h1 {
   font-size: 2rem;
   font-weight: 600;
 }
 
+.nav-link {
+  color: rgba(255, 255, 255, 0.85);
+  text-decoration: none;
+  font-size: 1rem;
+  font-weight: 500;
+  padding: 0.5rem 1rem;
+  border: 1px solid rgba(255, 255, 255, 0.3);
+  border-radius: 8px;
+  transition: all 0.2s;
+}
+
+.nav-link:hover {
+  color: white;
+  border-color: rgba(255, 255, 255, 0.6);
+  background: rgba(255, 255, 255, 0.1);
+}
+
 .container {
   max-width: 1400px;
   margin: 0 auto;
diff --git a/frontend/admin/vite.config.ts b/frontend/admin/vite.config.ts
index c5182bd..0358ba8 100644
--- a/frontend/admin/vite.config.ts
+++ b/frontend/admin/vite.config.ts
@@ -11,6 +11,10 @@ export default defineConfig({
       '/admin/api': {
         target: 'http://localhost:8080',
         changeOrigin: true,
+      },
+      '/v1': {
+        target: 'http://localhost:8080',
+        changeOrigin: true,
       }
     }
   },
diff --git a/scripts/chat.py b/scripts/chat.py
index 83cf362..94d43ce 100755
--- a/scripts/chat.py
+++ b/scripts/chat.py
@@ -534,7 +534,7 @@ def main():
                     console.print(Markdown(response))
                 
             except APIStatusError as e:
-                console.print(f"[bold red]Error {e.status_code}:[/bold red] {e.message}")
+                console.print(f"[bold red]Error {e.status_code}:[/bold red] {str(e)}")
             except Exception as e:
                 console.print(f"[bold red]Error:[/bold red] {e}")