#!/usr/bin/env python3 # /// script # requires-python = ">=3.11" # dependencies = [ # "rich>=13.7.0", # "httpx>=0.27.0", # ] # /// """ Terminal chat interface for go-llm-gateway. Usage: python chat.py python chat.py --url http://localhost:8080 python chat.py --model gemini-2.0-flash-exp python chat.py --token $(gcloud auth print-identity-token) """ import argparse import json import sys from typing import Optional import httpx from rich.console import Console from rich.live import Live from rich.markdown import Markdown from rich.panel import Panel from rich.prompt import Prompt from rich.table import Table class ChatClient: def __init__(self, base_url: str, token: Optional[str] = None): self.base_url = base_url.rstrip("/") self.token = token self.messages = [] self.console = Console() def _headers(self) -> dict: headers = {"Content-Type": "application/json"} if self.token: headers["Authorization"] = f"Bearer {self.token}" return headers def chat(self, user_message: str, model: str, stream: bool = True): """Send a chat message and get response.""" # Add user message to history self.messages.append({ "role": "user", "content": [{"type": "input_text", "text": user_message}] }) payload = { "model": model, "input": self.messages, "stream": stream } if stream: return self._stream_response(payload, model) else: return self._sync_response(payload, model) def _sync_response(self, payload: dict, model: str) -> str: """Non-streaming response.""" with self.console.status(f"[bold blue]Thinking ({model})..."): resp = httpx.post( f"{self.base_url}/v1/responses", json=payload, headers=self._headers(), timeout=60.0 ) resp.raise_for_status() data = resp.json() assistant_text = "" for msg in data.get("output", []): for block in msg.get("content", []): if block.get("type") == "output_text": assistant_text += block.get("text", "") # Add to history self.messages.append({ "role": "assistant", "content": [{"type": "output_text", "text": assistant_text}] }) return assistant_text def _stream_response(self, payload: dict, model: str) -> str: """Streaming response with live rendering.""" assistant_text = "" with httpx.stream( "POST", f"{self.base_url}/v1/responses", json=payload, headers=self._headers(), timeout=60.0 ) as resp: resp.raise_for_status() with Live(console=self.console, refresh_per_second=10) as live: for line in resp.iter_lines(): if not line.startswith("data: "): continue data_str = line[6:] # Remove "data: " prefix try: chunk = json.loads(data_str) except json.JSONDecodeError: continue if chunk.get("done"): break delta = chunk.get("delta", {}) for block in delta.get("content", []): if block.get("type") == "output_text": assistant_text += block.get("text", "") # Render markdown in real-time live.update(Markdown(assistant_text)) # Add to history self.messages.append({ "role": "assistant", "content": [{"type": "output_text", "text": assistant_text}] }) return assistant_text def clear_history(self): """Clear conversation history.""" self.messages = [] def print_models_table(base_url: str, headers: dict): """Fetch and print available models from the gateway.""" console = Console() try: resp = httpx.get(f"{base_url}/v1/models", headers=headers, timeout=10) resp.raise_for_status() data = resp.json().get("data", []) except Exception as e: console.print(f"[red]Failed to fetch models: {e}[/red]") return table = Table(title="Available Models", show_header=True, header_style="bold magenta") table.add_column("Provider", style="cyan") table.add_column("Model ID", style="green") for model in data: table.add_row(model.get("provider", ""), model.get("id", "")) console.print(table) def main(): parser = argparse.ArgumentParser(description="Chat with go-llm-gateway") parser.add_argument("--url", default="http://localhost:8080", help="Gateway URL") parser.add_argument("--model", default="gemini-2.0-flash-exp", help="Model to use") parser.add_argument("--token", help="Auth token (Bearer)") parser.add_argument("--no-stream", action="store_true", help="Disable streaming") args = parser.parse_args() console = Console() client = ChatClient(args.url, args.token) current_model = args.model stream_enabled = not args.no_stream # Welcome banner console.print(Panel.fit( "[bold cyan]go-llm-gateway Chat Interface[/bold cyan]\n" f"Connected to: [green]{args.url}[/green]\n" f"Model: [yellow]{current_model}[/yellow]\n" f"Streaming: [{'green' if stream_enabled else 'red'}]{stream_enabled}[/]\n\n" "Commands:\n" " [bold]/model [/bold] - Switch model\n" " [bold]/models[/bold] - List available models\n" " [bold]/stream[/bold] - Toggle streaming\n" " [bold]/clear[/bold] - Clear conversation\n" " [bold]/quit[/bold] or [bold]/exit[/bold] - Exit\n" " [bold]/help[/bold] - Show this help", title="Welcome", border_style="cyan" )) # Model aliases model_aliases = { "gpt4": "gpt-4o", "gpt4-mini": "gpt-4o-mini", "o1": "o1", "claude": "claude-3-5-sonnet-20241022", "haiku": "claude-3-5-haiku-20241022", "gemini": "gemini-2.0-flash-exp", "gemini-pro": "gemini-1.5-pro" } while True: try: user_input = Prompt.ask("\n[bold blue]You[/bold blue]") if not user_input.strip(): continue # Handle commands if user_input.startswith("/"): cmd_parts = user_input.split(maxsplit=1) cmd = cmd_parts[0].lower() if cmd in ["/quit", "/exit"]: console.print("[yellow]Goodbye! 👋[/yellow]") break elif cmd == "/help": console.print(Panel( "[bold]Commands:[/bold]\n" " /model - Switch model\n" " /models - List available models\n" " /stream - Toggle streaming\n" " /clear - Clear conversation\n" " /quit - Exit", title="Help", border_style="cyan" )) elif cmd == "/models": print_models_table(args.url, client._headers()) elif cmd == "/model": if len(cmd_parts) < 2: console.print("[red]Usage: /model [/red]") continue new_model = cmd_parts[1] # Check if it's an alias new_model = model_aliases.get(new_model, new_model) current_model = new_model console.print(f"[green]Switched to model: {current_model}[/green]") elif cmd == "/stream": stream_enabled = not stream_enabled console.print(f"[green]Streaming {'enabled' if stream_enabled else 'disabled'}[/green]") elif cmd == "/clear": client.clear_history() console.print("[green]Conversation history cleared[/green]") else: console.print(f"[red]Unknown command: {cmd}[/red]") continue # Send message to LLM try: console.print(f"\n[bold green]Assistant ({current_model})[/bold green]") response = client.chat(user_input, current_model, stream=stream_enabled) if not stream_enabled: # For non-streaming, render markdown console.print(Markdown(response)) except httpx.HTTPStatusError as e: console.print(f"[bold red]Error {e.response.status_code}:[/bold red] {e.response.text}") except Exception as e: console.print(f"[bold red]Error:[/bold red] {e}") except KeyboardInterrupt: console.print("\n[yellow]Use /quit to exit[/yellow]") except EOFError: break if __name__ == "__main__": main()