From 40355f9e7937165810368182d91ec7a0442fce2b Mon Sep 17 00:00:00 2001 From: Anibal Angulo Date: Sun, 1 Mar 2026 17:07:23 +0000 Subject: [PATCH] Add chat client script --- .gitignore | 3 + scripts/chat.py | 277 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100755 scripts/chat.py diff --git a/.gitignore b/.gitignore index 8b9ca71..e27d1fa 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,6 @@ config.json # Logs *.log logs/ + +# Python scripts +__pycache__/* diff --git a/scripts/chat.py b/scripts/chat.py new file mode 100755 index 0000000..c647e19 --- /dev/null +++ b/scripts/chat.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "rich>=13.7.0", +# "httpx>=0.27.0", +# ] +# /// + +""" +Terminal chat interface for go-llm-gateway. + +Usage: + python chat.py + python chat.py --url http://localhost:8080 + python chat.py --model gemini-2.0-flash-exp + python chat.py --token $(gcloud auth print-identity-token) +""" + +import argparse +import json +import sys +from typing import Optional + +import httpx +from rich.console import Console +from rich.live import Live +from rich.markdown import Markdown +from rich.panel import Panel +from rich.prompt import Prompt +from rich.table import Table + + +class ChatClient: + def __init__(self, base_url: str, token: Optional[str] = None): + self.base_url = base_url.rstrip("/") + self.token = token + self.messages = [] + self.console = Console() + + def _headers(self) -> dict: + headers = {"Content-Type": "application/json"} + if self.token: + headers["Authorization"] = f"Bearer {self.token}" + return headers + + def chat(self, user_message: str, model: str, stream: bool = True): + """Send a chat message and get response.""" + # Add user message to history + self.messages.append({ + "role": "user", + "content": [{"type": "input_text", "text": user_message}] + }) + + payload = { + "model": model, + "input": self.messages, + "stream": stream + } + + if stream: + return self._stream_response(payload, model) + else: + return self._sync_response(payload, model) + + def _sync_response(self, payload: dict, model: str) -> str: + """Non-streaming response.""" + with self.console.status(f"[bold blue]Thinking ({model})..."): + resp = httpx.post( + f"{self.base_url}/v1/responses", + json=payload, + headers=self._headers(), + timeout=60.0 + ) + resp.raise_for_status() + + data = resp.json() + assistant_text = "" + + for msg in data.get("output", []): + for block in msg.get("content", []): + if block.get("type") == "output_text": + assistant_text += block.get("text", "") + + # Add to history + self.messages.append({ + "role": "assistant", + "content": [{"type": "output_text", "text": assistant_text}] + }) + + return assistant_text + + def _stream_response(self, payload: dict, model: str) -> str: + """Streaming response with live rendering.""" + assistant_text = "" + + with httpx.stream( + "POST", + f"{self.base_url}/v1/responses", + json=payload, + headers=self._headers(), + timeout=60.0 + ) as resp: + resp.raise_for_status() + + with Live(console=self.console, refresh_per_second=10) as live: + for line in resp.iter_lines(): + if not line.startswith("data: "): + continue + + data_str = line[6:] # Remove "data: " prefix + + try: + chunk = json.loads(data_str) + except json.JSONDecodeError: + continue + + if chunk.get("done"): + break + + delta = chunk.get("delta", {}) + for block in delta.get("content", []): + if block.get("type") == "output_text": + assistant_text += block.get("text", "") + + # Render markdown in real-time + live.update(Markdown(assistant_text)) + + # Add to history + self.messages.append({ + "role": "assistant", + "content": [{"type": "output_text", "text": assistant_text}] + }) + + return assistant_text + + def clear_history(self): + """Clear conversation history.""" + self.messages = [] + + +def print_models_table(): + """Print available models table.""" + table = Table(title="Available Models", show_header=True, header_style="bold magenta") + table.add_column("Provider", style="cyan") + table.add_column("Model ID", style="green") + table.add_column("Alias", style="yellow") + + table.add_row("OpenAI", "gpt-4o", "gpt4") + table.add_row("OpenAI", "gpt-4o-mini", "gpt4-mini") + table.add_row("OpenAI", "o1", "o1") + table.add_row("Anthropic", "claude-3-5-sonnet-20241022", "claude") + table.add_row("Anthropic", "claude-3-5-haiku-20241022", "haiku") + table.add_row("Google", "gemini-2.0-flash-exp", "gemini") + table.add_row("Google", "gemini-1.5-pro", "gemini-pro") + + Console().print(table) + + +def main(): + parser = argparse.ArgumentParser(description="Chat with go-llm-gateway") + parser.add_argument("--url", default="http://localhost:8080", help="Gateway URL") + parser.add_argument("--model", default="gemini-2.0-flash-exp", help="Model to use") + parser.add_argument("--token", help="Auth token (Bearer)") + parser.add_argument("--no-stream", action="store_true", help="Disable streaming") + args = parser.parse_args() + + console = Console() + client = ChatClient(args.url, args.token) + current_model = args.model + stream_enabled = not args.no_stream + + # Welcome banner + console.print(Panel.fit( + "[bold cyan]go-llm-gateway Chat Interface[/bold cyan]\n" + f"Connected to: [green]{args.url}[/green]\n" + f"Model: [yellow]{current_model}[/yellow]\n" + f"Streaming: [{'green' if stream_enabled else 'red'}]{stream_enabled}[/]\n\n" + "Commands:\n" + " [bold]/model [/bold] - Switch model\n" + " [bold]/models[/bold] - List available models\n" + " [bold]/stream[/bold] - Toggle streaming\n" + " [bold]/clear[/bold] - Clear conversation\n" + " [bold]/quit[/bold] or [bold]/exit[/bold] - Exit\n" + " [bold]/help[/bold] - Show this help", + title="Welcome", + border_style="cyan" + )) + + # Model aliases + model_aliases = { + "gpt4": "gpt-4o", + "gpt4-mini": "gpt-4o-mini", + "o1": "o1", + "claude": "claude-3-5-sonnet-20241022", + "haiku": "claude-3-5-haiku-20241022", + "gemini": "gemini-2.0-flash-exp", + "gemini-pro": "gemini-1.5-pro" + } + + while True: + try: + user_input = Prompt.ask("\n[bold blue]You[/bold blue]") + + if not user_input.strip(): + continue + + # Handle commands + if user_input.startswith("/"): + cmd_parts = user_input.split(maxsplit=1) + cmd = cmd_parts[0].lower() + + if cmd in ["/quit", "/exit"]: + console.print("[yellow]Goodbye! 👋[/yellow]") + break + + elif cmd == "/help": + console.print(Panel( + "[bold]Commands:[/bold]\n" + " /model - Switch model\n" + " /models - List available models\n" + " /stream - Toggle streaming\n" + " /clear - Clear conversation\n" + " /quit - Exit", + title="Help", + border_style="cyan" + )) + + elif cmd == "/models": + print_models_table() + + elif cmd == "/model": + if len(cmd_parts) < 2: + console.print("[red]Usage: /model [/red]") + continue + + new_model = cmd_parts[1] + # Check if it's an alias + new_model = model_aliases.get(new_model, new_model) + current_model = new_model + console.print(f"[green]Switched to model: {current_model}[/green]") + + elif cmd == "/stream": + stream_enabled = not stream_enabled + console.print(f"[green]Streaming {'enabled' if stream_enabled else 'disabled'}[/green]") + + elif cmd == "/clear": + client.clear_history() + console.print("[green]Conversation history cleared[/green]") + + else: + console.print(f"[red]Unknown command: {cmd}[/red]") + + continue + + # Send message to LLM + try: + console.print(f"\n[bold green]Assistant ({current_model})[/bold green]") + response = client.chat(user_input, current_model, stream=stream_enabled) + + if not stream_enabled: + # For non-streaming, render markdown + console.print(Markdown(response)) + + except httpx.HTTPStatusError as e: + console.print(f"[bold red]Error {e.response.status_code}:[/bold red] {e.response.text}") + except Exception as e: + console.print(f"[bold red]Error:[/bold red] {e}") + + except KeyboardInterrupt: + console.print("\n[yellow]Use /quit to exit[/yellow]") + except EOFError: + break + + +if __name__ == "__main__": + main()