import { responseResourceSchema, type ResponseResource } from "./schemas.ts"; import { parseSSEStream, type SSEParseResult } from "./sse-parser.ts"; export interface TestConfig { baseUrl: string; apiKey: string; authHeaderName: string; useBearerPrefix: boolean; model: string; } export interface TestResult { id: string; name: string; description: string; status: "pending" | "running" | "passed" | "failed"; duration?: number; request?: unknown; response?: unknown; errors?: string[]; streamEvents?: number; } interface ValidatorContext { streaming: boolean; sseResult?: SSEParseResult; } type ResponseValidator = ( response: ResponseResource, context: ValidatorContext, ) => string[]; export interface TestTemplate { id: string; name: string; description: string; getRequest: (config: TestConfig) => Record; streaming?: boolean; validators: ResponseValidator[]; } // ============================================================ // Validators // ============================================================ const hasOutput: ResponseValidator = (response) => { if (!response.output || response.output.length === 0) { return ["Response has no output items"]; } return []; }; const hasOutputType = (type: string): ResponseValidator => (response) => { const hasType = response.output?.some((item) => item.type === type); if (!hasType) { return [`Expected output item of type "${type}" but none found`]; } return []; }; const completedStatus: ResponseValidator = (response) => { if (response.status !== "completed") { return [`Expected status "completed" but got "${response.status}"`]; } return []; }; const streamingEvents: ResponseValidator = (_, context) => { if (!context.streaming) return []; if (!context.sseResult || context.sseResult.events.length === 0) { return ["No streaming events received"]; } return []; }; const streamingSchema: ResponseValidator = (_, context) => { if (!context.streaming || !context.sseResult) return []; return context.sseResult.errors; }; // ============================================================ // Test Templates // ============================================================ export const testTemplates: TestTemplate[] = [ { id: "basic-response", name: "Basic Text Response", description: "Simple user message, validates ResponseResource schema", getRequest: (config) => ({ model: config.model, input: [ { type: "message", role: "user", content: [{ type: "input_text", text: "Say hello in exactly 3 words." }], }, ], }), validators: [hasOutput, completedStatus], }, { id: "streaming-response", name: "Streaming Response", description: "Validates SSE streaming events and final response", streaming: true, getRequest: (config) => ({ model: config.model, input: [ { type: "message", role: "user", content: [{ type: "input_text", text: "Count from 1 to 5." }], }, ], }), validators: [streamingEvents, streamingSchema, completedStatus], }, { id: "system-prompt", name: "System Prompt", description: "Include system instructions via the instructions field", getRequest: (config) => ({ model: config.model, instructions: "You are a pirate. Always respond in pirate speak.", input: [ { type: "message", role: "user", content: [{ type: "input_text", text: "Say hello." }], }, ], }), validators: [hasOutput, completedStatus], }, { id: "tool-calling", name: "Tool Calling", description: "Define a function tool and verify function_call output", getRequest: (config) => ({ model: config.model, input: [ { type: "message", role: "user", content: [ { type: "input_text", text: "What's the weather like in San Francisco?", }, ], }, ], tools: [ { type: "function", name: "get_weather", description: "Get the current weather for a location", parameters: { type: "object", properties: { location: { type: "string", description: "The city and state, e.g. San Francisco, CA", }, }, required: ["location"], }, }, ], }), validators: [hasOutput, hasOutputType("function_call")], }, { id: "image-input", name: "Image Input", description: "Send image URL in user content", getRequest: (config) => ({ model: config.model, input: [ { type: "message", role: "user", content: [ { type: "input_text", text: "What do you see in this image? Answer in one sentence.", }, { type: "input_image", image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAABmklEQVR42tyWAaTyUBzFew/eG4AHz+MBSAHKBiJRGFKwIgQQJKLUIioBIhCAiCAAEizAQIAECaASqFFJq84nudjnaqvuPnxzgP9xfrq5938csPn7PwHTKSoViCIEAYEAMhmoKsU2mUCWEQqB5xEMIp/HaGQG2G6RSuH9HQ7H34rFrtPbdz4jl6PbwmEsl3QA1mt4vcRKk8dz9eg6IpF7tt9fzGY0gCgafFRFo5Blc5vLhf3eCOj1yNhM5GRMVK0aATxPZoz09YXjkQDmczJgquGQAPp9WwCNBgG027YACgUC6HRsAZRKBDAY2AJoNv/ZnwzA6WScznG3p4UAymXGAEkyXrTFAh8fLAGqagQAyGaZpYsi7bHTNPz8MEj//LxuFPo+UBS8vb0KaLXubrRa7aX0RMLCykwmn0z3+XA4WACcTpCkh9MFAZpmuVXo+mO/w+/HZvNgbblcUCxaSo/Hyck80Yu6XXDcvfVZr79cvMZjuN2U9O9vKAqjZrfbIZ0mV4TUi9Xqz6jddNy//7+e3n8Fhf/Llo2kxi8AQyGRoDkmAhAAAAAASUVORK5CYII=", }, ], }, ], }), validators: [hasOutput, completedStatus], }, { id: "multi-turn", name: "Multi-turn Conversation", description: "Send assistant + user messages as conversation history", getRequest: (config) => ({ model: config.model, input: [ { type: "message", role: "user", content: [{ type: "input_text", text: "My name is Alice." }], }, { type: "message", role: "assistant", content: [ { type: "output_text", text: "Hello Alice! Nice to meet you. How can I help you today?", }, ], }, { type: "message", role: "user", content: [{ type: "input_text", text: "What is my name?" }], }, ], }), validators: [hasOutput, completedStatus], }, ]; // ============================================================ // Test Runner // ============================================================ async function makeRequest( config: TestConfig, body: Record, streaming = false, ): Promise { const headers: Record = { "Content-Type": "application/json", }; if (config.apiKey) { const authValue = config.useBearerPrefix ? `Bearer ${config.apiKey}` : config.apiKey; headers[config.authHeaderName] = authValue; } return fetch(`${config.baseUrl}/v1/responses`, { method: "POST", headers, body: JSON.stringify({ ...body, stream: streaming }), }); } async function runTest( template: TestTemplate, config: TestConfig, ): Promise { const startTime = Date.now(); const requestBody = template.getRequest(config); const streaming = template.streaming ?? false; try { const response = await makeRequest(config, requestBody, streaming); const duration = Date.now() - startTime; if (!response.ok) { const errorText = await response.text(); return { id: template.id, name: template.name, description: template.description, status: "failed", duration, request: requestBody, response: errorText, errors: [`HTTP ${response.status}: ${errorText}`], }; } let rawData: unknown; let sseResult: SSEParseResult | undefined; if (streaming) { sseResult = await parseSSEStream(response); rawData = sseResult.finalResponse; } else { rawData = await response.json(); } // Schema validation with Zod const parseResult = responseResourceSchema.safeParse(rawData); if (!parseResult.success) { return { id: template.id, name: template.name, description: template.description, status: "failed", duration, request: streaming ? { ...requestBody, stream: true } : requestBody, response: rawData, errors: parseResult.error.issues.map( (issue) => `${issue.path.join(".")}: ${issue.message}`, ), streamEvents: sseResult?.events.length, }; } // Semantic validators const context: ValidatorContext = { streaming, sseResult }; const errors = template.validators.flatMap((v) => v(parseResult.data, context), ); return { id: template.id, name: template.name, description: template.description, status: errors.length === 0 ? "passed" : "failed", duration, request: streaming ? { ...requestBody, stream: true } : requestBody, response: parseResult.data, errors, streamEvents: sseResult?.events.length, }; } catch (error) { return { id: template.id, name: template.name, description: template.description, status: "failed", duration: Date.now() - startTime, request: requestBody, errors: [error instanceof Error ? error.message : String(error)], }; } } export async function runAllTests( config: TestConfig, onProgress: (result: TestResult) => void, ): Promise { const promises = testTemplates.map(async (template) => { onProgress({ id: template.id, name: template.name, description: template.description, status: "running", }); const result = await runTest(template, config); onProgress(result); return result; }); return Promise.all(promises); }