371 lines
10 KiB
TypeScript
371 lines
10 KiB
TypeScript
import { responseResourceSchema, type ResponseResource } from "./schemas.ts";
|
|
import { parseSSEStream, type SSEParseResult } from "./sse-parser.ts";
|
|
|
|
export interface TestConfig {
|
|
baseUrl: string;
|
|
apiKey: string;
|
|
authHeaderName: string;
|
|
useBearerPrefix: boolean;
|
|
model: string;
|
|
}
|
|
|
|
export interface TestResult {
|
|
id: string;
|
|
name: string;
|
|
description: string;
|
|
status: "pending" | "running" | "passed" | "failed";
|
|
duration?: number;
|
|
request?: unknown;
|
|
response?: unknown;
|
|
errors?: string[];
|
|
streamEvents?: number;
|
|
}
|
|
|
|
interface ValidatorContext {
|
|
streaming: boolean;
|
|
sseResult?: SSEParseResult;
|
|
}
|
|
|
|
type ResponseValidator = (
|
|
response: ResponseResource,
|
|
context: ValidatorContext,
|
|
) => string[];
|
|
|
|
export interface TestTemplate {
|
|
id: string;
|
|
name: string;
|
|
description: string;
|
|
getRequest: (config: TestConfig) => Record<string, unknown>;
|
|
streaming?: boolean;
|
|
validators: ResponseValidator[];
|
|
}
|
|
|
|
// ============================================================
|
|
// Validators
|
|
// ============================================================
|
|
|
|
const hasOutput: ResponseValidator = (response) => {
|
|
if (!response.output || response.output.length === 0) {
|
|
return ["Response has no output items"];
|
|
}
|
|
return [];
|
|
};
|
|
|
|
const hasOutputType =
|
|
(type: string): ResponseValidator =>
|
|
(response) => {
|
|
const hasType = response.output?.some((item) => item.type === type);
|
|
if (!hasType) {
|
|
return [`Expected output item of type "${type}" but none found`];
|
|
}
|
|
return [];
|
|
};
|
|
|
|
const completedStatus: ResponseValidator = (response) => {
|
|
if (response.status !== "completed") {
|
|
return [`Expected status "completed" but got "${response.status}"`];
|
|
}
|
|
return [];
|
|
};
|
|
|
|
const streamingEvents: ResponseValidator = (_, context) => {
|
|
if (!context.streaming) return [];
|
|
if (!context.sseResult || context.sseResult.events.length === 0) {
|
|
return ["No streaming events received"];
|
|
}
|
|
return [];
|
|
};
|
|
|
|
const streamingSchema: ResponseValidator = (_, context) => {
|
|
if (!context.streaming || !context.sseResult) return [];
|
|
return context.sseResult.errors;
|
|
};
|
|
|
|
// ============================================================
|
|
// Test Templates
|
|
// ============================================================
|
|
|
|
export const testTemplates: TestTemplate[] = [
|
|
{
|
|
id: "basic-response",
|
|
name: "Basic Text Response",
|
|
description: "Simple user message, validates ResponseResource schema",
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [{ type: "input_text", text: "Say hello in exactly 3 words." }],
|
|
},
|
|
],
|
|
}),
|
|
validators: [hasOutput, completedStatus],
|
|
},
|
|
|
|
{
|
|
id: "streaming-response",
|
|
name: "Streaming Response",
|
|
description: "Validates SSE streaming events and final response",
|
|
streaming: true,
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [{ type: "input_text", text: "Count from 1 to 5." }],
|
|
},
|
|
],
|
|
}),
|
|
validators: [streamingEvents, streamingSchema, completedStatus],
|
|
},
|
|
|
|
{
|
|
id: "system-prompt",
|
|
name: "System Prompt",
|
|
description: "Include system instructions via the instructions field",
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
instructions: "You are a pirate. Always respond in pirate speak.",
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [{ type: "input_text", text: "Say hello." }],
|
|
},
|
|
],
|
|
}),
|
|
validators: [hasOutput, completedStatus],
|
|
},
|
|
|
|
{
|
|
id: "tool-calling",
|
|
name: "Tool Calling",
|
|
description: "Define a function tool and verify function_call output",
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "input_text",
|
|
text: "What's the weather like in San Francisco?",
|
|
},
|
|
],
|
|
},
|
|
],
|
|
tools: [
|
|
{
|
|
type: "function",
|
|
name: "get_weather",
|
|
description: "Get the current weather for a location",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
location: {
|
|
type: "string",
|
|
description: "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
},
|
|
required: ["location"],
|
|
},
|
|
},
|
|
],
|
|
}),
|
|
validators: [hasOutput, hasOutputType("function_call")],
|
|
},
|
|
|
|
{
|
|
id: "image-input",
|
|
name: "Image Input",
|
|
description: "Send image URL in user content",
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "input_text",
|
|
text: "What do you see in this image? Answer in one sentence.",
|
|
},
|
|
{
|
|
type: "input_image",
|
|
image_url:
|
|
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAABmklEQVR42tyWAaTyUBzFew/eG4AHz+MBSAHKBiJRGFKwIgQQJKLUIioBIhCAiCAAEizAQIAECaASqFFJq84nudjnaqvuPnxzgP9xfrq5938csPn7PwHTKSoViCIEAYEAMhmoKsU2mUCWEQqB5xEMIp/HaGQG2G6RSuH9HQ7H34rFrtPbdz4jl6PbwmEsl3QA1mt4vcRKk8dz9eg6IpF7tt9fzGY0gCgafFRFo5Blc5vLhf3eCOj1yNhM5GRMVK0aATxPZoz09YXjkQDmczJgquGQAPp9WwCNBgG027YACgUC6HRsAZRKBDAY2AJoNv/ZnwzA6WScznG3p4UAymXGAEkyXrTFAh8fLAGqagQAyGaZpYsi7bHTNPz8MEj//LxuFPo+UBS8vb0KaLXubrRa7aX0RMLCykwmn0z3+XA4WACcTpCkh9MFAZpmuVXo+mO/w+/HZvNgbblcUCxaSo/Hyck80Yu6XXDcvfVZr79cvMZjuN2U9O9vKAqjZrfbIZ0mV4TUi9Xqz6jddNy//7+e3n8Fhf/Llo2kxi8AQyGRoDkmAhAAAAAASUVORK5CYII=",
|
|
},
|
|
],
|
|
},
|
|
],
|
|
}),
|
|
validators: [hasOutput, completedStatus],
|
|
},
|
|
|
|
{
|
|
id: "multi-turn",
|
|
name: "Multi-turn Conversation",
|
|
description: "Send assistant + user messages as conversation history",
|
|
getRequest: (config) => ({
|
|
model: config.model,
|
|
input: [
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [{ type: "input_text", text: "My name is Alice." }],
|
|
},
|
|
{
|
|
type: "message",
|
|
role: "assistant",
|
|
content: [
|
|
{
|
|
type: "output_text",
|
|
text: "Hello Alice! Nice to meet you. How can I help you today?",
|
|
},
|
|
],
|
|
},
|
|
{
|
|
type: "message",
|
|
role: "user",
|
|
content: [{ type: "input_text", text: "What is my name?" }],
|
|
},
|
|
],
|
|
}),
|
|
validators: [hasOutput, completedStatus],
|
|
},
|
|
];
|
|
|
|
// ============================================================
|
|
// Test Runner
|
|
// ============================================================
|
|
|
|
async function makeRequest(
|
|
config: TestConfig,
|
|
body: Record<string, unknown>,
|
|
streaming = false,
|
|
): Promise<Response> {
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (config.apiKey) {
|
|
const authValue = config.useBearerPrefix
|
|
? `Bearer ${config.apiKey}`
|
|
: config.apiKey;
|
|
headers[config.authHeaderName] = authValue;
|
|
}
|
|
|
|
return fetch(`${config.baseUrl}/v1/responses`, {
|
|
method: "POST",
|
|
headers,
|
|
body: JSON.stringify({ ...body, stream: streaming }),
|
|
});
|
|
}
|
|
|
|
async function runTest(
|
|
template: TestTemplate,
|
|
config: TestConfig,
|
|
): Promise<TestResult> {
|
|
const startTime = Date.now();
|
|
const requestBody = template.getRequest(config);
|
|
const streaming = template.streaming ?? false;
|
|
|
|
try {
|
|
const response = await makeRequest(config, requestBody, streaming);
|
|
const duration = Date.now() - startTime;
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
return {
|
|
id: template.id,
|
|
name: template.name,
|
|
description: template.description,
|
|
status: "failed",
|
|
duration,
|
|
request: requestBody,
|
|
response: errorText,
|
|
errors: [`HTTP ${response.status}: ${errorText}`],
|
|
};
|
|
}
|
|
|
|
let rawData: unknown;
|
|
let sseResult: SSEParseResult | undefined;
|
|
|
|
if (streaming) {
|
|
sseResult = await parseSSEStream(response);
|
|
rawData = sseResult.finalResponse;
|
|
} else {
|
|
rawData = await response.json();
|
|
}
|
|
|
|
// Schema validation with Zod
|
|
const parseResult = responseResourceSchema.safeParse(rawData);
|
|
if (!parseResult.success) {
|
|
return {
|
|
id: template.id,
|
|
name: template.name,
|
|
description: template.description,
|
|
status: "failed",
|
|
duration,
|
|
request: streaming ? { ...requestBody, stream: true } : requestBody,
|
|
response: rawData,
|
|
errors: parseResult.error.issues.map(
|
|
(issue) => `${issue.path.join(".")}: ${issue.message}`,
|
|
),
|
|
streamEvents: sseResult?.events.length,
|
|
};
|
|
}
|
|
|
|
// Semantic validators
|
|
const context: ValidatorContext = { streaming, sseResult };
|
|
const errors = template.validators.flatMap((v) =>
|
|
v(parseResult.data, context),
|
|
);
|
|
|
|
return {
|
|
id: template.id,
|
|
name: template.name,
|
|
description: template.description,
|
|
status: errors.length === 0 ? "passed" : "failed",
|
|
duration,
|
|
request: streaming ? { ...requestBody, stream: true } : requestBody,
|
|
response: parseResult.data,
|
|
errors,
|
|
streamEvents: sseResult?.events.length,
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
id: template.id,
|
|
name: template.name,
|
|
description: template.description,
|
|
status: "failed",
|
|
duration: Date.now() - startTime,
|
|
request: requestBody,
|
|
errors: [error instanceof Error ? error.message : String(error)],
|
|
};
|
|
}
|
|
}
|
|
|
|
export async function runAllTests(
|
|
config: TestConfig,
|
|
onProgress: (result: TestResult) => void,
|
|
): Promise<TestResult[]> {
|
|
const promises = testTemplates.map(async (template) => {
|
|
onProgress({
|
|
id: template.id,
|
|
name: template.name,
|
|
description: template.description,
|
|
status: "running",
|
|
});
|
|
|
|
const result = await runTest(template, config);
|
|
onProgress(result);
|
|
return result;
|
|
});
|
|
|
|
return Promise.all(promises);
|
|
}
|