Add Open Responses compliance tests
This commit is contained in:
370
tests/src/compliance-tests.ts
Normal file
370
tests/src/compliance-tests.ts
Normal file
@@ -0,0 +1,370 @@
|
||||
import { responseResourceSchema, type ResponseResource } from "./schemas.ts";
|
||||
import { parseSSEStream, type SSEParseResult } from "./sse-parser.ts";
|
||||
|
||||
export interface TestConfig {
|
||||
baseUrl: string;
|
||||
apiKey: string;
|
||||
authHeaderName: string;
|
||||
useBearerPrefix: boolean;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface TestResult {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
status: "pending" | "running" | "passed" | "failed";
|
||||
duration?: number;
|
||||
request?: unknown;
|
||||
response?: unknown;
|
||||
errors?: string[];
|
||||
streamEvents?: number;
|
||||
}
|
||||
|
||||
interface ValidatorContext {
|
||||
streaming: boolean;
|
||||
sseResult?: SSEParseResult;
|
||||
}
|
||||
|
||||
type ResponseValidator = (
|
||||
response: ResponseResource,
|
||||
context: ValidatorContext,
|
||||
) => string[];
|
||||
|
||||
export interface TestTemplate {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
getRequest: (config: TestConfig) => Record<string, unknown>;
|
||||
streaming?: boolean;
|
||||
validators: ResponseValidator[];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Validators
|
||||
// ============================================================
|
||||
|
||||
const hasOutput: ResponseValidator = (response) => {
|
||||
if (!response.output || response.output.length === 0) {
|
||||
return ["Response has no output items"];
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const hasOutputType =
|
||||
(type: string): ResponseValidator =>
|
||||
(response) => {
|
||||
const hasType = response.output?.some((item) => item.type === type);
|
||||
if (!hasType) {
|
||||
return [`Expected output item of type "${type}" but none found`];
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const completedStatus: ResponseValidator = (response) => {
|
||||
if (response.status !== "completed") {
|
||||
return [`Expected status "completed" but got "${response.status}"`];
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const streamingEvents: ResponseValidator = (_, context) => {
|
||||
if (!context.streaming) return [];
|
||||
if (!context.sseResult || context.sseResult.events.length === 0) {
|
||||
return ["No streaming events received"];
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const streamingSchema: ResponseValidator = (_, context) => {
|
||||
if (!context.streaming || !context.sseResult) return [];
|
||||
return context.sseResult.errors;
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// Test Templates
|
||||
// ============================================================
|
||||
|
||||
export const testTemplates: TestTemplate[] = [
|
||||
{
|
||||
id: "basic-response",
|
||||
name: "Basic Text Response",
|
||||
description: "Simple user message, validates ResponseResource schema",
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Say hello in exactly 3 words." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [hasOutput, completedStatus],
|
||||
},
|
||||
|
||||
{
|
||||
id: "streaming-response",
|
||||
name: "Streaming Response",
|
||||
description: "Validates SSE streaming events and final response",
|
||||
streaming: true,
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Count from 1 to 5." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [streamingEvents, streamingSchema, completedStatus],
|
||||
},
|
||||
|
||||
{
|
||||
id: "system-prompt",
|
||||
name: "System Prompt",
|
||||
description: "Include system instructions via the instructions field",
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
instructions: "You are a pirate. Always respond in pirate speak.",
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Say hello." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [hasOutput, completedStatus],
|
||||
},
|
||||
|
||||
{
|
||||
id: "tool-calling",
|
||||
name: "Tool Calling",
|
||||
description: "Define a function tool and verify function_call output",
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "What's the weather like in San Francisco?",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
tools: [
|
||||
{
|
||||
type: "function",
|
||||
name: "get_weather",
|
||||
description: "Get the current weather for a location",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
location: {
|
||||
type: "string",
|
||||
description: "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required: ["location"],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [hasOutput, hasOutputType("function_call")],
|
||||
},
|
||||
|
||||
{
|
||||
id: "image-input",
|
||||
name: "Image Input",
|
||||
description: "Send image URL in user content",
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "What do you see in this image? Answer in one sentence.",
|
||||
},
|
||||
{
|
||||
type: "input_image",
|
||||
image_url:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAABmklEQVR42tyWAaTyUBzFew/eG4AHz+MBSAHKBiJRGFKwIgQQJKLUIioBIhCAiCAAEizAQIAECaASqFFJq84nudjnaqvuPnxzgP9xfrq5938csPn7PwHTKSoViCIEAYEAMhmoKsU2mUCWEQqB5xEMIp/HaGQG2G6RSuH9HQ7H34rFrtPbdz4jl6PbwmEsl3QA1mt4vcRKk8dz9eg6IpF7tt9fzGY0gCgafFRFo5Blc5vLhf3eCOj1yNhM5GRMVK0aATxPZoz09YXjkQDmczJgquGQAPp9WwCNBgG027YACgUC6HRsAZRKBDAY2AJoNv/ZnwzA6WScznG3p4UAymXGAEkyXrTFAh8fLAGqagQAyGaZpYsi7bHTNPz8MEj//LxuFPo+UBS8vb0KaLXubrRa7aX0RMLCykwmn0z3+XA4WACcTpCkh9MFAZpmuVXo+mO/w+/HZvNgbblcUCxaSo/Hyck80Yu6XXDcvfVZr79cvMZjuN2U9O9vKAqjZrfbIZ0mV4TUi9Xqz6jddNy//7+e3n8Fhf/Llo2kxi8AQyGRoDkmAhAAAAAASUVORK5CYII=",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [hasOutput, completedStatus],
|
||||
},
|
||||
|
||||
{
|
||||
id: "multi-turn",
|
||||
name: "Multi-turn Conversation",
|
||||
description: "Send assistant + user messages as conversation history",
|
||||
getRequest: (config) => ({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "My name is Alice." }],
|
||||
},
|
||||
{
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "output_text",
|
||||
text: "Hello Alice! Nice to meet you. How can I help you today?",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "What is my name?" }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
validators: [hasOutput, completedStatus],
|
||||
},
|
||||
];
|
||||
|
||||
// ============================================================
|
||||
// Test Runner
|
||||
// ============================================================
|
||||
|
||||
async function makeRequest(
|
||||
config: TestConfig,
|
||||
body: Record<string, unknown>,
|
||||
streaming = false,
|
||||
): Promise<Response> {
|
||||
const headers: Record<string, string> = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
if (config.apiKey) {
|
||||
const authValue = config.useBearerPrefix
|
||||
? `Bearer ${config.apiKey}`
|
||||
: config.apiKey;
|
||||
headers[config.authHeaderName] = authValue;
|
||||
}
|
||||
|
||||
return fetch(`${config.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({ ...body, stream: streaming }),
|
||||
});
|
||||
}
|
||||
|
||||
async function runTest(
|
||||
template: TestTemplate,
|
||||
config: TestConfig,
|
||||
): Promise<TestResult> {
|
||||
const startTime = Date.now();
|
||||
const requestBody = template.getRequest(config);
|
||||
const streaming = template.streaming ?? false;
|
||||
|
||||
try {
|
||||
const response = await makeRequest(config, requestBody, streaming);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
return {
|
||||
id: template.id,
|
||||
name: template.name,
|
||||
description: template.description,
|
||||
status: "failed",
|
||||
duration,
|
||||
request: requestBody,
|
||||
response: errorText,
|
||||
errors: [`HTTP ${response.status}: ${errorText}`],
|
||||
};
|
||||
}
|
||||
|
||||
let rawData: unknown;
|
||||
let sseResult: SSEParseResult | undefined;
|
||||
|
||||
if (streaming) {
|
||||
sseResult = await parseSSEStream(response);
|
||||
rawData = sseResult.finalResponse;
|
||||
} else {
|
||||
rawData = await response.json();
|
||||
}
|
||||
|
||||
// Schema validation with Zod
|
||||
const parseResult = responseResourceSchema.safeParse(rawData);
|
||||
if (!parseResult.success) {
|
||||
return {
|
||||
id: template.id,
|
||||
name: template.name,
|
||||
description: template.description,
|
||||
status: "failed",
|
||||
duration,
|
||||
request: streaming ? { ...requestBody, stream: true } : requestBody,
|
||||
response: rawData,
|
||||
errors: parseResult.error.issues.map(
|
||||
(issue) => `${issue.path.join(".")}: ${issue.message}`,
|
||||
),
|
||||
streamEvents: sseResult?.events.length,
|
||||
};
|
||||
}
|
||||
|
||||
// Semantic validators
|
||||
const context: ValidatorContext = { streaming, sseResult };
|
||||
const errors = template.validators.flatMap((v) =>
|
||||
v(parseResult.data, context),
|
||||
);
|
||||
|
||||
return {
|
||||
id: template.id,
|
||||
name: template.name,
|
||||
description: template.description,
|
||||
status: errors.length === 0 ? "passed" : "failed",
|
||||
duration,
|
||||
request: streaming ? { ...requestBody, stream: true } : requestBody,
|
||||
response: parseResult.data,
|
||||
errors,
|
||||
streamEvents: sseResult?.events.length,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
id: template.id,
|
||||
name: template.name,
|
||||
description: template.description,
|
||||
status: "failed",
|
||||
duration: Date.now() - startTime,
|
||||
request: requestBody,
|
||||
errors: [error instanceof Error ? error.message : String(error)],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function runAllTests(
|
||||
config: TestConfig,
|
||||
onProgress: (result: TestResult) => void,
|
||||
): Promise<TestResult[]> {
|
||||
const promises = testTemplates.map(async (template) => {
|
||||
onProgress({
|
||||
id: template.id,
|
||||
name: template.name,
|
||||
description: template.description,
|
||||
status: "running",
|
||||
});
|
||||
|
||||
const result = await runTest(template, config);
|
||||
onProgress(result);
|
||||
return result;
|
||||
});
|
||||
|
||||
return Promise.all(promises);
|
||||
}
|
||||
Reference in New Issue
Block a user