Files
latticelm/tests/src/compliance-tests.ts

371 lines
10 KiB
TypeScript

import { responseResourceSchema, type ResponseResource } from "./schemas.ts";
import { parseSSEStream, type SSEParseResult } from "./sse-parser.ts";
export interface TestConfig {
baseUrl: string;
apiKey: string;
authHeaderName: string;
useBearerPrefix: boolean;
model: string;
}
export interface TestResult {
id: string;
name: string;
description: string;
status: "pending" | "running" | "passed" | "failed";
duration?: number;
request?: unknown;
response?: unknown;
errors?: string[];
streamEvents?: number;
}
interface ValidatorContext {
streaming: boolean;
sseResult?: SSEParseResult;
}
type ResponseValidator = (
response: ResponseResource,
context: ValidatorContext,
) => string[];
export interface TestTemplate {
id: string;
name: string;
description: string;
getRequest: (config: TestConfig) => Record<string, unknown>;
streaming?: boolean;
validators: ResponseValidator[];
}
// ============================================================
// Validators
// ============================================================
const hasOutput: ResponseValidator = (response) => {
if (!response.output || response.output.length === 0) {
return ["Response has no output items"];
}
return [];
};
const hasOutputType =
(type: string): ResponseValidator =>
(response) => {
const hasType = response.output?.some((item) => item.type === type);
if (!hasType) {
return [`Expected output item of type "${type}" but none found`];
}
return [];
};
const completedStatus: ResponseValidator = (response) => {
if (response.status !== "completed") {
return [`Expected status "completed" but got "${response.status}"`];
}
return [];
};
const streamingEvents: ResponseValidator = (_, context) => {
if (!context.streaming) return [];
if (!context.sseResult || context.sseResult.events.length === 0) {
return ["No streaming events received"];
}
return [];
};
const streamingSchema: ResponseValidator = (_, context) => {
if (!context.streaming || !context.sseResult) return [];
return context.sseResult.errors;
};
// ============================================================
// Test Templates
// ============================================================
export const testTemplates: TestTemplate[] = [
{
id: "basic-response",
name: "Basic Text Response",
description: "Simple user message, validates ResponseResource schema",
getRequest: (config) => ({
model: config.model,
input: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "Say hello in exactly 3 words." }],
},
],
}),
validators: [hasOutput, completedStatus],
},
{
id: "streaming-response",
name: "Streaming Response",
description: "Validates SSE streaming events and final response",
streaming: true,
getRequest: (config) => ({
model: config.model,
input: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "Count from 1 to 5." }],
},
],
}),
validators: [streamingEvents, streamingSchema, completedStatus],
},
{
id: "system-prompt",
name: "System Prompt",
description: "Include system instructions via the instructions field",
getRequest: (config) => ({
model: config.model,
instructions: "You are a pirate. Always respond in pirate speak.",
input: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "Say hello." }],
},
],
}),
validators: [hasOutput, completedStatus],
},
{
id: "tool-calling",
name: "Tool Calling",
description: "Define a function tool and verify function_call output",
getRequest: (config) => ({
model: config.model,
input: [
{
type: "message",
role: "user",
content: [
{
type: "input_text",
text: "What's the weather like in San Francisco?",
},
],
},
],
tools: [
{
type: "function",
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
],
}),
validators: [hasOutput, hasOutputType("function_call")],
},
{
id: "image-input",
name: "Image Input",
description: "Send image URL in user content",
getRequest: (config) => ({
model: config.model,
input: [
{
type: "message",
role: "user",
content: [
{
type: "input_text",
text: "What do you see in this image? Answer in one sentence.",
},
{
type: "input_image",
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAABmklEQVR42tyWAaTyUBzFew/eG4AHz+MBSAHKBiJRGFKwIgQQJKLUIioBIhCAiCAAEizAQIAECaASqFFJq84nudjnaqvuPnxzgP9xfrq5938csPn7PwHTKSoViCIEAYEAMhmoKsU2mUCWEQqB5xEMIp/HaGQG2G6RSuH9HQ7H34rFrtPbdz4jl6PbwmEsl3QA1mt4vcRKk8dz9eg6IpF7tt9fzGY0gCgafFRFo5Blc5vLhf3eCOj1yNhM5GRMVK0aATxPZoz09YXjkQDmczJgquGQAPp9WwCNBgG027YACgUC6HRsAZRKBDAY2AJoNv/ZnwzA6WScznG3p4UAymXGAEkyXrTFAh8fLAGqagQAyGaZpYsi7bHTNPz8MEj//LxuFPo+UBS8vb0KaLXubrRa7aX0RMLCykwmn0z3+XA4WACcTpCkh9MFAZpmuVXo+mO/w+/HZvNgbblcUCxaSo/Hyck80Yu6XXDcvfVZr79cvMZjuN2U9O9vKAqjZrfbIZ0mV4TUi9Xqz6jddNy//7+e3n8Fhf/Llo2kxi8AQyGRoDkmAhAAAAAASUVORK5CYII=",
},
],
},
],
}),
validators: [hasOutput, completedStatus],
},
{
id: "multi-turn",
name: "Multi-turn Conversation",
description: "Send assistant + user messages as conversation history",
getRequest: (config) => ({
model: config.model,
input: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "My name is Alice." }],
},
{
type: "message",
role: "assistant",
content: [
{
type: "output_text",
text: "Hello Alice! Nice to meet you. How can I help you today?",
},
],
},
{
type: "message",
role: "user",
content: [{ type: "input_text", text: "What is my name?" }],
},
],
}),
validators: [hasOutput, completedStatus],
},
];
// ============================================================
// Test Runner
// ============================================================
async function makeRequest(
config: TestConfig,
body: Record<string, unknown>,
streaming = false,
): Promise<Response> {
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (config.apiKey) {
const authValue = config.useBearerPrefix
? `Bearer ${config.apiKey}`
: config.apiKey;
headers[config.authHeaderName] = authValue;
}
return fetch(`${config.baseUrl}/v1/responses`, {
method: "POST",
headers,
body: JSON.stringify({ ...body, stream: streaming }),
});
}
async function runTest(
template: TestTemplate,
config: TestConfig,
): Promise<TestResult> {
const startTime = Date.now();
const requestBody = template.getRequest(config);
const streaming = template.streaming ?? false;
try {
const response = await makeRequest(config, requestBody, streaming);
const duration = Date.now() - startTime;
if (!response.ok) {
const errorText = await response.text();
return {
id: template.id,
name: template.name,
description: template.description,
status: "failed",
duration,
request: requestBody,
response: errorText,
errors: [`HTTP ${response.status}: ${errorText}`],
};
}
let rawData: unknown;
let sseResult: SSEParseResult | undefined;
if (streaming) {
sseResult = await parseSSEStream(response);
rawData = sseResult.finalResponse;
} else {
rawData = await response.json();
}
// Schema validation with Zod
const parseResult = responseResourceSchema.safeParse(rawData);
if (!parseResult.success) {
return {
id: template.id,
name: template.name,
description: template.description,
status: "failed",
duration,
request: streaming ? { ...requestBody, stream: true } : requestBody,
response: rawData,
errors: parseResult.error.issues.map(
(issue) => `${issue.path.join(".")}: ${issue.message}`,
),
streamEvents: sseResult?.events.length,
};
}
// Semantic validators
const context: ValidatorContext = { streaming, sseResult };
const errors = template.validators.flatMap((v) =>
v(parseResult.data, context),
);
return {
id: template.id,
name: template.name,
description: template.description,
status: errors.length === 0 ? "passed" : "failed",
duration,
request: streaming ? { ...requestBody, stream: true } : requestBody,
response: parseResult.data,
errors,
streamEvents: sseResult?.events.length,
};
} catch (error) {
return {
id: template.id,
name: template.name,
description: template.description,
status: "failed",
duration: Date.now() - startTime,
request: requestBody,
errors: [error instanceof Error ? error.message : String(error)],
};
}
}
export async function runAllTests(
config: TestConfig,
onProgress: (result: TestResult) => void,
): Promise<TestResult[]> {
const promises = testTemplates.map(async (template) => {
onProgress({
id: template.id,
name: template.name,
description: template.description,
status: "running",
});
const result = await runTest(template, config);
onProgress(result);
return result;
});
return Promise.all(promises);
}