First commit

This commit is contained in:
Anibal Angulo
2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
# Search Evaluation
This package contains scripts to evaluate the performance of the vector search component.
## Evaluation
The `search-eval` script evaluates search performance. It can source data from either BigQuery or local files.
### Local File Evaluation
To run the evaluation using a local file, use the `--input-file` option.
```bash
uv run search-eval -- --input-file /path/to/your/data.csv
```
Or for a SQLite database:
```bash
uv run search-eval -- --input-file /path/to/your/data.db
```
#### Input File Structures
**CSV File**
The CSV file must contain the following columns:
| Column | Description |
|--------|-----------------------------------------------|
| `input` | The question to be used for the search query. |
| `source` | The expected document path for the question. |
**SQLite Database**
The SQLite database must contain a table named `evaluation_data` with the following columns:
| Column | Description |
|--------|-----------------------------------------------|
| `input` | The question to be used for the search query. |
| `source` | The expected document path for the question. |
### BigQuery Evaluation
The `search-eval-bq` script evaluates search performance using data sourced from and written to BigQuery.
### BigQuery Table Structures
#### Input Table
The input table must contain the following columns:
| Column | Type | Description |
| --------------- | ------- | --------------------------------------------------------------------------- |
| `id` | STRING | A unique identifier for each question. |
| `question` | STRING | The question to be used for the search query. |
| `document_path` | STRING | The expected document path for the given question. |
| `question_type` | STRING | The type of question. Rows where `question_type` is 'Unanswerable' are ignored. |
#### Output Table
The output table will be created by the script if it doesn't exist, or appended to if it does. It will have the following structure:
| Column | Type | Description |
| ------------------------ | --------- | ------------------------------------------------------------------------ |
| `id` | STRING | The unique identifier for the question from the input table. |
| `question` | STRING | The question used for the search query. |
| `expected_document` | STRING | The expected document for the given question. |
| `retrieved_documents` | STRING[] | An array of document IDs retrieved from the vector search. |
| `retrieved_distances` | FLOAT64[] | An array of distance scores for the retrieved documents. |
| `is_expected_in_results` | BOOLEAN | A flag indicating whether the expected document was in the search results. |
| `evaluation_timestamp` | TIMESTAMP | The timestamp of when the evaluation was run. |
### Usage
To run the BigQuery evaluation script, use the `uv run search-eval-bq` command with the following options:
```bash
uv run search-eval-bq -- --input-table <project.dataset.table> --output-table <project.dataset.table> [--project-id <gcp-project-id>]
```
**Arguments:**
* `--input-table`: **(Required)** The full BigQuery table name for the input data (e.g., `my-gcp-project.my_dataset.questions`).
* `--output-table`: **(Required)** The full BigQuery table name for the output results (e.g., `my-gcp-project.my_dataset.eval_results`).
* `--project-id`: (Optional) The Google Cloud project ID. If not provided, it will use the `project_id` from the `config.yaml` file.
**Example:**
```bash
uv run search-eval-bq -- \
--input-table "my-gcp-project.search_eval.synthetic_questions" \
--output-table "my-gcp-project.search_eval.results" \
--project-id "my-gcp-project"
```

View File

@@ -0,0 +1,27 @@
[project]
name = "search-eval"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Anibal Angulo", email = "a8065384@banorte.com" }
]
requires-python = ">=3.12"
dependencies = [
"embedder",
"ranx>=0.3.21",
"google-cloud-bigquery",
"pandas-gbq",
"kfp>=1.4.0",
"requests-toolbelt>=1.0.0",
]
[project.scripts]
search-eval = "search_eval.cli:app"
[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"
[tool.uv.sources]
embedder = { workspace = true }

View File

@@ -0,0 +1,46 @@
from typing import Annotated
import typer
from .main import evaluate
app = typer.Typer()
@app.command()
def main(
input_file: Annotated[
str | None,
typer.Option(
"-i",
"--input-file",
help="Path to a local CSV or SQLite file for evaluation data. "
"If not provided, data will be loaded from BigQuery.",
),
] = None,
output_file: Annotated[
str | None,
typer.Option(
"-o",
"--output-file",
help="Path to save the detailed results as a CSV file. "
"If not provided, results will be saved to BigQuery.",
),
] = None,
run_id: Annotated[
str | None,
typer.Option(
help="Optional: The specific run_id to filter the evaluation data by."
),
] = None,
):
"""Evaluates the search metrics by loading data from BigQuery or a local file."""
evaluate(
input_file=input_file,
output_file=output_file,
run_id=run_id,
)
if __name__ == "__main__":
app()

View File

@@ -0,0 +1,305 @@
import pathlib
import sqlite3
import pandas as pd
from embedder.vertex_ai import VertexAIEmbedder
from google.cloud import bigquery
from ranx import Qrels, Run
from ranx import evaluate as ranx
from rich.console import Console
from rich.progress import track
from rich.table import Table
from vector_search.vertex_ai import GoogleCloudVectorSearch
from rag_eval.config import settings as config
def load_data_from_local_file(
file_path: str, console: Console, run_id: str | None = None
) -> pd.DataFrame:
"""Loads evaluation data from a local CSV or SQLite file."""
console.print(f"[bold green]Loading data from {file_path}...[/bold green]")
path = pathlib.Path(file_path)
if not path.exists():
console.print(f"[bold red]Error: File not found at {file_path}[/bold red]")
raise
if path.suffix == ".csv":
try:
df = pd.read_csv(path)
except Exception as e:
console.print(
f"[bold red]An error occurred while reading the CSV file: {e}[/bold red]"
)
raise
elif path.suffix in [".db", ".sqlite"]:
try:
con = sqlite3.connect(path)
# Assuming table name is 'evaluation_data'
df = pd.read_sql("SELECT * FROM evaluation_data", con)
con.close()
except Exception as e:
console.print(
f"[bold red]An error occurred while reading the SQLite DB: {e}[/bold red]"
)
raise
else:
console.print(
f"[bold red]Unsupported file type: {path.suffix}. Please use .csv or .db/.sqlite[/bold red]"
)
raise
# Standardize column names and add ID
if "input" in df.columns and "source" in df.columns:
df = df.rename(columns={"input": "question", "source": "document_path"})
df["id"] = df.index + 1
df["id"] = df["id"].astype(str)
else:
console.print(
"[bold red]Error: The input file must contain 'input' and 'source' columns.[/bold red]"
)
raise
if run_id:
if "run_id" in df.columns:
df = df[df["run_id"] == run_id].copy()
console.print(f"Filtered data for run_id: [bold cyan]{run_id}[/bold cyan]")
if df.empty:
console.print(
f"[bold yellow]Warning: No data found for run_id '{run_id}' in {file_path}.[/bold yellow]"
)
else:
console.print(
f"[bold yellow]Warning: --run-id provided, but 'run_id' column not found in {file_path}. Using all data.[/bold yellow]"
)
df.dropna(inplace=True)
console.print(f"Loaded {len(df)} questions for evaluation.")
return df
def load_data_from_bigquery(
console: Console, run_id: str | None = None
) -> pd.DataFrame:
"""Loads evaluation data from the BigQuery table."""
console.print("[bold green]Loading data from BigQuery...[/bold green]")
bq_project_id = config.bigquery.project_id or config.project_id
client = bigquery.Client(project=bq_project_id)
table_ref = f"{bq_project_id}.{config.bigquery.dataset_id}.{config.bigquery.table_ids['synth_gen']}"
console.print(f"Querying table: [bold cyan]{table_ref}[/bold cyan]")
query = f"""
SELECT
input AS question,
source AS document_path,
ROW_NUMBER() OVER() as id
FROM
`{table_ref}`
WHERE
`type` != 'Unanswerable'
"""
if run_id:
console.print(f"Filtering for run_id: [bold cyan]{run_id}[/bold cyan]")
query += f" AND run_id = '{run_id}'"
try:
df = client.query(query).to_dataframe()
except Exception as e:
if "Not found" in str(e):
console.print(f"[bold red]Error: Table {table_ref} not found.[/bold red]")
console.print(
"Please ensure the table exists and the configuration in 'config.yaml' is correct."
)
raise
elif "unrecognized name: run_id" in str(e).lower():
console.print(
"[bold red]Error: The BigQuery table must contain a 'run_id' column when using the --run-id flag.[/bold red]"
)
raise
else:
console.print(
f"[bold red]An error occurred while querying BigQuery: {e}[/bold red]"
)
raise
df.dropna(inplace=True)
console.print(f"Loaded {len(df)} questions for evaluation.")
if df.empty:
console.print(
f"[bold yellow]Warning: No data found for run_id '{run_id}' in BigQuery.[/bold yellow]"
)
return df
def run_evaluation(
df: pd.DataFrame, console: Console
) -> pd.DataFrame:
"""Runs the search evaluation on the given dataframe."""
agent_config = config.agent
index_config = config.index
console.print(
f"Embedding Model: [bold cyan]{agent_config.embedding_model}[/bold cyan]"
)
console.print(f"Index Name: [bold cyan]{index_config.name}[/bold cyan]")
# Initialize the embedder and vector search
embedder = VertexAIEmbedder(
project=config.project_id,
location=config.location,
model_name=agent_config.embedding_model
)
vector_search = GoogleCloudVectorSearch(
project_id=config.project_id,
location=config.location,
bucket=config.bucket,
index_name=index_config.name,
)
vector_search.load_index_endpoint(index_config.endpoint)
# Prepare qrels
qrels_data = {}
for _, row in track(df.iterrows(), total=len(df), description="Preparing qrels..."):
doc_path = str(row["document_path"]).split("/")[-1].strip()
# print(doc_path)
qrels_data[str(row["id"])] = {doc_path: 1}
qrels = Qrels(qrels_data)
# Prepare run
run_data = {}
detailed_results_list = []
for _, row in track(df.iterrows(), total=len(df), description="Preparing run..."):
question_embedding = embedder.generate_embedding(row["question"])
results = vector_search.run_query(
deployed_index_id=index_config.deployment,
query=question_embedding,
limit=10,
)
# print(results[0]["id"])
run_data[str(row["id"])] = {
result["id"]: result["distance"] for result in results
}
retrieved_docs = [result["id"] for result in results]
retrieved_distances = [result["distance"] for result in results]
expected_doc = str(row["document_path"]).split("/")[-1].strip()
# print(f"expected doc: {expected_doc}")
# print(f"retrieved docs: {retrieved_docs}")
detailed_results_list.append(
{
"agent": agent_config.name,
"id": row["id"],
"input": row["question"],
"expected_document": expected_doc,
"retrieved_documents": retrieved_docs,
"retrieved_distances": retrieved_distances,
"is_expected_in_results": expected_doc in retrieved_docs,
}
)
run = Run(run_data)
# Evaluate
k_values = [1, 3, 5, 10]
metrics = []
for k in k_values:
metrics.extend(
[f"precision@{k}", f"recall@{k}", f"f1@{k}", f"ndcg@{k}", f"mrr@{k}"]
)
with console.status("[bold green]Running evaluation..."):
results = ranx(qrels, run, metrics)
# Create tables
table = Table(title=f"Search Metrics @k for Agent: {agent_config.name}")
table.add_column("k", justify="right", style="cyan")
table.add_column("Precision@k", justify="right")
table.add_column("Recall@k", justify="right")
table.add_column("F1@k", justify="right")
table.add_column("nDCG@k", justify="right")
table.add_column("MRR@k", justify="right")
for k in k_values:
precision = results.get(f"precision@{k}")
recall = results.get(f"recall@{k}")
f1 = results.get(f"f1@{k}")
ndcg = results.get(f"ndcg@{k}")
mrr = results.get(f"mrr@{k}")
table.add_row(
str(k),
f"{precision:.4f}" if precision is not None else "N/A",
f"{recall:.4f}" if recall is not None else "N/A",
f"{f1:.4f}" if f1 is not None else "N/A",
f"{ndcg:.4f}" if ndcg is not None else "N/A",
f"{mrr:.4f}" if mrr is not None else "N/A",
)
console.print(table)
return pd.DataFrame(detailed_results_list)
def evaluate(
input_file: str | None = None,
output_file: str | None = None,
run_id: str | None = None,
):
"""Core logic for evaluating search metrics."""
console = Console()
if input_file:
df = load_data_from_local_file(input_file, console, run_id)
else:
df = load_data_from_bigquery(console, run_id)
if df.empty:
raise Exception("Dataframe is empty")
if config.index:
console.print(
f"[bold blue]Running evaluation for agent: {config.agent.name}[/bold blue]"
)
results_df = run_evaluation(df, console)
else:
console.print(
f"[yellow]Skipping agent '{config.agent.name}' as it has no index configured.[/yellow]"
)
raise
final_results_df = results_df
if output_file:
console.print(
f"Saving detailed results to CSV file: [bold cyan]{output_file}[/bold cyan]"
)
try:
final_results_df.to_csv(output_file, index=False)
console.print(
f"Successfully saved {len(final_results_df)} rows to [bold green]{output_file}[/bold green]"
)
except Exception as e:
console.print(
f"[bold red]An error occurred while saving to CSV: {e}[/bold red]"
)
raise
else:
# Save detailed results to BigQuery
project_id = config.bigquery.project_id or config.project_id
dataset_id = config.bigquery.dataset_id
table_name = config.bigquery.table_ids["search_eval"]
table_id = f"{project_id}.{dataset_id}.{table_name}"
console.print(
f"Saving detailed results to BigQuery table: [bold cyan]{table_id}[/bold cyan]"
)
try:
final_results_df.to_gbq(
destination_table=f"{dataset_id}.{table_name}",
project_id=project_id,
if_exists="append",
)
console.print(
f"Successfully saved {len(final_results_df)} rows to [bold green]{table_id}[/bold green]"
)
except Exception as e:
console.print(
f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
)
raise