First commit

2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions
--- a/apps/search-eval/README.md
+++ b/apps/search-eval/README.md
@@ -0,0 +1,95 @@
+# Search Evaluation
+
+This package contains scripts to evaluate the performance of the vector search component.
+
+## Evaluation
+
+The `search-eval` script evaluates search performance. It can source data from either BigQuery or local files.
+
+### Local File Evaluation
+
+To run the evaluation using a local file, use the `--input-file` option.
+
+```bash
+uv run search-eval -- --input-file /path/to/your/data.csv
+```
+
+Or for a SQLite database:
+
+```bash
+uv run search-eval -- --input-file /path/to/your/data.db
+```
+
+#### Input File Structures
+
+**CSV File**
+
+The CSV file must contain the following columns:
+
+| Column | Description                                   |
+|--------|-----------------------------------------------|
+| `input`  | The question to be used for the search query. |
+| `source` | The expected document path for the question.  |
+
+**SQLite Database**
+
+The SQLite database must contain a table named `evaluation_data` with the following columns:
+
+| Column | Description                                   |
+|--------|-----------------------------------------------|
+| `input`  | The question to be used for the search query. |
+| `source` | The expected document path for the question.  |
+
+### BigQuery Evaluation
+
+The `search-eval-bq` script evaluates search performance using data sourced from and written to BigQuery.
+
+### BigQuery Table Structures
+
+#### Input Table
+
+The input table must contain the following columns:
+
+| Column          | Type    | Description                                                                 |
+| --------------- | ------- | --------------------------------------------------------------------------- |
+| `id`            | STRING  | A unique identifier for each question.                                      |
+| `question`      | STRING  | The question to be used for the search query.                               |
+| `document_path` | STRING  | The expected document path for the given question.                          |
+| `question_type` | STRING  | The type of question. Rows where `question_type` is 'Unanswerable' are ignored. |
+
+#### Output Table
+
+The output table will be created by the script if it doesn't exist, or appended to if it does. It will have the following structure:
+
+| Column                   | Type      | Description                                                              |
+| ------------------------ | --------- | ------------------------------------------------------------------------ |
+| `id`                     | STRING    | The unique identifier for the question from the input table.             |
+| `question`               | STRING    | The question used for the search query.                                  |
+| `expected_document`      | STRING    | The expected document for the given question.                            |
+| `retrieved_documents`    | STRING[]  | An array of document IDs retrieved from the vector search.               |
+| `retrieved_distances`    | FLOAT64[] | An array of distance scores for the retrieved documents.                 |
+| `is_expected_in_results` | BOOLEAN   | A flag indicating whether the expected document was in the search results. |
+| `evaluation_timestamp`   | TIMESTAMP | The timestamp of when the evaluation was run.                            |
+
+### Usage
+
+To run the BigQuery evaluation script, use the `uv run search-eval-bq` command with the following options:
+
+```bash
+uv run search-eval-bq -- --input-table <project.dataset.table> --output-table <project.dataset.table> [--project-id <gcp-project-id>]
+```
+
+**Arguments:**
+
+*   `--input-table`: **(Required)** The full BigQuery table name for the input data (e.g., `my-gcp-project.my_dataset.questions`).
+*   `--output-table`: **(Required)** The full BigQuery table name for the output results (e.g., `my-gcp-project.my_dataset.eval_results`).
+*   `--project-id`: (Optional) The Google Cloud project ID. If not provided, it will use the `project_id` from the `config.yaml` file.
+
+**Example:**
+
+```bash
+uv run search-eval-bq -- \
+  --input-table "my-gcp-project.search_eval.synthetic_questions" \
+  --output-table "my-gcp-project.search_eval.results" \
+  --project-id "my-gcp-project"
+```
--- a/apps/search-eval/pyproject.toml
+++ b/apps/search-eval/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "search-eval"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "embedder",
+    "ranx>=0.3.21",
+    "google-cloud-bigquery",
+    "pandas-gbq",
+    "kfp>=1.4.0",
+    "requests-toolbelt>=1.0.0",
+]
+
+[project.scripts]
+search-eval = "search_eval.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.3,<0.9.0"]
+build-backend = "uv_build"
+
+[tool.uv.sources]
+embedder = { workspace = true }
--- a/apps/search-eval/src/search_eval/init.py
+++ b/apps/search-eval/src/search_eval/init.py
--- a/apps/search-eval/src/search_eval/cli.py
+++ b/apps/search-eval/src/search_eval/cli.py
@@ -0,0 +1,46 @@
+from typing import Annotated
+
+import typer
+
+from .main import evaluate
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    input_file: Annotated[
+        str | None,
+        typer.Option(
+            "-i",
+            "--input-file",
+            help="Path to a local CSV or SQLite file for evaluation data. "
+            "If not provided, data will be loaded from BigQuery.",
+        ),
+    ] = None,
+    output_file: Annotated[
+        str | None,
+        typer.Option(
+            "-o",
+            "--output-file",
+            help="Path to save the detailed results as a CSV file. "
+            "If not provided, results will be saved to BigQuery.",
+        ),
+    ] = None,
+    run_id: Annotated[
+        str | None,
+        typer.Option(
+            help="Optional: The specific run_id to filter the evaluation data by."
+        ),
+    ] = None,
+):
+    """Evaluates the search metrics by loading data from BigQuery or a local file."""
+    evaluate(
+        input_file=input_file,
+        output_file=output_file,
+        run_id=run_id,
+    )
+
+
+if __name__ == "__main__":
+    app()
--- a/apps/search-eval/src/search_eval/main.py
+++ b/apps/search-eval/src/search_eval/main.py
@@ -0,0 +1,305 @@
+import pathlib
+import sqlite3
+
+import pandas as pd
+from embedder.vertex_ai import VertexAIEmbedder
+from google.cloud import bigquery
+from ranx import Qrels, Run
+from ranx import evaluate as ranx
+from rich.console import Console
+from rich.progress import track
+from rich.table import Table
+from vector_search.vertex_ai import GoogleCloudVectorSearch
+
+from rag_eval.config import settings as config
+
+
+def load_data_from_local_file(
+    file_path: str, console: Console, run_id: str | None = None
+) -> pd.DataFrame:
+    """Loads evaluation data from a local CSV or SQLite file."""
+    console.print(f"[bold green]Loading data from {file_path}...[/bold green]")
+    path = pathlib.Path(file_path)
+    if not path.exists():
+        console.print(f"[bold red]Error: File not found at {file_path}[/bold red]")
+        raise
+
+    if path.suffix == ".csv":
+        try:
+            df = pd.read_csv(path)
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while reading the CSV file: {e}[/bold red]"
+            )
+            raise
+    elif path.suffix in [".db", ".sqlite"]:
+        try:
+            con = sqlite3.connect(path)
+            # Assuming table name is 'evaluation_data'
+            df = pd.read_sql("SELECT * FROM evaluation_data", con)
+            con.close()
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while reading the SQLite DB: {e}[/bold red]"
+            )
+            raise
+    else:
+        console.print(
+            f"[bold red]Unsupported file type: {path.suffix}. Please use .csv or .db/.sqlite[/bold red]"
+        )
+        raise
+
+    # Standardize column names and add ID
+    if "input" in df.columns and "source" in df.columns:
+        df = df.rename(columns={"input": "question", "source": "document_path"})
+        df["id"] = df.index + 1
+        df["id"] = df["id"].astype(str)
+    else:
+        console.print(
+            "[bold red]Error: The input file must contain 'input' and 'source' columns.[/bold red]"
+        )
+        raise
+
+    if run_id:
+        if "run_id" in df.columns:
+            df = df[df["run_id"] == run_id].copy()
+            console.print(f"Filtered data for run_id: [bold cyan]{run_id}[/bold cyan]")
+            if df.empty:
+                console.print(
+                    f"[bold yellow]Warning: No data found for run_id '{run_id}' in {file_path}.[/bold yellow]"
+                )
+        else:
+            console.print(
+                f"[bold yellow]Warning: --run-id provided, but 'run_id' column not found in {file_path}. Using all data.[/bold yellow]"
+            )
+
+    df.dropna(inplace=True)
+    console.print(f"Loaded {len(df)} questions for evaluation.")
+    return df
+
+
+def load_data_from_bigquery(
+    console: Console, run_id: str | None = None
+) -> pd.DataFrame:
+    """Loads evaluation data from the BigQuery table."""
+    console.print("[bold green]Loading data from BigQuery...[/bold green]")
+    bq_project_id = config.bigquery.project_id or config.project_id
+    client = bigquery.Client(project=bq_project_id)
+    table_ref = f"{bq_project_id}.{config.bigquery.dataset_id}.{config.bigquery.table_ids['synth_gen']}"
+
+    console.print(f"Querying table: [bold cyan]{table_ref}[/bold cyan]")
+    query = f"""
+        SELECT
+            input AS question,
+            source AS document_path,
+            ROW_NUMBER() OVER() as id
+        FROM
+            `{table_ref}`
+        WHERE
+            `type` != 'Unanswerable'
+    """
+    if run_id:
+        console.print(f"Filtering for run_id: [bold cyan]{run_id}[/bold cyan]")
+        query += f" AND run_id = '{run_id}'"
+
+    try:
+        df = client.query(query).to_dataframe()
+    except Exception as e:
+        if "Not found" in str(e):
+            console.print(f"[bold red]Error: Table {table_ref} not found.[/bold red]")
+            console.print(
+                "Please ensure the table exists and the configuration in 'config.yaml' is correct."
+            )
+            raise
+        elif "unrecognized name: run_id" in str(e).lower():
+            console.print(
+                "[bold red]Error: The BigQuery table must contain a 'run_id' column when using the --run-id flag.[/bold red]"
+            )
+            raise
+        else:
+            console.print(
+                f"[bold red]An error occurred while querying BigQuery: {e}[/bold red]"
+            )
+            raise
+
+    df.dropna(inplace=True)
+    console.print(f"Loaded {len(df)} questions for evaluation.")
+    if df.empty:
+        console.print(
+            f"[bold yellow]Warning: No data found for run_id '{run_id}' in BigQuery.[/bold yellow]"
+        )
+    return df
+
+
+def run_evaluation(
+    df: pd.DataFrame, console: Console
+) -> pd.DataFrame:
+    """Runs the search evaluation on the given dataframe."""
+    agent_config = config.agent
+    index_config = config.index
+    console.print(
+        f"Embedding Model: [bold cyan]{agent_config.embedding_model}[/bold cyan]"
+    )
+    console.print(f"Index Name: [bold cyan]{index_config.name}[/bold cyan]")
+
+    # Initialize the embedder and vector search
+    embedder = VertexAIEmbedder(
+        project=config.project_id,
+        location=config.location,
+        model_name=agent_config.embedding_model
+    )
+    vector_search = GoogleCloudVectorSearch(
+        project_id=config.project_id,
+        location=config.location,
+        bucket=config.bucket,
+        index_name=index_config.name,
+    )
+    vector_search.load_index_endpoint(index_config.endpoint)
+
+    # Prepare qrels
+    qrels_data = {}
+    for _, row in track(df.iterrows(), total=len(df), description="Preparing qrels..."):
+        doc_path = str(row["document_path"]).split("/")[-1].strip()
+        # print(doc_path)
+        qrels_data[str(row["id"])] = {doc_path: 1}
+    qrels = Qrels(qrels_data)
+
+    # Prepare run
+    run_data = {}
+    detailed_results_list = []
+    for _, row in track(df.iterrows(), total=len(df), description="Preparing run..."):
+        question_embedding = embedder.generate_embedding(row["question"])
+        results = vector_search.run_query(
+            deployed_index_id=index_config.deployment,
+            query=question_embedding,
+            limit=10,
+        )
+        # print(results[0]["id"])
+        run_data[str(row["id"])] = {
+            result["id"]: result["distance"] for result in results
+        }
+
+        retrieved_docs = [result["id"] for result in results]
+        retrieved_distances = [result["distance"] for result in results]
+        expected_doc = str(row["document_path"]).split("/")[-1].strip()
+        # print(f"expected doc: {expected_doc}")
+        # print(f"retrieved docs: {retrieved_docs}")
+
+        detailed_results_list.append(
+            {
+                "agent": agent_config.name,
+                "id": row["id"],
+                "input": row["question"],
+                "expected_document": expected_doc,
+                "retrieved_documents": retrieved_docs,
+                "retrieved_distances": retrieved_distances,
+                "is_expected_in_results": expected_doc in retrieved_docs,
+            }
+        )
+    run = Run(run_data)
+
+    # Evaluate
+    k_values = [1, 3, 5, 10]
+    metrics = []
+    for k in k_values:
+        metrics.extend(
+            [f"precision@{k}", f"recall@{k}", f"f1@{k}", f"ndcg@{k}", f"mrr@{k}"]
+        )
+
+    with console.status("[bold green]Running evaluation..."):
+        results = ranx(qrels, run, metrics)
+
+    # Create tables
+    table = Table(title=f"Search Metrics @k for Agent: {agent_config.name}")
+    table.add_column("k", justify="right", style="cyan")
+    table.add_column("Precision@k", justify="right")
+    table.add_column("Recall@k", justify="right")
+    table.add_column("F1@k", justify="right")
+    table.add_column("nDCG@k", justify="right")
+    table.add_column("MRR@k", justify="right")
+
+    for k in k_values:
+        precision = results.get(f"precision@{k}")
+        recall = results.get(f"recall@{k}")
+        f1 = results.get(f"f1@{k}")
+        ndcg = results.get(f"ndcg@{k}")
+        mrr = results.get(f"mrr@{k}")
+        table.add_row(
+            str(k),
+            f"{precision:.4f}" if precision is not None else "N/A",
+            f"{recall:.4f}" if recall is not None else "N/A",
+            f"{f1:.4f}" if f1 is not None else "N/A",
+            f"{ndcg:.4f}" if ndcg is not None else "N/A",
+            f"{mrr:.4f}" if mrr is not None else "N/A",
+        )
+    console.print(table)
+
+    return pd.DataFrame(detailed_results_list)
+
+
+def evaluate(
+    input_file: str | None = None,
+    output_file: str | None = None,
+    run_id: str | None = None,
+):
+    """Core logic for evaluating search metrics."""
+    console = Console()
+    if input_file:
+        df = load_data_from_local_file(input_file, console, run_id)
+    else:
+        df = load_data_from_bigquery(console, run_id)
+
+    if df.empty:
+        raise Exception("Dataframe is empty")
+
+    if config.index:
+        console.print(
+            f"[bold blue]Running evaluation for agent: {config.agent.name}[/bold blue]"
+        )
+        results_df = run_evaluation(df, console)
+    else:
+        console.print(
+            f"[yellow]Skipping agent '{config.agent.name}' as it has no index configured.[/yellow]"
+        )
+        raise
+
+    final_results_df = results_df
+
+    if output_file:
+        console.print(
+            f"Saving detailed results to CSV file: [bold cyan]{output_file}[/bold cyan]"
+        )
+        try:
+            final_results_df.to_csv(output_file, index=False)
+            console.print(
+                f"Successfully saved {len(final_results_df)} rows to [bold green]{output_file}[/bold green]"
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to CSV: {e}[/bold red]"
+            )
+            raise
+    else:
+        # Save detailed results to BigQuery
+        project_id = config.bigquery.project_id or config.project_id
+        dataset_id = config.bigquery.dataset_id
+        table_name = config.bigquery.table_ids["search_eval"]
+        table_id = f"{project_id}.{dataset_id}.{table_name}"
+
+        console.print(
+            f"Saving detailed results to BigQuery table: [bold cyan]{table_id}[/bold cyan]"
+        )
+        try:
+            final_results_df.to_gbq(
+                destination_table=f"{dataset_id}.{table_name}",
+                project_id=project_id,
+                if_exists="append",
+            )
+            console.print(
+                f"Successfully saved {len(final_results_df)} rows to [bold green]{table_id}[/bold green]"
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
+            )
+            raise