First commit

2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions
--- a/apps/index-gen/README.md
+++ b/apps/index-gen/README.md
--- a/apps/index-gen/pyproject.toml
+++ b/apps/index-gen/pyproject.toml
@@ -0,0 +1,34 @@
+[project]
+name = "index-gen"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "chunker",
+    "document-converter",
+    "embedder",
+    "file-storage",
+    "llm",
+    "utils",
+    "vector-search",
+]
+
+[project.scripts]
+index-gen = "index_gen.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.12,<0.9.0"]
+build-backend = "uv_build"
+
+[tool.uv.sources]
+file-storage = { workspace = true }
+vector-search = { workspace = true }
+utils = { workspace = true }
+embedder = { workspace = true }
+chunker = { workspace = true }
+document-converter = { workspace = true }
+llm = { workspace = true }
--- a/apps/index-gen/src/index_gen/init.py
+++ b/apps/index-gen/src/index_gen/init.py
@@ -0,0 +1,2 @@
+def main() -> None:
+    print("Hello from index-gen!")
--- a/apps/index-gen/src/index_gen/cli.py
+++ b/apps/index-gen/src/index_gen/cli.py
@@ -0,0 +1,68 @@
+import logging
+import tempfile
+from pathlib import Path
+
+import typer
+
+from index_gen.main import (
+    aggregate_vectors,
+    build_gcs_path,
+    create_vector_index,
+    gather_files,
+    process_file,
+)
+from rag_eval.config import settings
+
+app = typer.Typer()
+
+
+@app.command()
+def run_ingestion():
+    """Main function for the CLI script."""
+    logging.basicConfig(level=logging.INFO)
+
+    agent_config = settings.agent
+    index_config = settings.index
+
+    if not agent_config or not index_config:
+        raise ValueError("Agent or index configuration not found in config.yaml")
+
+    # Gather files
+    files = gather_files(index_config.origin)
+
+    # Build output paths
+    contents_output_dir = build_gcs_path(index_config.data, "/contents")
+    vectors_output_dir = build_gcs_path(index_config.data, "/vectors")
+    aggregated_vectors_gcs_path = build_gcs_path(
+        index_config.data, "/vectors/vectors.json"
+    )
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_dir_path = Path(temp_dir)
+        vector_artifact_paths = []
+
+        # Process files and create local artifacts
+        for i, file in enumerate(files):
+            artifact_path = temp_dir_path / f"vectors_{i}.jsonl"
+            vector_artifact_paths.append(artifact_path)
+
+            process_file(
+                file,
+                agent_config.embedding_model,
+                contents_output_dir,
+                artifact_path,  # Pass the local path
+                index_config.chunk_limit,
+            )
+
+        # Aggregate the local artifacts into one file in GCS
+        aggregate_vectors(
+            vector_artifacts=vector_artifact_paths,
+            output_gcs_path=aggregated_vectors_gcs_path,
+        )
+
+    # Create vector index
+    create_vector_index(vectors_output_dir)
+
+
+if __name__ == "__main__":
+    app()
--- a/apps/index-gen/src/index_gen/main.py
+++ b/apps/index-gen/src/index_gen/main.py
@@ -0,0 +1,225 @@
+"""
+This script defines a Kubeflow Pipeline (KFP) for ingesting and processing documents.
+
+The pipeline is designed to run on Vertex AI Pipelines and consists of the following steps:
+1.  **Gather Files**: Scans a GCS directory for PDF files to process.
+2.  **Process Files (in parallel)**: For each PDF file found, this step:
+    a. Converts the PDF to Markdown text.
+    b. Chunks the text if it's too long.
+    c. Generates a vector embedding for each chunk using a Vertex AI embedding model.
+    d. Saves the markdown content and the vector embedding to separate GCS output paths.
+"""
+
+import json
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+from rag_eval.config import settings
+
+
+def build_gcs_path(base_path: str, suffix: str) -> str:
+    """Builds a GCS path by appending a suffix."""
+    return f"{base_path}{suffix}"
+
+
+def gather_files(
+    input_dir: str,
+) -> list:
+    """Gathers all PDF file paths from a GCS directory."""
+    from google.cloud import storage
+
+    logging.getLogger().setLevel(logging.INFO)
+
+    gcs_client = storage.Client()
+    bucket_name, prefix = input_dir.replace("gs://", "").split("/", 1)
+    bucket = gcs_client.bucket(bucket_name)
+    blob_list = bucket.list_blobs(prefix=prefix)
+
+    pdf_files = [
+        f"gs://{bucket_name}/{blob.name}"
+        for blob in blob_list
+        if blob.name.endswith(".pdf")
+    ]
+    logging.info(f"Found {len(pdf_files)} PDF files in {input_dir}")
+    return pdf_files
+
+
+def process_file(
+    file_path: str,
+    model_name: str,
+    contents_output_dir: str,
+    vectors_output_file: Path,
+    chunk_limit: int,
+):
+    """
+    Processes a single PDF file: converts to markdown, chunks, and generates embeddings.
+    The vector embeddings are written to a local JSONL file.
+    """
+    # Imports are inside the function as KFP serializes this function
+    from pathlib import Path
+
+    from chunker.contextual_chunker import ContextualChunker
+    from document_converter.markdown import MarkdownConverter
+    from embedder.vertex_ai import VertexAIEmbedder
+    from google.cloud import storage
+    from llm.vertex_ai import VertexAILLM
+    from utils.normalize_filenames import normalize_string
+
+    logging.getLogger().setLevel(logging.INFO)
+
+    # Initialize converters and embedders
+    converter = MarkdownConverter()
+    embedder = VertexAIEmbedder(model_name=model_name, project=settings.project_id, location=settings.location)
+    llm = VertexAILLM(project=settings.project_id, location=settings.location)
+    chunker = ContextualChunker(llm_client=llm, max_chunk_size=chunk_limit)
+    gcs_client = storage.Client()
+
+    file_id = normalize_string(Path(file_path).stem)
+    local_path = Path(f"/tmp/{Path(file_path).name}")
+
+    with open(vectors_output_file, "w", encoding="utf-8") as f:
+        try:
+            # Download file from GCS
+            bucket_name, blob_name = file_path.replace("gs://", "").split("/", 1)
+            bucket = gcs_client.bucket(bucket_name)
+            blob = bucket.blob(blob_name)
+            blob.download_to_filename(local_path)
+            logging.info(f"Processing file: {file_path}")
+
+            # Process the downloaded file
+            markdown_content = converter.process_file(local_path)
+
+            def upload_to_gcs(bucket_name, blob_name, data):
+                bucket = gcs_client.bucket(bucket_name)
+                blob = bucket.blob(blob_name)
+                blob.upload_from_string(data, content_type="text/markdown; charset=utf-8")
+
+            # Determine output bucket and paths for markdown
+            contents_bucket_name, contents_prefix = contents_output_dir.replace(
+                "gs://", ""
+            ).split("/", 1)
+
+            if len(markdown_content) > chunk_limit:
+                chunks = chunker.process_text(markdown_content)
+                for i, chunk in enumerate(chunks):
+                    chunk_id = f"{file_id}_{i}"
+                    embedding = embedder.generate_embedding(chunk["page_content"])
+
+                    # Upload markdown chunk
+                    md_blob_name = f"{contents_prefix}/{chunk_id}.md"
+                    upload_to_gcs(
+                        contents_bucket_name, md_blob_name, chunk["page_content"]
+                    )
+
+                    # Write vector to local JSONL file
+                    json_line = json.dumps({"id": chunk_id, "embedding": embedding})
+                    f.write(json_line + '\n')
+            else:
+                embedding = embedder.generate_embedding(markdown_content)
+
+                # Upload markdown
+                md_blob_name = f"{contents_prefix}/{file_id}.md"
+                upload_to_gcs(contents_bucket_name, md_blob_name, markdown_content)
+
+                # Write vector to local JSONL file
+                json_line = json.dumps({"id": file_id, "embedding": embedding})
+                f.write(json_line + '\n')
+
+        except Exception as e:
+            logging.error(f"Failed to process file {file_path}: {e}", exc_info=True)
+            raise
+
+        finally:
+            # Clean up the downloaded file
+            if os.path.exists(local_path):
+                os.remove(local_path)
+
+
+
+
+
+def aggregate_vectors(
+    vector_artifacts: list,  # This will be a list of paths to the artifact files
+    output_gcs_path: str,
+):
+    """
+    Aggregates multiple JSONL artifact files into a single JSONL file in GCS.
+    """
+    from google.cloud import storage
+
+    logging.getLogger().setLevel(logging.INFO)
+
+    # Create a temporary file to aggregate all vector data
+    with tempfile.NamedTemporaryFile(
+        mode="w", delete=False, encoding="utf-8"
+    ) as temp_agg_file:
+        logging.info(f"Aggregating vectors into temporary file: {temp_agg_file.name}")
+        for artifact_path in vector_artifacts:
+            with open(artifact_path, "r", encoding="utf-8") as f:
+                # Each line is a complete JSON object
+                for line in f:
+                    temp_agg_file.write(line)  # line already includes newline
+
+        temp_file_path = temp_agg_file.name
+
+    logging.info("Uploading aggregated file to GCS...")
+    gcs_client = storage.Client()
+    bucket_name, blob_name = output_gcs_path.replace("gs://", "").split("/", 1)
+    bucket = gcs_client.bucket(bucket_name)
+    blob = bucket.blob(blob_name)
+    blob.upload_from_filename(temp_file_path, content_type="application/json; charset=utf-8")
+
+    logging.info(f"Successfully uploaded aggregated vectors to {output_gcs_path}")
+
+    # Clean up the temporary file
+    import os
+
+    os.remove(temp_file_path)
+
+
+
+def create_vector_index(
+    vectors_dir: str,
+):
+    """Creates and deploys a Vertex AI Vector Search Index."""
+    from vector_search.vertex_ai import GoogleCloudVectorSearch
+
+    from rag_eval.config import settings as config
+
+    logging.getLogger().setLevel(logging.INFO)
+
+    try:
+        index_config = config.index
+
+        logging.info(
+            f"Initializing Vertex AI client for project '{config.project_id}' in '{config.location}'..."
+        )
+        vector_search = GoogleCloudVectorSearch(
+            project_id=config.project_id,
+            location=config.location,
+            bucket=config.bucket,
+            index_name=index_config.name,
+        )
+
+        logging.info(f"Starting creation of index '{index_config.name}'...")
+        vector_search.create_index(
+            name=index_config.name,
+            content_path=vectors_dir,
+            dimensions=index_config.dimensions,
+        )
+        logging.info(f"Index '{index_config.name}' created successfully.")
+
+        logging.info("Deploying index to a new endpoint...")
+        vector_search.deploy_index(
+            index_name=index_config.name, machine_type=index_config.machine_type
+        )
+        logging.info("Index deployed successfully!")
+        logging.info(f"Endpoint name: {vector_search.index_endpoint.display_name}")
+        logging.info(
+            f"Endpoint resource name: {vector_search.index_endpoint.resource_name}"
+        )
+    except Exception as e:
+        logging.error(f"An error occurred during index creation or deployment: {e}", exc_info=True)
+        raise
--- a/apps/integration-layer/README.md
+++ b/apps/integration-layer/README.md
@@ -0,0 +1,31 @@
+# Integration Layer CLI
+
+This package provides a command-line interface (CLI) to interact with the integration layer API deployed on Cloud Run.
+
+## Installation
+
+Install the package and its dependencies using `uv`:
+
+```bash
+uv pip install -e .
+```
+
+## Usage
+
+The CLI provides two main commands: `send` and `chat`.
+
+### `send`
+
+Sends a single message to the API.
+
+```bash
+int-layer send "My message" --telefono "1234567890"
+```
+
+### `chat`
+
+Starts an interactive chat session.
+
+```bash
+int-layer chat --telefono "1234567890"
+```
--- a/apps/integration-layer/pyproject.toml
+++ b/apps/integration-layer/pyproject.toml
@@ -0,0 +1,21 @@
+[project]
+name = "integration-layer"
+version = "0.1.0"
+description = "A CLI to interact with the integration layer API."
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "requests",
+    "typer",
+    "rich"
+]
+
+[project.scripts]
+int-layer = "integration_layer.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.12,<0.9.0"]
+build-backend = "uv_build"
--- a/apps/integration-layer/src/integration_layer/init.py
+++ b/apps/integration-layer/src/integration_layer/init.py
--- a/apps/integration-layer/src/integration_layer/cli.py
+++ b/apps/integration-layer/src/integration_layer/cli.py
@@ -0,0 +1,79 @@
+import random
+import string
+import subprocess
+
+import typer
+from rich import print
+from rich.prompt import Prompt
+
+from .main import IntegrationLayerClient
+
+app = typer.Typer()
+
+def get_auth_token() -> str:
+    """Gets the gcloud auth token."""
+    try:
+        result = subprocess.run(
+            ["gcloud", "auth", "print-identity-token"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout.strip()
+    except (subprocess.CalledProcessError, FileNotFoundError) as e:
+        print(f"[bold red]Error getting gcloud token:[/bold red] {e}")
+        print("Please ensure 'gcloud' is installed and you are authenticated.")
+        raise typer.Exit(code=1)
+
+@app.command()
+def send(
+    message: str = typer.Argument(..., help="The message to send."),
+    telefono: str = typer.Option(..., "--telefono", "-t", help="User's phone number (session ID)."),
+    nickname: str = typer.Option("User", "--nickname", "-n", help="User's nickname."),
+    canal: str = typer.Option("sigma", "--canal", "-c", help="Channel for the request."),
+):
+    """
+    Sends a single message to the Integration Layer.
+    """
+    try:
+        client = IntegrationLayerClient()
+        token = get_auth_token()
+        response = client.call(token=token, mensaje=message, telefono=telefono, nickname=nickname, canal=canal)
+        print(response)
+    except Exception as e:
+        print(f"[bold red]Error:[/bold red] {e}")
+        raise typer.Exit(code=1)
+
+@app.command()
+def chat(
+    telefono: str = typer.Option(None, "--telefono", "-t", help="User's phone number to start the session. If not provided, a random one will be generated."),
+    nickname: str = typer.Option("User", "--nickname", "-n", help="User's nickname."),
+    canal: str = typer.Option("sigma", "--canal", "-c", help="Channel for the request."),
+):
+    """
+    Starts an interactive chat with the Integration Layer.
+    """
+    if not telefono:
+        telefono = "".join(random.choices(string.digits, k=10))
+        print(f"[bold yellow]No phone number provided. Using random session ID:[/] {telefono}")
+
+    try:
+        client = IntegrationLayerClient()
+        print("[bold green]Starting a new chat session. Type 'exit' or 'quit' to end.[/bold green]")
+        
+        while True:
+            message = Prompt.ask("You")
+            if message.lower() in ["exit", "quit"]:
+                print("[bold yellow]Ending chat session.[/bold yellow]")
+                break
+            
+            token = get_auth_token()
+            response = client.call(token=token, mensaje=message, telefono=telefono, nickname=nickname, canal=canal)
+            print(f"Agent: {response}")
+
+    except Exception as e:
+        print(f"[bold red]Error:[/bold red] {e}")
+        raise typer.Exit(code=1)
+
+if __name__ == "__main__":
+    app()
--- a/apps/integration-layer/src/integration_layer/main.py
+++ b/apps/integration-layer/src/integration_layer/main.py
@@ -0,0 +1,43 @@
+import requests
+
+
+class IntegrationLayerClient:
+    """A class to interact with the Integration Layer API."""
+
+    def __init__(self):
+        """Initializes the IntegrationLayerClient."""
+        self.endpoint = "https://34.111.169.196/api/v1/dialogflow/detect-intent"
+
+    def call(self, token: str, mensaje: str, telefono: str, nickname: str, canal: str) -> dict:
+        """
+        Sends a message to the Integration Layer.
+
+        Args:
+            token: The gcloud auth token.
+            mensaje: The message to send.
+            telefono: The user's phone number (acts as session ID).
+            nickname: The user's nickname.
+            canal: The channel (e.g., 'sigma').
+
+        Returns:
+            A dictionary containing the server's response.
+        """
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+        }
+        data = {
+            "mensaje": mensaje,
+            "usuario": {
+                "telefono": telefono,
+                "nickname": nickname,
+            },
+            "canal": canal,
+        }
+
+        try:
+            response = requests.post(self.endpoint, headers=headers, json=data, timeout=60)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"Failed to connect to Integration Layer: {e}") from e
--- a/apps/keypoint-eval/README.md
+++ b/apps/keypoint-eval/README.md
@@ -0,0 +1,30 @@
+# Keypoint Evaluator
+
+This application evaluates a RAG (Retrieval-Augmented Generation) system based on the keypoint methodology from the RAGEval paper.
+
+## How to use
+
+To run the evaluation, execute the following command from the root directory of the project:
+
+```bash
+python -m keypoint_eval.main --evaluation-name <EVALUATION_NAME> --matriz-eval <PATH_TO_EVALUATION_MATRIX_FILE>
+```
+
+### Arguments
+
+*   `--evaluation-name`: The name of the evaluation.
+*   `--matriz-eval`: The path to the evaluation matrix file.
+
+The application will read the evaluation matrix from the specified file and will generate a CSV and a JSON file with the evaluation results.
+
+## Input File Structure
+
+The input file can be a CSV, Excel, or JSON file.
+
+The file must contain the following columns:
+
+*   `input`: The user's question.
+*   `expected_output`: The ground truth or expected answer.
+*   `category` (optional): The category of the question.
+
+If the `input` column is not found, the application will look for columns containing "pregunta" or "question".
--- a/apps/keypoint-eval/pyproject.toml
+++ b/apps/keypoint-eval/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "keypoint-eval"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = []
+
+[project.scripts]
+keypoint-eval = "keypoint_eval.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.3,<0.9.0"]
+build-backend = "uv_build"
--- a/apps/keypoint-eval/src/keypoint_eval/init.py
+++ b/apps/keypoint-eval/src/keypoint_eval/init.py
@@ -0,0 +1,2 @@
+def main() -> None:
+    print("Hello from keypoint-eval!")
--- a/apps/keypoint-eval/src/keypoint_eval/cli.py
+++ b/apps/keypoint-eval/src/keypoint_eval/cli.py
@@ -0,0 +1,58 @@
+import warnings
+from typing import Annotated
+
+import typer
+
+from .main import run_keypoint_evaluation
+
+warnings.filterwarnings("ignore")
+
+app = typer.Typer(name="keypoint-eval")
+
+
+@app.command()
+def main(
+    input_file: Annotated[
+        str,
+        typer.Option(
+            "--input-file",
+            "-i",
+            help="Path to a local CSV or SQLite file for evaluation data. "
+            "If not provided, data will be loaded from BigQuery.",
+        ),
+    ] = None,
+    output_file: Annotated[
+        str,
+        typer.Option(
+            "--output-file",
+            "-o",
+            help="Optional: Path to save the output CSV file. "
+            "If not provided, results will be saved to BigQuery.",
+        ),
+    ] = None,
+    run_id: Annotated[
+        str | None,
+        typer.Option(
+            help="Optional: The specific run_id to filter the evaluation data by."
+        ),
+    ] = None,
+    agent_name: Annotated[
+        str | None,
+        typer.Option(
+            "-a",
+            "--agent-name",
+            help="Optional: The name of a specific agent to run. Use 'dialogflow' to run the Dialogflow agent.",
+        ),
+    ] = None,
+):
+    """CLI for running keypoint-based evaluation."""
+    run_keypoint_evaluation(
+        input_file=input_file,
+        output_file=output_file,
+        run_id=run_id,
+        agent_name=agent_name,
+    )
+
+
+if __name__ == "__main__":
+    app()
--- a/apps/keypoint-eval/src/keypoint_eval/evaluator.py
+++ b/apps/keypoint-eval/src/keypoint_eval/evaluator.py
@@ -0,0 +1,330 @@
+from datetime import datetime
+from typing import Literal
+
+from llm.vertex_ai import VertexAILLM
+from pydantic import BaseModel, Field
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
+from rich.text import Text
+
+from rag_eval.config import settings
+
+
+class KeypointMetricPrompt(BaseModel):
+    name: str
+    description: str
+    template: str
+
+
+class KeyPointResponse(BaseModel):
+    keypoints: list[str]
+
+
+class KeyPointEval(BaseModel):
+    keypoint: str
+    analysis: str
+    category: Literal["relevant", "irrelevant", "incorrect"]
+
+
+class KeyPointEvalList(BaseModel):
+    evals: list[KeyPointEval]
+
+    def _count(self, category: str) -> int:
+        return sum(1 for e in self.evals if e.category == category)
+
+    def count_relevant(self) -> int:
+        return self._count("relevant")
+
+    def count_irrelevant(self) -> int:
+        return self._count("irrelevant")
+
+    def count_incorrect(self) -> int:
+        return self._count("incorrect")
+
+    def keypoint_details(self) -> list[dict]:
+        return [e.model_dump() for e in self.evals]
+
+
+class ConcisenessScore(BaseModel):
+    score: float = Field(
+        description="A score from 0.0 to 1.0 evaluating the conciseness of the answer."
+    )
+
+
+class KeypointRAGEvaluator:
+    """
+    Evaluador de sistemas RAG basado en la metodología de keypoints del paper RAGEval.
+    Se enfoca en 3 métricas principales:
+    - Completeness: Qué tan bien la respuesta captura los puntos clave de la respuesta ideal
+    - Hallucination: Identificación de contenido que contradice los puntos clave
+    - Irrelevance: Proporción de puntos clave que no son cubiertos ni contradichos
+    """
+
+    def __init__(self, console: Console, model: str = "gemini-2.0-flash"):
+        self.metrics_results = []
+        self.console = console
+        self.llm = VertexAILLM(project=settings.project_id, location=settings.location)
+        self.model = model
+
+    def evaluate_conciseness(self, query: str, answer: str) -> float:
+        """Evaluates the conciseness of a generated answer."""
+        prompt = f"""Evaluate the conciseness of the following generated answer in response to the user's query.
+        The score should be a single float from 0.0 to 1.0, where 1.0 is perfectly concise and direct, and 0.0 is extremely verbose and full of conversational fluff.
+        Only consider the conciseness, not the correctness of the answer.
+
+        User Query: {query}
+        Generated Answer: {answer}
+        """
+        try:
+            response = self.llm.structured_generation(
+                model=self.model,
+                prompt=prompt,
+                response_model=ConcisenessScore,
+                system_prompt="You are an expert evaluator focused on the conciseness and directness of answers. You output a single float score and nothing else.",
+            )
+            return response.score
+        except Exception as e:
+            self.console.print(
+                f"[bold red]Error during conciseness evaluation: {str(e)}[/bold red]"
+            )
+            return 0.0  # Return a neutral score in case of error
+
+    def extract_keypoints(self, question: str, ground_truth: str) -> list[str]:
+        """
+        Extrae puntos clave (keypoints) de la respuesta de referencia y agrega keypoints
+        estándar para verificar la adherencia al dominio de Banorte.
+
+        Args:
+            question: Pregunta del usuario
+            ground_truth: Respuesta ideal o de referencia
+
+        Returns:
+            Lista de puntos clave extraídos más los keypoints estándar de dominio
+        """
+        prompt = f"""En esta tarea, se te dará una pregunta y una respuesta ideal. Basado en la respuesta ideal, 
+        necesitas resumir los puntos clave necesarios para responder la pregunta. 
+        
+        <ejemplo>
+        <pregunta>
+        Cómo puedo sacar un adelanto de nómina?
+        </pregunta>
+        <respuesta>
+        ¡Hola! 👋 Sacar un Adelanto de Nómina con Banorte es muy fácil y 
+puede ayudarte con liquidez al instante. Aquí te explico cómo 
+funciona:
+
+Es un monto de hasta $10,000 MXN que puedes usar para lo que 
+necesites, sin intereses y con una comisión fija del 7%. Lo puedes 
+contratar directamente desde la aplicación móvil de Banorte. Los 
+pagos se ajustan a la frecuencia de tu nómina y se cargan 
+automáticamente a tu cuenta.
+
+Los principales requisitos son:
+*   Recibir tu nómina en Banorte y no tener otro adelanto vigente.
+*   Tener un ingreso neto mensual mayor a $2,000 MXN.
+*   Tener entre 18 y 74 años con 11 meses.
+*   Contar con un buen historial en Buró de Crédito.
+
+¡Espero que esta información te sea muy útil! 😊
+        </respuesta>
+        <puntos clave>
+        [
+            "Recibir tu nómina en Banorte",
+            "No tener otro adelanto vigente",
+            "Tener entre 18 y 74 años con 11 meses",
+            "Contar con buen historial en Buró de Crédito",
+        ]
+        </puntos clave>
+        </ejemplo>
+        
+
+        <real>
+        <pregunta>
+        {question}
+        </pregunta>
+        <respuesta>
+        {ground_truth}
+        </respuesta>
+        </real>
+        """
+        try:
+            response = self.llm.structured_generation(
+                model=self.model,
+                prompt=prompt,
+                response_model=KeyPointResponse,
+                system_prompt="Eres un asistente experto en extraer puntos clave informativos de respuestas.",
+            )
+
+            return response.keypoints
+
+        except Exception as e:
+            self.console.print(
+                f"[bold red]Error al extraer keypoints: {str(e)}[/bold red]"
+            )
+            raise
+
+    def evaluate_keypoints(
+        self,
+        generated_answer: str,
+        keypoints: list[str],
+    ) -> tuple[dict[str, float], list[dict]]:
+        """
+        Evalúa una respuesta generada según los puntos clave extraídos.
+
+        Args:
+            generated_answer: Respuesta generada por el sistema RAG
+            keypoints: Lista de puntos clave de la respuesta ideal
+
+        Returns:
+            Diccionario con las puntuaciones de las métricas y lista detallada de la clasificación de cada keypoint
+        """
+        prompt = f"""En esta tarea, recibirás una respuesta real y múltiples puntos clave 
+        extraídos de una respuesta ideal. Tu objetivo es evaluar la calidad y concisión de la respuesta generada.
+
+        Para cada punto clave, proporciona un breve análisis y concluye con una de las siguientes clasificaciones:
+
+        [[[ Relevante ]]] - La respuesta generada aborda el punto clave de manera precisa, correcta y directa. La información es fácil de encontrar y no está oculta por un exceso de texto innecesario o "fluff" conversacional (saludos, despedidas, jerga, etc.).
+
+        [[[ Irrelevante ]]] - La respuesta generada omite por completo el punto clave o no contiene ninguna información relacionada con él. También se considera Irrelevante si la información del punto clave está presente, pero tan oculta por el "fluff" que un usuario tendría dificultades para encontrarla.
+
+        [[[ Incorrecto ]]] - La respuesta generada contiene información relacionada con el punto clave pero es incorrecta, contradice el punto clave, o podría confundir o desinformar al usuario.
+
+        **Criterio de Evaluación:**
+        Sé estricto con el "fluff". Una respuesta ideal es tanto correcta como concisa. El exceso de texto conversacional que no aporta valor a la respuesta debe penalizarse. Si la información clave está presente pero la respuesta es innecesariamente larga y verbosa, considera rebajar su clasificación de Relevante a Irrelevante.
+
+        Respuesta Generada: {generated_answer}
+
+        Puntos Clave de la Respuesta ideal:
+        {"\n".join([f"{i + 1}. {kp}" for i, kp in enumerate(keypoints)])}
+        
+        """
+
+        try:
+            response = self.llm.structured_generation(
+                model=self.model,
+                prompt=prompt,
+                response_model=KeyPointEvalList,
+                system_prompt="Eres un evaluador experto de respuestas basadas en puntos clave, capaz de detectar si la información es relevante, irrelevante o incorrecta. Adoptas una postura favorable cuando evalúas la utilidad de las respuestas para los usuarios.",
+            )
+
+            relevant_count = response.count_relevant()
+            irrelevant_count = response.count_irrelevant()
+            incorrect_count = response.count_incorrect()
+
+            total_keypoints = len(keypoints)
+
+            completeness = (
+                relevant_count / total_keypoints if total_keypoints > 0 else 0
+            )
+            hallucination = (
+                incorrect_count / total_keypoints if total_keypoints > 0 else 0
+            )
+            irrelevance = (
+                irrelevant_count / total_keypoints if total_keypoints > 0 else 0
+            )
+
+            keypoint_details = response.keypoint_details()
+
+            metrics = {
+                "completeness": completeness,
+                "hallucination": hallucination,
+                "irrelevance": irrelevance,
+            }
+
+            return metrics, keypoint_details
+
+        except Exception as e:
+            self.console.print(
+                f"[bold red]Error al evaluar keypoints: {str(e)}[/bold red]"
+            )
+            raise
+
+    def evaluate_rag_pipeline(
+        self,
+        query: str,
+        response: str,
+        ground_truth: str,
+        retrieved_contexts: list[str],
+        verbose: bool = True,
+    ) -> dict:
+        """
+        Evalúa un pipeline RAG utilizando la metodología de keypoints.
+
+        Args:
+            query: Pregunta del usuario
+            response: Respuesta generada por el sistema RAG
+            ground_truth: Respuesta ideal o de referencia
+            retrieved_contexts: Contextos recuperados para generar la respuesta
+            verbose: Si se muestran detalles de la evaluación
+
+        Returns:
+            Diccionario con los resultados de la evaluación
+        """
+        try:
+            if verbose:
+                self.console.print(
+                    Panel(
+                        Text(
+                            f"Question: {query}\n\nAnswer: {response}", justify="left"
+                        ),
+                        title="[bold blue]Evaluating[/bold blue]",
+                        border_style="blue",
+                    )
+                )
+
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                transient=True,
+                console=self.console,
+                disable=not verbose,
+            ) as progress:
+                task = progress.add_task("Evaluation", total=2)
+
+                progress.update(task, description="Extracting keypoints...")
+                keypoints = self.extract_keypoints(query, ground_truth)
+                progress.advance(task)
+
+                if verbose:
+                    self.console.print(
+                        f"\nSe han extraído {len(keypoints)} puntos clave:"
+                    )
+                    for i, kp in enumerate(keypoints):
+                        self.console.print(f"{i + 1}. {kp}")
+
+                progress.update(task, description="Evaluating keypoints...")
+                metrics, keypoint_details = self.evaluate_keypoints(response, keypoints)
+                progress.advance(task)
+
+            results = {
+                "query": query,
+                "response": response,
+                "ground_truth": ground_truth,
+                "retrieved_contexts": retrieved_contexts,
+                "completeness": metrics["completeness"],
+                "hallucination": metrics["hallucination"],
+                "irrelevance": metrics["irrelevance"],
+                "keypoints": keypoints,
+                "keypoint_details": keypoint_details,
+                "timestamp": datetime.now(),
+            }
+
+            if verbose:
+                self.console.print("\nResultados de la evaluación:")
+                self.console.print(f"Completeness: {metrics['completeness']:.3f}")
+                self.console.print(f"Hallucination: {metrics['hallucination']:.3f}")
+                self.console.print(f"Irrelevance: {metrics['irrelevance']:.3f}")
+
+                self.console.print("\nDetalles de la evaluación por punto clave:")
+                for i, detail in enumerate(keypoint_details):
+                    self.console.print(f"\nKeypoint {i + 1}: {detail['keypoint']}")
+                    self.console.print(f"Categoría: {detail['category']}")
+
+            self.metrics_results.append(results)
+            return results
+
+        except Exception as e:
+            self.console.print(f"[bold red]Error en la evaluación: {str(e)}[/bold red]")
+            raise
--- a/apps/keypoint-eval/src/keypoint_eval/loaders.py
+++ b/apps/keypoint-eval/src/keypoint_eval/loaders.py
@@ -0,0 +1,132 @@
+import pathlib
+import sqlite3
+
+import pandas as pd
+from google.cloud import bigquery
+from rich.console import Console
+
+from rag_eval.config import settings as config
+
+
+def load_data_from_local_file(
+    file_path: str, console: Console, run_id: str = None
+) -> pd.DataFrame:
+    """Loads evaluation data from a local CSV or SQLite file and returns a DataFrame."""
+    console.print(f"Loading data from {file_path}...")
+    path = pathlib.Path(file_path)
+    if not path.exists():
+        raise Exception(f"Error: File not found at {file_path}")
+
+    if path.suffix == ".csv":
+        try:
+            df = pd.read_csv(path)
+        except Exception as e:
+            raise Exception(f"An error occurred while reading the CSV file: {e}")
+
+    elif path.suffix in [".db", ".sqlite"]:
+        try:
+            con = sqlite3.connect(path)
+            # Assuming table name is the file stem
+            table_name = path.stem
+            df = pd.read_sql(f"SELECT * FROM {table_name}", con)
+            con.close()
+        except Exception as e:
+            raise Exception(f"An error occurred while reading the SQLite DB: {e}")
+    else:
+        raise Exception(
+            f"Unsupported file type: {path.suffix}. Please use .csv or .db/.sqlite"
+        )
+
+    # Check for required columns
+    if (
+        "input" not in df.columns
+        or "expected_output" not in df.columns
+    ):
+        raise Exception(
+            "Error: The input file must contain 'input' and 'expected_output' columns."
+        )
+    df["agent"] = config.agent.name
+
+    print(f"{run_id=}")
+    if run_id:
+        if "run_id" in df.columns:
+            df = df[df["run_id"] == run_id].copy()
+            console.print(f"Filtered data for run_id: {run_id}")
+            if df.empty:
+                console.print(
+                    f"[yellow]Warning: No data found for run_id '{run_id}' in {file_path}.[/yellow]"
+                )
+        else:
+            console.print(
+                f"[yellow]Warning: --run-id provided, but 'run_id' column not found in {file_path}. Using all data.[/yellow]"
+            )
+
+    # Filter out unanswerable questions if 'type' column exists
+    if "type" in df.columns:
+        df = df[df["type"] != "Unanswerable"].copy()
+
+    df.dropna(subset=["input", "expected_output"], inplace=True)
+
+    console.print(f"Loaded {len(df)} questions for evaluation from {file_path}.")
+    return df
+
+
+def load_data_from_bigquery(console: Console, run_id: str = None) -> pd.DataFrame:
+    """Loads evaluation data from the BigQuery table and returns a DataFrame."""
+    console.print("Loading data from BigQuery...")
+    bq_project_id = config.bigquery.project_id or config.project_id
+    client = bigquery.Client(project=bq_project_id)
+    table_ref = f"{bq_project_id}.{config.bigquery.dataset_id}.{config.bigquery.table_ids['synth_gen']}"
+
+    console.print(f"Querying table: {table_ref}")
+    try:
+        table = client.get_table(table_ref)
+        all_columns = [schema.name for schema in table.schema]
+
+        select_cols = ["input", "expected_output"]
+        if "category" in all_columns:
+            select_cols.append("category")
+
+        query_parts = [f"SELECT {', '.join(select_cols)}", f"FROM `{table_ref}`"]
+
+        # Build WHERE clauses
+        where_clauses = []
+        if "type" in all_columns:
+            where_clauses.append("type != 'Unanswerable'")
+        if run_id:
+            if "run_id" in all_columns:
+                where_clauses.append(f"run_id = '{run_id}'")
+                console.print(f"Filtering data for run_id: {run_id}")
+            else:
+                console.print(
+                    "[yellow]Warning: --run-id provided, but 'run_id' column not found in BigQuery table. Using all data.[/yellow]"
+                )
+
+        if where_clauses:
+            query_parts.append("WHERE " + " AND ".join(where_clauses))
+
+        query = "\n".join(query_parts)
+        df = client.query(query).to_dataframe()
+
+    except Exception as e:
+        if "Not found" in str(e):
+            console.print(f"[bold red]Error: Table {table_ref} not found.[/bold red]")
+            console.print(
+                "Please ensure the table exists and the configuration in 'config.yaml' is correct."
+            )
+            raise
+        else:
+            console.print(
+                f"[bold red]An error occurred while querying BigQuery: {e}[/bold red]"
+            )
+            raise
+
+    df.dropna(subset=["input", "expected_output"], inplace=True)
+    df["agent"] = config.agent.name
+
+    console.print(f"Loaded {len(df)} questions for evaluation.")
+    if run_id and df.empty:
+        console.print(
+            f"[yellow]Warning: No data found for run_id '{run_id}' in BigQuery.[/yellow]"
+        )
+    return df
--- a/apps/keypoint-eval/src/keypoint_eval/main.py
+++ b/apps/keypoint-eval/src/keypoint_eval/main.py
@@ -0,0 +1,347 @@
+import json
+import uuid
+
+import pandas as pd
+from dialogflow.main import DialogflowAgent as OriginalDialogflowAgent
+from google.api_core import exceptions as google_exceptions
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
+from rich.table import Table
+
+from rag_eval.agent import Agent
+from rag_eval.config import settings as config
+
+from . import loaders
+from .evaluator import KeypointRAGEvaluator
+
+
+class DialogflowEvalAgent:
+    """Adapter for DialogflowAgent to be used in evaluation."""
+
+    def __init__(self, session_id: str = None):
+        self.agent = OriginalDialogflowAgent()
+        self.session_id = session_id or str(uuid.uuid4())
+
+    def call(self, query: str) -> str:
+        """Calls the Dialogflow agent with the session ID and returns only the response text."""
+        response = self.agent.call(query, session_id=self.session_id)
+        return response.get("response_text", "")
+
+
+def run_keypoint_evaluation(
+    input_file: str = None,
+    output_file: str = None,
+    run_id: str = None,
+    agent_name: str = None,
+):
+    """
+    Runs keypoint-based evaluation for each agent found in the input data.
+    Handles both single-turn and multi-turn conversational data.
+    """
+    console = Console()
+
+    # --- Introduction Panel ---
+    intro_panel = Panel(
+        f"""
+        [bold]Input File:[/bold] [cyan]{input_file or 'BigQuery'}[/cyan]
+        [bold]Output File:[/bold] [cyan]{output_file or 'BigQuery'}[/cyan]
+        [bold]Run ID:[/bold] [cyan]{run_id or 'Not specified'}[/cyan]
+        [bold]Agent Name:[/bold] [cyan]{agent_name or 'All'}[/cyan]
+        """,
+        title="[bold magenta]Keypoint Evaluation Run[/bold magenta]",
+        expand=False,
+        border_style="magenta",
+    )
+    console.print(intro_panel)
+
+    try:
+        if input_file:
+            df = loaders.load_data_from_local_file(input_file, console, run_id=run_id)
+        else:
+            df = loaders.load_data_from_bigquery(console, run_id=run_id)
+    except Exception as e:
+        console.print(
+            f"[bold red]An unexpected error occurred during data loading: {e}[/bold red]"
+        )
+        raise
+
+    if run_id is None:
+        run_id = "run_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+
+    if df.empty:
+        console.print("[bold red]No data loaded, exiting.[/bold red]")
+        return
+
+    # --- Set up agents to evaluate ---
+    evaluables = []
+    if agent_name:
+        if agent_name == "dialogflow":
+            evaluables.append(
+                {"name": "dialogflow", "agent_class": DialogflowEvalAgent, "is_special": True}
+            )
+            console.print("[bold green]Agent 'dialogflow' selected for evaluation.[/bold green]")
+        elif agent_name == config.agent.name:
+            evaluables.append(
+                {"name": config.agent.name, "agent_class": Agent, "is_special": False}
+            )
+        else:
+            console.print(
+                f"[bold red]Error: Agent '{agent_name}' not found in the configuration.[/bold red]"
+            )
+            raise ValueError(f"Agent '{agent_name}' not found in the configuration")
+    else:
+        evaluables.append(
+            {"name": config.agent.name, "agent_class": Agent, "is_special": False}
+        )
+
+    all_agents_results = []
+    total_skipped_questions = 0
+
+    # --- Check for conversational data ---
+    is_conversational = "conversation_id" in df.columns and "turn" in df.columns
+
+    if is_conversational:
+        df.sort_values(by=["conversation_id", "turn"], inplace=True)
+        conversations = df.groupby("conversation_id")
+        console.print(f"Found [bold cyan]{len(conversations)}[/bold cyan] conversations to evaluate.")
+        progress_total = len(df)
+    else:
+        console.print(f"Found [bold cyan]{len(df)}[/bold cyan] single questions to evaluate.")
+        conversations = [(None, df)] # Treat all rows as one big group
+        progress_total = len(df)
+
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        console=console,
+    ) as progress:
+        task = progress.add_task(
+            "[green]Processing evaluations...[/green]",
+            total=progress_total,
+        )
+
+        for conversation_id, conversation_df in conversations:
+            if is_conversational:
+                console.print(
+                    Panel(
+                        f"Evaluating conversation: [bold blue]{conversation_id}[/bold blue]",
+                        expand=False,
+                        border_style="blue",
+                    )
+                )
+
+            for evaluable in evaluables:
+                agent_name_for_results = evaluable["name"]
+                
+                # Initialize agent and history for each conversation
+                if evaluable["is_special"]:
+                    rag_agent = evaluable["agent_class"](session_id=str(uuid.uuid4()))
+                else:
+                    rag_agent = evaluable["agent_class"]()
+                
+                history = []
+                evaluator = KeypointRAGEvaluator(console)
+
+                for _, row in conversation_df.iterrows():
+                    query = row["input"]
+                    ground_truth = row["expected_output"]
+                    
+                    progress.update(
+                        task, description=f"Agent: {agent_name_for_results}, Conv: {conversation_id or 'N/A'}"
+                    )
+
+                    try:
+                        # Step 1: Call agent to get the response
+                        if is_conversational and not evaluable["is_special"]:
+                            # For standard agent in conversational mode, manage history
+                            history.append({"role": "user", "content": query})
+                            response = rag_agent.call(history)
+                            history.append({"role": "assistant", "content": response})
+                        else:
+                            # For special agents or single-turn mode
+                            response = rag_agent.call(query)
+
+                        # Step 2: Evaluate the response
+                        eval_result = evaluator.evaluate_rag_pipeline(
+                            query=query,
+                            response=response,
+                            ground_truth=ground_truth,
+                            retrieved_contexts=[],
+                            verbose=False,
+                        )
+
+                        # Step 3: Evaluate conciseness
+                        conciseness_score = evaluator.evaluate_conciseness(query, response)
+                        eval_result["conciseness"] = conciseness_score
+
+                        eval_result["agent"] = agent_name_for_results
+                        # Add conversational info if present
+                        if is_conversational:
+                            eval_result["conversation_id"] = conversation_id
+                            eval_result["turn"] = row["turn"]
+
+                        all_agents_results.append(eval_result)
+
+                    except google_exceptions.FailedPrecondition as e:
+                        if "Token limit exceeded" in str(e):
+                            total_skipped_questions += 1
+                            console.print(
+                                Panel(
+                                    f"[bold]Query:[/bold]\n[white]{query}[/white]",
+                                    title="[yellow]Skipping Question (Token Limit Exceeded)[/yellow]",
+                                    expand=False,
+                                    border_style="yellow",
+                                )
+                            )
+                        else:
+                            raise
+                    finally:
+                        progress.advance(task)
+
+    if not all_agents_results:
+        console.print("[bold red]No evaluation results were generated.[/bold red]")
+        return
+
+    final_df = pd.DataFrame(all_agents_results)
+
+    # --- Summary Table ---
+    summary_df = (
+        final_df.groupby("agent")[["completeness", "hallucination", "irrelevance", "conciseness"]]
+        .mean()
+        .reset_index()
+    )
+
+    table = Table(
+        title="[bold green]Keypoint Evaluation Summary[/bold green]",
+        show_header=True,
+        header_style="bold magenta",
+    )
+    table.add_column("Agent", justify="left", style="cyan", no_wrap=True)
+    table.add_column("Completeness", justify="right", style="magenta")
+    table.add_column("Hallucination", justify="right", style="green")
+    table.add_column("Irrelevance", justify="right", style="yellow")
+    table.add_column("Conciseness", justify="right", style="cyan")
+
+    for _, row in summary_df.iterrows():
+        table.add_row(
+            row["agent"],
+            f"{row['completeness']:.4f}",
+            f"{row['hallucination']:.4f}",
+            f"{row['irrelevance']:.4f}",
+            f"{row['conciseness']:.4f}",
+        )
+
+    console.print(table)
+
+    # --- Skipped Questions Summary ---
+    if total_skipped_questions > 0:
+        console.print(
+            Panel(
+                f"[bold yellow]Total questions skipped due to token limit: {total_skipped_questions}[/bold yellow]",
+                title="[bold]Skipped Questions[/bold]",
+                expand=False,
+                border_style="yellow",
+            )
+        )
+
+    if "timestamp" in final_df.columns:
+        final_df["timestamp"] = pd.to_datetime(final_df["timestamp"]).dt.tz_localize(
+            None
+        )
+
+    if output_file:
+        for col in ["keypoints", "keypoint_details", "retrieved_contexts"]:
+            if col in final_df.columns:
+                final_df[col] = final_df[col].apply(json.dumps)
+
+        output_panel = Panel(
+            f"Saving results to CSV file: [bold cyan]{output_file}[/bold cyan]\n"
+            f"Successfully saved {len(final_df)} rows to [bold green]{output_file}[/bold green]",
+            title="[bold green]Output[/bold green]",
+            expand=False,
+            border_style="green",
+        )
+        console.print(output_panel)
+        final_df.to_csv(output_file, index=False, encoding="utf-8-sig")
+    else:
+        project_id = config.bigquery.project_id or config.project_id
+        dataset_id = config.bigquery.dataset_id
+        table_name = config.bigquery.table_ids["keypoint_eval"]
+        table_id = f"{project_id}.{dataset_id}.{table_name}"
+
+        bq_schema = [
+            {"name": "run_id", "type": "STRING"},
+            {"name": "query", "type": "STRING"},
+            {"name": "response", "type": "STRING"},
+            {"name": "ground_truth", "type": "STRING"},
+            {"name": "retrieved_contexts", "type": "STRING", "mode": "REPEATED"},
+            {"name": "completeness", "type": "FLOAT"},
+            {"name": "hallucination", "type": "FLOAT"},
+            {"name": "irrelevance", "type": "FLOAT"},
+            {"name": "conciseness", "type": "FLOAT"},
+            {"name": "keypoints", "type": "STRING", "mode": "REPEATED"},
+            {
+                "name": "keypoint_details",
+                "type": "RECORD",
+                "mode": "REPEATED",
+                "fields": [
+                    {"name": "keypoint", "type": "STRING"},
+                    {"name": "analysis", "type": "STRING"},
+                    {"name": "category", "type": "STRING"},
+                ],
+            },
+            {"name": "timestamp", "type": "TIMESTAMP"},
+            {"name": "agent", "type": "STRING"},
+            {"name": "error", "type": "STRING"},
+            {"name": "conversation_id", "type": "STRING"},
+            {"name": "turn", "type": "INTEGER"},
+        ]
+
+        final_df["run_id"] = run_id
+        bq_column_names = [col["name"] for col in bq_schema]
+
+        for col_name in bq_column_names:
+            if col_name not in final_df.columns:
+                final_df[col_name] = None
+
+        final_df["completeness"] = final_df["completeness"].fillna(0.0)
+        final_df["hallucination"] = final_df["hallucination"].fillna(0.0)
+        final_df["irrelevance"] = final_df["irrelevance"].fillna(0.0)
+        final_df["error"] = final_df["error"].fillna("")
+
+        for col_name in ["retrieved_contexts", "keypoints", "keypoint_details"]:
+            if col_name in final_df.columns:
+                # Ensure any non-list items (like NaN or None) become an empty list
+                final_df[col_name] = [
+                    item if isinstance(item, list) else [] for item in final_df[col_name]
+                ]
+
+        final_df_for_bq = final_df[bq_column_names].copy()
+
+        output_panel = Panel(
+            f"Saving results to BigQuery table: [bold cyan]{table_id}[/bold cyan]\n"
+            f"Successfully saved {len(final_df_for_bq)} rows to [bold green]{table_id}[/bold green]",
+            title="[bold green]Output[/bold green]",
+            expand=False,
+            border_style="green",
+        )
+        console.print(output_panel)
+
+        try:
+            final_df_for_bq.to_gbq(
+                destination_table=f"{dataset_id}.{table_name}",
+                project_id=project_id,
+                if_exists="append",
+                table_schema=bq_schema,
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
+            )
+            console.print("DataFrame schema used for upload:")
+            console.print(final_df_for_bq.info())
+            raise
--- a/apps/search-eval/README.md
+++ b/apps/search-eval/README.md
@@ -0,0 +1,95 @@
+# Search Evaluation
+
+This package contains scripts to evaluate the performance of the vector search component.
+
+## Evaluation
+
+The `search-eval` script evaluates search performance. It can source data from either BigQuery or local files.
+
+### Local File Evaluation
+
+To run the evaluation using a local file, use the `--input-file` option.
+
+```bash
+uv run search-eval -- --input-file /path/to/your/data.csv
+```
+
+Or for a SQLite database:
+
+```bash
+uv run search-eval -- --input-file /path/to/your/data.db
+```
+
+#### Input File Structures
+
+**CSV File**
+
+The CSV file must contain the following columns:
+
+| Column | Description                                   |
+|--------|-----------------------------------------------|
+| `input`  | The question to be used for the search query. |
+| `source` | The expected document path for the question.  |
+
+**SQLite Database**
+
+The SQLite database must contain a table named `evaluation_data` with the following columns:
+
+| Column | Description                                   |
+|--------|-----------------------------------------------|
+| `input`  | The question to be used for the search query. |
+| `source` | The expected document path for the question.  |
+
+### BigQuery Evaluation
+
+The `search-eval-bq` script evaluates search performance using data sourced from and written to BigQuery.
+
+### BigQuery Table Structures
+
+#### Input Table
+
+The input table must contain the following columns:
+
+| Column          | Type    | Description                                                                 |
+| --------------- | ------- | --------------------------------------------------------------------------- |
+| `id`            | STRING  | A unique identifier for each question.                                      |
+| `question`      | STRING  | The question to be used for the search query.                               |
+| `document_path` | STRING  | The expected document path for the given question.                          |
+| `question_type` | STRING  | The type of question. Rows where `question_type` is 'Unanswerable' are ignored. |
+
+#### Output Table
+
+The output table will be created by the script if it doesn't exist, or appended to if it does. It will have the following structure:
+
+| Column                   | Type      | Description                                                              |
+| ------------------------ | --------- | ------------------------------------------------------------------------ |
+| `id`                     | STRING    | The unique identifier for the question from the input table.             |
+| `question`               | STRING    | The question used for the search query.                                  |
+| `expected_document`      | STRING    | The expected document for the given question.                            |
+| `retrieved_documents`    | STRING[]  | An array of document IDs retrieved from the vector search.               |
+| `retrieved_distances`    | FLOAT64[] | An array of distance scores for the retrieved documents.                 |
+| `is_expected_in_results` | BOOLEAN   | A flag indicating whether the expected document was in the search results. |
+| `evaluation_timestamp`   | TIMESTAMP | The timestamp of when the evaluation was run.                            |
+
+### Usage
+
+To run the BigQuery evaluation script, use the `uv run search-eval-bq` command with the following options:
+
+```bash
+uv run search-eval-bq -- --input-table <project.dataset.table> --output-table <project.dataset.table> [--project-id <gcp-project-id>]
+```
+
+**Arguments:**
+
+*   `--input-table`: **(Required)** The full BigQuery table name for the input data (e.g., `my-gcp-project.my_dataset.questions`).
+*   `--output-table`: **(Required)** The full BigQuery table name for the output results (e.g., `my-gcp-project.my_dataset.eval_results`).
+*   `--project-id`: (Optional) The Google Cloud project ID. If not provided, it will use the `project_id` from the `config.yaml` file.
+
+**Example:**
+
+```bash
+uv run search-eval-bq -- \
+  --input-table "my-gcp-project.search_eval.synthetic_questions" \
+  --output-table "my-gcp-project.search_eval.results" \
+  --project-id "my-gcp-project"
+```
--- a/apps/search-eval/pyproject.toml
+++ b/apps/search-eval/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "search-eval"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "embedder",
+    "ranx>=0.3.21",
+    "google-cloud-bigquery",
+    "pandas-gbq",
+    "kfp>=1.4.0",
+    "requests-toolbelt>=1.0.0",
+]
+
+[project.scripts]
+search-eval = "search_eval.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.3,<0.9.0"]
+build-backend = "uv_build"
+
+[tool.uv.sources]
+embedder = { workspace = true }
--- a/apps/search-eval/src/search_eval/init.py
+++ b/apps/search-eval/src/search_eval/init.py
--- a/apps/search-eval/src/search_eval/cli.py
+++ b/apps/search-eval/src/search_eval/cli.py
@@ -0,0 +1,46 @@
+from typing import Annotated
+
+import typer
+
+from .main import evaluate
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    input_file: Annotated[
+        str | None,
+        typer.Option(
+            "-i",
+            "--input-file",
+            help="Path to a local CSV or SQLite file for evaluation data. "
+            "If not provided, data will be loaded from BigQuery.",
+        ),
+    ] = None,
+    output_file: Annotated[
+        str | None,
+        typer.Option(
+            "-o",
+            "--output-file",
+            help="Path to save the detailed results as a CSV file. "
+            "If not provided, results will be saved to BigQuery.",
+        ),
+    ] = None,
+    run_id: Annotated[
+        str | None,
+        typer.Option(
+            help="Optional: The specific run_id to filter the evaluation data by."
+        ),
+    ] = None,
+):
+    """Evaluates the search metrics by loading data from BigQuery or a local file."""
+    evaluate(
+        input_file=input_file,
+        output_file=output_file,
+        run_id=run_id,
+    )
+
+
+if __name__ == "__main__":
+    app()
--- a/apps/search-eval/src/search_eval/main.py
+++ b/apps/search-eval/src/search_eval/main.py
@@ -0,0 +1,305 @@
+import pathlib
+import sqlite3
+
+import pandas as pd
+from embedder.vertex_ai import VertexAIEmbedder
+from google.cloud import bigquery
+from ranx import Qrels, Run
+from ranx import evaluate as ranx
+from rich.console import Console
+from rich.progress import track
+from rich.table import Table
+from vector_search.vertex_ai import GoogleCloudVectorSearch
+
+from rag_eval.config import settings as config
+
+
+def load_data_from_local_file(
+    file_path: str, console: Console, run_id: str | None = None
+) -> pd.DataFrame:
+    """Loads evaluation data from a local CSV or SQLite file."""
+    console.print(f"[bold green]Loading data from {file_path}...[/bold green]")
+    path = pathlib.Path(file_path)
+    if not path.exists():
+        console.print(f"[bold red]Error: File not found at {file_path}[/bold red]")
+        raise
+
+    if path.suffix == ".csv":
+        try:
+            df = pd.read_csv(path)
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while reading the CSV file: {e}[/bold red]"
+            )
+            raise
+    elif path.suffix in [".db", ".sqlite"]:
+        try:
+            con = sqlite3.connect(path)
+            # Assuming table name is 'evaluation_data'
+            df = pd.read_sql("SELECT * FROM evaluation_data", con)
+            con.close()
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while reading the SQLite DB: {e}[/bold red]"
+            )
+            raise
+    else:
+        console.print(
+            f"[bold red]Unsupported file type: {path.suffix}. Please use .csv or .db/.sqlite[/bold red]"
+        )
+        raise
+
+    # Standardize column names and add ID
+    if "input" in df.columns and "source" in df.columns:
+        df = df.rename(columns={"input": "question", "source": "document_path"})
+        df["id"] = df.index + 1
+        df["id"] = df["id"].astype(str)
+    else:
+        console.print(
+            "[bold red]Error: The input file must contain 'input' and 'source' columns.[/bold red]"
+        )
+        raise
+
+    if run_id:
+        if "run_id" in df.columns:
+            df = df[df["run_id"] == run_id].copy()
+            console.print(f"Filtered data for run_id: [bold cyan]{run_id}[/bold cyan]")
+            if df.empty:
+                console.print(
+                    f"[bold yellow]Warning: No data found for run_id '{run_id}' in {file_path}.[/bold yellow]"
+                )
+        else:
+            console.print(
+                f"[bold yellow]Warning: --run-id provided, but 'run_id' column not found in {file_path}. Using all data.[/bold yellow]"
+            )
+
+    df.dropna(inplace=True)
+    console.print(f"Loaded {len(df)} questions for evaluation.")
+    return df
+
+
+def load_data_from_bigquery(
+    console: Console, run_id: str | None = None
+) -> pd.DataFrame:
+    """Loads evaluation data from the BigQuery table."""
+    console.print("[bold green]Loading data from BigQuery...[/bold green]")
+    bq_project_id = config.bigquery.project_id or config.project_id
+    client = bigquery.Client(project=bq_project_id)
+    table_ref = f"{bq_project_id}.{config.bigquery.dataset_id}.{config.bigquery.table_ids['synth_gen']}"
+
+    console.print(f"Querying table: [bold cyan]{table_ref}[/bold cyan]")
+    query = f"""
+        SELECT
+            input AS question,
+            source AS document_path,
+            ROW_NUMBER() OVER() as id
+        FROM
+            `{table_ref}`
+        WHERE
+            `type` != 'Unanswerable'
+    """
+    if run_id:
+        console.print(f"Filtering for run_id: [bold cyan]{run_id}[/bold cyan]")
+        query += f" AND run_id = '{run_id}'"
+
+    try:
+        df = client.query(query).to_dataframe()
+    except Exception as e:
+        if "Not found" in str(e):
+            console.print(f"[bold red]Error: Table {table_ref} not found.[/bold red]")
+            console.print(
+                "Please ensure the table exists and the configuration in 'config.yaml' is correct."
+            )
+            raise
+        elif "unrecognized name: run_id" in str(e).lower():
+            console.print(
+                "[bold red]Error: The BigQuery table must contain a 'run_id' column when using the --run-id flag.[/bold red]"
+            )
+            raise
+        else:
+            console.print(
+                f"[bold red]An error occurred while querying BigQuery: {e}[/bold red]"
+            )
+            raise
+
+    df.dropna(inplace=True)
+    console.print(f"Loaded {len(df)} questions for evaluation.")
+    if df.empty:
+        console.print(
+            f"[bold yellow]Warning: No data found for run_id '{run_id}' in BigQuery.[/bold yellow]"
+        )
+    return df
+
+
+def run_evaluation(
+    df: pd.DataFrame, console: Console
+) -> pd.DataFrame:
+    """Runs the search evaluation on the given dataframe."""
+    agent_config = config.agent
+    index_config = config.index
+    console.print(
+        f"Embedding Model: [bold cyan]{agent_config.embedding_model}[/bold cyan]"
+    )
+    console.print(f"Index Name: [bold cyan]{index_config.name}[/bold cyan]")
+
+    # Initialize the embedder and vector search
+    embedder = VertexAIEmbedder(
+        project=config.project_id,
+        location=config.location,
+        model_name=agent_config.embedding_model
+    )
+    vector_search = GoogleCloudVectorSearch(
+        project_id=config.project_id,
+        location=config.location,
+        bucket=config.bucket,
+        index_name=index_config.name,
+    )
+    vector_search.load_index_endpoint(index_config.endpoint)
+
+    # Prepare qrels
+    qrels_data = {}
+    for _, row in track(df.iterrows(), total=len(df), description="Preparing qrels..."):
+        doc_path = str(row["document_path"]).split("/")[-1].strip()
+        # print(doc_path)
+        qrels_data[str(row["id"])] = {doc_path: 1}
+    qrels = Qrels(qrels_data)
+
+    # Prepare run
+    run_data = {}
+    detailed_results_list = []
+    for _, row in track(df.iterrows(), total=len(df), description="Preparing run..."):
+        question_embedding = embedder.generate_embedding(row["question"])
+        results = vector_search.run_query(
+            deployed_index_id=index_config.deployment,
+            query=question_embedding,
+            limit=10,
+        )
+        # print(results[0]["id"])
+        run_data[str(row["id"])] = {
+            result["id"]: result["distance"] for result in results
+        }
+
+        retrieved_docs = [result["id"] for result in results]
+        retrieved_distances = [result["distance"] for result in results]
+        expected_doc = str(row["document_path"]).split("/")[-1].strip()
+        # print(f"expected doc: {expected_doc}")
+        # print(f"retrieved docs: {retrieved_docs}")
+
+        detailed_results_list.append(
+            {
+                "agent": agent_config.name,
+                "id": row["id"],
+                "input": row["question"],
+                "expected_document": expected_doc,
+                "retrieved_documents": retrieved_docs,
+                "retrieved_distances": retrieved_distances,
+                "is_expected_in_results": expected_doc in retrieved_docs,
+            }
+        )
+    run = Run(run_data)
+
+    # Evaluate
+    k_values = [1, 3, 5, 10]
+    metrics = []
+    for k in k_values:
+        metrics.extend(
+            [f"precision@{k}", f"recall@{k}", f"f1@{k}", f"ndcg@{k}", f"mrr@{k}"]
+        )
+
+    with console.status("[bold green]Running evaluation..."):
+        results = ranx(qrels, run, metrics)
+
+    # Create tables
+    table = Table(title=f"Search Metrics @k for Agent: {agent_config.name}")
+    table.add_column("k", justify="right", style="cyan")
+    table.add_column("Precision@k", justify="right")
+    table.add_column("Recall@k", justify="right")
+    table.add_column("F1@k", justify="right")
+    table.add_column("nDCG@k", justify="right")
+    table.add_column("MRR@k", justify="right")
+
+    for k in k_values:
+        precision = results.get(f"precision@{k}")
+        recall = results.get(f"recall@{k}")
+        f1 = results.get(f"f1@{k}")
+        ndcg = results.get(f"ndcg@{k}")
+        mrr = results.get(f"mrr@{k}")
+        table.add_row(
+            str(k),
+            f"{precision:.4f}" if precision is not None else "N/A",
+            f"{recall:.4f}" if recall is not None else "N/A",
+            f"{f1:.4f}" if f1 is not None else "N/A",
+            f"{ndcg:.4f}" if ndcg is not None else "N/A",
+            f"{mrr:.4f}" if mrr is not None else "N/A",
+        )
+    console.print(table)
+
+    return pd.DataFrame(detailed_results_list)
+
+
+def evaluate(
+    input_file: str | None = None,
+    output_file: str | None = None,
+    run_id: str | None = None,
+):
+    """Core logic for evaluating search metrics."""
+    console = Console()
+    if input_file:
+        df = load_data_from_local_file(input_file, console, run_id)
+    else:
+        df = load_data_from_bigquery(console, run_id)
+
+    if df.empty:
+        raise Exception("Dataframe is empty")
+
+    if config.index:
+        console.print(
+            f"[bold blue]Running evaluation for agent: {config.agent.name}[/bold blue]"
+        )
+        results_df = run_evaluation(df, console)
+    else:
+        console.print(
+            f"[yellow]Skipping agent '{config.agent.name}' as it has no index configured.[/yellow]"
+        )
+        raise
+
+    final_results_df = results_df
+
+    if output_file:
+        console.print(
+            f"Saving detailed results to CSV file: [bold cyan]{output_file}[/bold cyan]"
+        )
+        try:
+            final_results_df.to_csv(output_file, index=False)
+            console.print(
+                f"Successfully saved {len(final_results_df)} rows to [bold green]{output_file}[/bold green]"
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to CSV: {e}[/bold red]"
+            )
+            raise
+    else:
+        # Save detailed results to BigQuery
+        project_id = config.bigquery.project_id or config.project_id
+        dataset_id = config.bigquery.dataset_id
+        table_name = config.bigquery.table_ids["search_eval"]
+        table_id = f"{project_id}.{dataset_id}.{table_name}"
+
+        console.print(
+            f"Saving detailed results to BigQuery table: [bold cyan]{table_id}[/bold cyan]"
+        )
+        try:
+            final_results_df.to_gbq(
+                destination_table=f"{dataset_id}.{table_name}",
+                project_id=project_id,
+                if_exists="append",
+            )
+            console.print(
+                f"Successfully saved {len(final_results_df)} rows to [bold green]{table_id}[/bold green]"
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
+            )
+            raise
--- a/apps/synth-gen/README.md
+++ b/apps/synth-gen/README.md
@@ -0,0 +1,28 @@
+# Synthetic Question Generator
+
+This application generates a set of synthetic questions from documents stored in Google Cloud Storage (GCS) and saves them to a local CSV file. For each document, it generates one question for each predefined question type (Factual, Summarization, etc.).
+
+The output CSV is structured for easy uploading to a BigQuery table with the following schema: `input` (STRING), `expected_output` (STRING), `source` (STRING), `type` (STRING).
+
+## Usage
+
+The script is run from the command line. You need to provide the path to the source documents within your GCS bucket and a path for the output CSV file.
+
+### Command
+
+```bash
+uv run python -m synth_gen.main [OPTIONS] GCS_PATH
+```
+
+### Arguments
+
+*   `GCS_PATH`: (Required) The path to the directory in your GCS bucket where the source markdown files are located (e.g., `documents/markdown/`).
+*   `--output-csv, -o`: (Required) The local file path where the generated questions will be saved in CSV format.
+
+### Example
+
+```bash
+uv run python -m synth_gen.main documents/processed/ --output-csv synthetic_questions.csv
+```
+
+This command will fetch all documents from the `gs://<your-bucket-name>/documents/processed/` directory, generate questions for each, and save them to a file named `synthetic_questions.csv` in the current directory.
--- a/apps/synth-gen/pyproject.toml
+++ b/apps/synth-gen/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "synth-gen"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "llm",
+]
+
+[project.scripts]
+synth-gen = "synth_gen.main:app"
+
+[build-system]
+requires = ["uv_build>=0.8.3,<0.9.0"]
+build-backend = "uv_build"
+
+[tool.uv.sources]
+llm = { workspace = true }
--- a/apps/synth-gen/src/synth_gen/init.py
+++ b/apps/synth-gen/src/synth_gen/init.py
@@ -0,0 +1,2 @@
+def main() -> None:
+    print("Hello from synth-gen!")
--- a/apps/synth-gen/src/synth_gen/main.py
+++ b/apps/synth-gen/src/synth_gen/main.py
@@ -0,0 +1,349 @@
+import datetime
+import os
+import random
+from typing import Annotated, Any, Dict, List
+
+import pandas as pd
+import typer
+from file_storage.google_cloud import GoogleCloudFileStorage
+from llm.vertex_ai import VertexAILLM
+from pydantic import BaseModel
+from rich.console import Console
+from rich.progress import track
+
+from rag_eval.config import Settings
+
+# --- Configuration ---
+PROMPT_TEMPLATE = """
+Eres un experto en generación de preguntas sintéticas. Tu tarea es crear preguntas sintéticas en español basadas en documentos de referencia proporcionados.
+
+## INSTRUCCIONES:
+
+### Requisitos obligatorios:
+1. **Idioma**: La pregunta DEBE estar completamente en español
+2. **Basada en documentos**: La pregunta DEBE poder responderse ÚNICAMENTE con la información contenida en los documentos proporcionados
+3. **Tipo de pregunta**: Sigue estrictamente la definición del tipo de pregunta especificado
+4. **Identificación de fuentes**: Incluye el ID de fuente de todos los documentos necesarios para responder la pregunta
+5. **Salida esperada**: Incluye la respuesta perfecta basada en los documentos necesarios para responder la pregunta
+
+### Tono de pregunta:
+La pregunta debe ser similar a la que haría un usuario sin contexto sobre el sistema o la información disponible. Ingenuo y curioso.
+
+### Tipo de pregunta solicitado:
+**Tipo**: {qtype}
+**Definición**: {qtype_def}
+
+### Documentos de referencia:
+{context}
+
+Por favor, genera una pregunta siguiendo estas instrucciones.
+""".strip()
+
+RESPONSE_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "pregunta": {
+            "type": "string",
+        },
+        "expected_output": {
+            "type": "string",
+        },
+        "ids": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["pregunta", "expected_output", "ids"],
+}
+
+
+class ResponseSchema(BaseModel):
+    pregunta: str
+    expected_output: str
+    ids: List[str]
+
+
+class Turn(BaseModel):
+    pregunta: str
+    expected_output: str
+
+
+class MultiStepResponseSchema(BaseModel):
+    conversation: List[Turn]
+
+
+MULTI_STEP_PROMPT_TEMPLATE = """
+Eres un experto en la generación de conversaciones sintéticas. Tu tarea es crear una conversación en español con múltiples turnos basada en los documentos de referencia proporcionados.
+
+## INSTRUCCIONES:
+
+### Requisitos obligatorios:
+1. **Idioma**: La conversación DEBE estar completamente en español.
+2. **Basada en documentos**: Todas las respuestas DEBEN poder responderse ÚNICAMENTE con la información contenida en los documentos de referencia.
+3. **Número de turnos**: La conversación debe tener exactamente {num_turns} turnos. Un turno consiste en una pregunta del usuario y una respuesta del asistente.
+4. **Flujo conversacional**: Las preguntas deben seguir un orden lógico, como si un usuario estuviera explorando un tema paso a paso. La segunda pregunta debe ser una continuación de la primera, y así sucesivamente.
+5. **Salida esperada**: Proporciona la respuesta perfecta para cada pregunta, basada en los documentos de referencia.
+
+### Tono de las preguntas:
+Las preguntas deben ser similares a las que haría un usuario sin contexto sobre el sistema o la información disponible. Deben ser ingenuas y curiosas.
+
+### Documentos de referencia:
+{context}
+
+Por favor, genera una conversación de {num_turns} turnos siguiendo estas instrucciones.
+""".strip()
+
+
+QUESTION_TYPE_MAP = {
+    "Factual": "Questions targeting specific details within a reference (e.g., a company’s profit in a report, a verdict in a legal case, or symptoms in a medical record) to test RAG’s retrieval accuracy.",
+    "Summarization": "Questions that require comprehensive answers, covering all relevant information, to mainly evaluate the recall rate of RAG retrieval.",
+    "Multi-hop Reasoning": "Questions involve logical relationships among events and details within adocument, forming a reasoning chain to assess RAG’s logical reasoning ability.",
+    "Unanswerable": "Questions arise from potential information loss during the schema-to-article generation, where no corresponding information fragment exists, or the information is insufficient for an answer.",
+}
+
+
+# --- Core Logic ---
+def generate_synthetic_question(
+    llm: VertexAILLM, file_content: str, file_path: str, q_type: str, q_def: str, language_model: str
+) -> Dict[str, Any]:
+    """Generates a single synthetic question using the LLM."""
+    prompt = PROMPT_TEMPLATE.format(
+        context=file_content, id=file_path, qtype=q_type, qtype_def=q_def
+    )
+    response = llm.structured_generation(
+        model=language_model,
+        prompt=prompt,
+        response_model=ResponseSchema,
+    )
+    return response
+
+
+def generate_synthetic_conversation(
+    llm: VertexAILLM,
+    file_content: str,
+    file_path: str,
+    num_turns: int,
+    language_model: str,
+) -> MultiStepResponseSchema:
+    """Generates a synthetic conversation with multiple turns using the LLM."""
+    prompt = MULTI_STEP_PROMPT_TEMPLATE.format(
+        context=file_content, num_turns=num_turns
+    )
+    response = llm.structured_generation(
+        model=language_model,
+        prompt=prompt,
+        response_model=MultiStepResponseSchema,
+    )
+    return response
+
+
+app = typer.Typer()
+
+
+def generate(
+    num_questions: int,
+    output_csv: str = None,
+    num_turns: int = 1,
+) -> str:
+    """
+    Core logic for generating a specified number of synthetic questions.
+    """
+    console = Console()
+    settings = Settings()
+    llm = VertexAILLM(project=settings.project_id, location=settings.location)
+    storage = GoogleCloudFileStorage(bucket=settings.bucket)
+
+    run_id = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d-%H%M%S")
+    console.print(f"[bold yellow]Generated Run ID: {run_id}[/bold yellow]")
+
+    all_rows = []
+    if not settings.index:
+        console.print("[yellow]Skipping as no index is configured.[/yellow]")
+        return ""
+
+    gcs_path = f"{settings.index.name}/contents/"
+    console.print(f"[green]Fetching files from GCS path: {gcs_path}[/green]")
+
+    try:
+        all_files = storage.list_files(path=gcs_path)
+        console.print(f"Found {len(all_files)} total files to process.")
+    except Exception as e:
+        console.print(f"[bold red]Error listing files: {e}[/bold red]")
+        return ""
+
+    if not all_files:
+        console.print("[yellow]No files found. Skipping.[/yellow]")
+        return ""
+
+    files_to_process = random.sample(
+        all_files, k=min(num_questions, len(all_files))
+    )
+    console.print(
+        f"Randomly selected {len(files_to_process)} files to generate questions from."
+    )
+
+    for file_path in track(files_to_process, description="Generating questions..."):
+        try:
+            file_content = storage.get_file_stream(file_path).read().decode("utf-8-sig")
+            q_type, q_def = random.choice(list(QUESTION_TYPE_MAP.items()))
+
+            if num_turns > 1:
+                conversation_data = None
+                for attempt in range(3):  # Retry up to 3 times
+                    conversation_data = generate_synthetic_conversation(
+                        llm,
+                        file_content,
+                        file_path,
+                        num_turns,
+                        settings.agent.language_model,
+                    )
+                    if (
+                        conversation_data
+                        and conversation_data.conversation
+                        and len(conversation_data.conversation) == num_turns
+                    ):
+                        break  # Success
+                    console.print(
+                        f"[yellow]Failed to generate valid conversation for {os.path.basename(file_path)}. Retrying ({attempt + 1}/3)...[/yellow]"
+                    )
+                    conversation_data = None
+
+                if not conversation_data:
+                    console.print(
+                        f"[bold red]Failed to generate valid conversation for {os.path.basename(file_path)} after 3 attempts. Skipping.[/bold red]"
+                    )
+                    continue
+
+                conversation_id = str(random.randint(10000, 99999))
+                for i, turn in enumerate(conversation_data.conversation):
+                    row = {
+                        "input": turn.pregunta,
+                        "expected_output": turn.expected_output,
+                        "source": os.path.splitext(os.path.basename(file_path))[0],
+                        "type": "Multi-turn",
+                        "agent": settings.agent.name,
+                        "run_id": run_id,
+                        "conversation_id": conversation_id,
+                        "turn": i + 1,
+                    }
+                    all_rows.append(row)
+
+            else:  # Single turn generation
+                generated_data = None
+                for attempt in range(3):  # Retry up to 3 times
+                    generated_data = generate_synthetic_question(
+                        llm,
+                        file_content,
+                        file_path,
+                        q_type,
+                        q_def,
+                        settings.agent.language_model,
+                    )
+                    if (
+                        generated_data
+                        and generated_data.expected_output
+                        and generated_data.expected_output.strip()
+                    ):
+                        break  # Success, exit retry loop
+                    console.print(
+                        f"[yellow]Empty answer for {q_type} on {os.path.basename(file_path)}. Retrying ({attempt + 1}/3)...[/yellow]"
+                    )
+                    generated_data = None  # Reset to indicate failure
+
+                if not generated_data:
+                    console.print(
+                        f"[bold red]Failed to generate valid answer for {q_type} on {os.path.basename(file_path)} after 3 attempts. Skipping.[/bold red]"
+                    )
+                    continue
+
+                row = {
+                    "input": generated_data.pregunta,
+                    "expected_output": generated_data.expected_output,
+                    "source": os.path.splitext(os.path.basename(file_path))[0],
+                    "type": q_type,
+                    "agent": settings.agent.name,
+                    "run_id": run_id,
+                }
+                all_rows.append(row)
+
+        except Exception as e:
+            console.print(f"[bold red]Error processing file {file_path}: {e}[/bold red]")
+
+    if not all_rows:
+        console.print("[bold yellow]No questions were generated.[/bold yellow]")
+        return ""
+
+    df = pd.DataFrame(all_rows)
+
+    if output_csv:
+        console.print(
+            f"\n[bold green]Saving {len(df)} generated questions to {output_csv}...[/bold green]"
+        )
+        df.to_csv(output_csv, index=False, encoding="utf-8-sig")
+        console.print("[bold green]Synthetic question generation complete.[/bold green]")
+    else:
+        console.print(
+            f"\n[bold green]Saving {len(df)} generated questions to BigQuery...[/bold green]"
+        )
+        project_id = settings.bigquery.project_id or settings.project_id
+        dataset_id = settings.bigquery.dataset_id
+        table_name = settings.bigquery.table_ids["synth_gen"]
+        table_id = f"{project_id}.{dataset_id}.{table_name}"
+
+        console.print(f"Saving to BigQuery table: [bold cyan]{table_id}[/bold cyan]")
+        try:
+            # Ensure new columns exist for all rows before upload
+            if "conversation_id" not in df.columns:
+                df["conversation_id"] = None
+            if "turn" not in df.columns:
+                df["turn"] = None
+            
+            df.to_gbq(
+                destination_table=f"{dataset_id}.{table_name}",
+                project_id=project_id,
+                if_exists="append",
+            )
+            console.print(
+                f"Successfully saved {len(df)} rows to [bold green]{table_id}[/bold green]"
+            )
+        except Exception as e:
+            console.print(
+                f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
+            )
+            raise typer.Exit(code=1)
+
+    console.print(f"[bold yellow]Finished run with ID: {run_id}[/bold yellow]")
+    return run_id
+
+
+@app.command()
+def main(
+    num_questions: Annotated[
+        int,
+        typer.Option(
+            "--num-questions", "-n", help="Number of questions to generate."
+        ),
+    ] = 10,
+    output_csv: Annotated[
+        str,
+        typer.Option(
+            "--output-csv", "-o", help="Optional: Path to save the output CSV file."
+        ),
+    ] = None,
+    num_turns: Annotated[
+        int,
+        typer.Option(
+            "--num-turns",
+            "-t",
+            help="Number of conversational turns to generate.",
+        ),
+    ] = 1,
+):
+    """
+    Generates a specified number of synthetic questions and saves them to BigQuery (default) or a local CSV file.
+    """
+    generate(
+        num_questions=num_questions, output_csv=output_csv, num_turns=num_turns
+    )
+
+
+
+if __name__ == "__main__":
+    app()