First commit

2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions
--- a/scripts/diagnose_embeddings.py
+++ b/scripts/diagnose_embeddings.py
@@ -0,0 +1,80 @@
+import asyncio
+import os
+import logging
+import typer
+import random
+from google import genai
+from google.genai import types
+from dotenv import load_dotenv
+from embedder.vertex_ai import VertexAIEmbedder
+
+load_dotenv()
+project = os.getenv("GOOGLE_CLOUD_PROJECT")
+location = os.getenv("GOOGLE_CLOUD_LOCATION")
+
+MODEL_NAME = "gemini-embedding-001"
+CONTENT_LIST = [
+    "¿Cuáles son los beneficios de una tarjeta de crédito?",
+    "¿Cómo puedo abrir una cuenta de ahorros?",
+    "¿Qué es una hipoteca y cómo funciona?",
+    "¿Cuáles son las tasas de interés para un préstamo personal?",
+    "¿Cómo puedo solicitar un préstamo para un coche?",
+    "¿Qué es la banca en línea y cómo me registro?",
+    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
+    "¿Qué es el phishing y cómo puedo protegerme?",
+    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
+    "¿Cómo puedo transferir dinero a una cuenta internacional?",
+]
+TASK_TYPE = "RETRIEVAL_DOCUMENT"
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+app = typer.Typer()
+
+logger.info(f"Initializing GenAI Client for project '{project}' in '{location}'")
+embedder = VertexAIEmbedder(MODEL_NAME, project, location)
+
+async def embed_content_task():
+    """A single task to send one embedding request using the global client."""
+    content_to_embed = random.choice(CONTENT_LIST)
+    await embedder.async_generate_embedding(content_to_embed)
+
+async def run_test(concurrency: int):
+    """Continuously calls the embedding API and tracks requests."""
+    total_requests = 0
+    
+    logger.info(f"Starting diagnostic test with {concurrency} concurrent requests on model '{MODEL_NAME}'.")
+    logger.info("Press Ctrl+C to stop.")
+
+    while True:
+        # Create tasks, passing project_id and location
+        tasks = [embed_content_task() for _ in range(concurrency)]
+        
+        try:
+            await asyncio.gather(*tasks)
+            total_requests += concurrency
+            logger.info(f"Successfully completed batch. Total requests so far: {total_requests}")
+        except Exception as e:
+            logger.error("Caught an error. Stopping test.")
+            print("\n--- STATS ---")
+            print(f"Total successful requests: {total_requests}")
+            print(f"Concurrent requests during failure: {concurrency}")
+            print(f"Error Type: {e.__class__.__name__}")
+            print(f"Error Details: {e}")
+            print("-------------")
+            break
+
+@app.command()
+def main(
+    concurrency: int = typer.Option(
+        10, "--concurrency", "-c", help="Number of concurrent requests to send in each batch."
+    ),
+):
+    try:
+        asyncio.run(run_test(concurrency))
+    except KeyboardInterrupt:
+        logger.info("\nKeyboard interrupt received. Exiting.")
+
+if __name__ == "__main__":
+    app()
--- a/scripts/diagnose_rag_endpoint.py
+++ b/scripts/diagnose_rag_endpoint.py
@@ -0,0 +1,98 @@
+import asyncio
+import logging
+import random
+import typer
+import httpx
+
+CONTENT_LIST = [
+    "¿Cuáles son los beneficios de una tarjeta de crédito?",
+    "¿Cómo puedo abrir una cuenta de ahorros?",
+    "¿Qué es una hipoteca y cómo funciona?",
+    "¿Cuáles son las tasas de interés para un préstamo personal?",
+    "¿Cómo puedo solicitar un préstamo para un coche?",
+    "¿Qué es la banca en línea y cómo me registro?",
+    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
+    "¿Qué es el phishing y cómo puedo protegerme?",
+    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
+    "¿Cómo puedo transferir dinero a una cuenta internacional?",
+]
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+app = typer.Typer()
+
+async def call_rag_endpoint_task(client: httpx.AsyncClient, url: str):
+    """A single task to send one request to the RAG endpoint."""
+    question = random.choice(CONTENT_LIST)
+    json_payload = {
+        "sessionInfo": {
+            "parameters": {
+                "query": question
+            }
+        }
+    }
+    response = await client.post(url, json=json_payload)
+    response.raise_for_status()  # Raise an exception for bad status codes
+    response_data = response.json()
+    response_text = response_data["sessionInfo"]["parameters"]["response"]
+    logger.info(f"Question: {question[:50]}... Response: {response_text[:100]}...")
+
+async def run_test(concurrency: int, url: str, timeout_seconds: float):
+    """Continuously calls the RAG endpoint and tracks requests."""
+    total_requests = 0
+    
+    logger.info(f"Starting diagnostic test with {concurrency} concurrent requests on endpoint '{url}'.")
+    logger.info(f"Request timeout is set to {timeout_seconds} seconds.")
+    logger.info("Press Ctrl+C to stop.")
+
+    timeout = httpx.Timeout(timeout_seconds)
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        while True:
+            tasks = [call_rag_endpoint_task(client, url) for _ in range(concurrency)]
+            
+            try:
+                await asyncio.gather(*tasks)
+                total_requests += concurrency
+                logger.info(f"Successfully completed batch. Total requests so far: {total_requests}")
+            except httpx.TimeoutException as e:
+                logger.error(f"A request timed out: {e.request.method} {e.request.url}")
+                logger.error("Consider increasing the timeout with the --timeout option.")
+                break
+            except httpx.HTTPStatusError as e:
+                logger.error(f"An HTTP error occurred: {e.response.status_code} - {e.request.method} {e.request.url}")
+                logger.error(f"Response body: {e.response.text}")
+                break
+            except httpx.RequestError as e:
+                logger.error(f"A request error occurred: {e.request.method} {e.request.url}")
+                logger.error(f"Error details: {e}")
+                break
+            except Exception as e:
+                logger.error("Caught an unexpected error. Stopping test.")
+                print("\n--- STATS ---")
+                print(f"Total successful requests: {total_requests}")
+                print(f"Concurrent requests during failure: {concurrency}")
+                print(f"Error Type: {e.__class__.__name__}")
+                print(f"Error Details: {e}")
+                print("-------------")
+                break
+
+@app.command()
+def main(
+    concurrency: int = typer.Option(
+        10, "--concurrency", "-c", help="Number of concurrent requests to send in each batch."
+    ),
+    url: str = typer.Option(
+        "http://127.0.0.1:8000/sigma-rag", "--url", "-u", help="The URL of the RAG endpoint to test."
+    ),
+    timeout_seconds: float = typer.Option(
+        30.0, "--timeout", "-t", help="Request timeout in seconds."
+    )
+):
+    try:
+        asyncio.run(run_test(concurrency, url, timeout_seconds))
+    except KeyboardInterrupt:
+        logger.info("\nKeyboard interrupt received. Exiting.")
+
+if __name__ == "__main__":
+    app()
--- a/scripts/stress_test.py
+++ b/scripts/stress_test.py
@@ -0,0 +1,91 @@
+import requests
+import time
+import random
+import concurrent.futures
+import threading
+
+# URL for the endpoint
+url = "http://localhost:8000/sigma-rag"
+
+# List of Spanish banking questions
+spanish_questions = [
+    "¿Cuáles son los beneficios de una tarjeta de crédito?",
+    "¿Cómo puedo abrir una cuenta de ahorros?",
+    "¿Qué es una hipoteca y cómo funciona?",
+    "¿Cuáles son las tasas de interés para un préstamo personal?",
+    "¿Cómo puedo solicitar un préstamo para un coche?",
+    "¿Qué es la banca en línea y cómo me registro?",
+    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
+    "¿Qué es el phishing y cómo puedo protegerme?",
+    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
+    "¿Cómo puedo transferir dinero a una cuenta internacional?",
+]
+
+# A threading Event to signal all threads to stop
+stop_event = threading.Event()
+
+def send_request(question, request_id):
+    """Sends a single request and handles the response."""
+    if stop_event.is_set():
+        return
+
+    data = {"sessionInfo": {"parameters": {"query": question}}}
+    try:
+        response = requests.post(url, json=data)
+
+        if stop_event.is_set():
+            return
+
+        if response.status_code == 500:
+            print(f"Request {request_id}: Received 500 error with question: '{question}'.")
+            print("Stopping stress test.")
+            stop_event.set()
+        else:
+            print(f"Request {request_id}: Successful with status code {response.status_code}.")
+
+    except requests.exceptions.RequestException as e:
+        if not stop_event.is_set():
+            print(f"Request {request_id}: An error occurred: {e}")
+            stop_event.set()
+
+def main():
+    """Runs the stress test with parallel requests."""
+    num_workers = 30  # Number of parallel requests
+    print(f"Starting stress test with {num_workers} parallel workers. Press Ctrl+C to stop.")
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
+        futures = {
+            executor.submit(send_request, random.choice(spanish_questions), i)
+            for i in range(1, num_workers + 1)
+        }
+        request_id_counter = num_workers + 1
+
+        try:
+            while not stop_event.is_set():
+                # Wait for any future to complete
+                done, _ = concurrent.futures.wait(
+                    futures, return_when=concurrent.futures.FIRST_COMPLETED
+                )
+
+                for future in done:
+                    # Remove the completed future
+                    futures.remove(future)
+
+                    # If we are not stopping, submit a new one
+                    if not stop_event.is_set():
+                        futures.add(
+                            executor.submit(
+                                send_request,
+                                random.choice(spanish_questions),
+                                request_id_counter,
+                            )
+                        )
+                        request_id_counter += 1
+        except KeyboardInterrupt:
+            print("\nKeyboard interrupt received. Stopping threads.")
+            stop_event.set()
+
+    print("Stress test finished.")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/submit_pipeline.py
+++ b/scripts/submit_pipeline.py
@@ -0,0 +1,84 @@
+import typer
+from google.cloud import aiplatform
+from typing_extensions import Annotated
+
+from rag_eval.config import settings
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    pipeline_spec_path: Annotated[
+        str,
+        typer.Option(
+            "--pipeline-spec-path",
+            "-p",
+            help="Path to the compiled pipeline YAML file.",
+        ),
+    ],
+    input_table: Annotated[
+        str,
+        typer.Option(
+            "--input-table",
+            "-i",
+            help="Full BigQuery table name for input (e.g., 'project.dataset.table')",
+        ),
+    ],
+    output_table: Annotated[
+        str,
+        typer.Option(
+            "--output-table",
+            "-o",
+            help="Full BigQuery table name for output (e.g., 'project.dataset.table')",
+        ),
+    ],
+    project_id: Annotated[
+        str,
+        typer.Option(
+            "--project-id",
+            help="Google Cloud project ID.",
+        ),
+    ] = settings.project_id,
+    location: Annotated[
+        str,
+        typer.Option(
+            "--location",
+            help="Google Cloud location for the pipeline job.",
+        ),
+    ] = settings.location,
+    display_name: Annotated[
+        str,
+        typer.Option(
+            "--display-name",
+            help="Display name for the pipeline job.",
+        ),
+    ] = "search-eval-pipeline-job",
+):
+    """Submits a Vertex AI pipeline job."""
+
+    parameter_values = {
+        "project_id": project_id,
+        "location": location,
+        "input_table": input_table,
+        "output_table": output_table,
+    }
+
+    job = aiplatform.PipelineJob(
+        display_name=display_name,
+        template_path=pipeline_spec_path,
+        pipeline_root=f"gs://{settings.bucket}/pipeline_root",
+        parameter_values=parameter_values,
+        project=project_id,
+        location=location,
+    )
+
+    print(f"Submitting pipeline job with parameters: {parameter_values}")
+    job.submit(
+        service_account="sa-cicd-gitlab@bnt-orquestador-cognitivo-dev.iam.gserviceaccount.com"
+    )
+    print(f"Pipeline job submitted. You can view it at: {job._dashboard_uri()}")
+
+
+if __name__ == "__main__":
+    app()
--- a/scripts/test_rerank.py
+++ b/scripts/test_rerank.py
@@ -0,0 +1,42 @@
+from google.cloud import discoveryengine_v1 as discoveryengine
+
+# TODO(developer): Uncomment these variables before running the sample.
+project_id = "bnt-orquestador-cognitivo-dev"
+
+client = discoveryengine.RankServiceClient()
+
+# The full resource name of the ranking config.
+# Format: projects/{project_id}/locations/{location}/rankingConfigs/default_ranking_config
+ranking_config = client.ranking_config_path(
+    project=project_id,
+    location="global",
+    ranking_config="default_ranking_config",
+)
+request = discoveryengine.RankRequest(
+    ranking_config=ranking_config,
+    model="semantic-ranker-default@latest",
+    top_n=10,
+    query="What is Google Gemini?",
+    records=[
+        discoveryengine.RankingRecord(
+            id="1",
+            title="Gemini",
+            content="The Gemini zodiac symbol often depicts two figures standing side-by-side.",
+        ),
+        discoveryengine.RankingRecord(
+            id="2",
+            title="Gemini",
+            content="Gemini is a cutting edge large language model created by Google.",
+        ),
+        discoveryengine.RankingRecord(
+            id="3",
+            title="Gemini Constellation",
+            content="Gemini is a constellation that can be seen in the night sky.",
+        ),
+    ],
+)
+
+response = client.rank(request=request)
+
+# Handle the response
+print(response)
--- a/scripts/test_server.py
+++ b/scripts/test_server.py
@@ -0,0 +1,12 @@
+import requests
+
+# Test the /sigma-rag endpoint
+url = "http://localhost:8000/sigma-rag"
+data = {
+    "sessionInfo": {"parameters": {"query": "What are the benefits of a credit card?"}}
+}
+
+response = requests.post(url, json=data)
+
+print("Response from /sigma-rag:")
+print(response.json())