Add linting/formatting

This commit is contained in:
Anibal Angulo
2026-02-23 17:23:22 +00:00
parent fd07f4a3e3
commit 099f6a50d1
7 changed files with 109 additions and 35 deletions

1
.gitignore vendored
View File

@@ -216,3 +216,4 @@ __marimo__/
.streamlit/secrets.toml
ref/
tmp/

View File

@@ -24,3 +24,9 @@ va-evaluator = "va_evaluator.cli:app"
[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"
[dependency-groups]
dev = [
"ruff>=0.15.2",
"ty>=0.0.18",
]

View File

@@ -22,8 +22,8 @@ class IndexSettings(BaseModel):
class AgentSettings(BaseModel):
name: str = "default"
language_model: str = "gemini-2.0-flash"
embedding_model: str = "text-embedding-004"
language_model: str = "gemini-2.5-flash"
embedding_model: str = "gemini-embedding-001"
class BigQuerySettings(BaseModel):
@@ -41,7 +41,9 @@ class BigQuerySettings(BaseModel):
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_nested_delimiter="__")
model_config = SettingsConfigDict(
env_nested_delimiter="__", env_file=".env", extra="ignore"
)
project_id: str
location: str = "us-central1"

View File

@@ -186,9 +186,7 @@ class KeypointRAGEvaluator:
generated_answer: str,
keypoints: list[str],
) -> tuple[dict[str, float], list[dict]]:
keypoints_list = "\n".join(
[f"{i + 1}. {kp}" for i, kp in enumerate(keypoints)]
)
keypoints_list = "\n".join([f"{i + 1}. {kp}" for i, kp in enumerate(keypoints)])
prompt = EVALUATE_KEYPOINTS_PROMPT.format(
generated_answer=generated_answer,
keypoints_list=keypoints_list,
@@ -343,14 +341,13 @@ def load_data_from_bigquery(
df.dropna(subset=["input", "expected_output", "response"], inplace=True)
console.print(f"Loaded {len(df)} rows for evaluation.")
if df.empty:
console.print(
"[bold yellow]Warning: No data found in BigQuery.[/bold yellow]"
)
console.print("[bold yellow]Warning: No data found in BigQuery.[/bold yellow]")
return df
# --- Core Logic ---
def evaluate(
input_file: str | None = None,
output_file: str | None = None,
@@ -429,9 +426,7 @@ def evaluate(
)
)
else:
console.print(
f"[bold red]Error evaluating row: {e}[/bold red]"
)
console.print(f"[bold red]Error evaluating row: {e}[/bold red]")
all_results.append(
{
"query": row["input"],
@@ -555,4 +550,3 @@ def evaluate(
f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
)
raise typer.Exit(code=1)

View File

@@ -253,4 +253,3 @@ def evaluate(
f"[bold red]An error occurred while saving to BigQuery: {e}[/bold red]"
)
raise typer.Exit(code=1)

View File

@@ -4,12 +4,12 @@ import random
import pandas as pd
import typer
import vertexai
from google import genai
from google.cloud import storage
from google.genai import types
from pydantic import BaseModel
from rich.console import Console
from rich.progress import track
from vertexai.generative_models import GenerationConfig, GenerativeModel
from va_evaluator.config import Settings
@@ -89,20 +89,25 @@ class MultiStepResponseSchema(BaseModel):
def generate_structured(
model: GenerativeModel,
client: genai.Client,
model: str,
prompt: str,
response_model: type[BaseModel],
) -> BaseModel:
generation_config = GenerationConfig(
response_mime_type="application/json",
response_schema=response_model,
response = client.models.generate_content(
model=model,
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=response_model,
),
)
response = model.generate_content(prompt, generation_config=generation_config)
return response_model.model_validate_json(response.text)
def generate_synthetic_question(
model: GenerativeModel,
client: genai.Client,
model: str,
file_content: str,
file_path: str,
q_type: str,
@@ -111,11 +116,12 @@ def generate_synthetic_question(
prompt = PROMPT_TEMPLATE.format(
context=file_content, id=file_path, qtype=q_type, qtype_def=q_def
)
return generate_structured(model, prompt, ResponseSchema)
return generate_structured(client, model, prompt, ResponseSchema)
def generate_synthetic_conversation(
model: GenerativeModel,
client: genai.Client,
model: str,
file_content: str,
file_path: str,
num_turns: int,
@@ -123,7 +129,7 @@ def generate_synthetic_conversation(
prompt = MULTI_STEP_PROMPT_TEMPLATE.format(
context=file_content, num_turns=num_turns
)
return generate_structured(model, prompt, MultiStepResponseSchema)
return generate_structured(client, model, prompt, MultiStepResponseSchema)
def generate(
@@ -134,8 +140,12 @@ def generate(
console = Console()
settings = Settings()
vertexai.init(project=settings.project_id, location=settings.location)
model = GenerativeModel(settings.agent.language_model)
client = genai.Client(
vertexai=True,
project=settings.project_id,
location=settings.location,
)
model_name = settings.agent.language_model
gcs_client = storage.Client(project=settings.project_id)
bucket = gcs_client.bucket(settings.require_bucket)
@@ -165,9 +175,7 @@ def generate(
console.print("[yellow]No files found. Skipping.[/yellow]")
return ""
files_to_process = random.sample(
all_files, k=min(num_questions, len(all_files))
)
files_to_process = random.sample(all_files, k=min(num_questions, len(all_files)))
console.print(
f"Randomly selected {len(files_to_process)} files to generate questions from."
)
@@ -182,7 +190,7 @@ def generate(
conversation_data = None
for attempt in range(3):
conversation_data = generate_synthetic_conversation(
model, file_content, file_path, num_turns
client, model_name, file_content, file_path, num_turns
)
if (
conversation_data
@@ -219,7 +227,7 @@ def generate(
generated_data = None
for attempt in range(3):
generated_data = generate_synthetic_question(
model, file_content, file_path, q_type, q_def
client, model_name, file_content, file_path, q_type, q_def
)
if (
generated_data
@@ -249,7 +257,9 @@ def generate(
all_rows.append(row)
except Exception as e:
console.print(f"[bold red]Error processing file {file_path}: {e}[/bold red]")
console.print(
f"[bold red]Error processing file {file_path}: {e}[/bold red]"
)
if not all_rows:
console.print("[bold yellow]No questions were generated.[/bold yellow]")
@@ -262,7 +272,9 @@ def generate(
f"\n[bold green]Saving {len(df)} generated questions to {output_csv}...[/bold green]"
)
df.to_csv(output_csv, index=False, encoding="utf-8-sig")
console.print("[bold green]Synthetic question generation complete.[/bold green]")
console.print(
"[bold green]Synthetic question generation complete.[/bold green]"
)
else:
console.print(
f"\n[bold green]Saving {len(df)} generated questions to BigQuery...[/bold green]"
@@ -295,4 +307,3 @@ def generate(
console.print(f"[bold yellow]Finished run with ID: {run_id}[/bold yellow]")
return run_id

61
uv.lock generated
View File

@@ -2337,6 +2337,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
]
[[package]]
name = "ruff"
version = "0.15.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" },
{ url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" },
{ url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" },
{ url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" },
{ url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" },
{ url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" },
{ url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" },
{ url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" },
{ url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" },
{ url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" },
{ url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" },
{ url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" },
{ url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" },
{ url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" },
{ url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" },
{ url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" },
{ url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" },
]
[[package]]
name = "scipy"
version = "1.17.0"
@@ -2500,6 +2525,30 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/36/75/661b406371f96622975eb25f9e70945d97fbe6b8e5af40342c59191962a3/trec_car_tools-2.6-py3-none-any.whl", hash = "sha256:e6f0373259e1c234222da7270ab54ca7af7a6f8d0dd32b13e158c1659d3991cf", size = 8414, upload-time = "2022-02-01T16:37:22.102Z" },
]
[[package]]
name = "ty"
version = "0.0.18"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/74/15/9682700d8d60fdca7afa4febc83a2354b29cdcd56e66e19c92b521db3b39/ty-0.0.18.tar.gz", hash = "sha256:04ab7c3db5dcbcdac6ce62e48940d3a0124f377c05499d3f3e004e264ae94b83", size = 5214774, upload-time = "2026-02-20T21:51:31.173Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ae/d8/920460d4c22ea68fcdeb0b2fb53ea2aeb9c6d7875bde9278d84f2ac767b6/ty-0.0.18-py3-none-linux_armv6l.whl", hash = "sha256:4e5e91b0a79857316ef893c5068afc4b9872f9d257627d9bc8ac4d2715750d88", size = 10280825, upload-time = "2026-02-20T21:51:25.03Z" },
{ url = "https://files.pythonhosted.org/packages/83/56/62587de582d3d20d78fcdddd0594a73822ac5a399a12ef512085eb7a4de6/ty-0.0.18-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee0e578b3f8416e2d5416da9553b78fd33857868aa1384cb7fefeceee5ff102d", size = 10118324, upload-time = "2026-02-20T21:51:22.27Z" },
{ url = "https://files.pythonhosted.org/packages/2f/2d/dbdace8d432a0755a7417f659bfd5b8a4261938ecbdfd7b42f4c454f5aa9/ty-0.0.18-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3f7a0487d36b939546a91d141f7fc3dbea32fab4982f618d5b04dc9d5b6da21e", size = 9605861, upload-time = "2026-02-20T21:51:16.066Z" },
{ url = "https://files.pythonhosted.org/packages/6b/d9/de11c0280f778d5fc571393aada7fe9b8bc1dd6a738f2e2c45702b8b3150/ty-0.0.18-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5e2fa8d45f57ca487a470e4bf66319c09b561150e98ae2a6b1a97ef04c1a4eb", size = 10092701, upload-time = "2026-02-20T21:51:26.862Z" },
{ url = "https://files.pythonhosted.org/packages/0f/94/068d4d591d791041732171e7b63c37a54494b2e7d28e88d2167eaa9ad875/ty-0.0.18-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d75652e9e937f7044b1aca16091193e7ef11dac1c7ec952b7fb8292b7ba1f5f2", size = 10109203, upload-time = "2026-02-20T21:51:11.59Z" },
{ url = "https://files.pythonhosted.org/packages/34/e4/526a4aa56dc0ca2569aaa16880a1ab105c3b416dd70e87e25a05688999f3/ty-0.0.18-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:563c868edceb8f6ddd5e91113c17d3676b028f0ed380bdb3829b06d9beb90e58", size = 10614200, upload-time = "2026-02-20T21:51:20.298Z" },
{ url = "https://files.pythonhosted.org/packages/fd/3d/b68ab20a34122a395880922587fbfc3adf090d22e0fb546d4d20fe8c2621/ty-0.0.18-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:502e2a1f948bec563a0454fc25b074bf5cf041744adba8794d024277e151d3b0", size = 11153232, upload-time = "2026-02-20T21:51:14.121Z" },
{ url = "https://files.pythonhosted.org/packages/68/ea/678243c042343fcda7e6af36036c18676c355878dcdcd517639586d2cf9e/ty-0.0.18-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc881dea97021a3aa29134a476937fd8054775c4177d01b94db27fcfb7aab65b", size = 10832934, upload-time = "2026-02-20T21:51:32.92Z" },
{ url = "https://files.pythonhosted.org/packages/d8/bd/7f8d647cef8b7b346c0163230a37e903c7461c7248574840b977045c77df/ty-0.0.18-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:421fcc3bc64cab56f48edb863c7c1c43649ec4d78ff71a1acb5366ad723b6021", size = 10700888, upload-time = "2026-02-20T21:51:09.673Z" },
{ url = "https://files.pythonhosted.org/packages/6e/06/cb3620dc48c5d335ba7876edfef636b2f4498eff4a262ff90033b9e88408/ty-0.0.18-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0fe5038a7136a0e638a2fb1ad06e3d3c4045314c6ba165c9c303b9aeb4623d6c", size = 10078965, upload-time = "2026-02-20T21:51:07.678Z" },
{ url = "https://files.pythonhosted.org/packages/60/27/c77a5a84533fa3b685d592de7b4b108eb1f38851c40fac4e79cc56ec7350/ty-0.0.18-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d123600a52372677613a719bbb780adeb9b68f47fb5f25acb09171de390e0035", size = 10134659, upload-time = "2026-02-20T21:51:18.311Z" },
{ url = "https://files.pythonhosted.org/packages/43/6e/60af6b88c73469e628ba5253a296da6984e0aa746206f3034c31f1a04ed1/ty-0.0.18-py3-none-musllinux_1_2_i686.whl", hash = "sha256:bb4bc11d32a1bf96a829bf6b9696545a30a196ac77bbc07cc8d3dfee35e03723", size = 10297494, upload-time = "2026-02-20T21:51:39.631Z" },
{ url = "https://files.pythonhosted.org/packages/33/90/612dc0b68224c723faed6adac2bd3f930a750685db76dfe17e6b9e534a83/ty-0.0.18-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:dda2efbf374ba4cd704053d04e32f2f784e85c2ddc2400006b0f96f5f7e4b667", size = 10791944, upload-time = "2026-02-20T21:51:37.13Z" },
{ url = "https://files.pythonhosted.org/packages/0d/da/f4ada0fd08a9e4138fe3fd2bcd3797753593f423f19b1634a814b9b2a401/ty-0.0.18-py3-none-win32.whl", hash = "sha256:c5768607c94977dacddc2f459ace6a11a408a0f57888dd59abb62d28d4fee4f7", size = 9677964, upload-time = "2026-02-20T21:51:42.039Z" },
{ url = "https://files.pythonhosted.org/packages/5e/fa/090ed9746e5c59fc26d8f5f96dc8441825171f1f47752f1778dad690b08b/ty-0.0.18-py3-none-win_amd64.whl", hash = "sha256:b78d0fa1103d36fc2fce92f2092adace52a74654ab7884d54cdaec8eb5016a4d", size = 10636576, upload-time = "2026-02-20T21:51:29.159Z" },
{ url = "https://files.pythonhosted.org/packages/92/4f/5dd60904c8105cda4d0be34d3a446c180933c76b84ae0742e58f02133713/ty-0.0.18-py3-none-win_arm64.whl", hash = "sha256:01770c3c82137c6b216aa3251478f0b197e181054ee92243772de553d3586398", size = 10095449, upload-time = "2026-02-20T21:51:34.914Z" },
]
[[package]]
name = "typer"
version = "0.24.1"
@@ -2581,6 +2630,12 @@ dependencies = [
{ name = "typer" },
]
[package.dev-dependencies]
dev = [
{ name = "ruff" },
{ name = "ty" },
]
[package.metadata]
requires-dist = [
{ name = "google-cloud-aiplatform" },
@@ -2596,6 +2651,12 @@ requires-dist = [
{ name = "typer" },
]
[package.metadata.requires-dev]
dev = [
{ name = "ruff", specifier = ">=0.15.2" },
{ name = "ty", specifier = ">=0.0.18" },
]
[[package]]
name = "warc3-wet"
version = "0.2.5"