First commit

This commit is contained in:
Anibal Angulo
2026-02-18 19:57:43 +00:00
commit a53f8fcf62
115 changed files with 9957 additions and 0 deletions

View File

@@ -0,0 +1 @@
3.10

0
packages/llm/README.md Normal file
View File

View File

@@ -0,0 +1,18 @@
[project]
name = "llm"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Anibal Angulo", email = "a8065384@banorte.com" }
]
requires-python = ">=3.12"
dependencies = [
"google-genai>=1.20.0",
"pydantic>=2.11.7",
"tenacity>=9.1.2",
]
[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"

View File

@@ -0,0 +1,2 @@
def hello() -> str:
return "Hello from llm!"

View File

@@ -0,0 +1,128 @@
from abc import ABC, abstractmethod
from typing import Any, Type, TypeVar
from pydantic import BaseModel, field_validator
class ToolCall(BaseModel):
name: str
arguments: dict
class Usage(BaseModel):
prompt_tokens: int | None = 0
thought_tokens: int | None = 0
response_tokens: int | None = 0
@field_validator("prompt_tokens", "thought_tokens", "response_tokens", mode="before")
@classmethod
def _validate_tokens(cls, v: int | None) -> int:
return v or 0
def __add__(self, other):
return Usage(
prompt_tokens=self.prompt_tokens + other.prompt_tokens,
thought_tokens=self.thought_tokens + other.thought_tokens,
response_tokens=self.response_tokens + other.response_tokens
)
def get_cost(self, name: str) -> int:
million = 1000000
if name == "gemini-2.5-pro":
if self.prompt_tokens > 200000:
input_cost = self.prompt_tokens * (2.5/million)
output_cost = self.thought_tokens * (15/million) + self.response_tokens * (15/million)
else:
input_cost = self.prompt_tokens * (1.25/million)
output_cost = self.thought_tokens * (10/million) + self.response_tokens * (10/million)
return (input_cost + output_cost) * 18.65
if name == "gemini-2.5-flash":
input_cost = self.prompt_tokens * (0.30/million)
output_cost = self.thought_tokens * (2.5/million) + self.response_tokens * (2.5/million)
return (input_cost + output_cost) * 18.65
else:
raise Exception("Invalid model")
class Generation(BaseModel):
"""A class to represent a single generation from a model.
Attributes:
text: The generated text.
usage: A dictionary containing usage metadata.
"""
text: str | None = None
tool_calls: list[ToolCall] | None = None
usage: Usage = Usage()
extra: dict = {}
T = TypeVar("T", bound=BaseModel)
class BaseLLM(ABC):
"""An abstract base class for all LLMs."""
@abstractmethod
def generate(
self,
model: str,
prompt: Any,
tools: list | None = None,
system_prompt: str | None = None,
) -> Generation:
"""Generates text from a prompt.
Args:
model: The model to use for generation.
prompt: The prompt to generate text from.
tools: An optional list of tools to use for generation.
system_prompt: An optional system prompt to guide the model's behavior.
Returns:
A Generation object containing the generated text and usage metadata.
"""
...
@abstractmethod
def structured_generation(
self,
model: str,
prompt: Any,
response_model: Type[T],
tools: list | None = None,
) -> T:
"""Generates structured data from a prompt.
Args:
model: The model to use for generation.
prompt: The prompt to generate text from.
response_model: The pydantic model to parse the response into.
tools: An optional list of tools to use for generation.
Returns:
An instance of the provided pydantic model.
"""
...
@abstractmethod
async def async_generate(
self,
model: str,
prompt: Any,
tools: list | None = None,
system_prompt: str | None = None,
tool_mode: str = "AUTO",
) -> Generation:
"""Generates text from a prompt.
Args:
model: The model to use for generation.
prompt: The prompt to generate text from.
tools: An optional list of tools to use for generation.
system_prompt: An optional system prompt to guide the model's behavior.
Returns:
A Generation object containing the generated text and usage metadata.
"""
...

View File

View File

@@ -0,0 +1,181 @@
import logging
from typing import Any, Type
from google import genai
from google.genai import types
from tenacity import retry, stop_after_attempt, wait_exponential
from rag_eval.config import settings
from .base import BaseLLM, Generation, T, ToolCall, Usage
logger = logging.getLogger(__name__)
class VertexAILLM(BaseLLM):
"""A class for interacting with the Vertex AI API."""
def __init__(
self, project: str | None = None, location: str | None = None, thinking: int = 0
) -> None:
"""Initializes the VertexAILLM client.
Args:
project: The Google Cloud project ID.
location: The Google Cloud location.
"""
self.client = genai.Client(
vertexai=True,
project=project or settings.project_id,
location=location or settings.location,
)
self.thinking_budget = thinking
# @retry(
# wait=wait_exponential(multiplier=1, min=2, max=60),
# stop=stop_after_attempt(3),
# reraise=True,
# )
def generate(
self,
model: str,
prompt: Any,
tools: list = [],
system_prompt: str | None = None,
tool_mode: str = "AUTO",
) -> Generation:
"""Generates text using the specified model and prompt.
Args:
model: The name of the model to use for generation.
prompt: The prompt to use for generation.
tools: A list of tools to use for generation.
system_prompt: An optional system prompt to guide the model's behavior.
Returns:
A Generation object containing the generated text and usage metadata.
"""
logger.debug("Entering VertexAILLM.generate")
logger.debug(f"Model: {model}, Tool Mode: {tool_mode}")
logger.debug(f"System prompt: {system_prompt}")
logger.debug("Calling Vertex AI API: models.generate_content...")
response = self.client.models.generate_content(
model=model,
contents=prompt,
config=types.GenerateContentConfig(
tools=tools,
system_instruction=system_prompt,
thinking_config=genai.types.ThinkingConfig(
thinking_budget=self.thinking_budget
),
tool_config=types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode=tool_mode
)
)
),
)
logger.debug("Received response from Vertex AI API.")
logger.debug(f"API Response: {response}")
return self._create_generation(response)
# @retry(
# wait=wait_exponential(multiplier=1, min=2, max=60),
# stop=stop_after_attempt(3),
# reraise=True,
# )
def structured_generation(
self,
model: str,
prompt: Any,
response_model: Type[T],
system_prompt: str | None = None,
tools: list | None = None,
) -> T:
"""Generates structured data from a prompt.
Args:
model: The model to use for generation.
prompt: The prompt to generate text from.
response_model: The pydantic model to parse the response into.
tools: An optional list of tools to use for generation.
Returns:
An instance of the provided pydantic model.
"""
config = genai.types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=response_model,
system_instruction=system_prompt,
tools=tools,
)
response: genai.types.GenerateContentResponse = (
self.client.models.generate_content(
model=model, contents=prompt, config=config
)
)
return response_model.model_validate_json(response.text)
# @retry(
# wait=wait_exponential(multiplier=1, min=2, max=60),
# stop=stop_after_attempt(3),
# reraise=True,
# )
async def async_generate(
self,
model: str,
prompt: Any,
tools: list = [],
system_prompt: str | None = None,
tool_mode: str = "AUTO",
) -> Generation:
response = await self.client.aio.models.generate_content(
model=model,
contents=prompt,
config=types.GenerateContentConfig(
tools=tools,
system_instruction=system_prompt,
thinking_config=genai.types.ThinkingConfig(
thinking_budget=self.thinking_budget
),
tool_config=types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode=tool_mode
)
),
),
)
return self._create_generation(response)
def _create_generation(self, response):
logger.debug("Creating Generation object from API response.")
m=response.usage_metadata
usage = Usage(
prompt_tokens=m.prompt_token_count,
thought_tokens=m.thoughts_token_count or 0,
response_tokens=m.candidates_token_count
)
logger.debug(f"{usage=}")
logger.debug(f"{response=}")
candidate = response.candidates[0]
tool_calls = []
for part in candidate.content.parts:
if fn := part.function_call:
tool_calls.append(ToolCall(name=fn.name, arguments=fn.args))
if len(tool_calls) > 0:
logger.debug(f"Found {len(tool_calls)} tool calls.")
return Generation(
tool_calls=tool_calls,
usage=usage,
extra={"original_content": candidate.content}
)
logger.debug("No tool calls found, returning text response.")
text = candidate.content.parts[0].text
return Generation(text=text, usage=usage)