First commmit

2026-02-22 15:25:27 +00:00
commit 35d5a65b17
70 changed files with 4298 additions and 0 deletions
--- a/packages/llm/.python-version
+++ b/packages/llm/.python-version
@@ -0,0 +1 @@
+3.10
--- a/packages/llm/README.md
+++ b/packages/llm/README.md
--- a/packages/llm/pyproject.toml
+++ b/packages/llm/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "llm"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Anibal Angulo", email = "a8065384@banorte.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "google-genai>=1.20.0",
+    "pydantic>=2.11.7",
+    "tenacity>=9.1.2",
+]
+
+[build-system]
+requires = ["uv_build>=0.8.3,<0.9.0"]
+build-backend = "uv_build"
--- a/packages/llm/src/llm/init.py
+++ b/packages/llm/src/llm/init.py
@@ -0,0 +1,2 @@
+def hello() -> str:
+    return "Hello from llm!"
--- a/packages/llm/src/llm/base.py
+++ b/packages/llm/src/llm/base.py
@@ -0,0 +1,128 @@
+from abc import ABC, abstractmethod
+from typing import Any, Type, TypeVar
+
+from pydantic import BaseModel, field_validator
+
+
+class ToolCall(BaseModel):
+    name: str
+    arguments: dict
+
+class Usage(BaseModel):
+    prompt_tokens: int | None = 0
+    thought_tokens: int | None = 0
+    response_tokens: int | None = 0
+
+    @field_validator("prompt_tokens", "thought_tokens", "response_tokens", mode="before")
+    @classmethod
+    def _validate_tokens(cls, v: int | None) -> int:
+        return v or 0
+
+    def __add__(self, other):
+        return Usage(
+            prompt_tokens=self.prompt_tokens + other.prompt_tokens,
+            thought_tokens=self.thought_tokens + other.thought_tokens,
+            response_tokens=self.response_tokens + other.response_tokens
+            )
+
+    def get_cost(self, name: str) -> int:
+        million = 1000000
+        if name == "gemini-2.5-pro":
+            if self.prompt_tokens > 200000:
+                input_cost = self.prompt_tokens * (2.5/million)
+                output_cost = self.thought_tokens * (15/million) + self.response_tokens * (15/million)
+            else:
+                input_cost = self.prompt_tokens * (1.25/million)
+                output_cost = self.thought_tokens * (10/million) + self.response_tokens * (10/million)
+            return (input_cost + output_cost) * 18.65
+        if name == "gemini-2.5-flash":
+            input_cost = self.prompt_tokens * (0.30/million)
+            output_cost = self.thought_tokens * (2.5/million) + self.response_tokens * (2.5/million)
+            return (input_cost + output_cost) * 18.65
+        else:
+            raise Exception("Invalid model")
+
+
+class Generation(BaseModel):
+    """A class to represent a single generation from a model.
+
+    Attributes:
+        text: The generated text.
+        usage: A dictionary containing usage metadata.
+    """
+
+    text: str | None = None
+    tool_calls: list[ToolCall] | None = None
+    usage: Usage = Usage()
+    extra: dict = {}
+
+
+T = TypeVar("T", bound=BaseModel)
+
+
+class BaseLLM(ABC):
+    """An abstract base class for all LLMs."""
+
+    @abstractmethod
+    def generate(
+        self,
+        model: str,
+        prompt: Any,
+        tools: list | None = None,
+        system_prompt: str | None = None,
+    ) -> Generation:
+        """Generates text from a prompt.
+
+        Args:
+            model: The model to use for generation.
+            prompt: The prompt to generate text from.
+            tools: An optional list of tools to use for generation.
+            system_prompt: An optional system prompt to guide the model's behavior.
+
+        Returns:
+            A Generation object containing the generated text and usage metadata.
+        """
+        ...
+
+    @abstractmethod
+    def structured_generation(
+        self,
+        model: str,
+        prompt: Any,
+        response_model: Type[T],
+        tools: list | None = None,
+    ) -> T:
+        """Generates structured data from a prompt.
+
+        Args:
+            model: The model to use for generation.
+            prompt: The prompt to generate text from.
+            response_model: The pydantic model to parse the response into.
+            tools: An optional list of tools to use for generation.
+
+        Returns:
+            An instance of the provided pydantic model.
+        """
+        ...
+
+    @abstractmethod
+    async def async_generate(
+        self,
+        model: str,
+        prompt: Any,
+        tools: list | None = None,
+        system_prompt: str | None = None,
+        tool_mode: str = "AUTO",
+    ) -> Generation:
+        """Generates text from a prompt.
+
+        Args:
+            model: The model to use for generation.
+            prompt: The prompt to generate text from.
+            tools: An optional list of tools to use for generation.
+            system_prompt: An optional system prompt to guide the model's behavior.
+
+        Returns:
+            A Generation object containing the generated text and usage metadata.
+        """
+        ...
--- a/packages/llm/src/llm/py.typed
+++ b/packages/llm/src/llm/py.typed
--- a/packages/llm/src/llm/vertex_ai.py
+++ b/packages/llm/src/llm/vertex_ai.py
@@ -0,0 +1,181 @@
+import logging
+from typing import Any, Type
+
+from google import genai
+from google.genai import types
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from rag_eval.config import settings
+
+from .base import BaseLLM, Generation, T, ToolCall, Usage
+
+logger = logging.getLogger(__name__)
+
+
+class VertexAILLM(BaseLLM):
+    """A class for interacting with the Vertex AI API."""
+
+    def __init__(
+        self, project: str | None = None, location: str | None = None, thinking: int = 0
+    ) -> None:
+        """Initializes the VertexAILLM client.
+        Args:
+            project: The Google Cloud project ID.
+            location: The Google Cloud location.
+        """
+        self.client = genai.Client(
+            vertexai=True,
+            project=project or settings.project_id,
+            location=location or settings.location,
+        )
+        self.thinking_budget = thinking
+
+    # @retry(
+    #     wait=wait_exponential(multiplier=1, min=2, max=60),
+    #     stop=stop_after_attempt(3),
+    #     reraise=True,
+    # )
+    def generate(
+        self,
+        model: str,
+        prompt: Any,
+        tools: list = [],
+        system_prompt: str | None = None,
+        tool_mode: str = "AUTO",
+    ) -> Generation:
+        """Generates text using the specified model and prompt.
+        Args:
+            model: The name of the model to use for generation.
+            prompt: The prompt to use for generation.
+            tools: A list of tools to use for generation.
+            system_prompt: An optional system prompt to guide the model's behavior.
+        Returns:
+            A Generation object containing the generated text and usage metadata.
+        """
+        logger.debug("Entering VertexAILLM.generate")
+        logger.debug(f"Model: {model}, Tool Mode: {tool_mode}")
+        logger.debug(f"System prompt: {system_prompt}")
+        logger.debug("Calling Vertex AI API: models.generate_content...")
+        response = self.client.models.generate_content(
+            model=model,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                tools=tools,
+                system_instruction=system_prompt,
+                thinking_config=genai.types.ThinkingConfig(
+                    thinking_budget=self.thinking_budget
+                ),
+                tool_config=types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode=tool_mode
+                    )
+                )
+            ),
+        )
+        logger.debug("Received response from Vertex AI API.")
+        logger.debug(f"API Response: {response}")
+
+        return self._create_generation(response)
+
+
+    # @retry(
+    #     wait=wait_exponential(multiplier=1, min=2, max=60),
+    #     stop=stop_after_attempt(3),
+    #     reraise=True,
+    # )
+    def structured_generation(
+        self,
+        model: str,
+        prompt: Any,
+        response_model: Type[T],
+        system_prompt: str | None = None,
+        tools: list | None = None,
+    ) -> T:
+        """Generates structured data from a prompt.
+        Args:
+            model: The model to use for generation.
+            prompt: The prompt to generate text from.
+            response_model: The pydantic model to parse the response into.
+            tools: An optional list of tools to use for generation.
+        Returns:
+            An instance of the provided pydantic model.
+        """
+        config = genai.types.GenerateContentConfig(
+            response_mime_type="application/json",
+            response_schema=response_model,
+            system_instruction=system_prompt,
+            tools=tools,
+        )
+
+        response: genai.types.GenerateContentResponse = (
+            self.client.models.generate_content(
+                model=model, contents=prompt, config=config
+            )
+        )
+
+        return response_model.model_validate_json(response.text)
+
+    # @retry(
+    #     wait=wait_exponential(multiplier=1, min=2, max=60),
+    #     stop=stop_after_attempt(3),
+    #     reraise=True,
+    # )
+    async def async_generate(
+        self,
+        model: str,
+        prompt: Any,
+        tools: list = [],
+        system_prompt: str | None = None,
+        tool_mode: str = "AUTO",
+    ) -> Generation:
+        response = await self.client.aio.models.generate_content(
+            model=model,
+            contents=prompt,
+            config=types.GenerateContentConfig(
+                tools=tools,
+                system_instruction=system_prompt,
+                thinking_config=genai.types.ThinkingConfig(
+                    thinking_budget=self.thinking_budget
+                ),
+                tool_config=types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode=tool_mode
+                    )
+                ),
+            ),
+        )
+
+        return self._create_generation(response)
+
+
+    def _create_generation(self, response):
+        logger.debug("Creating Generation object from API response.")
+        m=response.usage_metadata
+        usage = Usage(
+            prompt_tokens=m.prompt_token_count,
+            thought_tokens=m.thoughts_token_count or 0,
+            response_tokens=m.candidates_token_count
+        )
+
+        logger.debug(f"{usage=}")
+        logger.debug(f"{response=}")
+
+        candidate = response.candidates[0]
+
+        tool_calls = []
+
+        for part in candidate.content.parts:
+            if fn := part.function_call:
+                tool_calls.append(ToolCall(name=fn.name, arguments=fn.args))
+
+        if len(tool_calls) > 0:
+            logger.debug(f"Found {len(tool_calls)} tool calls.")
+            return Generation(
+                tool_calls=tool_calls,
+                usage=usage,
+                extra={"original_content": candidate.content}
+            )
+
+        logger.debug("No tool calls found, returning text response.")
+        text = candidate.content.parts[0].text
+        return Generation(text=text, usage=usage)