Merge pull request 'Replace local Tool call with MCP implementation' (#2) from mcp into main

Reviewed-on: va/legacy-rag#2
2026-02-23 05:29:57 +00:00
parent a9bc36b5fc 159e8ee433
commit 20a1237286
37 changed files with 2380 additions and 3541 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,2 +1,3 @@
 Use `uv` for project management.
 Use `uv run ruff check` for linting, and `uv run ty check` for type checking
 Use `uv run pytest` for testing.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "rag-eval"
+name = "va-agent"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
@@ -9,28 +9,20 @@ authors = [
 ]
 requires-python = "~=3.12.0"
 dependencies = [
    "aiohttp>=3.13.3",
    "gcloud-aio-auth>=5.4.2",
    "gcloud-aio-storage>=9.6.1",
    "google-adk>=1.14.1",
-    "google-cloud-aiplatform>=1.126.1",
+    "google-cloud-firestore>=2.23.0",
    "google-cloud-storage>=2.19.0",
    "pydantic-settings[yaml]>=2.13.1",
    "structlog>=25.5.0",
 ]
 [project.scripts]
 ragops = "rag_eval.cli:app"
 [build-system]
 requires = ["uv_build>=0.8.3,<0.9.0"]
 build-backend = "uv_build"
 [dependency-groups]
 dev = [
    "clai>=1.62.0",
    "marimo>=0.20.1",
    "pytest>=8.4.1",
    "pytest-asyncio>=1.3.0",
    "pytest-sugar>=1.1.1",
    "ruff>=0.12.10",
    "ty>=0.0.1a19",
 ]
@@ -43,4 +35,10 @@ exclude = ["scripts"]
 [tool.ruff.lint]
 select = ['ALL']
-ignore = ['D203', 'D213', 'COM812']
+ignore = [
    'D203',    # one-blank-line-before-class
    'D213',    # multi-line-summary-second-line
    'COM812',  # missing-trailing-comma
    'ANN401',  # dynamically-typed-any
    'ERA001',  # commented-out-code
 ]
--- a/rag_agent/agent.py
+++ b/rag_agent/agent.py
@@ -1,59 +0,0 @@
 # Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ADK agent with vector search RAG tool."""
 from __future__ import annotations
 import os
 from google.adk.agents.llm_agent import Agent
 from .config_helper import settings
 from .vector_search_tool import VectorSearchTool
 # Set environment variables for Google GenAI Client to use Vertex AI
 os.environ["GOOGLE_CLOUD_PROJECT"] = settings.project_id
 os.environ["GOOGLE_CLOUD_LOCATION"] = settings.location
 # Create vector search tool with configuration
 vector_search_tool = VectorSearchTool(
    name='conocimiento',
    description='Search the vector index for company products and services information',
    embedder=settings.embedder,
    project_id=settings.project_id,
    location=settings.location,
    bucket=settings.bucket,
    index_name=settings.index_name,
    index_endpoint=settings.index_endpoint,
    index_deployed_id=settings.index_deployed_id,
    similarity_top_k=5,
    min_similarity_threshold=0.6,
    relative_threshold_factor=0.9,
 )
 # Create agent with vector search tool
 # Configure model with Vertex AI fully qualified path
 model_path = (
    f'projects/{settings.project_id}/locations/{settings.location}/'
    f'publishers/google/models/{settings.agent_language_model}'
 )
 root_agent = Agent(
    model=model_path,
    name=settings.agent_name,
    description='A helpful assistant for user questions.',
    instruction=settings.agent_instructions,
    tools=[vector_search_tool],
 )
--- a/rag_agent/base.py
+++ b/rag_agent/base.py
@@ -1,68 +0,0 @@
 """Abstract base class for vector search providers."""
 from abc import ABC, abstractmethod
 from typing import Any, TypedDict
 class SearchResult(TypedDict):
    """A single vector search result."""
    id: str
    distance: float
    content: str
 class BaseVectorSearch(ABC):
    """Abstract base class for a vector search provider.
    This class defines the standard interface for creating a vector search
    index and running queries against it.
    """
    @abstractmethod
    def create_index(
        self, name: str, content_path: str, **kwargs: Any  # noqa: ANN401
    ) -> None:
        """Create a new vector search index with the provided content.
        Args:
            name: The desired name for the new index.
            content_path: Path to the data used to populate the index.
            **kwargs: Additional provider-specific arguments.
        """
        ...
    @abstractmethod
    def update_index(
        self, index_name: str, content_path: str, **kwargs: Any  # noqa: ANN401
    ) -> None:
        """Update an existing vector search index with new content.
        Args:
            index_name: The name of the index to update.
            content_path: Path to the data used to populate the index.
            **kwargs: Additional provider-specific arguments.
        """
        ...
    @abstractmethod
    def run_query(
        self,
        deployed_index_id: str,
        query: list[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run a similarity search query against the index.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        ...
--- a/rag_agent/config_helper.py
+++ b/rag_agent/config_helper.py
@@ -1,120 +0,0 @@
 # Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Configuration helper for ADK agent with vector search."""
 from __future__ import annotations
 import os
 from dataclasses import dataclass
 from functools import cached_property
 import vertexai
 from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
    YamlConfigSettingsSource,
 )
 from vertexai.language_models import TextEmbeddingModel
 CONFIG_FILE_PATH = os.getenv("CONFIG_YAML", "config.yaml")
@dataclass
 class EmbeddingResult:
    """Result from embedding a query."""
    embeddings: list[list[float]]
 class VertexAIEmbedder:
    """Embedder using Vertex AI TextEmbeddingModel."""
    def __init__(self, model_name: str, project_id: str, location: str) -> None:
        """Initialize the embedder.
        Args:
            model_name: Name of the embedding model (e.g., 'text-embedding-004')
            project_id: GCP project ID
            location: GCP location
        """
        vertexai.init(project=project_id, location=location)
        self.model = TextEmbeddingModel.from_pretrained(model_name)
    async def embed_query(self, query: str) -> EmbeddingResult:
        """Embed a single query string.
        Args:
            query: Text to embed
        Returns:
            EmbeddingResult with embeddings list
        """
        embeddings = self.model.get_embeddings([query])
        return EmbeddingResult(embeddings=[list(embeddings[0].values)])
 class AgentSettings(BaseSettings):
    """Settings for ADK agent with vector search."""
    # Google Cloud settings
    project_id: str
    location: str
    bucket: str
    # Agent configuration
    agent_name: str
    agent_instructions: str
    agent_language_model: str
    agent_embedding_model: str
    # Vector index configuration
    index_name: str
    index_deployed_id: str
    index_endpoint: str
    model_config = SettingsConfigDict(
        yaml_file=CONFIG_FILE_PATH,
        extra="ignore",  # Ignore extra fields from config.yaml
    )
    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Use env vars and YAML as settings sources."""
        return (
            env_settings,
            YamlConfigSettingsSource(settings_cls),
        )
    @cached_property
    def embedder(self) -> VertexAIEmbedder:
        """Return an embedder configured for the agent's embedding model."""
        return VertexAIEmbedder(
            model_name=self.agent_embedding_model,
            project_id=self.project_id,
            location=self.location,
        )
 settings = AgentSettings.model_validate({})
--- a/rag_agent/file_storage/init.py
+++ b/rag_agent/file_storage/init.py
@@ -1 +0,0 @@
 """File storage provider implementations."""
--- a/rag_agent/file_storage/base.py
+++ b/rag_agent/file_storage/base.py
@@ -1,56 +0,0 @@
 """Abstract base class for file storage providers."""
 from abc import ABC, abstractmethod
 from typing import BinaryIO
 class BaseFileStorage(ABC):
    """Abstract base class for a remote file processor.
    Defines the interface for listing and processing files from
    a remote source.
    """
    @abstractmethod
    def upload_file(
        self,
        file_path: str,
        destination_blob_name: str,
        content_type: str | None = None,
    ) -> None:
        """Upload a file to the remote source.
        Args:
            file_path: The local path to the file to upload.
            destination_blob_name: Name of the file in remote storage.
            content_type: The content type of the file.
        """
        ...
    @abstractmethod
    def list_files(self, path: str | None = None) -> list[str]:
        """List files from a remote location.
        Args:
            path: Path to a specific file or directory. If None,
                  recursively lists all files in the bucket.
        Returns:
            A list of file paths.
        """
        ...
    @abstractmethod
    def get_file_stream(self, file_name: str) -> BinaryIO:
        """Get a file from the remote source as a file-like object.
        Args:
            file_name: The name of the file to retrieve.
        Returns:
            A file-like object containing the file data.
        """
        ...
--- a/rag_agent/file_storage/google_cloud.py
+++ b/rag_agent/file_storage/google_cloud.py
@@ -1,188 +0,0 @@
 """Google Cloud Storage file storage implementation."""
 import asyncio
 import io
 import logging
 from typing import BinaryIO
 import aiohttp
 from gcloud.aio.storage import Storage
 from google.cloud import storage
 from .base import BaseFileStorage
 logger = logging.getLogger(__name__)
 HTTP_TOO_MANY_REQUESTS = 429
 HTTP_SERVER_ERROR = 500
 class GoogleCloudFileStorage(BaseFileStorage):
    """File storage backed by Google Cloud Storage."""
    def __init__(self, bucket: str) -> None:  # noqa: D107
        self.bucket_name = bucket
        self.storage_client = storage.Client()
        self.bucket_client = self.storage_client.bucket(self.bucket_name)
        self._aio_session: aiohttp.ClientSession | None = None
        self._aio_storage: Storage | None = None
        self._cache: dict[str, bytes] = {}
    def upload_file(
        self,
        file_path: str,
        destination_blob_name: str,
        content_type: str | None = None,
    ) -> None:
        """Upload a file to Cloud Storage.
        Args:
            file_path: The local path to the file to upload.
            destination_blob_name: Name of the blob in the bucket.
            content_type: The content type of the file.
        """
        blob = self.bucket_client.blob(destination_blob_name)
        blob.upload_from_filename(
            file_path,
            content_type=content_type,
            if_generation_match=0,
        )
        self._cache.pop(destination_blob_name, None)
    def list_files(self, path: str | None = None) -> list[str]:
        """List all files at the given path in the bucket.
        If path is None, recursively lists all files.
        Args:
            path: Prefix to filter files by.
        Returns:
            A list of blob names.
        """
        blobs = self.storage_client.list_blobs(
            self.bucket_name, prefix=path,
        )
        return [blob.name for blob in blobs]
    def get_file_stream(self, file_name: str) -> BinaryIO:
        """Get a file as a file-like object, using cache.
        Args:
            file_name: The blob name to retrieve.
        Returns:
            A BytesIO stream with the file contents.
        """
        if file_name not in self._cache:
            blob = self.bucket_client.blob(file_name)
            self._cache[file_name] = blob.download_as_bytes()
        file_stream = io.BytesIO(self._cache[file_name])
        file_stream.name = file_name
        return file_stream
    def _get_aio_session(self) -> aiohttp.ClientSession:
        if self._aio_session is None or self._aio_session.closed:
            connector = aiohttp.TCPConnector(
                limit=300, limit_per_host=50,
            )
            timeout = aiohttp.ClientTimeout(total=60)
            self._aio_session = aiohttp.ClientSession(
                timeout=timeout, connector=connector,
            )
        return self._aio_session
    def _get_aio_storage(self) -> Storage:
        if self._aio_storage is None:
            self._aio_storage = Storage(
                session=self._get_aio_session(),
            )
        return self._aio_storage
    async def async_get_file_stream(
        self, file_name: str, max_retries: int = 3,
    ) -> BinaryIO:
        """Get a file asynchronously with retry on transient errors.
        Args:
            file_name: The blob name to retrieve.
            max_retries: Maximum number of retry attempts.
        Returns:
            A BytesIO stream with the file contents.
        Raises:
            TimeoutError: If all retry attempts fail.
        """
        if file_name in self._cache:
            file_stream = io.BytesIO(self._cache[file_name])
            file_stream.name = file_name
            return file_stream
        storage_client = self._get_aio_storage()
        last_exception: Exception | None = None
        for attempt in range(max_retries):
            try:
                self._cache[file_name] = await storage_client.download(
                    self.bucket_name, file_name,
                )
                file_stream = io.BytesIO(self._cache[file_name])
                file_stream.name = file_name
            except TimeoutError as exc:
                last_exception = exc
                logger.warning(
                    "Timeout downloading gs://%s/%s (attempt %d/%d)",
                    self.bucket_name,
                    file_name,
                    attempt + 1,
                    max_retries,
                )
            except aiohttp.ClientResponseError as exc:
                last_exception = exc
                if (
                    exc.status == HTTP_TOO_MANY_REQUESTS
                    or exc.status >= HTTP_SERVER_ERROR
                ):
                    logger.warning(
                        "HTTP %d downloading gs://%s/%s "
                        "(attempt %d/%d)",
                        exc.status,
                        self.bucket_name,
                        file_name,
                        attempt + 1,
                        max_retries,
                    )
                else:
                    raise
            else:
                return file_stream
            if attempt < max_retries - 1:
                delay = 0.5 * (2**attempt)
                await asyncio.sleep(delay)
        msg = (
            f"Failed to download gs://{self.bucket_name}/{file_name} "
            f"after {max_retries} attempts"
        )
        raise TimeoutError(msg) from last_exception
    def delete_files(self, path: str) -> None:
        """Delete all files at the given path in the bucket.
        Args:
            path: Prefix of blobs to delete.
        """
        blobs = self.storage_client.list_blobs(
            self.bucket_name, prefix=path,
        )
        for blob in blobs:
            blob.delete()
            self._cache.pop(blob.name, None)
--- a/rag_agent/vector_search_tool.py
+++ b/rag_agent/vector_search_tool.py
@@ -1,176 +0,0 @@
 # Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """A retrieval tool that uses Vertex AI Vector Search (not RAG Engine)."""
 from __future__ import annotations
 import logging
 from typing import Any
 from typing import TYPE_CHECKING
 from google.adk.tools.tool_context import ToolContext
 from typing_extensions import override
 from .vertex_ai import GoogleCloudVectorSearch
 from google.adk.tools.retrieval.base_retrieval_tool import BaseRetrievalTool
 if TYPE_CHECKING:
  from .config_helper import VertexAIEmbedder
 logger = logging.getLogger('google_adk.' + __name__)
 class VectorSearchTool(BaseRetrievalTool):
  """A retrieval tool using Vertex AI Vector Search (not RAG Engine).
  This tool uses GoogleCloudVectorSearch to query a vector index directly,
  which is useful when Vertex AI RAG Engine is not available in your GCP project.
  """
  def __init__(
      self,
      *,
      name: str,
      description: str,
      embedder: VertexAIEmbedder,
      project_id: str,
      location: str,
      bucket: str,
      index_name: str,
      index_endpoint: str,
      index_deployed_id: str,
      similarity_top_k: int = 5,
      min_similarity_threshold: float = 0.6,
      relative_threshold_factor: float = 0.9,
  ):
    """Initialize the VectorSearchTool.
    Args:
        name: Tool name for function declaration
        description: Tool description for LLM
        embedder: Embedder instance for query embedding
        project_id: GCP project ID
        location: GCP location (e.g., 'us-central1')
        bucket: GCS bucket for content storage
        index_name: Vector search index name
        index_endpoint: Resource name of index endpoint
        index_deployed_id: Deployed index ID
        similarity_top_k: Number of results to retrieve (default: 5)
        min_similarity_threshold: Minimum similarity score 0.0-1.0 (default: 0.6)
        relative_threshold_factor: Factor of max similarity for dynamic filtering (default: 0.9)
    """
    super().__init__(name=name, description=description)
    self.embedder = embedder
    self.index_endpoint = index_endpoint
    self.index_deployed_id = index_deployed_id
    self.similarity_top_k = similarity_top_k
    self.min_similarity_threshold = min_similarity_threshold
    self.relative_threshold_factor = relative_threshold_factor
    # Initialize vector search (endpoint loaded lazily on first use)
    self.vector_search = GoogleCloudVectorSearch(
        project_id=project_id,
        location=location,
        bucket=bucket,
        index_name=index_name,
    )
    self._endpoint_loaded = False
    logger.info(
        'VectorSearchTool initialized with index=%s, deployed_id=%s',
        index_name,
        index_deployed_id,
    )
  @override
  async def run_async(
      self,
      *,
      args: dict[str, Any],
      tool_context: ToolContext,
  ) -> Any:
    """Execute vector search with the user's query.
    Args:
        args: Dictionary containing 'query' key
        tool_context: Tool execution context
    Returns:
        Formatted search results as XML-like documents or error message
    """
    query = args['query']
    logger.debug('VectorSearchTool query: %s', query)
    try:
      # Load index endpoint on first use (lazy loading)
      if not self._endpoint_loaded:
        self.vector_search.load_index_endpoint(self.index_endpoint)
        self._endpoint_loaded = True
        logger.info('Index endpoint loaded successfully')
      # Embed the query using the configured embedder
      embedding_result = await self.embedder.embed_query(query)
      query_embedding = list(embedding_result.embeddings[0])
      # Run vector search
      search_results = await self.vector_search.async_run_query(
          deployed_index_id=self.index_deployed_id,
          query=query_embedding,
          limit=self.similarity_top_k,
      )
      # Apply similarity filtering (dual threshold approach)
      if search_results:
        # Dynamic threshold based on max similarity
        max_similarity = max(r['distance'] for r in search_results)
        dynamic_cutoff = max_similarity * self.relative_threshold_factor
        # Filter by both absolute and relative thresholds
        search_results = [
            result
            for result in search_results
            if (
                result['distance'] > dynamic_cutoff
                and result['distance'] > self.min_similarity_threshold
            )
        ]
      logger.debug(
          'VectorSearchTool results: %d documents after filtering',
          len(search_results),
      )
      # Format results
      if not search_results:
        return (
            f"No matching documents found for query: '{query}' "
            f'(min_threshold={self.min_similarity_threshold})'
        )
      # Format as XML-like documents (matching pydantic_ai pattern)
      formatted_results = [
          f'<document {i} name={result["id"]}>\n'
          f'{result["content"]}\n'
          f'</document {i}>'
          for i, result in enumerate(search_results, start=1)
      ]
      return '\n'.join(formatted_results)
    except Exception as e:
      logger.error('VectorSearchTool error: %s', e, exc_info=True)
      return f'Error during vector search: {str(e)}'
--- a/rag_agent/vertex_ai.py
+++ b/rag_agent/vertex_ai.py
@@ -1,310 +0,0 @@
 """Google Cloud Vertex AI Vector Search implementation."""
 import asyncio
 from collections.abc import Sequence
 from typing import Any
 from uuid import uuid4
 import aiohttp
 import google.auth
 import google.auth.credentials
 import google.auth.transport.requests
 from gcloud.aio.auth import Token
 from google.cloud import aiplatform
 from .file_storage.google_cloud import GoogleCloudFileStorage
 from .base import BaseVectorSearch, SearchResult
 class GoogleCloudVectorSearch(BaseVectorSearch):
    """A vector search provider using Vertex AI Vector Search."""
    def __init__(
        self,
        project_id: str,
        location: str,
        bucket: str,
        index_name: str | None = None,
    ) -> None:
        """Initialize the GoogleCloudVectorSearch client.
        Args:
            project_id: The Google Cloud project ID.
            location: The Google Cloud location (e.g., 'us-central1').
            bucket: The GCS bucket to use for file storage.
            index_name: The name of the index.
        """
        aiplatform.init(project=project_id, location=location)
        self.project_id = project_id
        self.location = location
        self.storage = GoogleCloudFileStorage(bucket=bucket)
        self.index_name = index_name
        self._credentials: google.auth.credentials.Credentials | None = None
        self._aio_session: aiohttp.ClientSession | None = None
        self._async_token: Token | None = None
    def _get_auth_headers(self) -> dict[str, str]:
        if self._credentials is None:
            self._credentials, _ = google.auth.default(
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )
        if not self._credentials.token or self._credentials.expired:
            self._credentials.refresh(
                google.auth.transport.requests.Request(),
            )
        return {
            "Authorization": f"Bearer {self._credentials.token}",
            "Content-Type": "application/json",
        }
    async def _async_get_auth_headers(self) -> dict[str, str]:
        if self._async_token is None:
            self._async_token = Token(
                session=self._get_aio_session(),
                scopes=[
                    "https://www.googleapis.com/auth/cloud-platform",
                ],
            )
        access_token = await self._async_token.get()
        return {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }
    def _get_aio_session(self) -> aiohttp.ClientSession:
        if self._aio_session is None or self._aio_session.closed:
            connector = aiohttp.TCPConnector(
                limit=300, limit_per_host=50,
            )
            timeout = aiohttp.ClientTimeout(total=60)
            self._aio_session = aiohttp.ClientSession(
                timeout=timeout, connector=connector,
            )
        return self._aio_session
    def create_index(
        self,
        name: str,
        content_path: str,
        *,
        dimensions: int = 3072,
        approximate_neighbors_count: int = 150,
        distance_measure_type: str = "DOT_PRODUCT_DISTANCE",
        **kwargs: Any,  # noqa: ANN401, ARG002
    ) -> None:
        """Create a new Vertex AI Vector Search index.
        Args:
            name: The display name for the new index.
            content_path: GCS URI to the embeddings JSON file.
            dimensions: Number of dimensions in embedding vectors.
            approximate_neighbors_count: Neighbors to find per vector.
            distance_measure_type: The distance measure to use.
            **kwargs: Additional arguments.
        """
        index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
            display_name=name,
            contents_delta_uri=content_path,
            dimensions=dimensions,
            approximate_neighbors_count=approximate_neighbors_count,
            distance_measure_type=distance_measure_type,  # type: ignore[arg-type]
            leaf_node_embedding_count=1000,
            leaf_nodes_to_search_percent=10,
        )
        self.index = index
    def update_index(
        self, index_name: str, content_path: str, **kwargs: Any,  # noqa: ANN401, ARG002
    ) -> None:
        """Update an existing Vertex AI Vector Search index.
        Args:
            index_name: The resource name of the index to update.
            content_path: GCS URI to the new embeddings JSON file.
            **kwargs: Additional arguments.
        """
        index = aiplatform.MatchingEngineIndex(index_name=index_name)
        index.update_embeddings(
            contents_delta_uri=content_path,
        )
        self.index = index
    def deploy_index(
        self,
        index_name: str,
        machine_type: str = "e2-standard-2",
    ) -> None:
        """Deploy a Vertex AI Vector Search index to an endpoint.
        Args:
            index_name: The name of the index to deploy.
            machine_type: The machine type for the endpoint.
        """
        index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
            display_name=f"{index_name}-endpoint",
            public_endpoint_enabled=True,
        )
        index_endpoint.deploy_index(
            index=self.index,
            deployed_index_id=(
                f"{index_name.replace('-', '_')}_deployed_{uuid4().hex}"
            ),
            machine_type=machine_type,
        )
        self.index_endpoint = index_endpoint
    def load_index_endpoint(self, endpoint_name: str) -> None:
        """Load an existing Vertex AI Vector Search index endpoint.
        Args:
            endpoint_name: The resource name of the index endpoint.
        """
        self.index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
            endpoint_name,
        )
        if not self.index_endpoint.public_endpoint_domain_name:
            msg = (
                "The index endpoint does not have a public endpoint. "
                "Ensure the endpoint is configured for public access."
            )
            raise ValueError(msg)
    def run_query(
        self,
        deployed_index_id: str,
        query: list[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run a similarity search query against the deployed index.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        response = self.index_endpoint.find_neighbors(
            deployed_index_id=deployed_index_id,
            queries=[query],
            num_neighbors=limit,
        )
        results = []
        for neighbor in response[0]:
            file_path = (
                f"{self.index_name}/contents/{neighbor.id}.md"
            )
            content = (
                self.storage.get_file_stream(file_path)
                .read()
                .decode("utf-8")
            )
            results.append(
                SearchResult(
                    id=neighbor.id,
                    distance=float(neighbor.distance or 0),
                    content=content,
                ),
            )
        return results
    async def async_run_query(
        self,
        deployed_index_id: str,
        query: Sequence[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run an async similarity search via the REST API.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        domain = self.index_endpoint.public_endpoint_domain_name
        endpoint_id = self.index_endpoint.name.split("/")[-1]
        url = (
            f"https://{domain}/v1/projects/{self.project_id}"
            f"/locations/{self.location}"
            f"/indexEndpoints/{endpoint_id}:findNeighbors"
        )
        payload = {
            "deployed_index_id": deployed_index_id,
            "queries": [
                {
                    "datapoint": {"feature_vector": list(query)},
                    "neighbor_count": limit,
                },
            ],
        }
        headers = await self._async_get_auth_headers()
        session = self._get_aio_session()
        async with session.post(
            url, json=payload, headers=headers,
        ) as response:
            response.raise_for_status()
            data = await response.json()
        neighbors = (
            data.get("nearestNeighbors", [{}])[0].get("neighbors", [])
        )
        content_tasks = []
        for neighbor in neighbors:
            datapoint_id = neighbor["datapoint"]["datapointId"]
            file_path = (
                f"{self.index_name}/contents/{datapoint_id}.md"
            )
            content_tasks.append(
                self.storage.async_get_file_stream(file_path),
            )
        file_streams = await asyncio.gather(*content_tasks)
        results: list[SearchResult] = []
        for neighbor, stream in zip(
            neighbors, file_streams, strict=True,
        ):
            results.append(
                SearchResult(
                    id=neighbor["datapoint"]["datapointId"],
                    distance=neighbor["distance"],
                    content=stream.read().decode("utf-8"),
                ),
            )
        return results
    def delete_index(self, index_name: str) -> None:
        """Delete a Vertex AI Vector Search index.
        Args:
            index_name: The resource name of the index.
        """
        index = aiplatform.MatchingEngineIndex(index_name)
        index.delete()
    def delete_index_endpoint(
        self, index_endpoint_name: str,
    ) -> None:
        """Delete a Vertex AI Vector Search index endpoint.
        Args:
            index_endpoint_name: The resource name of the endpoint.
        """
        index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
            index_endpoint_name,
        )
        index_endpoint.undeploy_all()
        index_endpoint.delete(force=True)
--- a/scripts/diagnose_embeddings.py
+++ b/scripts/diagnose_embeddings.py
@@ -1,79 +0,0 @@
 import asyncio
 import logging
 import os
 import random
 import typer
 from dotenv import load_dotenv
 from embedder.vertex_ai import VertexAIEmbedder
 load_dotenv()
 project = os.getenv("GOOGLE_CLOUD_PROJECT")
 location = os.getenv("GOOGLE_CLOUD_LOCATION")
 MODEL_NAME = "gemini-embedding-001"
 CONTENT_LIST = [
    "¿Cuáles son los beneficios de una tarjeta de crédito?",
    "¿Cómo puedo abrir una cuenta de ahorros?",
    "¿Qué es una hipoteca y cómo funciona?",
    "¿Cuáles son las tasas de interés para un préstamo personal?",
    "¿Cómo puedo solicitar un préstamo para un coche?",
    "¿Qué es la banca en línea y cómo me registro?",
    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
    "¿Qué es el phishing y cómo puedo protegerme?",
    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
    "¿Cómo puedo transferir dinero a una cuenta internacional?",
 ]
 TASK_TYPE = "RETRIEVAL_DOCUMENT"
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 app = typer.Typer()
 logger.info(f"Initializing GenAI Client for project '{project}' in '{location}'")
 embedder = VertexAIEmbedder(MODEL_NAME, project, location)
 async def embed_content_task():
    """A single task to send one embedding request using the global client."""
    content_to_embed = random.choice(CONTENT_LIST)
    await embedder.async_generate_embedding(content_to_embed)
 async def run_test(concurrency: int):
    """Continuously calls the embedding API and tracks requests."""
    total_requests = 0
    logger.info(f"Starting diagnostic test with {concurrency} concurrent requests on model '{MODEL_NAME}'.")
    logger.info("Press Ctrl+C to stop.")
    while True:
        # Create tasks, passing project_id and location
        tasks = [embed_content_task() for _ in range(concurrency)]
        try:
            await asyncio.gather(*tasks)
            total_requests += concurrency
            logger.info(f"Successfully completed batch. Total requests so far: {total_requests}")
        except Exception as e:
            logger.error("Caught an error. Stopping test.")
            print("\n--- STATS ---")
            print(f"Total successful requests: {total_requests}")
            print(f"Concurrent requests during failure: {concurrency}")
            print(f"Error Type: {e.__class__.__name__}")
            print(f"Error Details: {e}")
            print("-------------")
            break
@app.command()
 def main(
    concurrency: int = typer.Option(
        10, "--concurrency", "-c", help="Number of concurrent requests to send in each batch."
    ),
 ):
    try:
        asyncio.run(run_test(concurrency))
    except KeyboardInterrupt:
        logger.info("\nKeyboard interrupt received. Exiting.")
 if __name__ == "__main__":
    app()
--- a/scripts/diagnose_rag_endpoint.py
+++ b/scripts/diagnose_rag_endpoint.py
@@ -1,99 +0,0 @@
 import asyncio
 import logging
 import random
 import httpx
 import typer
 CONTENT_LIST = [
    "¿Cuáles son los beneficios de una tarjeta de crédito?",
    "¿Cómo puedo abrir una cuenta de ahorros?",
    "¿Qué es una hipoteca y cómo funciona?",
    "¿Cuáles son las tasas de interés para un préstamo personal?",
    "¿Cómo puedo solicitar un préstamo para un coche?",
    "¿Qué es la banca en línea y cómo me registro?",
    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
    "¿Qué es el phishing y cómo puedo protegerme?",
    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
    "¿Cómo puedo transferir dinero a una cuenta internacional?",
 ]
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 app = typer.Typer()
 async def call_rag_endpoint_task(client: httpx.AsyncClient, url: str):
    """A single task to send one request to the RAG endpoint."""
    question = random.choice(CONTENT_LIST)
    json_payload = {
        "sessionInfo": {
            "parameters": {
                "query": question
            }
        }
    }
    response = await client.post(url, json=json_payload)
    response.raise_for_status()  # Raise an exception for bad status codes
    response_data = response.json()
    response_text = response_data["sessionInfo"]["parameters"]["response"]
    logger.info(f"Question: {question[:50]}... Response: {response_text[:100]}...")
 async def run_test(concurrency: int, url: str, timeout_seconds: float):
    """Continuously calls the RAG endpoint and tracks requests."""
    total_requests = 0
    logger.info(f"Starting diagnostic test with {concurrency} concurrent requests on endpoint '{url}'.")
    logger.info(f"Request timeout is set to {timeout_seconds} seconds.")
    logger.info("Press Ctrl+C to stop.")
    timeout = httpx.Timeout(timeout_seconds)
    async with httpx.AsyncClient(timeout=timeout) as client:
        while True:
            tasks = [call_rag_endpoint_task(client, url) for _ in range(concurrency)]
            try:
                await asyncio.gather(*tasks)
                total_requests += concurrency
                logger.info(f"Successfully completed batch. Total requests so far: {total_requests}")
            except httpx.TimeoutException as e:
                logger.error(f"A request timed out: {e.request.method} {e.request.url}")
                logger.error("Consider increasing the timeout with the --timeout option.")
                break
            except httpx.HTTPStatusError as e:
                logger.error(f"An HTTP error occurred: {e.response.status_code} - {e.request.method} {e.request.url}")
                logger.error(f"Response body: {e.response.text}")
                break
            except httpx.RequestError as e:
                logger.error(f"A request error occurred: {e.request.method} {e.request.url}")
                logger.error(f"Error details: {e}")
                break
            except Exception as e:
                logger.error("Caught an unexpected error. Stopping test.")
                print("\n--- STATS ---")
                print(f"Total successful requests: {total_requests}")
                print(f"Concurrent requests during failure: {concurrency}")
                print(f"Error Type: {e.__class__.__name__}")
                print(f"Error Details: {e}")
                print("-------------")
                break
@app.command()
 def main(
    concurrency: int = typer.Option(
        10, "--concurrency", "-c", help="Number of concurrent requests to send in each batch."
    ),
    url: str = typer.Option(
        "http://127.0.0.1:8000/sigma-rag", "--url", "-u", help="The URL of the RAG endpoint to test."
    ),
    timeout_seconds: float = typer.Option(
        30.0, "--timeout", "-t", help="Request timeout in seconds."
    )
 ):
    try:
        asyncio.run(run_test(concurrency, url, timeout_seconds))
    except KeyboardInterrupt:
        logger.info("\nKeyboard interrupt received. Exiting.")
 if __name__ == "__main__":
    app()
--- a/scripts/stress_test.py
+++ b/scripts/stress_test.py
@@ -1,91 +0,0 @@
 import concurrent.futures
 import random
 import threading
 import requests
 # URL for the endpoint
 url = "http://localhost:8000/sigma-rag"
 # List of Spanish banking questions
 spanish_questions = [
    "¿Cuáles son los beneficios de una tarjeta de crédito?",
    "¿Cómo puedo abrir una cuenta de ahorros?",
    "¿Qué es una hipoteca y cómo funciona?",
    "¿Cuáles son las tasas de interés para un préstamo personal?",
    "¿Cómo puedo solicitar un préstamo para un coche?",
    "¿Qué es la banca en línea y cómo me registro?",
    "¿Cómo puedo reportar una tarjeta de crédito perdida o robada?",
    "¿Qué es el phishing y cómo puedo protegerme?",
    "¿Cuáles son los diferentes tipos de cuentas corrientes que ofrecen?",
    "¿Cómo puedo transferir dinero a una cuenta internacional?",
 ]
 # A threading Event to signal all threads to stop
 stop_event = threading.Event()
 def send_request(question, request_id):
    """Sends a single request and handles the response."""
    if stop_event.is_set():
        return
    data = {"sessionInfo": {"parameters": {"query": question}}}
    try:
        response = requests.post(url, json=data)
        if stop_event.is_set():
            return
        if response.status_code == 500:
            print(f"Request {request_id}: Received 500 error with question: '{question}'.")
            print("Stopping stress test.")
            stop_event.set()
        else:
            print(f"Request {request_id}: Successful with status code {response.status_code}.")
    except requests.exceptions.RequestException as e:
        if not stop_event.is_set():
            print(f"Request {request_id}: An error occurred: {e}")
            stop_event.set()
 def main():
    """Runs the stress test with parallel requests."""
    num_workers = 30  # Number of parallel requests
    print(f"Starting stress test with {num_workers} parallel workers. Press Ctrl+C to stop.")
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = {
            executor.submit(send_request, random.choice(spanish_questions), i)
            for i in range(1, num_workers + 1)
        }
        request_id_counter = num_workers + 1
        try:
            while not stop_event.is_set():
                # Wait for any future to complete
                done, _ = concurrent.futures.wait(
                    futures, return_when=concurrent.futures.FIRST_COMPLETED
                )
                for future in done:
                    # Remove the completed future
                    futures.remove(future)
                    # If we are not stopping, submit a new one
                    if not stop_event.is_set():
                        futures.add(
                            executor.submit(
                                send_request,
                                random.choice(spanish_questions),
                                request_id_counter,
                            )
                        )
                        request_id_counter += 1
        except KeyboardInterrupt:
            print("\nKeyboard interrupt received. Stopping threads.")
            stop_event.set()
    print("Stress test finished.")
 if __name__ == "__main__":
    main()
--- a/scripts/submit_pipeline.py
+++ b/scripts/submit_pipeline.py
@@ -1,84 +0,0 @@
 from typing import Annotated
 import typer
 from google.cloud import aiplatform
 from rag_eval.config import settings
 app = typer.Typer()
@app.command()
 def main(
    pipeline_spec_path: Annotated[
        str,
        typer.Option(
            "--pipeline-spec-path",
            "-p",
            help="Path to the compiled pipeline YAML file.",
        ),
    ],
    input_table: Annotated[
        str,
        typer.Option(
            "--input-table",
            "-i",
            help="Full BigQuery table name for input (e.g., 'project.dataset.table')",
        ),
    ],
    output_table: Annotated[
        str,
        typer.Option(
            "--output-table",
            "-o",
            help="Full BigQuery table name for output (e.g., 'project.dataset.table')",
        ),
    ],
    project_id: Annotated[
        str,
        typer.Option(
            "--project-id",
            help="Google Cloud project ID.",
        ),
    ] = settings.project_id,
    location: Annotated[
        str,
        typer.Option(
            "--location",
            help="Google Cloud location for the pipeline job.",
        ),
    ] = settings.location,
    display_name: Annotated[
        str,
        typer.Option(
            "--display-name",
            help="Display name for the pipeline job.",
        ),
    ] = "search-eval-pipeline-job",
 ):
    """Submits a Vertex AI pipeline job."""
    parameter_values = {
        "project_id": project_id,
        "location": location,
        "input_table": input_table,
        "output_table": output_table,
    }
    job = aiplatform.PipelineJob(
        display_name=display_name,
        template_path=pipeline_spec_path,
        pipeline_root=f"gs://{settings.bucket}/pipeline_root",
        parameter_values=parameter_values,
        project=project_id,
        location=location,
    )
    print(f"Submitting pipeline job with parameters: {parameter_values}")
    job.submit(
        service_account="sa-cicd-gitlab@bnt-orquestador-cognitivo-dev.iam.gserviceaccount.com"
    )
    print(f"Pipeline job submitted. You can view it at: {job._dashboard_uri()}")
 if __name__ == "__main__":
    app()
--- a/scripts/test_rerank.py
+++ b/scripts/test_rerank.py
@@ -1,42 +0,0 @@
 from google.cloud import discoveryengine_v1 as discoveryengine
 # TODO(developer): Uncomment these variables before running the sample.
 project_id = "bnt-orquestador-cognitivo-dev"
 client = discoveryengine.RankServiceClient()
 # The full resource name of the ranking config.
 # Format: projects/{project_id}/locations/{location}/rankingConfigs/default_ranking_config
 ranking_config = client.ranking_config_path(
    project=project_id,
    location="global",
    ranking_config="default_ranking_config",
 )
 request = discoveryengine.RankRequest(
    ranking_config=ranking_config,
    model="semantic-ranker-default@latest",
    top_n=10,
    query="What is Google Gemini?",
    records=[
        discoveryengine.RankingRecord(
            id="1",
            title="Gemini",
            content="The Gemini zodiac symbol often depicts two figures standing side-by-side.",
        ),
        discoveryengine.RankingRecord(
            id="2",
            title="Gemini",
            content="Gemini is a cutting edge large language model created by Google.",
        ),
        discoveryengine.RankingRecord(
            id="3",
            title="Gemini Constellation",
            content="Gemini is a constellation that can be seen in the night sky.",
        ),
    ],
 )
 response = client.rank(request=request)
 # Handle the response
 print(response)
--- a/scripts/test_server.py
+++ b/scripts/test_server.py
@@ -1,12 +0,0 @@
 import requests
 # Test the /sigma-rag endpoint
 url = "http://localhost:8000/sigma-rag"
 data = {
    "sessionInfo": {"parameters": {"query": "What are the benefits of a credit card?"}}
 }
 response = requests.post(url, json=data)
 print("Response from /sigma-rag:")
 print(response.json())
--- a/src/rag_eval/init.py
+++ b/src/rag_eval/init.py
@@ -1 +0,0 @@
 """RAG evaluation agent package."""
--- a/src/rag_eval/agent.py
+++ b/src/rag_eval/agent.py
@@ -1,92 +0,0 @@
 """Pydantic AI agent with RAG tool for vector search."""
 import time
 import structlog
 from pydantic import BaseModel
 from pydantic_ai import Agent, Embedder, RunContext
 from pydantic_ai.models.google import GoogleModel
 from rag_eval.config import settings
 from rag_eval.vector_search.vertex_ai import GoogleCloudVectorSearch
 logger = structlog.get_logger(__name__)
 class Deps(BaseModel):
    """Dependencies injected into the agent at runtime."""
    vector_search: GoogleCloudVectorSearch
    embedder: Embedder
    model_config = {"arbitrary_types_allowed": True}
 model = GoogleModel(
    settings.agent_language_model,
    provider=settings.provider,
 )
 agent = Agent(
    model,
    deps_type=Deps,
    system_prompt=settings.agent_instructions,
 )
@agent.tool
 async def conocimiento(ctx: RunContext[Deps], query: str) -> str:
    """Search the vector index for the given query.
    Args:
        ctx: The run context containing dependencies.
        query: The query to search for.
    Returns:
        A formatted string containing the search results.
    """
    t0 = time.perf_counter()
    min_sim = 0.6
    query_embedding = await ctx.deps.embedder.embed_query(query)
    t_embed = time.perf_counter()
    search_results = await ctx.deps.vector_search.async_run_query(
        deployed_index_id=settings.index_deployed_id,
        query=list(query_embedding.embeddings[0]),
        limit=5,
    )
    t_search = time.perf_counter()
    if search_results:
        max_sim = max(r["distance"] for r in search_results)
        cutoff = max_sim * 0.9
        search_results = [
            s
            for s in search_results
            if s["distance"] > cutoff and s["distance"] > min_sim
        ]
    logger.info(
        "conocimiento.timing",
        embedding_ms=round((t_embed - t0) * 1000, 1),
        vector_search_ms=round((t_search - t_embed) * 1000, 1),
        total_ms=round((t_search - t0) * 1000, 1),
        chunks=[s["id"] for s in search_results],
    )
    formatted_results = [
        f"<document {i} name={result['id']}>\n"
        f"{result['content']}\n"
        f"</document {i}>"
        for i, result in enumerate(search_results, start=1)
    ]
    return "\n".join(formatted_results)
 if __name__ == "__main__":
    deps = Deps(
        vector_search=settings.vector_search,
        embedder=settings.embedder,
    )
    agent.to_cli_sync(deps=deps)
--- a/src/rag_eval/config.py
+++ b/src/rag_eval/config.py
@@ -1,92 +0,0 @@
 """Application settings loaded from YAML and environment variables."""
 import os
 from functools import cached_property
 from pydantic_ai import Embedder
 from pydantic_ai.providers.google import GoogleProvider
 from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
    YamlConfigSettingsSource,
 )
 from rag_eval.vector_search.vertex_ai import GoogleCloudVectorSearch
 CONFIG_FILE_PATH = os.getenv("CONFIG_YAML", "config.yaml")
 class Settings(BaseSettings):
    """Application settings loaded from config.yaml and env vars."""
    project_id: str
    location: str
    bucket: str
    agent_name: str
    agent_instructions: str
    agent_language_model: str
    agent_embedding_model: str
    agent_thinking: int
    index_name: str
    index_deployed_id: str
    index_endpoint: str
    index_dimensions: int
    index_machine_type: str = "e2-standard-16"
    index_origin: str
    index_destination: str
    index_chunk_limit: int
    model_config = SettingsConfigDict(yaml_file=CONFIG_FILE_PATH)
    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Use env vars and YAML as settings sources."""
        return (
            env_settings,
            YamlConfigSettingsSource(settings_cls),
        )
    @cached_property
    def provider(self) -> GoogleProvider:
        """Return a Google provider configured for Vertex AI."""
        return GoogleProvider(
            project=self.project_id,
            location=self.location,
        )
    @cached_property
    def vector_search(self) -> GoogleCloudVectorSearch:
        """Return a configured vector search client."""
        vs = GoogleCloudVectorSearch(
            project_id=self.project_id,
            location=self.location,
            bucket=self.bucket,
            index_name=self.index_name,
        )
        vs.load_index_endpoint(self.index_endpoint)
        return vs
    @cached_property
    def embedder(self) -> Embedder:
        """Return an embedder configured for the agent's embedding model."""
        from pydantic_ai.embeddings.google import GoogleEmbeddingModel  # noqa: PLC0415
        model = GoogleEmbeddingModel(
            self.agent_embedding_model,
            provider=self.provider,
        )
        return Embedder(model)
 settings = Settings.model_validate({})
--- a/src/rag_eval/file_storage/init.py
+++ b/src/rag_eval/file_storage/init.py
@@ -1 +0,0 @@
 """File storage provider implementations."""
--- a/src/rag_eval/file_storage/base.py
+++ b/src/rag_eval/file_storage/base.py
@@ -1,56 +0,0 @@
 """Abstract base class for file storage providers."""
 from abc import ABC, abstractmethod
 from typing import BinaryIO
 class BaseFileStorage(ABC):
    """Abstract base class for a remote file processor.
    Defines the interface for listing and processing files from
    a remote source.
    """
    @abstractmethod
    def upload_file(
        self,
        file_path: str,
        destination_blob_name: str,
        content_type: str | None = None,
    ) -> None:
        """Upload a file to the remote source.
        Args:
            file_path: The local path to the file to upload.
            destination_blob_name: Name of the file in remote storage.
            content_type: The content type of the file.
        """
        ...
    @abstractmethod
    def list_files(self, path: str | None = None) -> list[str]:
        """List files from a remote location.
        Args:
            path: Path to a specific file or directory. If None,
                  recursively lists all files in the bucket.
        Returns:
            A list of file paths.
        """
        ...
    @abstractmethod
    def get_file_stream(self, file_name: str) -> BinaryIO:
        """Get a file from the remote source as a file-like object.
        Args:
            file_name: The name of the file to retrieve.
        Returns:
            A file-like object containing the file data.
        """
        ...
--- a/src/rag_eval/file_storage/google_cloud.py
+++ b/src/rag_eval/file_storage/google_cloud.py
@@ -1,188 +0,0 @@
 """Google Cloud Storage file storage implementation."""
 import asyncio
 import io
 import logging
 from typing import BinaryIO
 import aiohttp
 from gcloud.aio.storage import Storage
 from google.cloud import storage
 from rag_eval.file_storage.base import BaseFileStorage
 logger = logging.getLogger(__name__)
 HTTP_TOO_MANY_REQUESTS = 429
 HTTP_SERVER_ERROR = 500
 class GoogleCloudFileStorage(BaseFileStorage):
    """File storage backed by Google Cloud Storage."""
    def __init__(self, bucket: str) -> None:  # noqa: D107
        self.bucket_name = bucket
        self.storage_client = storage.Client()
        self.bucket_client = self.storage_client.bucket(self.bucket_name)
        self._aio_session: aiohttp.ClientSession | None = None
        self._aio_storage: Storage | None = None
        self._cache: dict[str, bytes] = {}
    def upload_file(
        self,
        file_path: str,
        destination_blob_name: str,
        content_type: str | None = None,
    ) -> None:
        """Upload a file to Cloud Storage.
        Args:
            file_path: The local path to the file to upload.
            destination_blob_name: Name of the blob in the bucket.
            content_type: The content type of the file.
        """
        blob = self.bucket_client.blob(destination_blob_name)
        blob.upload_from_filename(
            file_path,
            content_type=content_type,
            if_generation_match=0,
        )
        self._cache.pop(destination_blob_name, None)
    def list_files(self, path: str | None = None) -> list[str]:
        """List all files at the given path in the bucket.
        If path is None, recursively lists all files.
        Args:
            path: Prefix to filter files by.
        Returns:
            A list of blob names.
        """
        blobs = self.storage_client.list_blobs(
            self.bucket_name, prefix=path,
        )
        return [blob.name for blob in blobs]
    def get_file_stream(self, file_name: str) -> BinaryIO:
        """Get a file as a file-like object, using cache.
        Args:
            file_name: The blob name to retrieve.
        Returns:
            A BytesIO stream with the file contents.
        """
        if file_name not in self._cache:
            blob = self.bucket_client.blob(file_name)
            self._cache[file_name] = blob.download_as_bytes()
        file_stream = io.BytesIO(self._cache[file_name])
        file_stream.name = file_name
        return file_stream
    def _get_aio_session(self) -> aiohttp.ClientSession:
        if self._aio_session is None or self._aio_session.closed:
            connector = aiohttp.TCPConnector(
                limit=300, limit_per_host=50,
            )
            timeout = aiohttp.ClientTimeout(total=60)
            self._aio_session = aiohttp.ClientSession(
                timeout=timeout, connector=connector,
            )
        return self._aio_session
    def _get_aio_storage(self) -> Storage:
        if self._aio_storage is None:
            self._aio_storage = Storage(
                session=self._get_aio_session(),
            )
        return self._aio_storage
    async def async_get_file_stream(
        self, file_name: str, max_retries: int = 3,
    ) -> BinaryIO:
        """Get a file asynchronously with retry on transient errors.
        Args:
            file_name: The blob name to retrieve.
            max_retries: Maximum number of retry attempts.
        Returns:
            A BytesIO stream with the file contents.
        Raises:
            TimeoutError: If all retry attempts fail.
        """
        if file_name in self._cache:
            file_stream = io.BytesIO(self._cache[file_name])
            file_stream.name = file_name
            return file_stream
        storage_client = self._get_aio_storage()
        last_exception: Exception | None = None
        for attempt in range(max_retries):
            try:
                self._cache[file_name] = await storage_client.download(
                    self.bucket_name, file_name,
                )
                file_stream = io.BytesIO(self._cache[file_name])
                file_stream.name = file_name
            except TimeoutError as exc:
                last_exception = exc
                logger.warning(
                    "Timeout downloading gs://%s/%s (attempt %d/%d)",
                    self.bucket_name,
                    file_name,
                    attempt + 1,
                    max_retries,
                )
            except aiohttp.ClientResponseError as exc:
                last_exception = exc
                if (
                    exc.status == HTTP_TOO_MANY_REQUESTS
                    or exc.status >= HTTP_SERVER_ERROR
                ):
                    logger.warning(
                        "HTTP %d downloading gs://%s/%s "
                        "(attempt %d/%d)",
                        exc.status,
                        self.bucket_name,
                        file_name,
                        attempt + 1,
                        max_retries,
                    )
                else:
                    raise
            else:
                return file_stream
            if attempt < max_retries - 1:
                delay = 0.5 * (2**attempt)
                await asyncio.sleep(delay)
        msg = (
            f"Failed to download gs://{self.bucket_name}/{file_name} "
            f"after {max_retries} attempts"
        )
        raise TimeoutError(msg) from last_exception
    def delete_files(self, path: str) -> None:
        """Delete all files at the given path in the bucket.
        Args:
            path: Prefix of blobs to delete.
        """
        blobs = self.storage_client.list_blobs(
            self.bucket_name, prefix=path,
        )
        for blob in blobs:
            blob.delete()
            self._cache.pop(blob.name, None)
--- a/src/rag_eval/logging.py
+++ b/src/rag_eval/logging.py
@@ -1,47 +0,0 @@
 """Structured logging configuration using structlog."""
 import logging
 import sys
 import structlog
 def setup_logging(*, json: bool = True, level: int = logging.INFO) -> None:
    """Configure structlog with JSON or console output."""
    shared_processors: list[structlog.types.Processor] = [
        structlog.contextvars.merge_contextvars,
        structlog.stdlib.add_log_level,
        structlog.stdlib.add_logger_name,
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
    ]
    if json:
        formatter = structlog.stdlib.ProcessorFormatter(
            processors=[
                structlog.stdlib.ProcessorFormatter.remove_processors_meta,
                structlog.processors.JSONRenderer(),
            ],
        )
    else:
        formatter = structlog.stdlib.ProcessorFormatter(
            processors=[
                structlog.stdlib.ProcessorFormatter.remove_processors_meta,
                structlog.dev.ConsoleRenderer(),
            ],
        )
    handler = logging.StreamHandler(sys.stdout)
    handler.setFormatter(formatter)
    root = logging.getLogger()
    root.handlers.clear()
    root.addHandler(handler)
    root.setLevel(level)
    structlog.configure(
        processors=shared_processors,
        logger_factory=structlog.stdlib.LoggerFactory(),
        wrapper_class=structlog.stdlib.BoundLogger,
        cache_logger_on_first_use=True,
    )
--- a/src/rag_eval/server.py
+++ b/src/rag_eval/server.py
@@ -1,61 +0,0 @@
 """FastAPI server exposing the RAG agent endpoint."""
 import time
 from typing import Literal
 from uuid import uuid4
 import structlog
 from fastapi import FastAPI
 from pydantic import BaseModel
 from rag_eval.agent import Deps, agent
 from rag_eval.config import settings
 from rag_eval.logging import setup_logging
 logger = structlog.get_logger(__name__)
 setup_logging()
 app = FastAPI(title="RAG Agent")
 class Message(BaseModel):
    """A single chat message."""
    role: Literal["system", "user", "assistant"]
    content: str
 class AgentRequest(BaseModel):
    """Request body for the agent endpoint."""
    messages: list[Message]
 class AgentResponse(BaseModel):
    """Response body from the agent endpoint."""
    response: str
@app.post("/agent")
 async def run_agent(request: AgentRequest) -> AgentResponse:
    """Run the RAG agent with the provided messages."""
    request_id = uuid4().hex[:8]
    structlog.contextvars.clear_contextvars()
    structlog.contextvars.bind_contextvars(request_id=request_id)
    prompt = request.messages[-1].content
    logger.info("request.start", prompt_length=len(prompt))
    t0 = time.perf_counter()
    deps = Deps(
        vector_search=settings.vector_search,
        embedder=settings.embedder,
    )
    result = await agent.run(prompt, deps=deps)
    elapsed = round((time.perf_counter() - t0) * 1000, 1)
    logger.info("request.end", elapsed_ms=elapsed)
    return AgentResponse(response=result.output)
--- a/src/rag_eval/vector_search/init.py
+++ b/src/rag_eval/vector_search/init.py
@@ -1 +0,0 @@
 """Vector search provider implementations."""
--- a/src/rag_eval/vector_search/base.py
+++ b/src/rag_eval/vector_search/base.py
@@ -1,68 +0,0 @@
 """Abstract base class for vector search providers."""
 from abc import ABC, abstractmethod
 from typing import Any, TypedDict
 class SearchResult(TypedDict):
    """A single vector search result."""
    id: str
    distance: float
    content: str
 class BaseVectorSearch(ABC):
    """Abstract base class for a vector search provider.
    This class defines the standard interface for creating a vector search
    index and running queries against it.
    """
    @abstractmethod
    def create_index(
        self, name: str, content_path: str, **kwargs: Any  # noqa: ANN401
    ) -> None:
        """Create a new vector search index with the provided content.
        Args:
            name: The desired name for the new index.
            content_path: Path to the data used to populate the index.
            **kwargs: Additional provider-specific arguments.
        """
        ...
    @abstractmethod
    def update_index(
        self, index_name: str, content_path: str, **kwargs: Any  # noqa: ANN401
    ) -> None:
        """Update an existing vector search index with new content.
        Args:
            index_name: The name of the index to update.
            content_path: Path to the data used to populate the index.
            **kwargs: Additional provider-specific arguments.
        """
        ...
    @abstractmethod
    def run_query(
        self,
        deployed_index_id: str,
        query: list[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run a similarity search query against the index.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        ...
--- a/src/rag_eval/vector_search/vertex_ai.py
+++ b/src/rag_eval/vector_search/vertex_ai.py
@@ -1,310 +0,0 @@
 """Google Cloud Vertex AI Vector Search implementation."""
 import asyncio
 from collections.abc import Sequence
 from typing import Any
 from uuid import uuid4
 import aiohttp
 import google.auth
 import google.auth.credentials
 import google.auth.transport.requests
 from gcloud.aio.auth import Token
 from google.cloud import aiplatform
 from rag_eval.file_storage.google_cloud import GoogleCloudFileStorage
 from rag_eval.vector_search.base import BaseVectorSearch, SearchResult
 class GoogleCloudVectorSearch(BaseVectorSearch):
    """A vector search provider using Vertex AI Vector Search."""
    def __init__(
        self,
        project_id: str,
        location: str,
        bucket: str,
        index_name: str | None = None,
    ) -> None:
        """Initialize the GoogleCloudVectorSearch client.
        Args:
            project_id: The Google Cloud project ID.
            location: The Google Cloud location (e.g., 'us-central1').
            bucket: The GCS bucket to use for file storage.
            index_name: The name of the index.
        """
        aiplatform.init(project=project_id, location=location)
        self.project_id = project_id
        self.location = location
        self.storage = GoogleCloudFileStorage(bucket=bucket)
        self.index_name = index_name
        self._credentials: google.auth.credentials.Credentials | None = None
        self._aio_session: aiohttp.ClientSession | None = None
        self._async_token: Token | None = None
    def _get_auth_headers(self) -> dict[str, str]:
        if self._credentials is None:
            self._credentials, _ = google.auth.default(
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )
        if not self._credentials.token or self._credentials.expired:
            self._credentials.refresh(
                google.auth.transport.requests.Request(),
            )
        return {
            "Authorization": f"Bearer {self._credentials.token}",
            "Content-Type": "application/json",
        }
    async def _async_get_auth_headers(self) -> dict[str, str]:
        if self._async_token is None:
            self._async_token = Token(
                session=self._get_aio_session(),
                scopes=[
                    "https://www.googleapis.com/auth/cloud-platform",
                ],
            )
        access_token = await self._async_token.get()
        return {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }
    def _get_aio_session(self) -> aiohttp.ClientSession:
        if self._aio_session is None or self._aio_session.closed:
            connector = aiohttp.TCPConnector(
                limit=300, limit_per_host=50,
            )
            timeout = aiohttp.ClientTimeout(total=60)
            self._aio_session = aiohttp.ClientSession(
                timeout=timeout, connector=connector,
            )
        return self._aio_session
    def create_index(
        self,
        name: str,
        content_path: str,
        *,
        dimensions: int = 3072,
        approximate_neighbors_count: int = 150,
        distance_measure_type: str = "DOT_PRODUCT_DISTANCE",
        **kwargs: Any,  # noqa: ANN401, ARG002
    ) -> None:
        """Create a new Vertex AI Vector Search index.
        Args:
            name: The display name for the new index.
            content_path: GCS URI to the embeddings JSON file.
            dimensions: Number of dimensions in embedding vectors.
            approximate_neighbors_count: Neighbors to find per vector.
            distance_measure_type: The distance measure to use.
            **kwargs: Additional arguments.
        """
        index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
            display_name=name,
            contents_delta_uri=content_path,
            dimensions=dimensions,
            approximate_neighbors_count=approximate_neighbors_count,
            distance_measure_type=distance_measure_type,  # type: ignore[arg-type]
            leaf_node_embedding_count=1000,
            leaf_nodes_to_search_percent=10,
        )
        self.index = index
    def update_index(
        self, index_name: str, content_path: str, **kwargs: Any,  # noqa: ANN401, ARG002
    ) -> None:
        """Update an existing Vertex AI Vector Search index.
        Args:
            index_name: The resource name of the index to update.
            content_path: GCS URI to the new embeddings JSON file.
            **kwargs: Additional arguments.
        """
        index = aiplatform.MatchingEngineIndex(index_name=index_name)
        index.update_embeddings(
            contents_delta_uri=content_path,
        )
        self.index = index
    def deploy_index(
        self,
        index_name: str,
        machine_type: str = "e2-standard-2",
    ) -> None:
        """Deploy a Vertex AI Vector Search index to an endpoint.
        Args:
            index_name: The name of the index to deploy.
            machine_type: The machine type for the endpoint.
        """
        index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
            display_name=f"{index_name}-endpoint",
            public_endpoint_enabled=True,
        )
        index_endpoint.deploy_index(
            index=self.index,
            deployed_index_id=(
                f"{index_name.replace('-', '_')}_deployed_{uuid4().hex}"
            ),
            machine_type=machine_type,
        )
        self.index_endpoint = index_endpoint
    def load_index_endpoint(self, endpoint_name: str) -> None:
        """Load an existing Vertex AI Vector Search index endpoint.
        Args:
            endpoint_name: The resource name of the index endpoint.
        """
        self.index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
            endpoint_name,
        )
        if not self.index_endpoint.public_endpoint_domain_name:
            msg = (
                "The index endpoint does not have a public endpoint. "
                "Ensure the endpoint is configured for public access."
            )
            raise ValueError(msg)
    def run_query(
        self,
        deployed_index_id: str,
        query: list[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run a similarity search query against the deployed index.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        response = self.index_endpoint.find_neighbors(
            deployed_index_id=deployed_index_id,
            queries=[query],
            num_neighbors=limit,
        )
        results = []
        for neighbor in response[0]:
            file_path = (
                f"{self.index_name}/contents/{neighbor.id}.md"
            )
            content = (
                self.storage.get_file_stream(file_path)
                .read()
                .decode("utf-8")
            )
            results.append(
                SearchResult(
                    id=neighbor.id,
                    distance=float(neighbor.distance or 0),
                    content=content,
                ),
            )
        return results
    async def async_run_query(
        self,
        deployed_index_id: str,
        query: Sequence[float],
        limit: int,
    ) -> list[SearchResult]:
        """Run an async similarity search via the REST API.
        Args:
            deployed_index_id: The ID of the deployed index.
            query: The embedding vector for the search query.
            limit: Maximum number of nearest neighbors to return.
        Returns:
            A list of matched items with id, distance, and content.
        """
        domain = self.index_endpoint.public_endpoint_domain_name
        endpoint_id = self.index_endpoint.name.split("/")[-1]
        url = (
            f"https://{domain}/v1/projects/{self.project_id}"
            f"/locations/{self.location}"
            f"/indexEndpoints/{endpoint_id}:findNeighbors"
        )
        payload = {
            "deployed_index_id": deployed_index_id,
            "queries": [
                {
                    "datapoint": {"feature_vector": list(query)},
                    "neighbor_count": limit,
                },
            ],
        }
        headers = await self._async_get_auth_headers()
        session = self._get_aio_session()
        async with session.post(
            url, json=payload, headers=headers,
        ) as response:
            response.raise_for_status()
            data = await response.json()
        neighbors = (
            data.get("nearestNeighbors", [{}])[0].get("neighbors", [])
        )
        content_tasks = []
        for neighbor in neighbors:
            datapoint_id = neighbor["datapoint"]["datapointId"]
            file_path = (
                f"{self.index_name}/contents/{datapoint_id}.md"
            )
            content_tasks.append(
                self.storage.async_get_file_stream(file_path),
            )
        file_streams = await asyncio.gather(*content_tasks)
        results: list[SearchResult] = []
        for neighbor, stream in zip(
            neighbors, file_streams, strict=True,
        ):
            results.append(
                SearchResult(
                    id=neighbor["datapoint"]["datapointId"],
                    distance=neighbor["distance"],
                    content=stream.read().decode("utf-8"),
                ),
            )
        return results
    def delete_index(self, index_name: str) -> None:
        """Delete a Vertex AI Vector Search index.
        Args:
            index_name: The resource name of the index.
        """
        index = aiplatform.MatchingEngineIndex(index_name)
        index.delete()
    def delete_index_endpoint(
        self, index_endpoint_name: str,
    ) -> None:
        """Delete a Vertex AI Vector Search index endpoint.
        Args:
            index_endpoint_name: The resource name of the endpoint.
        """
        index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
            index_endpoint_name,
        )
        index_endpoint.undeploy_all()
        index_endpoint.delete(force=True)
--- a/src/va_agent/init.py
+++ b/src/va_agent/init.py
@@ -4,7 +4,3 @@ import os
 # Ensure the Google GenAI SDK talks to Vertex AI instead of the public Gemini API.
 os.environ.setdefault("GOOGLE_GENAI_USE_VERTEXAI", "true")
 from .agent import root_agent
 __all__ = ["root_agent"]
--- a/src/va_agent/agent.py
+++ b/src/va_agent/agent.py
@@ -0,0 +1,29 @@
 """ADK agent with vector search RAG tool."""
 from google import genai
 from google.adk.agents.llm_agent import Agent
 from google.adk.runners import Runner
 from google.adk.tools.mcp_tool import McpToolset
 from google.adk.tools.mcp_tool.mcp_session_manager import SseConnectionParams
 from google.cloud.firestore_v1.async_client import AsyncClient
 from va_agent.config import settings
 from va_agent.session import FirestoreSessionService
 connection_params = SseConnectionParams(url=settings.mcp_remote_url)
 toolset = McpToolset(connection_params=connection_params)
 agent = Agent(
    model=settings.agent_model,
    name=settings.agent_name,
    instruction=settings.agent_instructions,
    tools=[toolset],
 )
 session_service = FirestoreSessionService(
    db=AsyncClient(database=settings.firestore_db),
    compaction_token_threshold=10_000,
    genai_client=genai.Client(),
 )
 runner = Runner(app_name="va_agent", agent=agent, session_service=session_service)
--- a/src/va_agent/config.py
+++ b/src/va_agent/config.py
@@ -0,0 +1,53 @@
 """Configuration helper for ADK agent."""
 import os
 from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
    YamlConfigSettingsSource,
 )
 CONFIG_FILE_PATH = os.getenv("CONFIG_YAML", "config.yaml")
 class AgentSettings(BaseSettings):
    """Settings for ADK agent with vector search."""
    google_cloud_project: str
    google_cloud_location: str
    # Agent configuration
    agent_name: str
    agent_instructions: str
    agent_model: str
    # Firestore configuration
    firestore_db: str
    # MCP configuration
    mcp_remote_url: str
    model_config = SettingsConfigDict(
        yaml_file=CONFIG_FILE_PATH,
        extra="ignore",  # Ignore extra fields from config.yaml
    )
    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Use env vars and YAML as settings sources."""
        return (
            env_settings,
            YamlConfigSettingsSource(settings_cls),
        )
 settings = AgentSettings.model_validate({})
--- a/src/va_agent/server.py
+++ b/src/va_agent/server.py
@@ -0,0 +1,10 @@
 """FastAPI server exposing the RAG agent endpoint.
 NOTE: This file is a stub. The rag_eval module was removed in the
 lean MCP implementation. This file is kept for reference but is not
 functional.
 """
 from fastapi import FastAPI
 app = FastAPI(title="RAG Agent")
--- a/src/va_agent/session.py
+++ b/src/va_agent/session.py
@@ -0,0 +1,582 @@
 """Firestore-backed session service for Google ADK."""
 from __future__ import annotations
 import asyncio
 import logging
 import time
 import uuid
 from typing import TYPE_CHECKING, Any, override
 from google.adk.errors.already_exists_error import AlreadyExistsError
 from google.adk.events.event import Event
 from google.adk.sessions import _session_util
 from google.adk.sessions.base_session_service import (
    BaseSessionService,
    GetSessionConfig,
    ListSessionsResponse,
 )
 from google.adk.sessions.session import Session
 from google.adk.sessions.state import State
 from google.cloud.firestore_v1.async_transaction import async_transactional
 from google.cloud.firestore_v1.base_query import FieldFilter
 from google.cloud.firestore_v1.field_path import FieldPath
 from google.genai.types import Content, Part
 if TYPE_CHECKING:
    from google import genai
    from google.cloud.firestore_v1.async_client import AsyncClient
 logger = logging.getLogger("google_adk." + __name__)
 _COMPACTION_LOCK_TTL = 300  # seconds
@async_transactional
 async def _try_claim_compaction_txn(transaction: Any, session_ref: Any) -> bool:
    """Atomically claim the compaction lock if it is free or stale."""
    snapshot = await session_ref.get(transaction=transaction)
    if not snapshot.exists:
        return False
    data = snapshot.to_dict() or {}
    lock_time = data.get("compaction_lock")
    if lock_time and (time.time() - lock_time) < _COMPACTION_LOCK_TTL:
        return False
    transaction.update(session_ref, {"compaction_lock": time.time()})
    return True
 class FirestoreSessionService(BaseSessionService):
    """A Firestore-backed implementation of BaseSessionService.
    Firestore document layout (given ``collection_prefix="adk"``)::
        adk_app_states/{app_name}
            → app-scoped state key/values
        adk_user_states/{app_name}__{user_id}
            → user-scoped state key/values
        adk_sessions/{app_name}__{user_id}__{session_id}
            → {app_name, user_id, session_id, state: {…}, last_update_time}
            └─ events/{event_id}  →  serialised Event
    """
    def __init__(  # noqa: PLR0913
        self,
        *,
        db: AsyncClient,
        collection_prefix: str = "adk",
        compaction_token_threshold: int | None = None,
        compaction_model: str = "gemini-2.5-flash",
        compaction_keep_recent: int = 10,
        genai_client: genai.Client | None = None,
    ) -> None:
        """Initialize FirestoreSessionService.
        Args:
            db: Firestore async client
            collection_prefix: Prefix for Firestore collections
            compaction_token_threshold: Token count threshold for compaction
            compaction_model: Model to use for summarization
            compaction_keep_recent: Number of recent events to keep
            genai_client: GenAI client for compaction summaries
        """
        if compaction_token_threshold is not None and genai_client is None:
            msg = "genai_client is required when compaction_token_threshold is set."
            raise ValueError(msg)
        self._db = db
        self._prefix = collection_prefix
        self._compaction_threshold = compaction_token_threshold
        self._compaction_model = compaction_model
        self._compaction_keep_recent = compaction_keep_recent
        self._genai_client = genai_client
        self._compaction_locks: dict[str, asyncio.Lock] = {}
        self._active_tasks: set[asyncio.Task] = set()
    # ------------------------------------------------------------------
    # Document-reference helpers
    # ------------------------------------------------------------------
    def _app_state_ref(self, app_name: str) -> Any:
        return self._db.collection(f"{self._prefix}_app_states").document(app_name)
    def _user_state_ref(self, app_name: str, user_id: str) -> Any:
        return self._db.collection(f"{self._prefix}_user_states").document(
            f"{app_name}__{user_id}"
        )
    def _session_ref(self, app_name: str, user_id: str, session_id: str) -> Any:
        return self._db.collection(f"{self._prefix}_sessions").document(
            f"{app_name}__{user_id}__{session_id}"
        )
    def _events_col(self, app_name: str, user_id: str, session_id: str) -> Any:
        return self._session_ref(app_name, user_id, session_id).collection("events")
    # ------------------------------------------------------------------
    # State helpers
    # ------------------------------------------------------------------
    async def _get_app_state(self, app_name: str) -> dict[str, Any]:
        snap = await self._app_state_ref(app_name).get()
        return snap.to_dict() or {} if snap.exists else {}
    async def _get_user_state(self, app_name: str, user_id: str) -> dict[str, Any]:
        snap = await self._user_state_ref(app_name, user_id).get()
        return snap.to_dict() or {} if snap.exists else {}
    @staticmethod
    def _merge_state(
        app_state: dict[str, Any],
        user_state: dict[str, Any],
        session_state: dict[str, Any],
    ) -> dict[str, Any]:
        merged = dict(session_state)
        for key, value in app_state.items():
            merged[State.APP_PREFIX + key] = value
        for key, value in user_state.items():
            merged[State.USER_PREFIX + key] = value
        return merged
    # ------------------------------------------------------------------
    # Compaction helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _events_to_text(events: list[Event]) -> str:
        lines: list[str] = []
        for event in events:
            if event.content and event.content.parts:
                text = "".join(p.text or "" for p in event.content.parts)
                if text:
                    role = "User" if event.author == "user" else "Assistant"
                    lines.append(f"{role}: {text}")
        return "\n\n".join(lines)
    async def _generate_summary(
        self, existing_summary: str, events: list[Event]
    ) -> str:
        conversation_text = self._events_to_text(events)
        previous = (
            f"Previous summary of earlier conversation:\n{existing_summary}\n\n"
            if existing_summary
            else ""
        )
        prompt = (
            "Summarize the following conversation between a user and an "
            "assistant. Preserve:\n"
            "- Key decisions and conclusions\n"
            "- User preferences and requirements\n"
            "- Important facts, names, and numbers\n"
            "- The overall topic and direction of the conversation\n"
            "- Any pending tasks or open questions\n\n"
            f"{previous}"
            f"Conversation:\n{conversation_text}\n\n"
            "Provide a clear, comprehensive summary."
        )
        if self._genai_client is None:
            msg = "genai_client is required for compaction"
            raise RuntimeError(msg)
        response = await self._genai_client.aio.models.generate_content(
            model=self._compaction_model,
            contents=prompt,
        )
        return response.text or ""
    async def _compact_session(self, session: Session) -> None:
        app_name = session.app_name
        user_id = session.user_id
        session_id = session.id
        events_ref = self._events_col(app_name, user_id, session_id)
        query = events_ref.order_by("timestamp")
        event_docs = await query.get()
        if len(event_docs) <= self._compaction_keep_recent:
            return
        all_events = [Event.model_validate(doc.to_dict()) for doc in event_docs]
        events_to_summarize = all_events[: -self._compaction_keep_recent]
        session_snap = await self._session_ref(app_name, user_id, session_id).get()
        existing_summary = (session_snap.to_dict() or {}).get(
            "conversation_summary", ""
        )
        try:
            summary = await self._generate_summary(
                existing_summary, events_to_summarize
            )
        except Exception:
            logger.exception("Compaction summary generation failed; skipping.")
            return
        # Write summary BEFORE deleting events so a crash between the two
        # steps leaves safe duplication rather than data loss.
        await self._session_ref(app_name, user_id, session_id).update(
            {"conversation_summary": summary}
        )
        docs_to_delete = event_docs[: -self._compaction_keep_recent]
        for i in range(0, len(docs_to_delete), 500):
            batch = self._db.batch()
            for doc in docs_to_delete[i : i + 500]:
                batch.delete(doc.reference)
            await batch.commit()
        logger.info(
            "Compacted session %s: summarised %d events, kept %d.",
            session_id,
            len(docs_to_delete),
            self._compaction_keep_recent,
        )
    async def _guarded_compact(self, session: Session) -> None:
        """Run compaction in the background with per-session locking."""
        key = f"{session.app_name}__{session.user_id}__{session.id}"
        lock = self._compaction_locks.setdefault(key, asyncio.Lock())
        if lock.locked():
            logger.debug("Compaction already running locally for %s; skipping.", key)
            return
        async with lock:
            session_ref = self._session_ref(
                session.app_name, session.user_id, session.id
            )
            try:
                transaction = self._db.transaction()
                claimed = await _try_claim_compaction_txn(transaction, session_ref)
            except Exception:
                logger.exception("Failed to claim compaction lock for %s", key)
                return
            if not claimed:
                logger.debug(
                    "Compaction lock held by another instance for %s; skipping.",
                    key,
                )
                return
            try:
                await self._compact_session(session)
            except Exception:
                logger.exception("Background compaction failed for %s", key)
            finally:
                try:
                    await session_ref.update({"compaction_lock": None})
                except Exception:
                    logger.exception("Failed to release compaction lock for %s", key)
    async def close(self) -> None:
        """Await all in-flight compaction tasks. Call before shutdown."""
        if self._active_tasks:
            await asyncio.gather(*self._active_tasks, return_exceptions=True)
    # ------------------------------------------------------------------
    # BaseSessionService implementation
    # ------------------------------------------------------------------
    @override
    async def create_session(
        self,
        *,
        app_name: str,
        user_id: str,
        state: dict[str, Any] | None = None,
        session_id: str | None = None,
    ) -> Session:
        if session_id and session_id.strip():
            session_id = session_id.strip()
            existing = await self._session_ref(app_name, user_id, session_id).get()
            if existing.exists:
                msg = f"Session with id {session_id} already exists."
                raise AlreadyExistsError(msg)
        else:
            session_id = str(uuid.uuid4())
        state_deltas = _session_util.extract_state_delta(state)  # type: ignore[attr-defined]
        app_state_delta = state_deltas["app"]
        user_state_delta = state_deltas["user"]
        session_state = state_deltas["session"]
        write_coros: list = []
        if app_state_delta:
            write_coros.append(
                self._app_state_ref(app_name).set(app_state_delta, merge=True)
            )
        if user_state_delta:
            write_coros.append(
                self._user_state_ref(app_name, user_id).set(
                    user_state_delta, merge=True
                )
            )
        now = time.time()
        write_coros.append(
            self._session_ref(app_name, user_id, session_id).set(
                {
                    "app_name": app_name,
                    "user_id": user_id,
                    "session_id": session_id,
                    "state": session_state or {},
                    "last_update_time": now,
                }
            )
        )
        await asyncio.gather(*write_coros)
        app_state, user_state = await asyncio.gather(
            self._get_app_state(app_name),
            self._get_user_state(app_name, user_id),
        )
        merged = self._merge_state(app_state, user_state, session_state or {})
        return Session(
            app_name=app_name,
            user_id=user_id,
            id=session_id,
            state=merged,
            last_update_time=now,
        )
    @override
    async def get_session(
        self,
        *,
        app_name: str,
        user_id: str,
        session_id: str,
        config: GetSessionConfig | None = None,
    ) -> Session | None:
        snap = await self._session_ref(app_name, user_id, session_id).get()
        if not snap.exists:
            return None
        session_data = snap.to_dict()
        # Build events query
        events_ref = self._events_col(app_name, user_id, session_id)
        query = events_ref
        if config and config.after_timestamp:
            query = query.where(
                filter=FieldFilter("timestamp", ">=", config.after_timestamp)
            )
        query = query.order_by("timestamp")
        event_docs, app_state, user_state = await asyncio.gather(
            query.get(),
            self._get_app_state(app_name),
            self._get_user_state(app_name, user_id),
        )
        events = [Event.model_validate(doc.to_dict()) for doc in event_docs]
        if config and config.num_recent_events:
            events = events[-config.num_recent_events :]
        # Prepend conversation summary as synthetic context events
        conversation_summary = session_data.get("conversation_summary")
        if conversation_summary:
            summary_event = Event(
                id="summary-context",
                author="user",
                content=Content(
                    role="user",
                    parts=[
                        Part(
                            text=(
                                "[Conversation context from previous"
                                " messages]\n"
                                f"{conversation_summary}"
                            )
                        )
                    ],
                ),
                timestamp=0.0,
                invocation_id="compaction-summary",
            )
            ack_event = Event(
                id="summary-ack",
                author=app_name,
                content=Content(
                    role="model",
                    parts=[
                        Part(
                            text=(
                                "Understood, I have the context from our"
                                " previous conversation and will continue"
                                " accordingly."
                            )
                        )
                    ],
                ),
                timestamp=0.001,
                invocation_id="compaction-summary",
            )
            events = [summary_event, ack_event, *events]
        # Merge scoped state
        merged = self._merge_state(app_state, user_state, session_data.get("state", {}))
        return Session(
            app_name=app_name,
            user_id=user_id,
            id=session_id,
            state=merged,
            events=events,
            last_update_time=session_data.get("last_update_time", 0.0),
        )
    @override
    async def list_sessions(
        self, *, app_name: str, user_id: str | None = None
    ) -> ListSessionsResponse:
        query = self._db.collection(f"{self._prefix}_sessions").where(
            filter=FieldFilter("app_name", "==", app_name)
        )
        if user_id is not None:
            query = query.where(filter=FieldFilter("user_id", "==", user_id))
        docs = await query.get()
        if not docs:
            return ListSessionsResponse()
        doc_dicts: list[dict[str, Any]] = [doc.to_dict() or {} for doc in docs]
        # Pre-fetch app state and all distinct user states in parallel
        unique_user_ids = list({d["user_id"] for d in doc_dicts})
        app_state, *user_states = await asyncio.gather(
            self._get_app_state(app_name),
            *(self._get_user_state(app_name, uid) for uid in unique_user_ids),
        )
        user_state_cache = dict(zip(unique_user_ids, user_states, strict=False))
        sessions: list[Session] = []
        for data in doc_dicts:
            s_user_id = data["user_id"]
            merged = self._merge_state(
                app_state,
                user_state_cache[s_user_id],
                data.get("state", {}),
            )
            sessions.append(
                Session(
                    app_name=app_name,
                    user_id=s_user_id,
                    id=data["session_id"],
                    state=merged,
                    events=[],
                    last_update_time=data.get("last_update_time", 0.0),
                )
            )
        return ListSessionsResponse(sessions=sessions)
    @override
    async def delete_session(
        self, *, app_name: str, user_id: str, session_id: str
    ) -> None:
        ref = self._session_ref(app_name, user_id, session_id)
        await self._db.recursive_delete(ref)
    @override
    async def append_event(self, session: Session, event: Event) -> Event:
        if event.partial:
            return event
        t0 = time.monotonic()
        app_name = session.app_name
        user_id = session.user_id
        session_id = session.id
        # Base class: strips temp state, applies delta to in-memory session,
        # appends event to session.events
        event = await super().append_event(session=session, event=event)
        session.last_update_time = event.timestamp
        # Persist event document
        event_data = event.model_dump(mode="json", exclude_none=True)
        await (
            self._events_col(app_name, user_id, session_id)
            .document(event.id)
            .set(event_data)
        )
        # Persist state deltas
        session_ref = self._session_ref(app_name, user_id, session_id)
        if event.actions and event.actions.state_delta:
            state_deltas = _session_util.extract_state_delta(event.actions.state_delta)
            write_coros: list = []
            if state_deltas["app"]:
                write_coros.append(
                    self._app_state_ref(app_name).set(state_deltas["app"], merge=True)
                )
            if state_deltas["user"]:
                write_coros.append(
                    self._user_state_ref(app_name, user_id).set(
                        state_deltas["user"], merge=True
                    )
                )
            if state_deltas["session"]:
                field_updates: dict[str, Any] = {
                    FieldPath("state", k).to_api_repr(): v
                    for k, v in state_deltas["session"].items()
                }
                field_updates["last_update_time"] = event.timestamp
                write_coros.append(session_ref.update(field_updates))
            else:
                write_coros.append(
                    session_ref.update({"last_update_time": event.timestamp})
                )
            await asyncio.gather(*write_coros)
        else:
            await session_ref.update({"last_update_time": event.timestamp})
        # Log token usage
        if event.usage_metadata:
            meta = event.usage_metadata
            logger.info(
                "Token usage for session %s event %s: "
                "prompt=%s, candidates=%s, total=%s",
                session_id,
                event.id,
                meta.prompt_token_count,
                meta.candidates_token_count,
                meta.total_token_count,
            )
        # Trigger compaction if total token count exceeds threshold
        if (
            self._compaction_threshold is not None
            and event.usage_metadata
            and event.usage_metadata.total_token_count
            and event.usage_metadata.total_token_count >= self._compaction_threshold
        ):
            logger.info(
                "Compaction triggered for session %s: "
                "total_token_count=%d >= threshold=%d",
                session_id,
                event.usage_metadata.total_token_count,
                self._compaction_threshold,
            )
            task = asyncio.create_task(self._guarded_compact(session))
            self._active_tasks.add(task)
            task.add_done_callback(self._active_tasks.discard)
        elapsed = time.monotonic() - t0
        logger.info(
            "append_event completed for session %s event %s in %.3fs",
            session_id,
            event.id,
            elapsed,
        )
        return event
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1 @@
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,35 @@
 """Shared fixtures for Firestore session service tests."""
 from __future__ import annotations
 import os
 import uuid
 import pytest
 import pytest_asyncio
 from google.cloud.firestore_v1.async_client import AsyncClient
 from va_agent.session import FirestoreSessionService
 os.environ.setdefault("FIRESTORE_EMULATOR_HOST", "localhost:8153")
@pytest_asyncio.fixture
 async def db():
    return AsyncClient(project="test-project")
@pytest_asyncio.fixture
 async def service(db: AsyncClient):
    prefix = f"test_{uuid.uuid4().hex[:8]}"
    return FirestoreSessionService(db=db, collection_prefix=prefix)
@pytest.fixture
 def app_name():
    return f"app_{uuid.uuid4().hex[:8]}"
@pytest.fixture
 def user_id():
    return f"user_{uuid.uuid4().hex[:8]}"
--- a/tests/test_compaction.py
+++ b/tests/test_compaction.py
@@ -0,0 +1,515 @@
 """Tests for conversation compaction in FirestoreSessionService."""
 from __future__ import annotations
 import asyncio
 import time
 from unittest.mock import AsyncMock, MagicMock, patch
 import uuid
 import pytest
 import pytest_asyncio
 from google import genai
 from google.adk.events.event import Event
 from google.cloud.firestore_v1.async_client import AsyncClient
 from google.genai.types import Content, GenerateContentResponseUsageMetadata, Part
 from va_agent.session import FirestoreSessionService, _try_claim_compaction_txn
 pytestmark = pytest.mark.asyncio
@pytest_asyncio.fixture
 async def mock_genai_client():
    client = MagicMock(spec=genai.Client)
    response = MagicMock()
    response.text = "Summary of the conversation so far."
    client.aio.models.generate_content = AsyncMock(return_value=response)
    return client
@pytest_asyncio.fixture
 async def compaction_service(db: AsyncClient, mock_genai_client):
    prefix = f"test_{uuid.uuid4().hex[:8]}"
    return FirestoreSessionService(
        db=db,
        collection_prefix=prefix,
        compaction_token_threshold=100,
        compaction_keep_recent=2,
        genai_client=mock_genai_client,
    )
 # ------------------------------------------------------------------
 # __init__ validation
 # ------------------------------------------------------------------
 class TestCompactionInit:
    async def test_requires_genai_client(self, db):
        with pytest.raises(ValueError, match="genai_client is required"):
            FirestoreSessionService(
                db=db,
                compaction_token_threshold=1000,
            )
    async def test_no_threshold_no_client_ok(self, db):
        svc = FirestoreSessionService(db=db)
        assert svc._compaction_threshold is None
 # ------------------------------------------------------------------
 # Compaction trigger
 # ------------------------------------------------------------------
 class TestCompactionTrigger:
    async def test_compaction_triggered_above_threshold(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        # Add 5 events, last one with usage_metadata above threshold
        base = time.time()
        for i in range(4):
            e = Event(
                author="user" if i % 2 == 0 else app_name,
                content=Content(
                    role="user" if i % 2 == 0 else "model",
                    parts=[Part(text=f"message {i}")],
                ),
                timestamp=base + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        # This event crosses the threshold
        trigger_event = Event(
            author=app_name,
            content=Content(
                role="model", parts=[Part(text="final response")]
            ),
            timestamp=base + 4,
            invocation_id="inv-4",
            usage_metadata=GenerateContentResponseUsageMetadata(
                total_token_count=200,
            ),
        )
        await compaction_service.append_event(session, trigger_event)
        await compaction_service.close()
        # Summary generation should have been called
        mock_genai_client.aio.models.generate_content.assert_called_once()
        # Fetch session: should have summary + only keep_recent events
        fetched = await compaction_service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        # 2 synthetic summary events + 2 kept real events
        assert len(fetched.events) == 4
        assert fetched.events[0].id == "summary-context"
        assert fetched.events[1].id == "summary-ack"
        assert "Summary of the conversation" in fetched.events[0].content.parts[0].text
    async def test_no_compaction_below_threshold(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author=app_name,
            content=Content(
                role="model", parts=[Part(text="short reply")]
            ),
            timestamp=time.time(),
            invocation_id="inv-1",
            usage_metadata=GenerateContentResponseUsageMetadata(
                total_token_count=50,
            ),
        )
        await compaction_service.append_event(session, event)
        mock_genai_client.aio.models.generate_content.assert_not_called()
    async def test_no_compaction_without_usage_metadata(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user",
            content=Content(
                role="user", parts=[Part(text="hello")]
            ),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await compaction_service.append_event(session, event)
        mock_genai_client.aio.models.generate_content.assert_not_called()
 # ------------------------------------------------------------------
 # Compaction with too few events (nothing to compact)
 # ------------------------------------------------------------------
 class TestCompactionEdgeCases:
    async def test_skip_when_fewer_events_than_keep_recent(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        # Only 2 events, keep_recent=2 → nothing to summarize
        for i in range(2):
            e = Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text=f"msg {i}")]
                ),
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        # Trigger compaction manually even though threshold wouldn't fire
        await compaction_service._compact_session(session)
        mock_genai_client.aio.models.generate_content.assert_not_called()
    async def test_summary_generation_failure_is_non_fatal(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        for i in range(5):
            e = Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text=f"msg {i}")]
                ),
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        # Make summary generation fail
        mock_genai_client.aio.models.generate_content = AsyncMock(
            side_effect=RuntimeError("API error")
        )
        # Should not raise
        await compaction_service._compact_session(session)
        # All events should still be present
        fetched = await compaction_service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert len(fetched.events) == 5
 # ------------------------------------------------------------------
 # get_session with summary
 # ------------------------------------------------------------------
 class TestGetSessionWithSummary:
    async def test_no_summary_no_synthetic_events(
        self, compaction_service, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user",
            content=Content(
                role="user", parts=[Part(text="hello")]
            ),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await compaction_service.append_event(session, event)
        fetched = await compaction_service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert len(fetched.events) == 1
        assert fetched.events[0].author == "user"
 # ------------------------------------------------------------------
 # _events_to_text
 # ------------------------------------------------------------------
 class TestEventsToText:
    async def test_formats_user_and_assistant(self):
        events = [
            Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text="Hi there")]
                ),
                timestamp=1.0,
                invocation_id="inv-1",
            ),
            Event(
                author="bot",
                content=Content(
                    role="model", parts=[Part(text="Hello!")]
                ),
                timestamp=2.0,
                invocation_id="inv-2",
            ),
        ]
        text = FirestoreSessionService._events_to_text(events)
        assert "User: Hi there" in text
        assert "Assistant: Hello!" in text
    async def test_skips_events_without_text(self):
        events = [
            Event(
                author="user",
                timestamp=1.0,
                invocation_id="inv-1",
            ),
        ]
        text = FirestoreSessionService._events_to_text(events)
        assert text == ""
 # ------------------------------------------------------------------
 # Firestore distributed lock
 # ------------------------------------------------------------------
 class TestCompactionLock:
    async def test_claim_and_release(
        self, compaction_service, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        session_ref = compaction_service._session_ref(
            app_name, user_id, session.id
        )
        # Claim the lock
        transaction = compaction_service._db.transaction()
        claimed = await _try_claim_compaction_txn(transaction, session_ref)
        assert claimed is True
        # Lock is now held — second claim should fail
        transaction2 = compaction_service._db.transaction()
        claimed2 = await _try_claim_compaction_txn(transaction2, session_ref)
        assert claimed2 is False
        # Release the lock
        await session_ref.update({"compaction_lock": None})
        # Can claim again after release
        transaction3 = compaction_service._db.transaction()
        claimed3 = await _try_claim_compaction_txn(transaction3, session_ref)
        assert claimed3 is True
    async def test_stale_lock_can_be_reclaimed(
        self, compaction_service, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        session_ref = compaction_service._session_ref(
            app_name, user_id, session.id
        )
        # Set a stale lock (older than TTL)
        await session_ref.update({"compaction_lock": time.time() - 600})
        # Should be able to reclaim a stale lock
        transaction = compaction_service._db.transaction()
        claimed = await _try_claim_compaction_txn(transaction, session_ref)
        assert claimed is True
    async def test_claim_nonexistent_session(self, compaction_service):
        ref = compaction_service._session_ref("no_app", "no_user", "no_id")
        transaction = compaction_service._db.transaction()
        claimed = await _try_claim_compaction_txn(transaction, ref)
        assert claimed is False
 # ------------------------------------------------------------------
 # Guarded compact
 # ------------------------------------------------------------------
 class TestGuardedCompact:
    async def test_local_lock_skips_concurrent(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        for i in range(5):
            e = Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text=f"msg {i}")]
                ),
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        # Hold the in-process lock so _guarded_compact skips
        key = f"{app_name}__{user_id}__{session.id}"
        lock = compaction_service._compaction_locks.setdefault(
            key, asyncio.Lock()
        )
        async with lock:
            await compaction_service._guarded_compact(session)
        mock_genai_client.aio.models.generate_content.assert_not_called()
    async def test_firestore_lock_held_skips(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        for i in range(5):
            e = Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text=f"msg {i}")]
                ),
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        # Set a fresh Firestore lock (simulating another instance)
        session_ref = compaction_service._session_ref(
            app_name, user_id, session.id
        )
        await session_ref.update({"compaction_lock": time.time()})
        await compaction_service._guarded_compact(session)
        mock_genai_client.aio.models.generate_content.assert_not_called()
    async def test_claim_failure_logs_and_skips(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        with patch(
            "va_agent.session._try_claim_compaction_txn",
            side_effect=RuntimeError("Firestore down"),
        ):
            await compaction_service._guarded_compact(session)
        mock_genai_client.aio.models.generate_content.assert_not_called()
    async def test_compaction_failure_releases_lock(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        # Make _compact_session raise an unhandled exception
        with patch.object(
            compaction_service,
            "_compact_session",
            side_effect=RuntimeError("unexpected crash"),
        ):
            await compaction_service._guarded_compact(session)
        # Lock should be released even after failure
        session_ref = compaction_service._session_ref(
            app_name, user_id, session.id
        )
        snap = await session_ref.get()
        assert snap.to_dict().get("compaction_lock") is None
    async def test_lock_release_failure_is_non_fatal(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        original_session_ref = compaction_service._session_ref
        def patched_session_ref(an, uid, sid):
            ref = original_session_ref(an, uid, sid)
            original_update = ref.update
            async def failing_update(data):
                if "compaction_lock" in data:
                    raise RuntimeError("Firestore write failed")
                return await original_update(data)
            ref.update = failing_update
            return ref
        with patch.object(
            compaction_service,
            "_session_ref",
            side_effect=patched_session_ref,
        ):
            # Should not raise despite lock release failure
            await compaction_service._guarded_compact(session)
 # ------------------------------------------------------------------
 # close()
 # ------------------------------------------------------------------
 class TestClose:
    async def test_close_no_tasks(self, compaction_service):
        await compaction_service.close()
    async def test_close_awaits_tasks(
        self, compaction_service, mock_genai_client, app_name, user_id
    ):
        session = await compaction_service.create_session(
            app_name=app_name, user_id=user_id
        )
        base = time.time()
        for i in range(4):
            e = Event(
                author="user",
                content=Content(
                    role="user", parts=[Part(text=f"msg {i}")]
                ),
                timestamp=base + i,
                invocation_id=f"inv-{i}",
            )
            await compaction_service.append_event(session, e)
        trigger = Event(
            author=app_name,
            content=Content(
                role="model", parts=[Part(text="trigger")]
            ),
            timestamp=base + 4,
            invocation_id="inv-4",
            usage_metadata=GenerateContentResponseUsageMetadata(
                total_token_count=200,
            ),
        )
        await compaction_service.append_event(session, trigger)
        assert len(compaction_service._active_tasks) > 0
        await compaction_service.close()
        assert len(compaction_service._active_tasks) == 0
--- a/tests/test_firestore_session_service.py
+++ b/tests/test_firestore_session_service.py
@@ -0,0 +1,428 @@
 """Tests for FirestoreSessionService against the Firestore emulator."""
 from __future__ import annotations
 import time
 import uuid
 import pytest
 from google.adk.errors.already_exists_error import AlreadyExistsError
 from google.adk.events.event import Event
 from google.adk.events.event_actions import EventActions
 from google.adk.sessions.base_session_service import GetSessionConfig
 from google.genai.types import Content, Part
 pytestmark = pytest.mark.asyncio
 # ------------------------------------------------------------------
 # create_session
 # ------------------------------------------------------------------
 class TestCreateSession:
    async def test_auto_generates_id(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        assert session.id
        assert session.app_name == app_name
        assert session.user_id == user_id
        assert session.last_update_time > 0
    async def test_custom_id(self, service, app_name, user_id):
        sid = "my-custom-session"
        session = await service.create_session(
            app_name=app_name, user_id=user_id, session_id=sid
        )
        assert session.id == sid
    async def test_duplicate_id_raises(self, service, app_name, user_id):
        sid = "dup-session"
        await service.create_session(
            app_name=app_name, user_id=user_id, session_id=sid
        )
        with pytest.raises(AlreadyExistsError):
            await service.create_session(
                app_name=app_name, user_id=user_id, session_id=sid
            )
    async def test_session_state(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name,
            user_id=user_id,
            state={"count": 42},
        )
        assert session.state["count"] == 42
    async def test_scoped_state(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name,
            user_id=user_id,
            state={
                "app:global_flag": True,
                "user:lang": "es",
                "local_key": "val",
            },
        )
        assert session.state["app:global_flag"] is True
        assert session.state["user:lang"] == "es"
        assert session.state["local_key"] == "val"
    async def test_temp_state_not_persisted(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name,
            user_id=user_id,
            state={"temp:scratch": "gone", "keep": "yes"},
        )
        retrieved = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert "temp:scratch" not in retrieved.state
        assert retrieved.state["keep"] == "yes"
 # ------------------------------------------------------------------
 # get_session
 # ------------------------------------------------------------------
 class TestGetSession:
    async def test_nonexistent_returns_none(self, service, app_name, user_id):
        result = await service.get_session(
            app_name=app_name, user_id=user_id, session_id="nope"
        )
        assert result is None
    async def test_roundtrip(self, service, app_name, user_id):
        created = await service.create_session(
            app_name=app_name,
            user_id=user_id,
            state={"foo": "bar"},
        )
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=created.id
        )
        assert fetched is not None
        assert fetched.id == created.id
        assert fetched.state["foo"] == "bar"
        assert fetched.last_update_time == pytest.approx(
            created.last_update_time, abs=0.01
        )
    async def test_returns_events(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user",
            content=Content(parts=[Part(text="hello")]),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert len(fetched.events) == 1
        assert fetched.events[0].author == "user"
    async def test_num_recent_events(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        for i in range(5):
            e = Event(
                author="user",
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await service.append_event(session, e)
        fetched = await service.get_session(
            app_name=app_name,
            user_id=user_id,
            session_id=session.id,
            config=GetSessionConfig(num_recent_events=2),
        )
        assert len(fetched.events) == 2
    async def test_after_timestamp(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        base = time.time()
        for i in range(3):
            e = Event(
                author="user",
                timestamp=base + i,
                invocation_id=f"inv-{i}",
            )
            await service.append_event(session, e)
        fetched = await service.get_session(
            app_name=app_name,
            user_id=user_id,
            session_id=session.id,
            config=GetSessionConfig(after_timestamp=base + 1),
        )
        assert len(fetched.events) == 2
 # ------------------------------------------------------------------
 # list_sessions
 # ------------------------------------------------------------------
 class TestListSessions:
    async def test_empty(self, service, app_name, user_id):
        resp = await service.list_sessions(
            app_name=app_name, user_id=user_id
        )
        assert resp.sessions == [] or resp.sessions is None
    async def test_returns_created_sessions(
        self, service, app_name, user_id
    ):
        s1 = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        s2 = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        resp = await service.list_sessions(
            app_name=app_name, user_id=user_id
        )
        ids = {s.id for s in resp.sessions}
        assert s1.id in ids
        assert s2.id in ids
    async def test_filter_by_user(self, service, app_name):
        uid1 = f"user_{uuid.uuid4().hex[:8]}"
        uid2 = f"user_{uuid.uuid4().hex[:8]}"
        await service.create_session(app_name=app_name, user_id=uid1)
        await service.create_session(app_name=app_name, user_id=uid2)
        resp = await service.list_sessions(
            app_name=app_name, user_id=uid1
        )
        assert len(resp.sessions) == 1
        assert resp.sessions[0].user_id == uid1
    async def test_sessions_have_merged_state(
        self, service, app_name, user_id
    ):
        await service.create_session(
            app_name=app_name,
            user_id=user_id,
            state={"app:shared": "yes", "local": "val"},
        )
        resp = await service.list_sessions(
            app_name=app_name, user_id=user_id
        )
        s = resp.sessions[0]
        assert s.state["app:shared"] == "yes"
        assert s.state["local"] == "val"
    async def test_sessions_have_no_events(
        self, service, app_name, user_id
    ):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user", timestamp=time.time(), invocation_id="inv-1"
        )
        await service.append_event(session, event)
        resp = await service.list_sessions(
            app_name=app_name, user_id=user_id
        )
        assert resp.sessions[0].events == []
 # ------------------------------------------------------------------
 # delete_session
 # ------------------------------------------------------------------
 class TestDeleteSession:
    async def test_delete(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        await service.delete_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        result = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert result is None
    async def test_delete_removes_events(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user", timestamp=time.time(), invocation_id="inv-1"
        )
        await service.append_event(session, event)
        await service.delete_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        result = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert result is None
 # ------------------------------------------------------------------
 # append_event
 # ------------------------------------------------------------------
 class TestAppendEvent:
    async def test_basic(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user",
            content=Content(parts=[Part(text="hi")]),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        returned = await service.append_event(session, event)
        assert returned.id == event.id
        assert returned.timestamp > 0
    async def test_partial_event_not_persisted(
        self, service, app_name, user_id
    ):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="user",
            partial=True,
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert len(fetched.events) == 0
    async def test_session_state_delta(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="agent",
            actions=EventActions(state_delta={"counter": 1}),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert fetched.state["counter"] == 1
    async def test_app_state_delta(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="agent",
            actions=EventActions(state_delta={"app:version": "2.0"}),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert fetched.state["app:version"] == "2.0"
    async def test_user_state_delta(self, service, app_name, user_id):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="agent",
            actions=EventActions(state_delta={"user:pref": "dark"}),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert fetched.state["user:pref"] == "dark"
    async def test_updates_last_update_time(
        self, service, app_name, user_id
    ):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        original_time = session.last_update_time
        event = Event(
            author="user",
            timestamp=time.time() + 10,
            invocation_id="inv-1",
        )
        await service.append_event(session, event)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert fetched.last_update_time > original_time
    async def test_multiple_events_accumulate(
        self, service, app_name, user_id
    ):
        session = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        for i in range(3):
            e = Event(
                author="user",
                content=Content(parts=[Part(text=f"msg {i}")]),
                timestamp=time.time() + i,
                invocation_id=f"inv-{i}",
            )
            await service.append_event(session, e)
        fetched = await service.get_session(
            app_name=app_name, user_id=user_id, session_id=session.id
        )
        assert len(fetched.events) == 3
    async def test_app_state_shared_across_sessions(
        self, service, app_name, user_id
    ):
        s1 = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        event = Event(
            author="agent",
            actions=EventActions(state_delta={"app:shared_val": 99}),
            timestamp=time.time(),
            invocation_id="inv-1",
        )
        await service.append_event(s1, event)
        s2 = await service.create_session(
            app_name=app_name, user_id=user_id
        )
        assert s2.state["app:shared_val"] == 99
--- a/uv.lock
+++ b/uv.lock
		`@@ -1 +0,0 @@`
			`"""File storage provider implementations."""`
		`@@ -1 +0,0 @@`
			`"""Vector search provider implementations."""`