Add docstrings

2025-09-26 15:45:13 +00:00
parent 17fcd3596b
commit b44a209d42
10 changed files with 942 additions and 4 deletions
--- a/src/vector_search_mcp/engine/base_engine.py
+++ b/src/vector_search_mcp/engine/base_engine.py
@@ -1,3 +1,16 @@
+"""Abstract base engine for vector search operations.
+
+This module defines the abstract interface for vector search engines using
+generic types to ensure type safety across different backend implementations.
+
+The BaseEngine class uses two generic type parameters:
+- ResponseType: The raw response type returned by the backend's search API
+- ConditionType: The backend-specific filter/condition type
+
+This design allows each engine implementation to use its native types while
+maintaining a consistent interface for the semantic search workflow.
+"""
+
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar

@@ -10,13 +23,87 @@ __all__ = ["BaseEngine"]


 class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
+    """Abstract base class for vector search engines.
+
+    This class defines the interface that all vector search engine implementations
+    must follow. It uses generic types to ensure type safety while allowing
+    different backends to use their native response and condition types.
+
+    Type Parameters:
+        ResponseType: The raw response type returned by the backend's search API.
+                     For example, list[ScoredPoint] for Qdrant.
+        ConditionType: The backend-specific filter/condition type.
+                      For example, models.Filter for Qdrant.
+
+    The class implements the Template Method pattern where semantic_search()
+    orchestrates calls to the abstract methods that subclasses must implement.
+
+    Example:
+        >>> class MyEngine(BaseEngine[MyResponse, MyCondition]):
+        ...     def transform_conditions(self, conditions):
+        ...         # Convert generic Condition objects to MyCondition
+        ...         return my_condition
+        ...
+        ...     def transform_response(self, response):
+        ...         # Convert MyResponse to list[SearchRow]
+        ...         return search_rows
+        ...
+        ...     async def run_similarity_query(self, embedding, collection, ...):
+        ...         # Execute backend-specific search
+        ...         return my_response
+
+    """
+
    @abstractmethod
    def transform_conditions(
        self, conditions: list[Condition] | None
-    ) -> ConditionType | None: ...
+    ) -> ConditionType | None:
+        """Transform generic conditions to backend-specific filter format.
+
+        This method converts the generic Condition objects (Match, MatchAny,
+        MatchExclude) into the specific filter format required by the backend
+        vector database.
+
+        Args:
+            conditions: List of generic condition objects to apply, or None
+                       for no filtering.
+
+        Returns:
+            Backend-specific filter object, or None if no conditions provided.
+            The exact type depends on the ConditionType generic parameter.
+
+        Example:
+            For Qdrant, this might convert:
+            >>> conditions = [Match(key="category", value="tech")]
+            >>> qdrant_filter = transform_conditions(conditions)
+            >>> # Returns models.Filter(must=[...])
+
+        """
+        ...

    @abstractmethod
-    def transform_response(self, response: ResponseType) -> list[SearchRow]: ...
+    def transform_response(self, response: ResponseType) -> list[SearchRow]:
+        """Transform backend-specific response to standardized SearchRow format.
+
+        This method converts the raw response from the backend vector database
+        into a list of SearchRow objects with standardized structure.
+
+        Args:
+            response: Raw response from the backend search API. The exact type
+                     depends on the ResponseType generic parameter.
+
+        Returns:
+            List of SearchRow objects containing chunk_id, score, and payload
+            for each search result.
+
+        Example:
+            For Qdrant, this might convert:
+            >>> response = [ScoredPoint(id=1, score=0.9, payload={...})]
+            >>> search_rows = transform_response(response)
+            >>> # Returns [SearchRow(chunk_id="1", score=0.9, payload={...})]
+
+        """
+        ...

    @abstractmethod
    async def run_similarity_query(
@@ -26,7 +113,37 @@ class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
        limit: int = 10,
        conditions: ConditionType | None = None,
        threshold: float | None = None,
-    ) -> ResponseType: ...
+    ) -> ResponseType:
+        """Execute similarity search query against the backend vector database.
+
+        This method performs the actual vector similarity search using the
+        backend's native API. It accepts backend-specific conditions and
+        returns the raw backend response.
+
+        Args:
+            embedding: Query vector as a list of floats.
+            collection: Name of the collection/index to search in.
+            limit: Maximum number of results to return. Defaults to 10.
+            conditions: Backend-specific filter conditions, or None for no filtering.
+            threshold: Minimum similarity score threshold, or None for no threshold.
+
+        Returns:
+            Raw response from the backend API. The exact type depends on the
+            ResponseType generic parameter.
+
+        Example:
+            For Qdrant:
+            >>> response = await run_similarity_query(
+            ...     embedding=[0.1, 0.2, 0.3],
+            ...     collection="documents",
+            ...     limit=5,
+            ...     conditions=models.Filter(...),
+            ...     threshold=0.7
+            ... )
+            >>> # Returns list[models.ScoredPoint]
+
+        """
+        ...

    async def semantic_search(
        self,
@@ -36,6 +153,40 @@ class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
        conditions: list[Condition] | None = None,
        threshold: float | None = None,
    ) -> list[SearchRow]:
+        """Perform semantic search with generic interface.
+
+        This is the main public method that orchestrates the complete search
+        workflow. It handles the conversion between generic types and backend-
+        specific types, making it easy to use regardless of the underlying
+        vector database.
+
+        The method follows this workflow:
+        1. Transform generic conditions to backend-specific format
+        2. Execute the similarity query using backend API
+        3. Transform the response to standardized SearchRow format
+
+        Args:
+            embedding: Query vector as a list of floats.
+            collection: Name of the collection/index to search in.
+            limit: Maximum number of results to return. Defaults to 10.
+            conditions: List of generic filter conditions, or None for no filtering.
+            threshold: Minimum similarity score threshold, or None for no threshold.
+
+        Returns:
+            List of SearchRow objects with chunk_id, score, and payload.
+
+        Example:
+            >>> results = await engine.semantic_search(
+            ...     embedding=[0.1, 0.2, 0.3, 0.4, 0.5],
+            ...     collection="documents",
+            ...     limit=5,
+            ...     conditions=[Match(key="category", value="tech")],
+            ...     threshold=0.7
+            ... )
+            >>> for result in results:
+            ...     print(f"ID: {result.chunk_id}, Score: {result.score}")
+
+        """
        transformed_conditions = self.transform_conditions(conditions)
        response = await self.run_similarity_query(
            embedding, collection, limit, transformed_conditions, threshold