forked from innovacion/searchbox
Add docstrings
This commit is contained in:
@@ -1,3 +1,16 @@
|
||||
"""Abstract base engine for vector search operations.
|
||||
|
||||
This module defines the abstract interface for vector search engines using
|
||||
generic types to ensure type safety across different backend implementations.
|
||||
|
||||
The BaseEngine class uses two generic type parameters:
|
||||
- ResponseType: The raw response type returned by the backend's search API
|
||||
- ConditionType: The backend-specific filter/condition type
|
||||
|
||||
This design allows each engine implementation to use its native types while
|
||||
maintaining a consistent interface for the semantic search workflow.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
@@ -10,13 +23,87 @@ __all__ = ["BaseEngine"]
|
||||
|
||||
|
||||
class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
|
||||
"""Abstract base class for vector search engines.
|
||||
|
||||
This class defines the interface that all vector search engine implementations
|
||||
must follow. It uses generic types to ensure type safety while allowing
|
||||
different backends to use their native response and condition types.
|
||||
|
||||
Type Parameters:
|
||||
ResponseType: The raw response type returned by the backend's search API.
|
||||
For example, list[ScoredPoint] for Qdrant.
|
||||
ConditionType: The backend-specific filter/condition type.
|
||||
For example, models.Filter for Qdrant.
|
||||
|
||||
The class implements the Template Method pattern where semantic_search()
|
||||
orchestrates calls to the abstract methods that subclasses must implement.
|
||||
|
||||
Example:
|
||||
>>> class MyEngine(BaseEngine[MyResponse, MyCondition]):
|
||||
... def transform_conditions(self, conditions):
|
||||
... # Convert generic Condition objects to MyCondition
|
||||
... return my_condition
|
||||
...
|
||||
... def transform_response(self, response):
|
||||
... # Convert MyResponse to list[SearchRow]
|
||||
... return search_rows
|
||||
...
|
||||
... async def run_similarity_query(self, embedding, collection, ...):
|
||||
... # Execute backend-specific search
|
||||
... return my_response
|
||||
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def transform_conditions(
|
||||
self, conditions: list[Condition] | None
|
||||
) -> ConditionType | None: ...
|
||||
) -> ConditionType | None:
|
||||
"""Transform generic conditions to backend-specific filter format.
|
||||
|
||||
This method converts the generic Condition objects (Match, MatchAny,
|
||||
MatchExclude) into the specific filter format required by the backend
|
||||
vector database.
|
||||
|
||||
Args:
|
||||
conditions: List of generic condition objects to apply, or None
|
||||
for no filtering.
|
||||
|
||||
Returns:
|
||||
Backend-specific filter object, or None if no conditions provided.
|
||||
The exact type depends on the ConditionType generic parameter.
|
||||
|
||||
Example:
|
||||
For Qdrant, this might convert:
|
||||
>>> conditions = [Match(key="category", value="tech")]
|
||||
>>> qdrant_filter = transform_conditions(conditions)
|
||||
>>> # Returns models.Filter(must=[...])
|
||||
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def transform_response(self, response: ResponseType) -> list[SearchRow]: ...
|
||||
def transform_response(self, response: ResponseType) -> list[SearchRow]:
|
||||
"""Transform backend-specific response to standardized SearchRow format.
|
||||
|
||||
This method converts the raw response from the backend vector database
|
||||
into a list of SearchRow objects with standardized structure.
|
||||
|
||||
Args:
|
||||
response: Raw response from the backend search API. The exact type
|
||||
depends on the ResponseType generic parameter.
|
||||
|
||||
Returns:
|
||||
List of SearchRow objects containing chunk_id, score, and payload
|
||||
for each search result.
|
||||
|
||||
Example:
|
||||
For Qdrant, this might convert:
|
||||
>>> response = [ScoredPoint(id=1, score=0.9, payload={...})]
|
||||
>>> search_rows = transform_response(response)
|
||||
>>> # Returns [SearchRow(chunk_id="1", score=0.9, payload={...})]
|
||||
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def run_similarity_query(
|
||||
@@ -26,7 +113,37 @@ class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
|
||||
limit: int = 10,
|
||||
conditions: ConditionType | None = None,
|
||||
threshold: float | None = None,
|
||||
) -> ResponseType: ...
|
||||
) -> ResponseType:
|
||||
"""Execute similarity search query against the backend vector database.
|
||||
|
||||
This method performs the actual vector similarity search using the
|
||||
backend's native API. It accepts backend-specific conditions and
|
||||
returns the raw backend response.
|
||||
|
||||
Args:
|
||||
embedding: Query vector as a list of floats.
|
||||
collection: Name of the collection/index to search in.
|
||||
limit: Maximum number of results to return. Defaults to 10.
|
||||
conditions: Backend-specific filter conditions, or None for no filtering.
|
||||
threshold: Minimum similarity score threshold, or None for no threshold.
|
||||
|
||||
Returns:
|
||||
Raw response from the backend API. The exact type depends on the
|
||||
ResponseType generic parameter.
|
||||
|
||||
Example:
|
||||
For Qdrant:
|
||||
>>> response = await run_similarity_query(
|
||||
... embedding=[0.1, 0.2, 0.3],
|
||||
... collection="documents",
|
||||
... limit=5,
|
||||
... conditions=models.Filter(...),
|
||||
... threshold=0.7
|
||||
... )
|
||||
>>> # Returns list[models.ScoredPoint]
|
||||
|
||||
"""
|
||||
...
|
||||
|
||||
async def semantic_search(
|
||||
self,
|
||||
@@ -36,6 +153,40 @@ class BaseEngine(ABC, Generic[ResponseType, ConditionType]):
|
||||
conditions: list[Condition] | None = None,
|
||||
threshold: float | None = None,
|
||||
) -> list[SearchRow]:
|
||||
"""Perform semantic search with generic interface.
|
||||
|
||||
This is the main public method that orchestrates the complete search
|
||||
workflow. It handles the conversion between generic types and backend-
|
||||
specific types, making it easy to use regardless of the underlying
|
||||
vector database.
|
||||
|
||||
The method follows this workflow:
|
||||
1. Transform generic conditions to backend-specific format
|
||||
2. Execute the similarity query using backend API
|
||||
3. Transform the response to standardized SearchRow format
|
||||
|
||||
Args:
|
||||
embedding: Query vector as a list of floats.
|
||||
collection: Name of the collection/index to search in.
|
||||
limit: Maximum number of results to return. Defaults to 10.
|
||||
conditions: List of generic filter conditions, or None for no filtering.
|
||||
threshold: Minimum similarity score threshold, or None for no threshold.
|
||||
|
||||
Returns:
|
||||
List of SearchRow objects with chunk_id, score, and payload.
|
||||
|
||||
Example:
|
||||
>>> results = await engine.semantic_search(
|
||||
... embedding=[0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
... collection="documents",
|
||||
... limit=5,
|
||||
... conditions=[Match(key="category", value="tech")],
|
||||
... threshold=0.7
|
||||
... )
|
||||
>>> for result in results:
|
||||
... print(f"ID: {result.chunk_id}, Score: {result.score}")
|
||||
|
||||
"""
|
||||
transformed_conditions = self.transform_conditions(conditions)
|
||||
response = await self.run_similarity_query(
|
||||
embedding, collection, limit, transformed_conditions, threshold
|
||||
|
||||
Reference in New Issue
Block a user