searchbox/src/vector_search_mcp/engine/qdrant_engine.py

"""Qdrant vector database engine implementation.

This module provides a concrete implementation of the BaseEngine interface
for the Qdrant vector database. It handles the transformation between generic
search conditions and Qdrant-specific filter objects, as well as converting
Qdrant's ScoredPoint responses to standardized SearchRow objects.

The QdrantEngine class is marked as final to prevent inheritance and uses
the generic type parameters BaseEngine[list[models.ScoredPoint], models.Filter]
to ensure type safety with Qdrant's native types.
"""

from collections.abc import Sequence
from typing import final, override

from qdrant_client import AsyncQdrantClient, models

from ..config import Settings
from ..models import Chunk, Condition, Match, MatchAny, MatchExclude, SearchRow
from .base_engine import BaseEngine

__all__ = ["QdrantEngine"]


@final
class QdrantEngine(
    BaseEngine[list[models.ScoredPoint], models.Filter, models.PointStruct]
):
    """Qdrant vector database engine implementation.

    This class provides a concrete implementation of the BaseEngine interface
    specifically for Qdrant vector database operations. It handles:

    - Converting generic Condition objects to Qdrant Filter objects
    - Executing similarity searches using Qdrant's AsyncClient
    - Transforming ScoredPoint results to SearchRow objects
    - Filtering out results with null payloads

    Type Parameters:
        ResponseType: list[models.ScoredPoint] - Qdrant's search response format
        ConditionType: models.Filter - Qdrant's filter object format

    The class is marked as @final to prevent inheritance since it's a concrete
    implementation that should not be extended.

    Example:
        >>> engine = QdrantEngine()
        >>> results = await engine.semantic_search(
        ...     embedding=[0.1, 0.2, 0.3],
        ...     collection="documents",
        ...     conditions=[Match(key="category", value="tech")]
        ... )

    """

    def __init__(self) -> None:
        """Initialize the Qdrant engine with configuration and client.

        Creates a Settings instance to load configuration from environment
        variables or vault, then initializes the AsyncQdrantClient with
        the configured URL and API key.

        The client is configured for async operations and will handle
        connection pooling and retry logic automatically.

        Raises:
            ConfigurationError: If required settings are missing or invalid.
            ConnectionError: If unable to establish connection to Qdrant server.

        """
        self.settings = Settings()  # type: ignore[reportCallArgs]
        self.client = AsyncQdrantClient(
            url=self.settings.url, api_key=self.settings.api_key
        )

    @override
    def transform_conditions(
        self, conditions: list[Condition] | None
    ) -> models.Filter | None:
        """Transform generic conditions to Qdrant Filter objects.

        Converts the generic Condition objects (Match, MatchAny, MatchExclude)
        into Qdrant's Filter format with appropriate FieldCondition objects.

        Args:
            conditions: List of generic condition objects, or None for no filtering.

        Returns:
            Qdrant Filter object with must conditions, or None if no conditions provided.

        Example:
            >>> conditions = [
            ...     Match(key="category", value="tech"),
            ...     MatchAny(key="tags", any=["python", "rust"])
            ... ]
            >>> filter_obj = transform_conditions(conditions)
            >>> # Returns models.Filter(must=[FieldCondition(...), FieldCondition(...)])

        """
        if not conditions:
            return None

        filters: list[models.Condition] = []

        for condition in conditions:
            if isinstance(condition, Match):
                filters.append(
                    models.FieldCondition(
                        key=condition.key,
                        match=models.MatchValue(value=condition.value),
                    )
                )
            elif isinstance(condition, MatchAny):
                filters.append(
                    models.FieldCondition(
                        key=condition.key, match=models.MatchAny(any=condition.any)
                    )
                )
            elif isinstance(condition, MatchExclude):
                filters.append(
                    models.FieldCondition(
                        key=condition.key,
                        match=models.MatchExcept(**{"except": condition.exclude}),
                    )
                )

        return models.Filter(must=filters)

    @override
    def transform_response(self, response: list[models.ScoredPoint]) -> list[SearchRow]:
        """Transform Qdrant ScoredPoint objects to SearchRow objects.

        Converts Qdrant's native ScoredPoint response format into standardized
        SearchRow objects. Filters out any results with null payloads.

        Args:
            response: List of ScoredPoint objects from Qdrant search response.

        Returns:
            List of SearchRow objects with chunk_id, score, and payload.
            Results with null payloads are excluded.

        Example:
            >>> scored_points = [
            ...     ScoredPoint(id=1, score=0.9, payload={"text": "example"})
            ... ]
            >>> search_rows = transform_response(scored_points)
            >>> # Returns [SearchRow(chunk_id="1", score=0.9, payload={...})]

        """
        return [
            SearchRow(chunk_id=str(point.id), score=point.score, payload=point.payload)
            for point in response
            if point.payload is not None
        ]

    @override
    async def run_similarity_query(
        self,
        embedding: Sequence[float] | models.NamedVector,
        collection: str,
        limit: int = 10,
        conditions: models.Filter | None = None,
        threshold: float | None = None,
    ) -> list[models.ScoredPoint]:
        """Execute similarity search using Qdrant's search API.

        Performs vector similarity search against the specified Qdrant collection
        using the provided query vector and optional filters.

        Args:
            embedding: Query vector as a sequence of floats or NamedVector object.
            collection: Name of the Qdrant collection to search in.
            limit: Maximum number of results to return. Defaults to 10.
            conditions: Qdrant Filter object for filtering results, or None.
            threshold: Minimum similarity score threshold, or None.

        Returns:
            List of ScoredPoint objects from Qdrant containing IDs, scores, and payloads.

        Example:
            >>> results = await run_similarity_query(
            ...     embedding=[0.1, 0.2, 0.3],
            ...     collection="documents",
            ...     limit=5,
            ...     threshold=0.7
            ... )
            >>> # Returns [ScoredPoint(id=..., score=..., payload=...)]

        """
        return await self.client.search(
            collection_name=collection,
            query_vector=embedding,
            query_filter=conditions,
            limit=limit,
            with_payload=True,
            with_vectors=False,
            score_threshold=threshold,
        )

    @override
    async def create_index(self, name: str, size: int) -> bool:
        return await self.client.create_collection(
            collection_name=name,
            vectors_config=models.VectorParams(
                size=size, distance=models.Distance.COSINE
            ),
        )

    @override
    async def run_upload_chunk(
        self, index_name: str, chunk: models.PointStruct
    ) -> bool:
        result = await self.client.upsert(
            collection_name=index_name,
            points=[chunk],
        )
        return result.status == models.UpdateStatus.ACKNOWLEDGED

    @override
    def transform_chunk(self, chunk: Chunk) -> models.PointStruct:
        return models.PointStruct(
            id=chunk.id,
            vector=chunk.vector,
            payload=chunk.payload.model_dump(),
        )