Add semantic caching

2026-03-04 06:02:24 +00:00
parent 0cdf9cd44e
commit 132ea1c04f
9 changed files with 624 additions and 1 deletions
--- a/src/knowledge_search_mcp/main.py
+++ b/src/knowledge_search_mcp/main.py
@@ -67,6 +67,23 @@ async def knowledge_search(
            {"time_ms": round((t_embed - t0) * 1000, 1)},
        )

+        # Check semantic cache before vector search
+        if app.semantic_cache is not None and source is None:
+            cached = await app.semantic_cache.check(embedding)
+            if cached is not None:
+                t_cache = time.perf_counter()
+                log_structured_entry(
+                    "knowledge_search completed from cache",
+                    "INFO",
+                    {
+                        "embedding_ms": f"{round((t_embed - t0) * 1000, 1)}ms",
+                        "cache_check_ms": f"{round((t_cache - t_embed) * 1000, 1)}ms",
+                        "total_ms": f"{round((t_cache - t0) * 1000, 1)}ms",
+                        "cache_hit": True,
+                    },
+                )
+                return cached
+
        # Perform vector search
        log_structured_entry("Performing vector search", "INFO")
        try:
@@ -98,16 +115,23 @@ async def knowledge_search(
                "source_filter": source.value if source is not None else None,
                "results_count": len(filtered_results),
                "chunks": [s["id"] for s in filtered_results],
+                "cache_hit": False,
            },
        )

        # Format and return results
+        formatted = format_search_results(filtered_results)
+
        if not filtered_results:
            log_structured_entry(
                "No results found for query", "INFO", {"query": query[:100]}
            )

-        return format_search_results(filtered_results)
+        # Store in semantic cache (only for unfiltered queries with results)
+        if app.semantic_cache is not None and source is None and filtered_results:
+            await app.semantic_cache.store(query, formatted, embedding)
+
+        return formatted

    except Exception as e:  # noqa: BLE001
        # Catch-all for any unexpected errors
--- a/src/knowledge_search_mcp/config.py
+++ b/src/knowledge_search_mcp/config.py
@@ -62,6 +62,13 @@ class Settings(BaseSettings):
    log_level: str = "INFO"
    cloud_logging_enabled: bool = False

+    # Semantic cache (Redis)
+    redis_url: str | None = None
+    cache_name: str = "knowledge_search_cache"
+    cache_vector_dims: int = 3072
+    cache_distance_threshold: float = 0.12
+    cache_ttl: int | None = 3600
+
    @classmethod
    def settings_customise_sources(
        cls,
--- a/src/knowledge_search_mcp/models.py
+++ b/src/knowledge_search_mcp/models.py
@@ -9,6 +9,7 @@ if TYPE_CHECKING:

    from .clients.vector_search import GoogleCloudVectorSearch
    from .config import Settings
+    from .services.semantic_cache import KnowledgeSemanticCache


 class SourceNamespace(StrEnum):
@@ -34,3 +35,4 @@ class AppContext:
    vector_search: "GoogleCloudVectorSearch"
    genai_client: "genai.Client"
    settings: "Settings"
+    semantic_cache: "KnowledgeSemanticCache | None" = None
--- a/src/knowledge_search_mcp/server.py
+++ b/src/knowledge_search_mcp/server.py
@@ -10,6 +10,7 @@ from .clients.vector_search import GoogleCloudVectorSearch
 from .config import get_config
 from .logging import log_structured_entry
 from .models import AppContext
+from .services.semantic_cache import KnowledgeSemanticCache
 from .services.validation import (
    validate_gcs_access,
    validate_genai_access,
@@ -112,10 +113,34 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[AppContext]:
                "All validations passed - MCP server initialization complete", "INFO"
            )

+        # Initialize semantic cache if Redis is configured
+        semantic_cache = None
+        if config_for_init.redis_url:
+            try:
+                semantic_cache = KnowledgeSemanticCache(
+                    redis_url=config_for_init.redis_url,
+                    name=config_for_init.cache_name,
+                    vector_dims=config_for_init.cache_vector_dims,
+                    distance_threshold=config_for_init.cache_distance_threshold,
+                    ttl=config_for_init.cache_ttl,
+                )
+                log_structured_entry(
+                    "Semantic cache initialized",
+                    "INFO",
+                    {"redis_url": config_for_init.redis_url, "cache_name": config_for_init.cache_name},
+                )
+            except Exception as e:
+                log_structured_entry(
+                    "Semantic cache initialization failed, continuing without cache",
+                    "WARNING",
+                    {"error": str(e), "error_type": type(e).__name__},
+                )
+
        yield AppContext(
            vector_search=vs,
            genai_client=genai_client,
            settings=config,
+            semantic_cache=semantic_cache,
        )

    except Exception as e:
--- a/src/knowledge_search_mcp/services/semantic_cache.py
+++ b/src/knowledge_search_mcp/services/semantic_cache.py
@@ -0,0 +1,97 @@
+# ruff: noqa: INP001
+"""Semantic cache backed by Redis for knowledge search results."""
+
+from redisvl.extensions.cache.llm.semantic import SemanticCache
+from redisvl.utils.vectorize.custom import CustomVectorizer
+
+from ..logging import log_structured_entry
+
+
+def _stub_embed(content: object) -> list[float]:
+    """Stub vectorizer so SemanticCache creates an index with the right dims.
+
+    Never called at runtime — we always pass pre-computed vectors to
+    ``acheck`` and ``astore``.  Only invoked once by ``CustomVectorizer``
+    at init time to discover the dimensionality.
+    """
+    return [0.0] * _stub_embed.dims  # type: ignore[attr-defined]
+
+
+class KnowledgeSemanticCache:
+    """Thin wrapper around RedisVL SemanticCache with FLAT indexing."""
+
+    def __init__(
+        self,
+        redis_url: str,
+        name: str = "knowledge_search_cache",
+        vector_dims: int = 3072,
+        distance_threshold: float = 0.12,
+        ttl: int | None = 3600,
+    ) -> None:
+        _stub_embed.dims = vector_dims  # type: ignore[attr-defined]
+        vectorizer = CustomVectorizer(embed=_stub_embed)
+
+        self._cache = SemanticCache(
+            name=name,
+            distance_threshold=distance_threshold,
+            ttl=ttl,
+            redis_url=redis_url,
+            vectorizer=vectorizer,
+            overwrite=False,
+        )
+        self._name = name
+
+    async def check(
+        self,
+        embedding: list[float],
+    ) -> str | None:
+        """Return cached response for a semantically similar query, or None."""
+        try:
+            results = await self._cache.acheck(
+                vector=embedding,
+                num_results=1,
+                return_fields=["response", "prompt", "vector_distance"],
+            )
+        except Exception as e:
+            log_structured_entry(
+                "Semantic cache check failed, skipping cache",
+                "WARNING",
+                {"error": str(e), "error_type": type(e).__name__},
+            )
+            return None
+
+        if not results:
+            return None
+
+        hit = results[0]
+        log_structured_entry(
+            "Semantic cache hit",
+            "INFO",
+            {
+                "vector_distance": hit.get("vector_distance"),
+                "original_prompt": hit.get("prompt", "")[:100],
+            },
+        )
+        return hit.get("response")
+
+    async def store(
+        self,
+        query: str,
+        response: str,
+        embedding: list[float],
+        metadata: dict | None = None,
+    ) -> None:
+        """Store a query/response pair in the cache."""
+        try:
+            await self._cache.astore(
+                prompt=query,
+                response=response,
+                vector=embedding,
+                metadata=metadata,
+            )
+        except Exception as e:
+            log_structured_entry(
+                "Semantic cache store failed",
+                "WARNING",
+                {"error": str(e), "error_type": type(e).__name__},
+            )