Add semantic caching
This commit is contained in:
@@ -67,6 +67,23 @@ async def knowledge_search(
|
||||
{"time_ms": round((t_embed - t0) * 1000, 1)},
|
||||
)
|
||||
|
||||
# Check semantic cache before vector search
|
||||
if app.semantic_cache is not None and source is None:
|
||||
cached = await app.semantic_cache.check(embedding)
|
||||
if cached is not None:
|
||||
t_cache = time.perf_counter()
|
||||
log_structured_entry(
|
||||
"knowledge_search completed from cache",
|
||||
"INFO",
|
||||
{
|
||||
"embedding_ms": f"{round((t_embed - t0) * 1000, 1)}ms",
|
||||
"cache_check_ms": f"{round((t_cache - t_embed) * 1000, 1)}ms",
|
||||
"total_ms": f"{round((t_cache - t0) * 1000, 1)}ms",
|
||||
"cache_hit": True,
|
||||
},
|
||||
)
|
||||
return cached
|
||||
|
||||
# Perform vector search
|
||||
log_structured_entry("Performing vector search", "INFO")
|
||||
try:
|
||||
@@ -98,16 +115,23 @@ async def knowledge_search(
|
||||
"source_filter": source.value if source is not None else None,
|
||||
"results_count": len(filtered_results),
|
||||
"chunks": [s["id"] for s in filtered_results],
|
||||
"cache_hit": False,
|
||||
},
|
||||
)
|
||||
|
||||
# Format and return results
|
||||
formatted = format_search_results(filtered_results)
|
||||
|
||||
if not filtered_results:
|
||||
log_structured_entry(
|
||||
"No results found for query", "INFO", {"query": query[:100]}
|
||||
)
|
||||
|
||||
return format_search_results(filtered_results)
|
||||
# Store in semantic cache (only for unfiltered queries with results)
|
||||
if app.semantic_cache is not None and source is None and filtered_results:
|
||||
await app.semantic_cache.store(query, formatted, embedding)
|
||||
|
||||
return formatted
|
||||
|
||||
except Exception as e: # noqa: BLE001
|
||||
# Catch-all for any unexpected errors
|
||||
|
||||
@@ -62,6 +62,13 @@ class Settings(BaseSettings):
|
||||
log_level: str = "INFO"
|
||||
cloud_logging_enabled: bool = False
|
||||
|
||||
# Semantic cache (Redis)
|
||||
redis_url: str | None = None
|
||||
cache_name: str = "knowledge_search_cache"
|
||||
cache_vector_dims: int = 3072
|
||||
cache_distance_threshold: float = 0.12
|
||||
cache_ttl: int | None = 3600
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
|
||||
@@ -9,6 +9,7 @@ if TYPE_CHECKING:
|
||||
|
||||
from .clients.vector_search import GoogleCloudVectorSearch
|
||||
from .config import Settings
|
||||
from .services.semantic_cache import KnowledgeSemanticCache
|
||||
|
||||
|
||||
class SourceNamespace(StrEnum):
|
||||
@@ -34,3 +35,4 @@ class AppContext:
|
||||
vector_search: "GoogleCloudVectorSearch"
|
||||
genai_client: "genai.Client"
|
||||
settings: "Settings"
|
||||
semantic_cache: "KnowledgeSemanticCache | None" = None
|
||||
|
||||
@@ -10,6 +10,7 @@ from .clients.vector_search import GoogleCloudVectorSearch
|
||||
from .config import get_config
|
||||
from .logging import log_structured_entry
|
||||
from .models import AppContext
|
||||
from .services.semantic_cache import KnowledgeSemanticCache
|
||||
from .services.validation import (
|
||||
validate_gcs_access,
|
||||
validate_genai_access,
|
||||
@@ -112,10 +113,34 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[AppContext]:
|
||||
"All validations passed - MCP server initialization complete", "INFO"
|
||||
)
|
||||
|
||||
# Initialize semantic cache if Redis is configured
|
||||
semantic_cache = None
|
||||
if config_for_init.redis_url:
|
||||
try:
|
||||
semantic_cache = KnowledgeSemanticCache(
|
||||
redis_url=config_for_init.redis_url,
|
||||
name=config_for_init.cache_name,
|
||||
vector_dims=config_for_init.cache_vector_dims,
|
||||
distance_threshold=config_for_init.cache_distance_threshold,
|
||||
ttl=config_for_init.cache_ttl,
|
||||
)
|
||||
log_structured_entry(
|
||||
"Semantic cache initialized",
|
||||
"INFO",
|
||||
{"redis_url": config_for_init.redis_url, "cache_name": config_for_init.cache_name},
|
||||
)
|
||||
except Exception as e:
|
||||
log_structured_entry(
|
||||
"Semantic cache initialization failed, continuing without cache",
|
||||
"WARNING",
|
||||
{"error": str(e), "error_type": type(e).__name__},
|
||||
)
|
||||
|
||||
yield AppContext(
|
||||
vector_search=vs,
|
||||
genai_client=genai_client,
|
||||
settings=config,
|
||||
semantic_cache=semantic_cache,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
97
src/knowledge_search_mcp/services/semantic_cache.py
Normal file
97
src/knowledge_search_mcp/services/semantic_cache.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# ruff: noqa: INP001
|
||||
"""Semantic cache backed by Redis for knowledge search results."""
|
||||
|
||||
from redisvl.extensions.cache.llm.semantic import SemanticCache
|
||||
from redisvl.utils.vectorize.custom import CustomVectorizer
|
||||
|
||||
from ..logging import log_structured_entry
|
||||
|
||||
|
||||
def _stub_embed(content: object) -> list[float]:
|
||||
"""Stub vectorizer so SemanticCache creates an index with the right dims.
|
||||
|
||||
Never called at runtime — we always pass pre-computed vectors to
|
||||
``acheck`` and ``astore``. Only invoked once by ``CustomVectorizer``
|
||||
at init time to discover the dimensionality.
|
||||
"""
|
||||
return [0.0] * _stub_embed.dims # type: ignore[attr-defined]
|
||||
|
||||
|
||||
class KnowledgeSemanticCache:
|
||||
"""Thin wrapper around RedisVL SemanticCache with FLAT indexing."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
redis_url: str,
|
||||
name: str = "knowledge_search_cache",
|
||||
vector_dims: int = 3072,
|
||||
distance_threshold: float = 0.12,
|
||||
ttl: int | None = 3600,
|
||||
) -> None:
|
||||
_stub_embed.dims = vector_dims # type: ignore[attr-defined]
|
||||
vectorizer = CustomVectorizer(embed=_stub_embed)
|
||||
|
||||
self._cache = SemanticCache(
|
||||
name=name,
|
||||
distance_threshold=distance_threshold,
|
||||
ttl=ttl,
|
||||
redis_url=redis_url,
|
||||
vectorizer=vectorizer,
|
||||
overwrite=False,
|
||||
)
|
||||
self._name = name
|
||||
|
||||
async def check(
|
||||
self,
|
||||
embedding: list[float],
|
||||
) -> str | None:
|
||||
"""Return cached response for a semantically similar query, or None."""
|
||||
try:
|
||||
results = await self._cache.acheck(
|
||||
vector=embedding,
|
||||
num_results=1,
|
||||
return_fields=["response", "prompt", "vector_distance"],
|
||||
)
|
||||
except Exception as e:
|
||||
log_structured_entry(
|
||||
"Semantic cache check failed, skipping cache",
|
||||
"WARNING",
|
||||
{"error": str(e), "error_type": type(e).__name__},
|
||||
)
|
||||
return None
|
||||
|
||||
if not results:
|
||||
return None
|
||||
|
||||
hit = results[0]
|
||||
log_structured_entry(
|
||||
"Semantic cache hit",
|
||||
"INFO",
|
||||
{
|
||||
"vector_distance": hit.get("vector_distance"),
|
||||
"original_prompt": hit.get("prompt", "")[:100],
|
||||
},
|
||||
)
|
||||
return hit.get("response")
|
||||
|
||||
async def store(
|
||||
self,
|
||||
query: str,
|
||||
response: str,
|
||||
embedding: list[float],
|
||||
metadata: dict | None = None,
|
||||
) -> None:
|
||||
"""Store a query/response pair in the cache."""
|
||||
try:
|
||||
await self._cache.astore(
|
||||
prompt=query,
|
||||
response=response,
|
||||
vector=embedding,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
log_structured_entry(
|
||||
"Semantic cache store failed",
|
||||
"WARNING",
|
||||
{"error": str(e), "error_type": type(e).__name__},
|
||||
)
|
||||
Reference in New Issue
Block a user