Add semantic caching
This commit is contained in:
97
src/knowledge_search_mcp/services/semantic_cache.py
Normal file
97
src/knowledge_search_mcp/services/semantic_cache.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# ruff: noqa: INP001
|
||||
"""Semantic cache backed by Redis for knowledge search results."""
|
||||
|
||||
from redisvl.extensions.cache.llm.semantic import SemanticCache
|
||||
from redisvl.utils.vectorize.custom import CustomVectorizer
|
||||
|
||||
from ..logging import log_structured_entry
|
||||
|
||||
|
||||
def _stub_embed(content: object) -> list[float]:
|
||||
"""Stub vectorizer so SemanticCache creates an index with the right dims.
|
||||
|
||||
Never called at runtime — we always pass pre-computed vectors to
|
||||
``acheck`` and ``astore``. Only invoked once by ``CustomVectorizer``
|
||||
at init time to discover the dimensionality.
|
||||
"""
|
||||
return [0.0] * _stub_embed.dims # type: ignore[attr-defined]
|
||||
|
||||
|
||||
class KnowledgeSemanticCache:
|
||||
"""Thin wrapper around RedisVL SemanticCache with FLAT indexing."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
redis_url: str,
|
||||
name: str = "knowledge_search_cache",
|
||||
vector_dims: int = 3072,
|
||||
distance_threshold: float = 0.12,
|
||||
ttl: int | None = 3600,
|
||||
) -> None:
|
||||
_stub_embed.dims = vector_dims # type: ignore[attr-defined]
|
||||
vectorizer = CustomVectorizer(embed=_stub_embed)
|
||||
|
||||
self._cache = SemanticCache(
|
||||
name=name,
|
||||
distance_threshold=distance_threshold,
|
||||
ttl=ttl,
|
||||
redis_url=redis_url,
|
||||
vectorizer=vectorizer,
|
||||
overwrite=False,
|
||||
)
|
||||
self._name = name
|
||||
|
||||
async def check(
|
||||
self,
|
||||
embedding: list[float],
|
||||
) -> str | None:
|
||||
"""Return cached response for a semantically similar query, or None."""
|
||||
try:
|
||||
results = await self._cache.acheck(
|
||||
vector=embedding,
|
||||
num_results=1,
|
||||
return_fields=["response", "prompt", "vector_distance"],
|
||||
)
|
||||
except Exception as e:
|
||||
log_structured_entry(
|
||||
"Semantic cache check failed, skipping cache",
|
||||
"WARNING",
|
||||
{"error": str(e), "error_type": type(e).__name__},
|
||||
)
|
||||
return None
|
||||
|
||||
if not results:
|
||||
return None
|
||||
|
||||
hit = results[0]
|
||||
log_structured_entry(
|
||||
"Semantic cache hit",
|
||||
"INFO",
|
||||
{
|
||||
"vector_distance": hit.get("vector_distance"),
|
||||
"original_prompt": hit.get("prompt", "")[:100],
|
||||
},
|
||||
)
|
||||
return hit.get("response")
|
||||
|
||||
async def store(
|
||||
self,
|
||||
query: str,
|
||||
response: str,
|
||||
embedding: list[float],
|
||||
metadata: dict | None = None,
|
||||
) -> None:
|
||||
"""Store a query/response pair in the cache."""
|
||||
try:
|
||||
await self._cache.astore(
|
||||
prompt=query,
|
||||
response=response,
|
||||
vector=embedding,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
log_structured_entry(
|
||||
"Semantic cache store failed",
|
||||
"WARNING",
|
||||
{"error": str(e), "error_type": type(e).__name__},
|
||||
)
|
||||
Reference in New Issue
Block a user