Add semantic caching

This commit is contained in:
2026-03-04 06:02:24 +00:00
parent 694b060fa4
commit e81aac2e29
9 changed files with 625 additions and 2 deletions

View File

@@ -0,0 +1,97 @@
# ruff: noqa: INP001
"""Semantic cache backed by Redis for knowledge search results."""
from redisvl.extensions.cache.llm.semantic import SemanticCache
from redisvl.utils.vectorize.custom import CustomVectorizer
from ..logging import log_structured_entry
def _stub_embed(content: object) -> list[float]:
"""Stub vectorizer so SemanticCache creates an index with the right dims.
Never called at runtime — we always pass pre-computed vectors to
``acheck`` and ``astore``. Only invoked once by ``CustomVectorizer``
at init time to discover the dimensionality.
"""
return [0.0] * _stub_embed.dims # type: ignore[attr-defined]
class KnowledgeSemanticCache:
"""Thin wrapper around RedisVL SemanticCache with FLAT indexing."""
def __init__(
self,
redis_url: str,
name: str = "knowledge_search_cache",
vector_dims: int = 3072,
distance_threshold: float = 0.12,
ttl: int | None = 3600,
) -> None:
_stub_embed.dims = vector_dims # type: ignore[attr-defined]
vectorizer = CustomVectorizer(embed=_stub_embed)
self._cache = SemanticCache(
name=name,
distance_threshold=distance_threshold,
ttl=ttl,
redis_url=redis_url,
vectorizer=vectorizer,
overwrite=False,
)
self._name = name
async def check(
self,
embedding: list[float],
) -> str | None:
"""Return cached response for a semantically similar query, or None."""
try:
results = await self._cache.acheck(
vector=embedding,
num_results=1,
return_fields=["response", "prompt", "vector_distance"],
)
except Exception as e:
log_structured_entry(
"Semantic cache check failed, skipping cache",
"WARNING",
{"error": str(e), "error_type": type(e).__name__},
)
return None
if not results:
return None
hit = results[0]
log_structured_entry(
"Semantic cache hit",
"INFO",
{
"vector_distance": hit.get("vector_distance"),
"original_prompt": hit.get("prompt", "")[:100],
},
)
return hit.get("response")
async def store(
self,
query: str,
response: str,
embedding: list[float],
metadata: dict | None = None,
) -> None:
"""Store a query/response pair in the cache."""
try:
await self._cache.astore(
prompt=query,
response=response,
vector=embedding,
metadata=metadata,
)
except Exception as e:
log_structured_entry(
"Semantic cache store failed",
"WARNING",
{"error": str(e), "error_type": type(e).__name__},
)