Add filter with metadata using restricts

2026-02-24 02:59:44 +00:00
parent 427de45522
commit 72808b1475
3 changed files with 65 additions and 10 deletions
--- a/main.py
+++ b/main.py
@@ -204,6 +204,7 @@ class GoogleCloudVectorSearch:
        deployed_index_id: str,
        query: Sequence[float],
        limit: int,
+        restricts: list[dict[str, list[str]]] | None = None,
    ) -> list[SearchResult]:
        """Run an async similarity search via the REST API.

@@ -229,14 +230,18 @@ class GoogleCloudVectorSearch:
            f"/locations/{self.location}"
            f"/indexEndpoints/{endpoint_id}:findNeighbors"
        )
+        query_payload = {
+            "datapoint": {"feature_vector": list(query)},
+            "neighbor_count": limit,
+        }
+
+        # Add restricts if provided
+        if restricts:
+            query_payload["restricts"] = restricts
+
        payload = {
            "deployed_index_id": deployed_index_id,
-            "queries": [
-                {
-                    "datapoint": {"feature_vector": list(query)},
-                    "neighbor_count": limit,
-                },
-            ],
+            "queries": [query_payload],
        }

        headers = await self._async_get_auth_headers()
@@ -385,12 +390,16 @@ mcp = FastMCP(
 async def knowledge_search(
    query: str,
    ctx: Context,
+    source_folders: list[str] | None = None,
 ) -> str:
    """Search a knowledge base using a natural-language query.

    Args:
        query: The text query to search for.
        ctx: MCP request context (injected automatically).
+        source_folders: Optional list of source folder paths to filter results.
+                       If provided, only documents from these folders will be returned.
+                       Example: ["Educacion Financiera", "Productos y Servicios"]

    Returns:
        A formatted string containing matched documents with id and content.
@@ -413,13 +422,31 @@ async def knowledge_search(
    embedding = response.embeddings[0].values
    t_embed = time.perf_counter()

+    # Build restricts for source folder filtering if provided
+    restricts = None
+    if source_folders:
+        restricts = [
+            {
+                "namespace": "source_folder",
+                "allow": source_folders,
+            }
+        ]
+        logger.info(f"Filtering by source_folders: {source_folders}")
+    else:
+        logger.info("No filtering - searching all folders")
+
    search_results = await app.vector_search.async_run_query(
        deployed_index_id=app.settings.deployed_index_id,
        query=embedding,
        limit=app.settings.search_limit,
+        restricts=restricts,
    )
    t_search = time.perf_counter()

+    # Log raw results from Vertex AI before similarity filtering
+    logger.info(f"Raw results from Vertex AI (before similarity filter): {len(search_results)} chunks")
+    logger.info(f"Raw chunk IDs: {[s['id'] for s in search_results]}")
+
    # Apply similarity filtering
    if search_results:
        max_sim = max(r["distance"] for r in search_results)