diff --git a/main.py b/main.py index ffac63f..e5b053b 100644 --- a/main.py +++ b/main.py @@ -654,51 +654,128 @@ async def knowledge_search( t0 = time.perf_counter() min_sim = 0.6 - response = await app.genai_client.aio.models.embed_content( - model=app.settings.embedding_model, - contents=query, - config=genai_types.EmbedContentConfig( - task_type="RETRIEVAL_QUERY", - - ), - ) - embedding = response.embeddings[0].values - t_embed = time.perf_counter() - - search_results = await app.vector_search.async_run_query( - deployed_index_id=app.settings.deployed_index_id, - query=embedding, - limit=app.settings.search_limit, - ) - t_search = time.perf_counter() - - # Apply similarity filtering - if search_results: - max_sim = max(r["distance"] for r in search_results) - cutoff = max_sim * 0.9 - search_results = [ - s - for s in search_results - if s["distance"] > cutoff and s["distance"] > min_sim - ] - log_structured_entry( - "knowledge_search timing", + "knowledge_search request received", "INFO", - { - "embedding": f"{round((t_embed - t0) * 1000, 1)}ms", - "vector_search": f"{round((t_search - t_embed) * 1000, 1)}ms", - "total": f"{round((t_search - t0) * 1000, 1)}ms", - "chunks": [s["id"] for s in search_results] - } + {"query": query[:100]} # Log first 100 chars of query ) - # Format results as XML-like documents - formatted_results = [ - f"\n{result['content']}\n" - for i, result in enumerate(search_results, start=1) - ] - return "\n".join(formatted_results) + try: + # Generate embedding for the query + log_structured_entry("Generating query embedding", "INFO") + try: + response = await app.genai_client.aio.models.embed_content( + model=app.settings.embedding_model, + contents=query, + config=genai_types.EmbedContentConfig( + task_type="RETRIEVAL_QUERY", + ), + ) + embedding = response.embeddings[0].values + t_embed = time.perf_counter() + log_structured_entry( + "Query embedding generated successfully", + "INFO", + {"time_ms": round((t_embed - t0) * 1000, 1)} + ) + except Exception as e: + error_type = type(e).__name__ + error_msg = str(e) + + # Check if it's a rate limit error + if "429" in error_msg or "RESOURCE_EXHAUSTED" in error_msg: + log_structured_entry( + "Rate limit exceeded while generating embedding", + "WARNING", + { + "error": error_msg, + "error_type": error_type, + "query": query[:100] + } + ) + return "Error: API rate limit exceeded. Please try again later." + else: + log_structured_entry( + "Failed to generate query embedding", + "ERROR", + { + "error": error_msg, + "error_type": error_type, + "query": query[:100] + } + ) + return f"Error generating embedding: {error_msg}" + + # Perform vector search + log_structured_entry("Performing vector search", "INFO") + try: + search_results = await app.vector_search.async_run_query( + deployed_index_id=app.settings.deployed_index_id, + query=embedding, + limit=app.settings.search_limit, + ) + t_search = time.perf_counter() + except Exception as e: + log_structured_entry( + "Vector search failed", + "ERROR", + { + "error": str(e), + "error_type": type(e).__name__, + "query": query[:100] + } + ) + return f"Error performing vector search: {str(e)}" + + # Apply similarity filtering + if search_results: + max_sim = max(r["distance"] for r in search_results) + cutoff = max_sim * 0.9 + search_results = [ + s + for s in search_results + if s["distance"] > cutoff and s["distance"] > min_sim + ] + + log_structured_entry( + "knowledge_search completed successfully", + "INFO", + { + "embedding_ms": f"{round((t_embed - t0) * 1000, 1)}ms", + "vector_search_ms": f"{round((t_search - t_embed) * 1000, 1)}ms", + "total_ms": f"{round((t_search - t0) * 1000, 1)}ms", + "results_count": len(search_results), + "chunks": [s["id"] for s in search_results] + } + ) + + # Format results as XML-like documents + if not search_results: + log_structured_entry( + "No results found for query", + "INFO", + {"query": query[:100]} + ) + return "No relevant documents found for your query." + + formatted_results = [ + f"\n{result['content']}\n" + for i, result in enumerate(search_results, start=1) + ] + return "\n".join(formatted_results) + + except Exception as e: + # Catch-all for any unexpected errors + log_structured_entry( + "Unexpected error in knowledge_search", + "ERROR", + { + "error": str(e), + "error_type": type(e).__name__, + "query": query[:100] + } + ) + return f"Unexpected error during search: {str(e)}" if __name__ == "__main__":