from unittest.mock import AsyncMock, MagicMock, patch import pytest from qdrant_client import models from vector_search_mcp.engine import Backend, get_engine from vector_search_mcp.models import Match, MatchAny, MatchExclude, SearchRow class TestEngineIntegration: """Integration tests for the complete engine workflow""" @pytest.fixture def mock_complete_engine_setup(self): """Setup complete mocked engine environment""" with ( patch( "vector_search_mcp.engine.qdrant_engine.Settings" ) as mock_settings_class, patch( "vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient" ) as mock_client_class, ): # Setup settings mock_settings = MagicMock() mock_settings.url = "http://localhost:6333" mock_settings.api_key = "test_api_key" mock_settings_class.return_value = mock_settings # Setup client with realistic response mock_client = AsyncMock() mock_client.search.return_value = [ models.ScoredPoint( id="doc_1", score=0.95, payload={ "text": "Advanced Python programming techniques for data science", "category": "programming", "language": "python", "difficulty": "advanced", "tags": ["python", "data-science", "machine-learning"], }, version=1, ), models.ScoredPoint( id="doc_2", score=0.87, payload={ "text": "Rust systems programming for performance-critical applications", "category": "programming", "language": "rust", "difficulty": "intermediate", "tags": ["rust", "systems", "performance"], }, version=1, ), models.ScoredPoint( id="doc_3", score=0.78, payload={ "text": "Introduction to machine learning with Python", "category": "programming", "language": "python", "difficulty": "beginner", "tags": ["python", "machine-learning", "tutorial"], }, version=1, ), ] mock_client_class.return_value = mock_client yield { "settings": mock_settings, "client": mock_client, "settings_class": mock_settings_class, "client_class": mock_client_class, } @pytest.mark.asyncio async def test_complete_semantic_search_workflow(self, mock_complete_engine_setup): """Test the complete workflow from factory to results""" mocks = mock_complete_engine_setup # 1. Create engine through factory engine = get_engine(Backend.QDRANT) # 2. Prepare search parameters query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] collection_name = "programming_docs" search_conditions = [ Match(key="category", value="programming"), MatchAny(key="language", any=["python", "rust"]), MatchExclude(key="difficulty", exclude=["expert"]), ] # 3. Execute semantic search results = await engine.semantic_search( embedding=query_vector, collection=collection_name, limit=5, conditions=search_conditions, threshold=0.7, ) # 4. Verify the complete flow # Check that client.search was called with correct parameters client_mock = mocks["client"] client_mock.search.assert_called_once() call_args = client_mock.search.call_args assert call_args[1]["collection_name"] == collection_name assert call_args[1]["query_vector"] == query_vector assert call_args[1]["limit"] == 5 assert call_args[1]["score_threshold"] == 0.7 assert call_args[1]["with_payload"] is True assert call_args[1]["with_vectors"] is False # Verify conditions were transformed to Qdrant filter qdrant_filter = call_args[1]["query_filter"] assert isinstance(qdrant_filter, models.Filter) assert len(qdrant_filter.must) == 3 # Check individual conditions conditions = qdrant_filter.must # Match condition match_condition = next(c for c in conditions if c.key == "category") assert isinstance(match_condition.match, models.MatchValue) assert match_condition.match.value == "programming" # MatchAny condition match_any_condition = next(c for c in conditions if c.key == "language") assert isinstance(match_any_condition.match, models.MatchAny) assert match_any_condition.match.any == ["python", "rust"] # MatchExclude condition match_exclude_condition = next(c for c in conditions if c.key == "difficulty") assert isinstance(match_exclude_condition.match, models.MatchExcept) # 5. Verify results transformation assert isinstance(results, list) assert len(results) == 3 assert all(isinstance(result, SearchRow) for result in results) # Check first result assert results[0].chunk_id == "doc_1" assert results[0].score == 0.95 assert ( results[0].payload["text"] == "Advanced Python programming techniques for data science" ) assert results[0].payload["category"] == "programming" # Check second result assert results[1].chunk_id == "doc_2" assert results[1].score == 0.87 assert results[1].payload["language"] == "rust" # Check third result assert results[2].chunk_id == "doc_3" assert results[2].score == 0.78 assert results[2].payload["difficulty"] == "beginner" @pytest.mark.asyncio async def test_search_with_no_conditions(self, mock_complete_engine_setup): """Test semantic search without any conditions""" engine = get_engine(Backend.QDRANT) results = await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="test_collection" ) # Verify no filter was applied client_mock = mock_complete_engine_setup["client"] call_args = client_mock.search.call_args assert call_args[1]["query_filter"] is None # Results should still be transformed assert len(results) == 3 assert all(isinstance(result, SearchRow) for result in results) @pytest.mark.asyncio async def test_search_with_empty_conditions(self, mock_complete_engine_setup): """Test semantic search with empty conditions list""" engine = get_engine(Backend.QDRANT) results = await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="test_collection", conditions=[] ) # Verify no filter was applied client_mock = mock_complete_engine_setup["client"] call_args = client_mock.search.call_args assert call_args[1]["query_filter"] is None assert len(results) == 3 @pytest.mark.asyncio async def test_search_filters_null_payloads(self, mock_complete_engine_setup): """Test that results with null payloads are filtered out""" # Override the mock response to include null payload client_mock = mock_complete_engine_setup["client"] client_mock.search.return_value = [ models.ScoredPoint( id="valid_1", score=0.95, payload={"text": "Valid document"}, version=1, ), models.ScoredPoint( id="invalid", score=0.90, payload=None, # This should be filtered out version=1, ), models.ScoredPoint( id="valid_2", score=0.85, payload={"text": "Another valid document"}, version=1, ), ] engine = get_engine(Backend.QDRANT) results = await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="test_collection" ) # Should only have 2 results (null payload filtered out) assert len(results) == 2 assert results[0].chunk_id == "valid_1" assert results[1].chunk_id == "valid_2" @pytest.mark.asyncio async def test_error_propagation_from_client(self, mock_complete_engine_setup): """Test that client errors are properly propagated""" # Make the client raise an exception client_mock = mock_complete_engine_setup["client"] client_mock.search.side_effect = Exception("Qdrant connection timeout") engine = get_engine(Backend.QDRANT) with pytest.raises(Exception, match="Qdrant connection timeout"): await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="test_collection" ) @pytest.mark.asyncio async def test_search_with_named_vector(self, mock_complete_engine_setup): """Test semantic search with NamedVector instead of regular vector""" engine = get_engine(Backend.QDRANT) named_vector = models.NamedVector( name="text_embedding", vector=[0.1, 0.2, 0.3, 0.4, 0.5] ) results = await engine.semantic_search( embedding=named_vector, # type: ignore - Testing duck typing collection="test_collection", ) # Verify named vector was passed through client_mock = mock_complete_engine_setup["client"] call_args = client_mock.search.call_args assert call_args[1]["query_vector"] == named_vector assert len(results) == 3 @pytest.mark.asyncio async def test_search_parameter_defaults(self, mock_complete_engine_setup): """Test that default parameters are applied correctly""" engine = get_engine(Backend.QDRANT) await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="test_collection" ) client_mock = mock_complete_engine_setup["client"] call_args = client_mock.search.call_args # Check defaults assert call_args[1]["limit"] == 10 # default limit assert call_args[1]["score_threshold"] is None # default threshold assert call_args[1]["query_filter"] is None # default conditions assert call_args[1]["with_payload"] is True assert call_args[1]["with_vectors"] is False @pytest.mark.asyncio async def test_multiple_engine_instances_independence( self, mock_complete_engine_setup ): """Test that multiple engine instances work independently""" # Create two engines engine1 = get_engine(Backend.QDRANT) engine2 = get_engine(Backend.QDRANT) # Verify they are the same instance due to caching assert engine1 is engine2 # Both should work with the same instance results1 = await engine1.semantic_search( embedding=[0.1, 0.2, 0.3], collection="collection1" ) results2 = await engine2.semantic_search( embedding=[0.4, 0.5, 0.6], collection="collection2" ) assert len(results1) == 3 assert len(results2) == 3 # Verify client was called twice (same instance, multiple calls) client_mock = mock_complete_engine_setup["client"] assert client_mock.search.call_count == 2 @pytest.mark.asyncio async def test_large_result_set_handling(self, mock_complete_engine_setup): """Test handling of large result sets""" # Create a large mock response large_response = [] for i in range(100): large_response.append( models.ScoredPoint( id=f"doc_{i}", score=0.9 - (i * 0.001), # Decreasing scores payload={"text": f"Document {i}", "index": i}, version=1, ) ) client_mock = mock_complete_engine_setup["client"] client_mock.search.return_value = large_response engine = get_engine(Backend.QDRANT) results = await engine.semantic_search( embedding=[0.1, 0.2, 0.3], collection="large_collection", limit=100 ) # Should handle all 100 results assert len(results) == 100 assert results[0].chunk_id == "doc_0" assert results[0].score == 0.9 assert results[99].chunk_id == "doc_99" assert results[99].score == 0.801 # 0.9 - (99 * 0.001) def test_engine_type_consistency(self): """Test that engine types are consistent across multiple calls""" with ( patch("vector_search_mcp.engine.qdrant_engine.Settings"), patch("vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient"), ): engines = [get_engine(Backend.QDRANT) for _ in range(5)] # All should be the same instance due to caching assert all(engine is engines[0] for engine in engines) # All should be QdrantEngine instances from vector_search_mcp.engine.qdrant_engine import QdrantEngine assert all(isinstance(engine, QdrantEngine) for engine in engines)