Add testing

2025-09-26 15:26:13 +00:00
parent a91188e83f
commit 17fcd3596b
7 changed files with 1615 additions and 0 deletions
--- a/tests/test_engine/test_integration.py
+++ b/tests/test_engine/test_integration.py
@@ -0,0 +1,361 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from qdrant_client import models
+
+from vector_search_mcp.engine import Backend, get_engine
+from vector_search_mcp.models import Match, MatchAny, MatchExclude, SearchRow
+
+
+class TestEngineIntegration:
+    """Integration tests for the complete engine workflow"""
+
+    @pytest.fixture
+    def mock_complete_engine_setup(self):
+        """Setup complete mocked engine environment"""
+        with (
+            patch(
+                "vector_search_mcp.engine.qdrant_engine.Settings"
+            ) as mock_settings_class,
+            patch(
+                "vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient"
+            ) as mock_client_class,
+        ):
+            # Setup settings
+            mock_settings = MagicMock()
+            mock_settings.url = "http://localhost:6333"
+            mock_settings.api_key = "test_api_key"
+            mock_settings_class.return_value = mock_settings
+
+            # Setup client with realistic response
+            mock_client = AsyncMock()
+            mock_client.search.return_value = [
+                models.ScoredPoint(
+                    id="doc_1",
+                    score=0.95,
+                    payload={
+                        "text": "Advanced Python programming techniques for data science",
+                        "category": "programming",
+                        "language": "python",
+                        "difficulty": "advanced",
+                        "tags": ["python", "data-science", "machine-learning"],
+                    },
+                    version=1,
+                ),
+                models.ScoredPoint(
+                    id="doc_2",
+                    score=0.87,
+                    payload={
+                        "text": "Rust systems programming for performance-critical applications",
+                        "category": "programming",
+                        "language": "rust",
+                        "difficulty": "intermediate",
+                        "tags": ["rust", "systems", "performance"],
+                    },
+                    version=1,
+                ),
+                models.ScoredPoint(
+                    id="doc_3",
+                    score=0.78,
+                    payload={
+                        "text": "Introduction to machine learning with Python",
+                        "category": "programming",
+                        "language": "python",
+                        "difficulty": "beginner",
+                        "tags": ["python", "machine-learning", "tutorial"],
+                    },
+                    version=1,
+                ),
+            ]
+            mock_client_class.return_value = mock_client
+
+            yield {
+                "settings": mock_settings,
+                "client": mock_client,
+                "settings_class": mock_settings_class,
+                "client_class": mock_client_class,
+            }
+
+    @pytest.mark.asyncio
+    async def test_complete_semantic_search_workflow(self, mock_complete_engine_setup):
+        """Test the complete workflow from factory to results"""
+        mocks = mock_complete_engine_setup
+
+        # 1. Create engine through factory
+        engine = get_engine(Backend.QDRANT)
+
+        # 2. Prepare search parameters
+        query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+        collection_name = "programming_docs"
+        search_conditions = [
+            Match(key="category", value="programming"),
+            MatchAny(key="language", any=["python", "rust"]),
+            MatchExclude(key="difficulty", exclude=["expert"]),
+        ]
+
+        # 3. Execute semantic search
+        results = await engine.semantic_search(
+            embedding=query_vector,
+            collection=collection_name,
+            limit=5,
+            conditions=search_conditions,
+            threshold=0.7,
+        )
+
+        # 4. Verify the complete flow
+
+        # Check that client.search was called with correct parameters
+        client_mock = mocks["client"]
+        client_mock.search.assert_called_once()
+
+        call_args = client_mock.search.call_args
+        assert call_args[1]["collection_name"] == collection_name
+        assert call_args[1]["query_vector"] == query_vector
+        assert call_args[1]["limit"] == 5
+        assert call_args[1]["score_threshold"] == 0.7
+        assert call_args[1]["with_payload"] is True
+        assert call_args[1]["with_vectors"] is False
+
+        # Verify conditions were transformed to Qdrant filter
+        qdrant_filter = call_args[1]["query_filter"]
+        assert isinstance(qdrant_filter, models.Filter)
+        assert len(qdrant_filter.must) == 3
+
+        # Check individual conditions
+        conditions = qdrant_filter.must
+
+        # Match condition
+        match_condition = next(c for c in conditions if c.key == "category")
+        assert isinstance(match_condition.match, models.MatchValue)
+        assert match_condition.match.value == "programming"
+
+        # MatchAny condition
+        match_any_condition = next(c for c in conditions if c.key == "language")
+        assert isinstance(match_any_condition.match, models.MatchAny)
+        assert match_any_condition.match.any == ["python", "rust"]
+
+        # MatchExclude condition
+        match_exclude_condition = next(c for c in conditions if c.key == "difficulty")
+        assert isinstance(match_exclude_condition.match, models.MatchExcept)
+
+        # 5. Verify results transformation
+        assert isinstance(results, list)
+        assert len(results) == 3
+        assert all(isinstance(result, SearchRow) for result in results)
+
+        # Check first result
+        assert results[0].chunk_id == "doc_1"
+        assert results[0].score == 0.95
+        assert (
+            results[0].payload["text"]
+            == "Advanced Python programming techniques for data science"
+        )
+        assert results[0].payload["category"] == "programming"
+
+        # Check second result
+        assert results[1].chunk_id == "doc_2"
+        assert results[1].score == 0.87
+        assert results[1].payload["language"] == "rust"
+
+        # Check third result
+        assert results[2].chunk_id == "doc_3"
+        assert results[2].score == 0.78
+        assert results[2].payload["difficulty"] == "beginner"
+
+    @pytest.mark.asyncio
+    async def test_search_with_no_conditions(self, mock_complete_engine_setup):
+        """Test semantic search without any conditions"""
+        engine = get_engine(Backend.QDRANT)
+
+        results = await engine.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="test_collection"
+        )
+
+        # Verify no filter was applied
+        client_mock = mock_complete_engine_setup["client"]
+        call_args = client_mock.search.call_args
+        assert call_args[1]["query_filter"] is None
+
+        # Results should still be transformed
+        assert len(results) == 3
+        assert all(isinstance(result, SearchRow) for result in results)
+
+    @pytest.mark.asyncio
+    async def test_search_with_empty_conditions(self, mock_complete_engine_setup):
+        """Test semantic search with empty conditions list"""
+        engine = get_engine(Backend.QDRANT)
+
+        results = await engine.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="test_collection", conditions=[]
+        )
+
+        # Verify no filter was applied
+        client_mock = mock_complete_engine_setup["client"]
+        call_args = client_mock.search.call_args
+        assert call_args[1]["query_filter"] is None
+
+        assert len(results) == 3
+
+    @pytest.mark.asyncio
+    async def test_search_filters_null_payloads(self, mock_complete_engine_setup):
+        """Test that results with null payloads are filtered out"""
+        # Override the mock response to include null payload
+        client_mock = mock_complete_engine_setup["client"]
+        client_mock.search.return_value = [
+            models.ScoredPoint(
+                id="valid_1",
+                score=0.95,
+                payload={"text": "Valid document"},
+                version=1,
+            ),
+            models.ScoredPoint(
+                id="invalid",
+                score=0.90,
+                payload=None,  # This should be filtered out
+                version=1,
+            ),
+            models.ScoredPoint(
+                id="valid_2",
+                score=0.85,
+                payload={"text": "Another valid document"},
+                version=1,
+            ),
+        ]
+
+        engine = get_engine(Backend.QDRANT)
+        results = await engine.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="test_collection"
+        )
+
+        # Should only have 2 results (null payload filtered out)
+        assert len(results) == 2
+        assert results[0].chunk_id == "valid_1"
+        assert results[1].chunk_id == "valid_2"
+
+    @pytest.mark.asyncio
+    async def test_error_propagation_from_client(self, mock_complete_engine_setup):
+        """Test that client errors are properly propagated"""
+        # Make the client raise an exception
+        client_mock = mock_complete_engine_setup["client"]
+        client_mock.search.side_effect = Exception("Qdrant connection timeout")
+
+        engine = get_engine(Backend.QDRANT)
+
+        with pytest.raises(Exception, match="Qdrant connection timeout"):
+            await engine.semantic_search(
+                embedding=[0.1, 0.2, 0.3], collection="test_collection"
+            )
+
+    @pytest.mark.asyncio
+    async def test_search_with_named_vector(self, mock_complete_engine_setup):
+        """Test semantic search with NamedVector instead of regular vector"""
+        engine = get_engine(Backend.QDRANT)
+
+        named_vector = models.NamedVector(
+            name="text_embedding", vector=[0.1, 0.2, 0.3, 0.4, 0.5]
+        )
+
+        results = await engine.semantic_search(
+            embedding=named_vector,  # type: ignore - Testing duck typing
+            collection="test_collection",
+        )
+
+        # Verify named vector was passed through
+        client_mock = mock_complete_engine_setup["client"]
+        call_args = client_mock.search.call_args
+        assert call_args[1]["query_vector"] == named_vector
+
+        assert len(results) == 3
+
+    @pytest.mark.asyncio
+    async def test_search_parameter_defaults(self, mock_complete_engine_setup):
+        """Test that default parameters are applied correctly"""
+        engine = get_engine(Backend.QDRANT)
+
+        await engine.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="test_collection"
+        )
+
+        client_mock = mock_complete_engine_setup["client"]
+        call_args = client_mock.search.call_args
+
+        # Check defaults
+        assert call_args[1]["limit"] == 10  # default limit
+        assert call_args[1]["score_threshold"] is None  # default threshold
+        assert call_args[1]["query_filter"] is None  # default conditions
+        assert call_args[1]["with_payload"] is True
+        assert call_args[1]["with_vectors"] is False
+
+    @pytest.mark.asyncio
+    async def test_multiple_engine_instances_independence(
+        self, mock_complete_engine_setup
+    ):
+        """Test that multiple engine instances work independently"""
+        # Create two engines
+        engine1 = get_engine(Backend.QDRANT)
+        engine2 = get_engine(Backend.QDRANT)
+
+        # Verify they are the same instance due to caching
+        assert engine1 is engine2
+
+        # Both should work with the same instance
+        results1 = await engine1.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="collection1"
+        )
+
+        results2 = await engine2.semantic_search(
+            embedding=[0.4, 0.5, 0.6], collection="collection2"
+        )
+
+        assert len(results1) == 3
+        assert len(results2) == 3
+
+        # Verify client was called twice (same instance, multiple calls)
+        client_mock = mock_complete_engine_setup["client"]
+        assert client_mock.search.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_large_result_set_handling(self, mock_complete_engine_setup):
+        """Test handling of large result sets"""
+        # Create a large mock response
+        large_response = []
+        for i in range(100):
+            large_response.append(
+                models.ScoredPoint(
+                    id=f"doc_{i}",
+                    score=0.9 - (i * 0.001),  # Decreasing scores
+                    payload={"text": f"Document {i}", "index": i},
+                    version=1,
+                )
+            )
+
+        client_mock = mock_complete_engine_setup["client"]
+        client_mock.search.return_value = large_response
+
+        engine = get_engine(Backend.QDRANT)
+        results = await engine.semantic_search(
+            embedding=[0.1, 0.2, 0.3], collection="large_collection", limit=100
+        )
+
+        # Should handle all 100 results
+        assert len(results) == 100
+        assert results[0].chunk_id == "doc_0"
+        assert results[0].score == 0.9
+        assert results[99].chunk_id == "doc_99"
+        assert results[99].score == 0.801  # 0.9 - (99 * 0.001)
+
+    def test_engine_type_consistency(self):
+        """Test that engine types are consistent across multiple calls"""
+        with (
+            patch("vector_search_mcp.engine.qdrant_engine.Settings"),
+            patch("vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient"),
+        ):
+            engines = [get_engine(Backend.QDRANT) for _ in range(5)]
+
+            # All should be the same instance due to caching
+            assert all(engine is engines[0] for engine in engines)
+
+            # All should be QdrantEngine instances
+            from vector_search_mcp.engine.qdrant_engine import QdrantEngine
+
+            assert all(isinstance(engine, QdrantEngine) for engine in engines)