forked from innovacion/searchbox
Add testing
This commit is contained in:
361
tests/test_engine/test_integration.py
Normal file
361
tests/test_engine/test_integration.py
Normal file
@@ -0,0 +1,361 @@
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from qdrant_client import models
|
||||
|
||||
from vector_search_mcp.engine import Backend, get_engine
|
||||
from vector_search_mcp.models import Match, MatchAny, MatchExclude, SearchRow
|
||||
|
||||
|
||||
class TestEngineIntegration:
|
||||
"""Integration tests for the complete engine workflow"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_complete_engine_setup(self):
|
||||
"""Setup complete mocked engine environment"""
|
||||
with (
|
||||
patch(
|
||||
"vector_search_mcp.engine.qdrant_engine.Settings"
|
||||
) as mock_settings_class,
|
||||
patch(
|
||||
"vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient"
|
||||
) as mock_client_class,
|
||||
):
|
||||
# Setup settings
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.url = "http://localhost:6333"
|
||||
mock_settings.api_key = "test_api_key"
|
||||
mock_settings_class.return_value = mock_settings
|
||||
|
||||
# Setup client with realistic response
|
||||
mock_client = AsyncMock()
|
||||
mock_client.search.return_value = [
|
||||
models.ScoredPoint(
|
||||
id="doc_1",
|
||||
score=0.95,
|
||||
payload={
|
||||
"text": "Advanced Python programming techniques for data science",
|
||||
"category": "programming",
|
||||
"language": "python",
|
||||
"difficulty": "advanced",
|
||||
"tags": ["python", "data-science", "machine-learning"],
|
||||
},
|
||||
version=1,
|
||||
),
|
||||
models.ScoredPoint(
|
||||
id="doc_2",
|
||||
score=0.87,
|
||||
payload={
|
||||
"text": "Rust systems programming for performance-critical applications",
|
||||
"category": "programming",
|
||||
"language": "rust",
|
||||
"difficulty": "intermediate",
|
||||
"tags": ["rust", "systems", "performance"],
|
||||
},
|
||||
version=1,
|
||||
),
|
||||
models.ScoredPoint(
|
||||
id="doc_3",
|
||||
score=0.78,
|
||||
payload={
|
||||
"text": "Introduction to machine learning with Python",
|
||||
"category": "programming",
|
||||
"language": "python",
|
||||
"difficulty": "beginner",
|
||||
"tags": ["python", "machine-learning", "tutorial"],
|
||||
},
|
||||
version=1,
|
||||
),
|
||||
]
|
||||
mock_client_class.return_value = mock_client
|
||||
|
||||
yield {
|
||||
"settings": mock_settings,
|
||||
"client": mock_client,
|
||||
"settings_class": mock_settings_class,
|
||||
"client_class": mock_client_class,
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_semantic_search_workflow(self, mock_complete_engine_setup):
|
||||
"""Test the complete workflow from factory to results"""
|
||||
mocks = mock_complete_engine_setup
|
||||
|
||||
# 1. Create engine through factory
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
# 2. Prepare search parameters
|
||||
query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
|
||||
collection_name = "programming_docs"
|
||||
search_conditions = [
|
||||
Match(key="category", value="programming"),
|
||||
MatchAny(key="language", any=["python", "rust"]),
|
||||
MatchExclude(key="difficulty", exclude=["expert"]),
|
||||
]
|
||||
|
||||
# 3. Execute semantic search
|
||||
results = await engine.semantic_search(
|
||||
embedding=query_vector,
|
||||
collection=collection_name,
|
||||
limit=5,
|
||||
conditions=search_conditions,
|
||||
threshold=0.7,
|
||||
)
|
||||
|
||||
# 4. Verify the complete flow
|
||||
|
||||
# Check that client.search was called with correct parameters
|
||||
client_mock = mocks["client"]
|
||||
client_mock.search.assert_called_once()
|
||||
|
||||
call_args = client_mock.search.call_args
|
||||
assert call_args[1]["collection_name"] == collection_name
|
||||
assert call_args[1]["query_vector"] == query_vector
|
||||
assert call_args[1]["limit"] == 5
|
||||
assert call_args[1]["score_threshold"] == 0.7
|
||||
assert call_args[1]["with_payload"] is True
|
||||
assert call_args[1]["with_vectors"] is False
|
||||
|
||||
# Verify conditions were transformed to Qdrant filter
|
||||
qdrant_filter = call_args[1]["query_filter"]
|
||||
assert isinstance(qdrant_filter, models.Filter)
|
||||
assert len(qdrant_filter.must) == 3
|
||||
|
||||
# Check individual conditions
|
||||
conditions = qdrant_filter.must
|
||||
|
||||
# Match condition
|
||||
match_condition = next(c for c in conditions if c.key == "category")
|
||||
assert isinstance(match_condition.match, models.MatchValue)
|
||||
assert match_condition.match.value == "programming"
|
||||
|
||||
# MatchAny condition
|
||||
match_any_condition = next(c for c in conditions if c.key == "language")
|
||||
assert isinstance(match_any_condition.match, models.MatchAny)
|
||||
assert match_any_condition.match.any == ["python", "rust"]
|
||||
|
||||
# MatchExclude condition
|
||||
match_exclude_condition = next(c for c in conditions if c.key == "difficulty")
|
||||
assert isinstance(match_exclude_condition.match, models.MatchExcept)
|
||||
|
||||
# 5. Verify results transformation
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == 3
|
||||
assert all(isinstance(result, SearchRow) for result in results)
|
||||
|
||||
# Check first result
|
||||
assert results[0].chunk_id == "doc_1"
|
||||
assert results[0].score == 0.95
|
||||
assert (
|
||||
results[0].payload["text"]
|
||||
== "Advanced Python programming techniques for data science"
|
||||
)
|
||||
assert results[0].payload["category"] == "programming"
|
||||
|
||||
# Check second result
|
||||
assert results[1].chunk_id == "doc_2"
|
||||
assert results[1].score == 0.87
|
||||
assert results[1].payload["language"] == "rust"
|
||||
|
||||
# Check third result
|
||||
assert results[2].chunk_id == "doc_3"
|
||||
assert results[2].score == 0.78
|
||||
assert results[2].payload["difficulty"] == "beginner"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_with_no_conditions(self, mock_complete_engine_setup):
|
||||
"""Test semantic search without any conditions"""
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
results = await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="test_collection"
|
||||
)
|
||||
|
||||
# Verify no filter was applied
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
call_args = client_mock.search.call_args
|
||||
assert call_args[1]["query_filter"] is None
|
||||
|
||||
# Results should still be transformed
|
||||
assert len(results) == 3
|
||||
assert all(isinstance(result, SearchRow) for result in results)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_with_empty_conditions(self, mock_complete_engine_setup):
|
||||
"""Test semantic search with empty conditions list"""
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
results = await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="test_collection", conditions=[]
|
||||
)
|
||||
|
||||
# Verify no filter was applied
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
call_args = client_mock.search.call_args
|
||||
assert call_args[1]["query_filter"] is None
|
||||
|
||||
assert len(results) == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_filters_null_payloads(self, mock_complete_engine_setup):
|
||||
"""Test that results with null payloads are filtered out"""
|
||||
# Override the mock response to include null payload
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
client_mock.search.return_value = [
|
||||
models.ScoredPoint(
|
||||
id="valid_1",
|
||||
score=0.95,
|
||||
payload={"text": "Valid document"},
|
||||
version=1,
|
||||
),
|
||||
models.ScoredPoint(
|
||||
id="invalid",
|
||||
score=0.90,
|
||||
payload=None, # This should be filtered out
|
||||
version=1,
|
||||
),
|
||||
models.ScoredPoint(
|
||||
id="valid_2",
|
||||
score=0.85,
|
||||
payload={"text": "Another valid document"},
|
||||
version=1,
|
||||
),
|
||||
]
|
||||
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
results = await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="test_collection"
|
||||
)
|
||||
|
||||
# Should only have 2 results (null payload filtered out)
|
||||
assert len(results) == 2
|
||||
assert results[0].chunk_id == "valid_1"
|
||||
assert results[1].chunk_id == "valid_2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_propagation_from_client(self, mock_complete_engine_setup):
|
||||
"""Test that client errors are properly propagated"""
|
||||
# Make the client raise an exception
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
client_mock.search.side_effect = Exception("Qdrant connection timeout")
|
||||
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
with pytest.raises(Exception, match="Qdrant connection timeout"):
|
||||
await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="test_collection"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_with_named_vector(self, mock_complete_engine_setup):
|
||||
"""Test semantic search with NamedVector instead of regular vector"""
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
named_vector = models.NamedVector(
|
||||
name="text_embedding", vector=[0.1, 0.2, 0.3, 0.4, 0.5]
|
||||
)
|
||||
|
||||
results = await engine.semantic_search(
|
||||
embedding=named_vector, # type: ignore - Testing duck typing
|
||||
collection="test_collection",
|
||||
)
|
||||
|
||||
# Verify named vector was passed through
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
call_args = client_mock.search.call_args
|
||||
assert call_args[1]["query_vector"] == named_vector
|
||||
|
||||
assert len(results) == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_parameter_defaults(self, mock_complete_engine_setup):
|
||||
"""Test that default parameters are applied correctly"""
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
|
||||
await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="test_collection"
|
||||
)
|
||||
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
call_args = client_mock.search.call_args
|
||||
|
||||
# Check defaults
|
||||
assert call_args[1]["limit"] == 10 # default limit
|
||||
assert call_args[1]["score_threshold"] is None # default threshold
|
||||
assert call_args[1]["query_filter"] is None # default conditions
|
||||
assert call_args[1]["with_payload"] is True
|
||||
assert call_args[1]["with_vectors"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_engine_instances_independence(
|
||||
self, mock_complete_engine_setup
|
||||
):
|
||||
"""Test that multiple engine instances work independently"""
|
||||
# Create two engines
|
||||
engine1 = get_engine(Backend.QDRANT)
|
||||
engine2 = get_engine(Backend.QDRANT)
|
||||
|
||||
# Verify they are the same instance due to caching
|
||||
assert engine1 is engine2
|
||||
|
||||
# Both should work with the same instance
|
||||
results1 = await engine1.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="collection1"
|
||||
)
|
||||
|
||||
results2 = await engine2.semantic_search(
|
||||
embedding=[0.4, 0.5, 0.6], collection="collection2"
|
||||
)
|
||||
|
||||
assert len(results1) == 3
|
||||
assert len(results2) == 3
|
||||
|
||||
# Verify client was called twice (same instance, multiple calls)
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
assert client_mock.search.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_result_set_handling(self, mock_complete_engine_setup):
|
||||
"""Test handling of large result sets"""
|
||||
# Create a large mock response
|
||||
large_response = []
|
||||
for i in range(100):
|
||||
large_response.append(
|
||||
models.ScoredPoint(
|
||||
id=f"doc_{i}",
|
||||
score=0.9 - (i * 0.001), # Decreasing scores
|
||||
payload={"text": f"Document {i}", "index": i},
|
||||
version=1,
|
||||
)
|
||||
)
|
||||
|
||||
client_mock = mock_complete_engine_setup["client"]
|
||||
client_mock.search.return_value = large_response
|
||||
|
||||
engine = get_engine(Backend.QDRANT)
|
||||
results = await engine.semantic_search(
|
||||
embedding=[0.1, 0.2, 0.3], collection="large_collection", limit=100
|
||||
)
|
||||
|
||||
# Should handle all 100 results
|
||||
assert len(results) == 100
|
||||
assert results[0].chunk_id == "doc_0"
|
||||
assert results[0].score == 0.9
|
||||
assert results[99].chunk_id == "doc_99"
|
||||
assert results[99].score == 0.801 # 0.9 - (99 * 0.001)
|
||||
|
||||
def test_engine_type_consistency(self):
|
||||
"""Test that engine types are consistent across multiple calls"""
|
||||
with (
|
||||
patch("vector_search_mcp.engine.qdrant_engine.Settings"),
|
||||
patch("vector_search_mcp.engine.qdrant_engine.AsyncQdrantClient"),
|
||||
):
|
||||
engines = [get_engine(Backend.QDRANT) for _ in range(5)]
|
||||
|
||||
# All should be the same instance due to caching
|
||||
assert all(engine is engines[0] for engine in engines)
|
||||
|
||||
# All should be QdrantEngine instances
|
||||
from vector_search_mcp.engine.qdrant_engine import QdrantEngine
|
||||
|
||||
assert all(isinstance(engine, QdrantEngine) for engine in engines)
|
||||
Reference in New Issue
Block a user