90 lines
2.2 KiB
Python
90 lines
2.2 KiB
Python
"""Shared pytest fixtures for knowledge_pipeline tests."""
|
|
|
|
from unittest.mock import Mock
|
|
|
|
import pytest
|
|
|
|
from knowledge_pipeline.chunker.base_chunker import BaseChunker, Document
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_gcs_client():
|
|
"""Mock Google Cloud Storage client."""
|
|
client = Mock()
|
|
bucket = Mock()
|
|
blob = Mock()
|
|
|
|
client.bucket.return_value = bucket
|
|
bucket.blob.return_value = blob
|
|
bucket.list_blobs.return_value = []
|
|
|
|
return client
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_chunker():
|
|
"""Mock BaseChunker implementation."""
|
|
chunker = Mock(spec=BaseChunker)
|
|
chunker.max_chunk_size = 1000
|
|
chunker.process_text.return_value = [
|
|
{"page_content": "Test chunk content", "metadata": {"id": "test_chunk"}}
|
|
]
|
|
return chunker
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_embedder():
|
|
"""Mock pydantic_ai Embedder."""
|
|
embedder = Mock()
|
|
embeddings_result = Mock()
|
|
embeddings_result.embeddings = [[0.1, 0.2, 0.3]]
|
|
embedder.embed_documents_sync.return_value = embeddings_result
|
|
return embedder
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_converter():
|
|
"""Mock MarkItDown converter."""
|
|
converter = Mock()
|
|
result = Mock()
|
|
result.text_content = "# Markdown Content\n\nTest content here."
|
|
converter.convert.return_value = result
|
|
return converter
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_chunks() -> list[Document]:
|
|
"""Sample document chunks for testing."""
|
|
return [
|
|
{"page_content": "First chunk content", "metadata": {"id": "doc_1_0"}},
|
|
{"page_content": "Second chunk content", "metadata": {"id": "doc_1_1"}},
|
|
{"page_content": "Third chunk content", "metadata": {"id": "doc_1_2"}},
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_embeddings():
|
|
"""Sample embeddings for testing."""
|
|
return [
|
|
[0.1, 0.2, 0.3, 0.4, 0.5],
|
|
[0.6, 0.7, 0.8, 0.9, 1.0],
|
|
[0.2, 0.3, 0.4, 0.5, 0.6],
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_vectors():
|
|
"""Sample vector records for testing."""
|
|
return [
|
|
{
|
|
"id": "doc_1_0",
|
|
"embedding": [0.1, 0.2, 0.3],
|
|
"restricts": [{"namespace": "source", "allow": ["documents"]}],
|
|
},
|
|
{
|
|
"id": "doc_1_1",
|
|
"embedding": [0.4, 0.5, 0.6],
|
|
"restricts": [{"namespace": "source", "allow": ["documents"]}],
|
|
},
|
|
]
|