"""Shared pytest fixtures for knowledge_pipeline tests.""" from unittest.mock import Mock import pytest from knowledge_pipeline.chunker.base_chunker import BaseChunker, Document @pytest.fixture def mock_gcs_client(): """Mock Google Cloud Storage client.""" client = Mock() bucket = Mock() blob = Mock() client.bucket.return_value = bucket bucket.blob.return_value = blob bucket.list_blobs.return_value = [] return client @pytest.fixture def mock_chunker(): """Mock BaseChunker implementation.""" chunker = Mock(spec=BaseChunker) chunker.max_chunk_size = 1000 chunker.process_text.return_value = [ {"page_content": "Test chunk content", "metadata": {"id": "test_chunk"}} ] return chunker @pytest.fixture def mock_embedder(): """Mock pydantic_ai Embedder.""" embedder = Mock() embeddings_result = Mock() embeddings_result.embeddings = [[0.1, 0.2, 0.3]] embedder.embed_documents_sync.return_value = embeddings_result return embedder @pytest.fixture def mock_converter(): """Mock MarkItDown converter.""" converter = Mock() result = Mock() result.text_content = "# Markdown Content\n\nTest content here." converter.convert.return_value = result return converter @pytest.fixture def sample_chunks() -> list[Document]: """Sample document chunks for testing.""" return [ {"page_content": "First chunk content", "metadata": {"id": "doc_1_0"}}, {"page_content": "Second chunk content", "metadata": {"id": "doc_1_1"}}, {"page_content": "Third chunk content", "metadata": {"id": "doc_1_2"}}, ] @pytest.fixture def sample_embeddings(): """Sample embeddings for testing.""" return [ [0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1.0], [0.2, 0.3, 0.4, 0.5, 0.6], ] @pytest.fixture def sample_vectors(): """Sample vector records for testing.""" return [ { "id": "doc_1_0", "embedding": [0.1, 0.2, 0.3], "restricts": [{"namespace": "source", "allow": ["documents"]}], }, { "id": "doc_1_1", "embedding": [0.4, 0.5, 0.6], "restricts": [{"namespace": "source", "allow": ["documents"]}], }, ]