[project] name = "rag-pipeline" version = "0.1.0" description = "RAG Pipeline for document chunking, embedding, and vector search" readme = "README.md" requires-python = ">=3.12" authors = [ { name = "Pipeline Team" } ] dependencies = [ # Core dependencies "google-genai>=1.45.0", "google-cloud-aiplatform>=1.106.0", "google-cloud-storage>=2.19.0", "google-auth>=2.29.0", "pydantic>=2.11.7", "pydantic-settings[yaml]>=2.10.1", "python-dotenv>=1.0.0", # Chunking "chonkie>=1.1.2", "tiktoken>=0.7.0", "langchain>=0.3.0", "langchain-core>=0.3.0", # Document processing "markitdown[pdf]>=0.1.2", "pypdf>=6.1.2", "pdf2image>=1.17.0", # Storage & networking "gcloud-aio-storage>=9.6.1", "gcloud-aio-auth>=5.3.0", "aiohttp>=3.10.11,<4", # Utils "tenacity>=9.1.2", "typer>=0.16.1", # Pipeline orchestration (optional) "kfp>=2.15.2", ] [project.scripts] # Chunkers llm-chunker = "chunker.llm_chunker:app" recursive-chunker = "chunker.recursive_chunker:app" contextual-chunker = "chunker.contextual_chunker:app" # Converters convert-md = "document_converter.markdown:app" # Storage file-storage = "file_storage.cli:app" # Vector Search vector-search = "vector_search.cli:app" # Utils normalize-filenames = "utils.normalize_filenames:app" [build-system] requires = ["uv_build>=0.8.3,<0.9.0"] build-backend = "uv_build" [tool.uv.workspace] members = [ "apps/*", "packages/*", ] [tool.uv.sources] chunker = { workspace = true } document-converter = { workspace = true } embedder = { workspace = true } file-storage = { workspace = true } llm = { workspace = true } utils = { workspace = true } vector-search = { workspace = true } index-gen = { workspace = true } [dependency-groups] dev = [ "pytest>=8.4.1", "mypy>=1.17.1", "ruff>=0.12.10", ] [tool.ruff.lint] extend-select = ["I", "F"]