[project]
name = "rag-pipeline"
version = "0.1.0"
description = "RAG Pipeline for document chunking, embedding, and vector search"
readme = "README.md"
requires-python = ">=3.12"
authors = [
    { name = "Pipeline Team" }
]

dependencies = [
    # Core dependencies
    "google-genai>=1.45.0",
    "google-cloud-aiplatform>=1.106.0",
    "google-cloud-storage>=2.19.0",
    "google-auth>=2.29.0",
    "pydantic>=2.11.7",
    "pydantic-settings[yaml]>=2.10.1",
    "python-dotenv>=1.0.0",

    # Chunking
    "chonkie>=1.1.2",
    "tiktoken>=0.7.0",
    "langchain>=0.3.0",
    "langchain-core>=0.3.0",

    # Document processing
    "markitdown[pdf]>=0.1.2",
    "pypdf>=6.1.2",
    "pdf2image>=1.17.0",

    # Storage & networking
    "gcloud-aio-storage>=9.6.1",
    "gcloud-aio-auth>=5.3.0",
    "aiohttp>=3.10.11,<4",

    # Utils
    "tenacity>=9.1.2",
    "typer>=0.16.1",

    # Pipeline orchestration (optional)
    "kfp>=2.15.2",
]

[project.scripts]
# Chunkers
llm-chunker = "chunker.llm_chunker:app"
recursive-chunker = "chunker.recursive_chunker:app"
contextual-chunker = "chunker.contextual_chunker:app"

# Converters
convert-md = "document_converter.markdown:app"

# Storage
file-storage = "file_storage.cli:app"

# Vector Search
vector-search = "vector_search.cli:app"

# Utils
normalize-filenames = "utils.normalize_filenames:app"

[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"

[tool.uv.workspace]
members = [
    "apps/*",
    "packages/*",
]

[tool.uv.sources]
chunker = { workspace = true }
document-converter = { workspace = true }
embedder = { workspace = true }
file-storage = { workspace = true }
llm = { workspace = true }
utils = { workspace = true }
vector-search = { workspace = true }
index-gen = { workspace = true }

[dependency-groups]
dev = [
    "pytest>=8.4.1",
    "mypy>=1.17.1",
    "ruff>=0.12.10",
]

[tool.ruff.lint]
extend-select = ["I", "F"]