code-shredding

This commit is contained in:
2026-02-24 03:50:34 +00:00
parent 98a1b5939e
commit ba7581055c
71 changed files with 1026 additions and 2417 deletions

View File

@@ -1,91 +1,57 @@
[project]
name = "rag-pipeline"
name = "knowledge-pipeline"
version = "0.1.0"
description = "RAG Pipeline for document chunking, embedding, and vector search"
readme = "README.md"
requires-python = ">=3.12"
authors = [
{ name = "Pipeline Team" }
{ name = "Anibal Angulo", email = "A8065384@banorte.com" }
]
dependencies = [
# Core dependencies
"google-genai>=1.45.0",
"google-cloud-aiplatform>=1.106.0",
"google-cloud-storage>=2.19.0",
"google-auth>=2.29.0",
"pydantic>=2.11.7",
"pydantic-settings[yaml]>=2.10.1",
"python-dotenv>=1.0.0",
# Chunking
"chonkie>=1.1.2",
"tiktoken>=0.7.0",
"langchain>=0.3.0",
"langchain-core>=0.3.0",
# Document processing
"markitdown[pdf]>=0.1.2",
"pypdf>=6.1.2",
"pdf2image>=1.17.0",
# Storage & networking
"gcloud-aio-storage>=9.6.1",
"gcloud-aio-auth>=5.3.0",
"aiohttp>=3.10.11,<4",
# Utils
"tenacity>=9.1.2",
"typer>=0.16.1",
# Pipeline orchestration (optional)
"kfp>=2.15.2",
"pydantic-ai>=0.0.5",
]
[project.scripts]
# Chunkers
llm-chunker = "chunker.llm_chunker:app"
recursive-chunker = "chunker.recursive_chunker:app"
contextual-chunker = "chunker.contextual_chunker:app"
# Converters
convert-md = "document_converter.markdown:app"
# Storage
file-storage = "file_storage.cli:app"
# Vector Search
vector-search = "vector_search.cli:app"
# Utils
normalize-filenames = "utils.normalize_filenames:app"
knowledge-pipeline = "knowledge_pipeline.cli:app"
[build-system]
requires = ["uv_build>=0.8.3,<0.9.0"]
build-backend = "uv_build"
[tool.uv.workspace]
members = [
"apps/*",
"packages/*",
]
[tool.uv.sources]
chunker = { workspace = true }
document-converter = { workspace = true }
embedder = { workspace = true }
file-storage = { workspace = true }
llm = { workspace = true }
utils = { workspace = true }
vector-search = { workspace = true }
index-gen = { workspace = true }
[dependency-groups]
dev = [
"pytest>=8.4.1",
"mypy>=1.17.1",
"ruff>=0.12.10",
"ty>=0.0.18",
]
[tool.ruff.lint]
extend-select = ["I", "F"]
select = ["I", "F"]
[tool.pytest.ini_options]
addopts = [
"--strict-markers",
"--tb=short",
"--disable-warnings",
]
markers = [
"unit: Unit tests",
"integration: Integration tests",
"slow: Slow running tests",
]