code-shredding

This commit is contained in:
2026-02-24 03:50:34 +00:00
parent 98a1b5939e
commit 74b4735275
71 changed files with 1015 additions and 2418 deletions

View File

@@ -1,29 +1,15 @@
# Configuración de Google Cloud Platform
# Google Cloud Platform
project_id: "tu-proyecto-gcp"
location: "us-central1" # o us-east1, europe-west1, etc.
bucket: "tu-bucket-nombre"
location: "us-central1"
# Configuración del índice vectorial
index:
name: "mi-indice-rag"
dimensions: 768 # Para text-embedding-005 usa 768
machine_type: "e2-standard-2" # Tipo de máquina para el endpoint
approximate_neighbors_count: 150
distance_measure_type: "DOT_PRODUCT_DISTANCE" # O "COSINE_DISTANCE", "EUCLIDEAN_DISTANCE"
# Embedding model
agent_embedding_model: "text-embedding-005"
# Configuración de embeddings
embedder:
model_name: "text-embedding-005"
task: "RETRIEVAL_DOCUMENT" # O "RETRIEVAL_QUERY" para queries
# Configuración de LLM para chunking
llm:
model: "gemini-2.0-flash" # O "gemini-1.5-pro", "gemini-1.5-flash"
# Configuración de chunking
chunking:
strategy: "contextual" # "recursive", "contextual", "llm"
max_chunk_size: 800
chunk_overlap: 200 # Solo para LLMChunker
merge_related: true # Solo para LLMChunker
extract_images: true # Solo para LLMChunker
# Vector index
index_name: "mi-indice-rag"
index_dimensions: 768
index_machine_type: "e2-standard-16"
index_origin: "gs://tu-bucket/input/"
index_destination: "gs://tu-bucket/output/"
index_chunk_limit: 800
index_distance_measure_type: "DOT_PRODUCT_DISTANCE"