First commmit

This commit is contained in:
2026-02-22 15:25:27 +00:00
commit 35d5a65b17
70 changed files with 4298 additions and 0 deletions

29
config.example.yaml Normal file
View File

@@ -0,0 +1,29 @@
# Configuración de Google Cloud Platform
project_id: "tu-proyecto-gcp"
location: "us-central1" # o us-east1, europe-west1, etc.
bucket: "tu-bucket-nombre"
# Configuración del índice vectorial
index:
name: "mi-indice-rag"
dimensions: 768 # Para text-embedding-005 usa 768
machine_type: "e2-standard-2" # Tipo de máquina para el endpoint
approximate_neighbors_count: 150
distance_measure_type: "DOT_PRODUCT_DISTANCE" # O "COSINE_DISTANCE", "EUCLIDEAN_DISTANCE"
# Configuración de embeddings
embedder:
model_name: "text-embedding-005"
task: "RETRIEVAL_DOCUMENT" # O "RETRIEVAL_QUERY" para queries
# Configuración de LLM para chunking
llm:
model: "gemini-2.0-flash" # O "gemini-1.5-pro", "gemini-1.5-flash"
# Configuración de chunking
chunking:
strategy: "contextual" # "recursive", "contextual", "llm"
max_chunk_size: 800
chunk_overlap: 200 # Solo para LLMChunker
merge_related: true # Solo para LLMChunker
extract_images: true # Solo para LLMChunker