30 lines
995 B
YAML
30 lines
995 B
YAML
# Configuración de Google Cloud Platform
|
|
project_id: "tu-proyecto-gcp"
|
|
location: "us-central1" # o us-east1, europe-west1, etc.
|
|
bucket: "tu-bucket-nombre"
|
|
|
|
# Configuración del índice vectorial
|
|
index:
|
|
name: "mi-indice-rag"
|
|
dimensions: 768 # Para text-embedding-005 usa 768
|
|
machine_type: "e2-standard-2" # Tipo de máquina para el endpoint
|
|
approximate_neighbors_count: 150
|
|
distance_measure_type: "DOT_PRODUCT_DISTANCE" # O "COSINE_DISTANCE", "EUCLIDEAN_DISTANCE"
|
|
|
|
# Configuración de embeddings
|
|
embedder:
|
|
model_name: "text-embedding-005"
|
|
task: "RETRIEVAL_DOCUMENT" # O "RETRIEVAL_QUERY" para queries
|
|
|
|
# Configuración de LLM para chunking
|
|
llm:
|
|
model: "gemini-2.0-flash" # O "gemini-1.5-pro", "gemini-1.5-flash"
|
|
|
|
# Configuración de chunking
|
|
chunking:
|
|
strategy: "contextual" # "recursive", "contextual", "llm"
|
|
max_chunk_size: 800
|
|
chunk_overlap: 200 # Solo para LLMChunker
|
|
merge_related: true # Solo para LLMChunker
|
|
extract_images: true # Solo para LLMChunker
|