code-shredding
This commit is contained in:
@@ -1,29 +1,15 @@
|
||||
# Configuración de Google Cloud Platform
|
||||
# Google Cloud Platform
|
||||
project_id: "tu-proyecto-gcp"
|
||||
location: "us-central1" # o us-east1, europe-west1, etc.
|
||||
bucket: "tu-bucket-nombre"
|
||||
location: "us-central1"
|
||||
|
||||
# Configuración del índice vectorial
|
||||
index:
|
||||
name: "mi-indice-rag"
|
||||
dimensions: 768 # Para text-embedding-005 usa 768
|
||||
machine_type: "e2-standard-2" # Tipo de máquina para el endpoint
|
||||
approximate_neighbors_count: 150
|
||||
distance_measure_type: "DOT_PRODUCT_DISTANCE" # O "COSINE_DISTANCE", "EUCLIDEAN_DISTANCE"
|
||||
# Embedding model
|
||||
agent_embedding_model: "text-embedding-005"
|
||||
|
||||
# Configuración de embeddings
|
||||
embedder:
|
||||
model_name: "text-embedding-005"
|
||||
task: "RETRIEVAL_DOCUMENT" # O "RETRIEVAL_QUERY" para queries
|
||||
|
||||
# Configuración de LLM para chunking
|
||||
llm:
|
||||
model: "gemini-2.0-flash" # O "gemini-1.5-pro", "gemini-1.5-flash"
|
||||
|
||||
# Configuración de chunking
|
||||
chunking:
|
||||
strategy: "contextual" # "recursive", "contextual", "llm"
|
||||
max_chunk_size: 800
|
||||
chunk_overlap: 200 # Solo para LLMChunker
|
||||
merge_related: true # Solo para LLMChunker
|
||||
extract_images: true # Solo para LLMChunker
|
||||
# Vector index
|
||||
index_name: "mi-indice-rag"
|
||||
index_dimensions: 768
|
||||
index_machine_type: "e2-standard-16"
|
||||
index_origin: "gs://tu-bucket/input/"
|
||||
index_destination: "gs://tu-bucket/output/"
|
||||
index_chunk_limit: 800
|
||||
index_distance_measure_type: "DOT_PRODUCT_DISTANCE"
|
||||
|
||||
Reference in New Issue
Block a user