Bug solucionado de Qdrant y subida a de datos extraidos a Redis con referencia al documento

This commit is contained in:
Sebastian
2025-11-07 23:30:10 +00:00
parent c9a63e129d
commit 70f2a42502
15 changed files with 1392 additions and 29 deletions

View File

@@ -1,10 +1,12 @@
"""
Servicio de embeddings usando Azure OpenAI.
Genera embeddings para chunks de texto usando text-embedding-3-large (3072 dimensiones).
Incluye manejo de rate limits con retry exponencial y delays entre batches.
"""
import asyncio
import logging
from typing import List
from openai import AzureOpenAI
from openai import AzureOpenAI, RateLimitError
from ..core.config import settings
logger = logging.getLogger(__name__)
@@ -63,46 +65,89 @@ class EmbeddingService:
async def generate_embeddings_batch(
self,
texts: List[str],
batch_size: int = 100
batch_size: int | None = None,
delay_between_batches: float | None = None,
max_retries: int | None = None
) -> List[List[float]]:
"""
Genera embeddings para múltiples textos en lotes.
Genera embeddings para múltiples textos en lotes con manejo de rate limits.
Args:
texts: Lista de textos para generar embeddings
batch_size: Tamaño del lote para procesamiento (default: 100)
batch_size: Tamaño del lote (None = usar configuración de settings)
delay_between_batches: Segundos de espera entre batches (None = usar configuración)
max_retries: Número máximo de reintentos (None = usar configuración)
Returns:
Lista de vectores de embeddings
Raises:
Exception: Si hay error al generar los embeddings
Exception: Si hay error al generar los embeddings después de todos los reintentos
"""
# Usar configuración de settings si no se proporciona
batch_size = batch_size or settings.EMBEDDING_BATCH_SIZE
delay_between_batches = delay_between_batches or settings.EMBEDDING_DELAY_BETWEEN_BATCHES
max_retries = max_retries or settings.EMBEDDING_MAX_RETRIES
try:
embeddings = []
total_batches = (len(texts) - 1) // batch_size + 1
logger.info(f"Iniciando generación de embeddings: {len(texts)} textos en {total_batches} batches")
logger.info(f"Configuración: batch_size={batch_size}, delay={delay_between_batches}s, max_retries={max_retries}")
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
logger.info(f"Procesando lote {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
batch_num = i // batch_size + 1
response = self.client.embeddings.create(
input=batch,
model=self.model
)
logger.info(f"📊 Procesando batch {batch_num}/{total_batches} ({len(batch)} textos)...")
batch_embeddings = [item.embedding for item in response.data]
# Validar dimensiones
for idx, emb in enumerate(batch_embeddings):
if len(emb) != self.embedding_dimension:
raise ValueError(
f"Dimensión incorrecta en índice {i + idx}: "
f"esperada {self.embedding_dimension}, obtenida {len(emb)}"
# Retry con exponential backoff
retry_count = 0
while retry_count <= max_retries:
try:
response = self.client.embeddings.create(
input=batch,
model=self.model
)
embeddings.extend(batch_embeddings)
batch_embeddings = [item.embedding for item in response.data]
logger.info(f"Generados {len(embeddings)} embeddings exitosamente")
# Validar dimensiones
for idx, emb in enumerate(batch_embeddings):
if len(emb) != self.embedding_dimension:
raise ValueError(
f"Dimensión incorrecta en índice {i + idx}: "
f"esperada {self.embedding_dimension}, obtenida {len(emb)}"
)
embeddings.extend(batch_embeddings)
logger.info(f"✓ Batch {batch_num}/{total_batches} completado exitosamente")
break # Éxito, salir del retry loop
except RateLimitError as e:
retry_count += 1
if retry_count > max_retries:
logger.error(f"❌ Rate limit excedido después de {max_retries} reintentos")
raise
# Exponential backoff: 2^retry_count segundos
wait_time = 2 ** retry_count
logger.warning(
f"⚠️ Rate limit alcanzado en batch {batch_num}/{total_batches}. "
f"Reintento {retry_count}/{max_retries} en {wait_time}s..."
)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error(f"❌ Error en batch {batch_num}/{total_batches}: {e}")
raise
# Delay entre batches para respetar rate limit (excepto en el último)
if i + batch_size < len(texts):
await asyncio.sleep(delay_between_batches)
logger.info(f"✅ Embeddings generados exitosamente: {len(embeddings)} vectores de {self.embedding_dimension}D")
return embeddings
except Exception as e: