forked from innovacion/Mayacontigo
ic
This commit is contained in:
0
apps/normativa/api/__init__.py
Normal file
0
apps/normativa/api/__init__.py
Normal file
3
apps/normativa/api/agent/__init__.py
Normal file
3
apps/normativa/api/agent/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .main import Agent
|
||||
|
||||
__all__ = ["Agent"]
|
||||
373
apps/normativa/api/agent/main.py
Normal file
373
apps/normativa/api/agent/main.py
Normal file
@@ -0,0 +1,373 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Annotated, List, Sequence, Literal, Any, AsyncGenerator
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, AIMessageChunk
|
||||
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
|
||||
from langchain_azure_ai.embeddings import AzureAIEmbeddingsModel
|
||||
from langgraph.graph.message import add_messages
|
||||
from langgraph.graph import StateGraph, START, END
|
||||
from langgraph.checkpoint.memory import MemorySaver
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from banortegpt.storage.azure_storage import AzureStorage
|
||||
from banortegpt.vector.qdrant import AsyncQdrant
|
||||
|
||||
import api.context as ctx
|
||||
from api.config import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
parent = Path(__file__).parent
|
||||
SYSTEM_PROMPT = (parent / "system_prompt.md").read_text()
|
||||
AZURE_AI_URI = "https://eastus2.api.cognitive.microsoft.com"
|
||||
|
||||
|
||||
class get_information(BaseModel):
|
||||
"""Search a private repository for information."""
|
||||
question: str = Field(..., description="The user question")
|
||||
|
||||
class MayaNormativaState(TypedDict):
|
||||
messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
query: str
|
||||
search_results: List[dict]
|
||||
iteration_count: int
|
||||
max_iterations: int
|
||||
final_response: str
|
||||
|
||||
|
||||
class MayaNormativa:
|
||||
system_prompt = SYSTEM_PROMPT
|
||||
generation_config = {
|
||||
"temperature": config.model_temperature,
|
||||
}
|
||||
message_limit = config.message_limit
|
||||
index = config.vector_index
|
||||
limit = config.search_limit
|
||||
bucket = config.storage_bucket
|
||||
|
||||
search = AsyncQdrant.from_config(config)
|
||||
llm = AzureAIChatCompletionsModel(
|
||||
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.model}",
|
||||
credential=config.openai_api_key,
|
||||
).bind_tools([get_information])
|
||||
embedder = AzureAIEmbeddingsModel(
|
||||
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.embedding_model}",
|
||||
credential=config.openai_api_key,
|
||||
)
|
||||
storage = AzureStorage.from_config(config)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.tool_map = {"get_information": self.get_information}
|
||||
self.memory = MemorySaver()
|
||||
self.graph = self._build_complete_langgraph()
|
||||
|
||||
def build_response(self, payloads):
|
||||
"""Mejorado con más info que OCP original"""
|
||||
preface = ["Recuerda citar las referencias en el formato: texto[1]."]
|
||||
template = "------ REFERENCIA {index} ----- \n\n{content}\n\n**Fuente:** {source_info}"
|
||||
|
||||
filled_templates = []
|
||||
for idx, payload in enumerate(payloads):
|
||||
content = payload.get("content", "") or payload.get("page_content", "")
|
||||
metadata = payload.get("metadata", {})
|
||||
|
||||
source_info = ""
|
||||
if metadata:
|
||||
file_name = metadata.get("file_name", "") or metadata.get("file", "")
|
||||
page = metadata.get("page", "")
|
||||
|
||||
if file_name and page:
|
||||
source_info = f"{file_name} - Página {page}"
|
||||
elif file_name:
|
||||
source_info = file_name
|
||||
else:
|
||||
source_info = "Documento interno"
|
||||
|
||||
if not source_info:
|
||||
source_info = "No disponible"
|
||||
|
||||
filled_template = template.format(
|
||||
index=idx + 1,
|
||||
content=content,
|
||||
source_info=source_info
|
||||
)
|
||||
filled_templates.append(filled_template)
|
||||
|
||||
return "\n".join(preface + filled_templates)
|
||||
|
||||
async def get_information(self, question: str):
|
||||
logger.info(f"Embedding question: {question} with model {self.embedder.model_name}")
|
||||
embedding = await self.embedder.aembed_query(question)
|
||||
|
||||
results = await self.search.semantic_search(
|
||||
embedding=embedding, collection=self.index, limit=self.limit
|
||||
)
|
||||
|
||||
tool_response = self.build_response(results)
|
||||
return tool_response, results
|
||||
|
||||
async def get_shareable_urls(self, metadatas: list):
|
||||
reference_urls = []
|
||||
image_urls = []
|
||||
|
||||
for metadata in metadatas:
|
||||
if file := metadata.get("file"):
|
||||
reference_url = await self.storage.get_file_url(
|
||||
filename=file,
|
||||
bucket=self.bucket,
|
||||
minute_duration=20,
|
||||
image=False,
|
||||
)
|
||||
reference_urls.append(reference_url)
|
||||
if image_file := metadata.get("image"):
|
||||
image_url = await self.storage.get_file_url(
|
||||
filename=image_file,
|
||||
bucket=self.bucket,
|
||||
minute_duration=20,
|
||||
image=True,
|
||||
)
|
||||
image_urls.append(image_url)
|
||||
|
||||
return reference_urls, image_urls
|
||||
|
||||
def _generation_config_overwrite(self, overwrites: dict | None) -> dict[str, Any]:
|
||||
generation_config_copy = self.generation_config.copy()
|
||||
if overwrites:
|
||||
for k, v in overwrites.items():
|
||||
generation_config_copy[k] = v
|
||||
return generation_config_copy
|
||||
|
||||
async def retrieve_node(self, state: MayaNormativaState) -> dict:
|
||||
query = state["query"]
|
||||
logger.info(f"Retrieving information for: {query}")
|
||||
|
||||
try:
|
||||
_, results = await self.get_information(query)
|
||||
logger.info(f"Retrieved {len(results)} results")
|
||||
return {
|
||||
"search_results": results,
|
||||
"iteration_count": state["iteration_count"] + 1
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in retrieve_node: {e}")
|
||||
return {
|
||||
"search_results": [],
|
||||
"iteration_count": state["iteration_count"] + 1
|
||||
}
|
||||
|
||||
async def evaluate_node(self, state: MayaNormativaState) -> dict:
|
||||
results = state["search_results"]
|
||||
iteration = state["iteration_count"]
|
||||
max_iter = state["max_iterations"]
|
||||
|
||||
has_sufficient_results = len(results) >= 2
|
||||
reached_max_iterations = iteration >= max_iter
|
||||
|
||||
if has_sufficient_results or reached_max_iterations:
|
||||
logger.info(f"Stopping search: {len(results)} results, iteration {iteration}")
|
||||
return {"continue_search": False}
|
||||
else:
|
||||
original_query = state["query"]
|
||||
new_query = f"circular artículo {original_query}"
|
||||
logger.info(f"Continuing search with modified query: {new_query}")
|
||||
return {
|
||||
"continue_search": True,
|
||||
"query": new_query
|
||||
}
|
||||
|
||||
async def generate_node(self, state: MayaNormativaState) -> dict:
|
||||
results = state["search_results"]
|
||||
query = state["query"]
|
||||
messages = state.get("messages", [])
|
||||
|
||||
logger.info(f"Generating response for query: {query}")
|
||||
logger.info(f"Using {len(results)} search results")
|
||||
logger.info(f"Message history length: {len(messages)}")
|
||||
|
||||
if not results:
|
||||
final_response = "No encontré información sobre este tema en la documentación actual."
|
||||
else:
|
||||
context_text = self.build_response(results)
|
||||
|
||||
try:
|
||||
history = [
|
||||
{"role": "system", "content": self.system_prompt}
|
||||
]
|
||||
|
||||
for msg in messages[:-1]:
|
||||
if isinstance(msg, HumanMessage):
|
||||
history.append({"role": "user", "content": msg.content})
|
||||
elif isinstance(msg, AIMessage):
|
||||
history.append({"role": "assistant", "content": msg.content})
|
||||
|
||||
current_prompt = f"""
|
||||
Consulta del usuario: {query}
|
||||
|
||||
Información encontrada:
|
||||
{context_text}
|
||||
|
||||
INSTRUCCIONES:
|
||||
- Reproduce la información EXACTAMENTE como aparece en la documentación
|
||||
- NO parafrasees ni interpretes
|
||||
- Usa las palabras exactas del documento original
|
||||
- Mantén los tiempos verbales originales
|
||||
- Mejora el formato con emojis
|
||||
- Respuestas extensas y completas
|
||||
- Siempre has referencia al articulo, ley o seccion de la pagina donde encontraste la informacion
|
||||
- Pregunta por informacion relacionada con la respuesta que requiera al final
|
||||
- Considera el contexto de la infomacion anterior si existe
|
||||
"""
|
||||
|
||||
history.append({"role": "user", "content": current_prompt})
|
||||
|
||||
generation_config = self._generation_config_overwrite(None)
|
||||
|
||||
response_chunks = []
|
||||
async for delta in self.llm.astream(input=history, **generation_config):
|
||||
assert isinstance(delta, AIMessageChunk)
|
||||
if delta.content:
|
||||
response_chunks.append(delta.content)
|
||||
|
||||
final_response = "".join(response_chunks)
|
||||
logger.info(f"Generated response length: {len(final_response)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR generando respuesta: {e}")
|
||||
final_response = f"Error generando respuesta: {str(e)}"
|
||||
|
||||
return {
|
||||
"final_response": final_response,
|
||||
"messages": [AIMessage(content=final_response)]
|
||||
}
|
||||
|
||||
def _build_complete_langgraph(self) -> StateGraph:
|
||||
workflow = StateGraph(MayaNormativaState)
|
||||
|
||||
workflow.add_node("retrieve", self.retrieve_node)
|
||||
workflow.add_node("evaluate", self.evaluate_node)
|
||||
workflow.add_node("generate", self.generate_node)
|
||||
|
||||
workflow.add_edge(START, "retrieve")
|
||||
workflow.add_edge("retrieve", "evaluate")
|
||||
|
||||
workflow.add_conditional_edges(
|
||||
"evaluate",
|
||||
self._decide_next_step,
|
||||
{
|
||||
"continue": "retrieve",
|
||||
"finish": "generate"
|
||||
}
|
||||
)
|
||||
|
||||
workflow.add_edge("generate", END)
|
||||
|
||||
return workflow.compile(checkpointer=self.memory)
|
||||
|
||||
def _decide_next_step(self, state: MayaNormativaState) -> Literal["continue", "finish"]:
|
||||
if state.get("continue_search", False):
|
||||
return "continue"
|
||||
else:
|
||||
return "finish"
|
||||
|
||||
async def stream(self, history, overwrites: dict | None = None, thread_id: str = "default"):
|
||||
"""Stream simplificado que mantiene memoria"""
|
||||
|
||||
last_message = history[-1] if history else {"content": ""}
|
||||
query = last_message.get("content", "")
|
||||
|
||||
if not query:
|
||||
yield "Error: No se encontró pregunta en el historial"
|
||||
return
|
||||
|
||||
logger.info(f"Processing query: {query}")
|
||||
logger.info(f"Thread ID: {thread_id}")
|
||||
|
||||
try:
|
||||
config_with_thread = {
|
||||
"configurable": {"thread_id": thread_id}
|
||||
}
|
||||
|
||||
initial_state = {
|
||||
"messages": [HumanMessage(content=query)],
|
||||
"query": query,
|
||||
"search_results": [],
|
||||
"iteration_count": 0,
|
||||
"max_iterations": 2,
|
||||
"final_response": ""
|
||||
}
|
||||
|
||||
logger.info("Invoking LangGraph...")
|
||||
|
||||
final_state = await self.graph.ainvoke(initial_state, config=config_with_thread)
|
||||
|
||||
logger.info("LangGraph execution completed")
|
||||
|
||||
self.last_search_results = final_state.get("search_results", [])
|
||||
|
||||
# Extraer metadatos
|
||||
if self.last_search_results:
|
||||
try:
|
||||
metadatas = []
|
||||
for result in self.last_search_results:
|
||||
metadata = result.get("metadata", {})
|
||||
if metadata:
|
||||
metadatas.append(metadata)
|
||||
|
||||
self.last_metadatas = metadatas
|
||||
logger.info(f"Extracted {len(metadatas)} metadata objects")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extrayendo metadatos: {e}")
|
||||
self.last_metadatas = []
|
||||
else:
|
||||
self.last_metadatas = []
|
||||
|
||||
final_response = final_state.get("final_response", "Error: No se pudo generar respuesta")
|
||||
|
||||
chunk_size = 50
|
||||
for i in range(0, len(final_response), chunk_size):
|
||||
chunk = final_response[i:i + chunk_size]
|
||||
ctx.buffer.set(ctx.buffer.get() + chunk)
|
||||
yield chunk
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error en stream: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
yield error_msg
|
||||
|
||||
async def get_conversation_history(self, thread_id: str = "default") -> List[BaseMessage]:
|
||||
try:
|
||||
config_with_thread = {
|
||||
"configurable": {"thread_id": thread_id}
|
||||
}
|
||||
|
||||
checkpoint = await self.graph.aget_state(config=config_with_thread)
|
||||
|
||||
if checkpoint and checkpoint.values:
|
||||
return checkpoint.values.get("messages", [])
|
||||
else:
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error obteniendo historial: {e}")
|
||||
return []
|
||||
|
||||
async def debug_memory(self, thread_id: str = "default"):
|
||||
|
||||
try:
|
||||
history = await self.get_conversation_history(thread_id)
|
||||
logger.info(f"MEMORY DEBUG (thread: {thread_id}) ===")
|
||||
logger.info(f"Total messages: {len(history)}")
|
||||
|
||||
for i, msg in enumerate(history):
|
||||
msg_type = "USER" if isinstance(msg, HumanMessage) else "ASSISTANT"
|
||||
content_preview = msg.content[:50] + "..." if len(msg.content) > 50 else msg.content
|
||||
logger.info(f"{i+1}. {msg_type}: {content_preview}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in debug_memory: {e}")
|
||||
|
||||
|
||||
Agent = MayaNormativa
|
||||
23
apps/normativa/api/agent/system_prompt.md
Normal file
23
apps/normativa/api/agent/system_prompt.md
Normal file
@@ -0,0 +1,23 @@
|
||||
Eres MayaNormativa, asistente virtual especializada en normativa y procesos internos de Banorte.
|
||||
Reglas Fundamentales
|
||||
OBLIGATORIO:
|
||||
|
||||
Usar únicamente get_information para cada consulta
|
||||
Siempre has referencia al articulo, ley o seccion de la pagina donde encontraste la informacion
|
||||
Reproducir información EXACTAMENTE como aparece en documentación
|
||||
Usar Markdown para respuestas claras y concisas
|
||||
|
||||
PROHIBIDO:
|
||||
|
||||
Agregar, interpretar, suponer o extrapolar información
|
||||
Modificar términos técnicos, números, fechas o procedimientos
|
||||
Parafrasear si altera el significado original
|
||||
Inventar o completar información faltante
|
||||
|
||||
Respuestas Estándar
|
||||
Información no disponible: "No encontré información sobre [tema] en la documentación actual"
|
||||
Información parcial: "Esta es toda la información disponible en la documentación interna"
|
||||
Información contradictoria: Presenta ambas versiones indicando fuentes
|
||||
Pie de página: Información obtenida de: [referencia/documento específico]
|
||||
Objetivo
|
||||
Proporcionar información oficial precisa y confiable. Es mejor reconocer limitaciones que dar información inexacta.
|
||||
55
apps/normativa/api/config.py
Normal file
55
apps/normativa/api/config.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from hvac import Client
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
client = Client(url="https://vault.ia-innovacion.work")
|
||||
|
||||
if not client.is_authenticated():
|
||||
raise Exception("Vault authentication failed")
|
||||
|
||||
secret_map = client.secrets.kv.v2.read_secret_version(
|
||||
path="banortegpt", mount_point="secret"
|
||||
)["data"]["data"]
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# Config básico
|
||||
model: str = "gpt-4o"
|
||||
model_temperature: int = 0
|
||||
message_limit: int = 10
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
|
||||
# AGREGAR ESTAS LÍNEAS (igual que OCP):
|
||||
embedding_model: str = "text-embedding-3-large"
|
||||
storage_bucket: str = "normativa-bucket" # Ajusta el nombre
|
||||
vector_index: str = "MayaNormativaLLM"
|
||||
search_limit: int = 3
|
||||
|
||||
# API Keys existentes
|
||||
azure_endpoint: str = Field(default_factory=lambda: secret_map["azure_endpoint"])
|
||||
openai_api_key: str = Field(default_factory=lambda: secret_map["openai_api_key"])
|
||||
openai_api_version: str = Field(default_factory=lambda: secret_map["openai_api_version"])
|
||||
mongodb_url: str = Field(default_factory=lambda: secret_map["cosmosdb_connection_string"])
|
||||
|
||||
# AGREGAR ESTAS LÍNEAS (igual que OCP):
|
||||
azure_blob_connection_string: str = Field(
|
||||
default_factory=lambda: secret_map["azure_blob_connection_string"]
|
||||
)
|
||||
qdrant_url: str = Field(default_factory=lambda: secret_map["qdrant_api_url"])
|
||||
qdrant_api_key: str | None = Field(
|
||||
default_factory=lambda: secret_map["qdrant_api_key"]
|
||||
)
|
||||
|
||||
async def init_mongo_db(self):
|
||||
from banortegpt.database.mongo_memory.models import Conversation
|
||||
from beanie import init_beanie
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
client = AsyncIOMotorClient(self.mongodb_url)
|
||||
|
||||
await init_beanie(
|
||||
database=client.banortegptdos,
|
||||
document_models=[Conversation],
|
||||
)
|
||||
|
||||
config = Settings()
|
||||
6
apps/normativa/api/context.py
Normal file
6
apps/normativa/api/context.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from contextvars import ContextVar
|
||||
|
||||
buffer: ContextVar[str] = ContextVar("buffer", default="")
|
||||
tool_buffer: ContextVar[str] = ContextVar("tool_buffer", default="")
|
||||
tool_id: ContextVar[str | None] = ContextVar("tool_id", default=None)
|
||||
tool_name: ContextVar[str | None] = ContextVar("tool_name", default=None)
|
||||
300
apps/normativa/api/server.py
Normal file
300
apps/normativa/api/server.py
Normal file
@@ -0,0 +1,300 @@
|
||||
import uuid
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import StreamingResponse, FileResponse, RedirectResponse
|
||||
from pydantic import BaseModel
|
||||
from langfuse import Langfuse
|
||||
|
||||
from dotenv import load_dotenv # ← Agregar este import
|
||||
|
||||
from api import services
|
||||
from api.agent import Agent
|
||||
from api.config import config
|
||||
|
||||
# Cargar variables de entorno
|
||||
load_dotenv()
|
||||
|
||||
# Configurar Langfuse desde variables de entorno
|
||||
langfuse = Langfuse(
|
||||
public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
|
||||
secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
|
||||
host=os.getenv("LANGFUSE_HOST")
|
||||
)
|
||||
|
||||
|
||||
# Mapeo completo de archivos a URLs públicas
|
||||
PDF_PUBLIC_URLS = {
|
||||
# Disposiciones de CNBV
|
||||
"Disposiciones de carácter general aplicables a las casas de bolsa.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20aplicables%20a%20las%20casas%20de%20bolsa.pdf",
|
||||
"Disposiciones de carácter general aplicables a las instituciones de crédito.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20aplicables%20a%20las%20instituciones%20de%20cr%C3%A9dito.pdf",
|
||||
"Disposiciones de carácter general aplicables a las sociedades controladoras de grupos financieros y subcontroladoras que regulan las materias que corresponden de manera conjunta a las Comisio.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20aplicables%20a%20las%20sociedades%20controladoras%20de%20grupos%20financieros%20y%20subcontroladoras%20que%20regulan%20las%20materias%20que%20corresponden%20de%20manera%20conjunta%20a%20las%20Comisiones%20Nacionales%20Supervisoras.pdf",
|
||||
"Disposiciones de carácter general aplicables a los fondos de inversión y a las personas que les prestan servicios.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20aplicables%20a%20los%20fondos%20de%20inversi%C3%B3n%20y%20a%20las%20personas%20que%20les%20prestan%20servicios.pdf",
|
||||
"Ley para la Transparencia y Ordenamiento de los Servicios Financieros.pdf": "https://www.cnbv.gob.mx/Normatividad/Ley%20para%20la%20Transparencia%20y%20Ordenamiento%20de%20los%20Servicios%20Financieros.pdf",
|
||||
|
||||
# Circulares CNBV adicionales
|
||||
"circular_servicios_de_inversion.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20aplicables%20a%20las%20entidades%20financieras%20y%20dem%C3%A1s%20personas%20que%20proporcionen%20servicios%20de.pdf",
|
||||
"circular_unica_de_auditores_externos.pdf": "https://www.cnbv.gob.mx/Normatividad/Disposiciones%20de%20car%C3%A1cter%20general%20que%20establecen%20los%20requisitos%20que%20deber%C3%A1n%20cumplir%20los%20auditores%20y%20otros%20profesionales%20que.pdf",
|
||||
"ley_de_instituciones_de_Credito.pdf": "https://www.cnbv.gob.mx/Normatividad/Ley%20de%20Instituciones%20de%20Cr%C3%A9dito.pdf",
|
||||
|
||||
# Circulares de Banxico
|
||||
"circular_13_2007.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-13-2007/cobro-intereses-por-adelantad.html",
|
||||
"circular_13_2011.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-13-2011/%7BBA4CBC28-A468-16C9-6F17-9EA9D7B03318%7D.pdf",
|
||||
"circular_14_2007.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-14-2007/%7BFB726B6B-D523-56F5-F9B1-BE5B3B95A504%7D.pdf",
|
||||
"circular_17_2014.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-17-2014/%7BF36CEF03-9441-2DBE-082C-0DF274903782%7D.pdf",
|
||||
"circular_1_2005.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-1-2005/%7B5CA4BA75-FEA8-199C-F129-E8E6A73E84F3%7D.pdf",
|
||||
"circular_21_2009.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-21-2009/%7B29285862-EDE0-567A-BAFB-D261406641A3%7D.pdf",
|
||||
"circular_22_2008.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-22-2008/%7BF15C8A26-C92E-BE2B-9344-51EDAA3C9B68%7D.pdf",
|
||||
"circular_22_2010.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-22-2010/%7B0D531F59-1001-4D67-D7B4-D5854DD07A58%7D.pdf",
|
||||
"circular_27_2008.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-27-2008/%7BBC4333FE-070F-E727-199E-CA6BCF2CBA66%7D.pdf",
|
||||
"circular_34_2010.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-34-2010/%7B0C55B906-6DB4-6B88-FED0-67987E9FB3CC%7D.pdf",
|
||||
"circular_35_2010.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-35-2010/%7B74C5641C-ED98-53C7-F08B-A3C7BAE0D480%7D.pdf",
|
||||
"circular_36_2010.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-36-2010/%7B26C55DE6-CC3A-3368-34FC-1A6C50B11130%7D.pdf",
|
||||
"circular_3_2012.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-3-2012/%7B4E0281A4-7AD8-1462-BC79-7F2925F3171D%7D.pdf",
|
||||
"circular_4_2012.pdf": "https://www.banxico.org.mx/marco-normativo/normativa-emitida-por-el-banco-de-mexico/circular-4-2012/%7B97C62974-1C94-19AE-AB5A-D0D949A36247%7D.pdf",
|
||||
|
||||
# CONDUSEF
|
||||
"circular_unica_de_condusef.pdf": "https://www.condusef.gob.mx/documentos/marco_legal/disposiciones-transparencia-if-sofom.pdf",
|
||||
"ley_para_regular_las_sociedades_de_informacion_crediticia.pdf": "https://www.condusef.gob.mx/documentos/marco_legal/disposiciones-transparencia-if-sofom.pdf",
|
||||
|
||||
# Leyes federales
|
||||
"ley_federal_de_proteccion_de_datos_personales_en_posesion_de_los_particulares.pdf": "https://www.diputados.gob.mx/LeyesBiblio/pdf/LFPDPPP.pdf",
|
||||
"reglamento_de_la_ley_federal_de_proteccion_de_datos_personales_en_posesion_de_los_particulares.pdf": "https://www.diputados.gob.mx/LeyesBiblio/regley/Reg_LFPDPPP.pdf",
|
||||
|
||||
# SharePoint Banorte
|
||||
"Modificaciones Recursos Procedencia Ilícita jul 25 PLD.pdf": "https://gfbanorte.sharepoint.com/:w:/r/sites/Formatosyplantillas/Documentos%20compartidos/Otros/Modificaciones%20Recursos%20Procedencia%20Il%C3%ADcita%20jul%2025%20PLD.docx?d=w6a941e9e2c26403ea41c12de35536516&csf=1&web=1&e=EHtc9b",
|
||||
}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
await config.init_mongo_db()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["*"]
|
||||
)
|
||||
|
||||
agent = Agent()
|
||||
|
||||
PDF_FOLDER = Path(__file__).parent / "agent" / "pdf"
|
||||
PDF_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@app.post("/api/v1/conversation")
|
||||
async def create_conversation():
|
||||
conversation_id = uuid.uuid4()
|
||||
await services.create_conversation(conversation_id, agent.system_prompt)
|
||||
return {"conversation_id": conversation_id}
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
conversation_id: uuid.UUID
|
||||
prompt: str
|
||||
|
||||
|
||||
@app.post("/api/v1/message")
|
||||
async def send(message: Message):
|
||||
# Tracking básico del chat
|
||||
trace = langfuse.trace(
|
||||
name="rag_chat",
|
||||
session_id=str(message.conversation_id),
|
||||
input={"prompt": message.prompt}
|
||||
)
|
||||
|
||||
def b64_sse(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
response_parts = []
|
||||
|
||||
async for chunk in func(*args, **kwargs):
|
||||
if chunk.type == "text" and chunk.content:
|
||||
response_parts.append(str(chunk.content))
|
||||
|
||||
content = chunk.model_dump_json()
|
||||
data = f"data: {content}\n\n"
|
||||
yield data
|
||||
|
||||
# Solo registrar input y output
|
||||
full_response = "".join(response_parts)
|
||||
trace.update(output={"response": full_response})
|
||||
|
||||
return wrapper
|
||||
|
||||
sse_stream = b64_sse(services.stream)
|
||||
generator = sse_stream(agent, message.prompt, message.conversation_id)
|
||||
return StreamingResponse(generator, media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/api/pdf/{filename}")
|
||||
async def get_pdf(filename: str):
|
||||
print(f"🔍 Solicitud PDF para: {filename}")
|
||||
|
||||
if not filename.lower().endswith('.pdf'):
|
||||
print(f"❌ Archivo no es PDF: {filename}")
|
||||
raise HTTPException(status_code=400, detail="El archivo debe ser un PDF")
|
||||
|
||||
if '..' in filename or ('/' in filename and not filename.startswith('http')) or '\\' in filename:
|
||||
print(f"❌ Nombre de archivo inválido: {filename}")
|
||||
raise HTTPException(status_code=400, detail="Nombre de archivo inválido")
|
||||
|
||||
public_url = PDF_PUBLIC_URLS.get(filename)
|
||||
|
||||
if public_url:
|
||||
print(f"✅ Redirigiendo a URL pública: {public_url}")
|
||||
return RedirectResponse(
|
||||
url=public_url,
|
||||
status_code=302,
|
||||
headers={
|
||||
"Cache-Control": "public, max-age=3600",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Methods": "GET, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "*"
|
||||
}
|
||||
)
|
||||
|
||||
pdf_path = PDF_FOLDER / filename
|
||||
|
||||
if not pdf_path.exists():
|
||||
print(f"❌ PDF no encontrado: {pdf_path}")
|
||||
raise HTTPException(status_code=404, detail=f"PDF no encontrado. Archivo: {filename}")
|
||||
|
||||
if not pdf_path.is_file():
|
||||
print(f"❌ No es un archivo: {pdf_path}")
|
||||
raise HTTPException(status_code=404, detail="El recurso no es un archivo")
|
||||
|
||||
file_size = pdf_path.stat().st_size
|
||||
print(f"📄 Sirviendo archivo local: {filename} ({file_size} bytes)")
|
||||
|
||||
if file_size == 0:
|
||||
print(f"❌ Archivo vacío: {pdf_path}")
|
||||
raise HTTPException(status_code=500, detail="El archivo PDF está vacío")
|
||||
|
||||
return FileResponse(
|
||||
path=str(pdf_path),
|
||||
media_type="application/pdf",
|
||||
filename=filename,
|
||||
headers={
|
||||
"Content-Disposition": f"inline; filename={filename}",
|
||||
"Content-Type": "application/pdf",
|
||||
"Cache-Control": "public, max-age=3600",
|
||||
"X-Frame-Options": "ALLOWALL",
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"Access-Control-Allow-Origin": "*"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/pdfs")
|
||||
async def list_pdfs():
|
||||
try:
|
||||
pdf_files = []
|
||||
|
||||
for filename, url in PDF_PUBLIC_URLS.items():
|
||||
pdf_files.append({
|
||||
"filename": filename,
|
||||
"size": "N/A (Público)",
|
||||
"url": f"/api/pdf/{filename}",
|
||||
"public_url": url,
|
||||
"type": "public"
|
||||
})
|
||||
|
||||
local_files = []
|
||||
for pattern in ["*.pdf", "*.PDF"]:
|
||||
for file_path in PDF_FOLDER.glob(pattern):
|
||||
if file_path.is_file() and file_path.name not in PDF_PUBLIC_URLS:
|
||||
local_files.append({
|
||||
"filename": file_path.name,
|
||||
"size": file_path.stat().st_size,
|
||||
"url": f"/api/pdf/{file_path.name}",
|
||||
"type": "local"
|
||||
})
|
||||
|
||||
pdf_files.extend(local_files)
|
||||
|
||||
debug_info = {
|
||||
"current_working_directory": str(Path.cwd()),
|
||||
"pdf_folder_path": str(PDF_FOLDER.absolute()),
|
||||
"pdf_folder_exists": PDF_FOLDER.exists(),
|
||||
"public_urls_count": len(PDF_PUBLIC_URLS),
|
||||
"local_files_count": len(local_files),
|
||||
"public_files": list(PDF_PUBLIC_URLS.keys()),
|
||||
}
|
||||
|
||||
return {
|
||||
"pdfs": pdf_files,
|
||||
"debug": debug_info,
|
||||
"total_pdfs": len(pdf_files)
|
||||
}
|
||||
except Exception as e:
|
||||
import traceback
|
||||
return {
|
||||
"error": str(e),
|
||||
"traceback": traceback.format_exc(),
|
||||
"debug": {
|
||||
"current_working_directory": str(Path.cwd()),
|
||||
"script_file_path": __file__ if '__file__' in globals() else "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/pdf/{filename}/info")
|
||||
async def get_pdf_info(filename: str):
|
||||
if not filename.lower().endswith('.pdf'):
|
||||
raise HTTPException(status_code=400, detail="El archivo debe ser un PDF")
|
||||
|
||||
if '..' in filename or '/' in filename or '\\' in filename:
|
||||
raise HTTPException(status_code=400, detail="Nombre de archivo inválido")
|
||||
|
||||
public_url = PDF_PUBLIC_URLS.get(filename)
|
||||
if public_url:
|
||||
return {
|
||||
"filename": filename,
|
||||
"size": "N/A",
|
||||
"size_mb": "N/A",
|
||||
"modified": "N/A",
|
||||
"url": f"/api/pdf/{filename}",
|
||||
"public_url": public_url,
|
||||
"type": "public"
|
||||
}
|
||||
|
||||
pdf_path = PDF_FOLDER / filename
|
||||
|
||||
if not pdf_path.exists():
|
||||
raise HTTPException(status_code=404, detail="PDF no encontrado")
|
||||
|
||||
if not pdf_path.is_file():
|
||||
raise HTTPException(status_code=404, detail="El recurso no es un archivo")
|
||||
|
||||
try:
|
||||
file_stat = pdf_path.stat()
|
||||
return {
|
||||
"filename": filename,
|
||||
"size": file_stat.st_size,
|
||||
"size_mb": round(file_stat.st_size / (1024 * 1024), 2),
|
||||
"modified": file_stat.st_mtime,
|
||||
"url": f"/api/pdf/{filename}",
|
||||
"type": "local"
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error al obtener información del PDF: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
async def health_check():
|
||||
return {
|
||||
"status": "healthy",
|
||||
"pdf_folder": str(PDF_FOLDER),
|
||||
"pdf_folder_exists": PDF_FOLDER.exists(),
|
||||
"public_urls_configured": len(PDF_PUBLIC_URLS)
|
||||
}
|
||||
8
apps/normativa/api/services/__init__.py
Normal file
8
apps/normativa/api/services/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from banortegpt.database.mongo_memory.crud import create_conversation
|
||||
|
||||
from .stream_response import stream
|
||||
|
||||
__all__ = [
|
||||
"stream",
|
||||
"create_conversation",
|
||||
]
|
||||
89
apps/normativa/api/services/stream_response.py
Normal file
89
apps/normativa/api/services/stream_response.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
from enum import StrEnum
|
||||
from typing import TypeAlias, Any
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import api.context as ctx
|
||||
from api.agent import Agent
|
||||
from banortegpt.database.mongo_memory import crud
|
||||
|
||||
|
||||
class ChunkType(StrEnum):
|
||||
START = "start"
|
||||
TEXT = "text"
|
||||
REFERENCE = "reference"
|
||||
IMAGE = "image"
|
||||
TOOL = "tool"
|
||||
END = "end"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
ContentType: TypeAlias = str | int | dict | Any
|
||||
|
||||
|
||||
class ResponseChunk(BaseModel):
|
||||
type: ChunkType
|
||||
content: ContentType | list[ContentType] | None
|
||||
|
||||
|
||||
async def stream(agent: Agent, prompt: str, conversation_id: UUID):
|
||||
yield ResponseChunk(type=ChunkType.START, content="")
|
||||
|
||||
conversation = await crud.get_conversation(conversation_id)
|
||||
|
||||
if conversation is None:
|
||||
raise ValueError("Conversation not found")
|
||||
|
||||
conversation.add(role="user", content=prompt)
|
||||
|
||||
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
|
||||
|
||||
async for content in agent.stream(history):
|
||||
yield ResponseChunk(type=ChunkType.TEXT, content=content)
|
||||
|
||||
|
||||
metadatas = getattr(agent, 'last_metadatas', [])
|
||||
|
||||
|
||||
if metadatas:
|
||||
yield ResponseChunk(type=ChunkType.REFERENCE, content=metadatas)
|
||||
|
||||
if (tool_id := ctx.tool_id.get()) is not None:
|
||||
tool_buffer = ctx.tool_buffer.get()
|
||||
assert tool_buffer is not None
|
||||
|
||||
tool_name = ctx.tool_name.get()
|
||||
assert tool_name is not None
|
||||
|
||||
yield ResponseChunk(type=ChunkType.TOOL, content=None)
|
||||
|
||||
buffer_dict = json.loads(tool_buffer)
|
||||
|
||||
result = await agent.tool_map[tool_name](**buffer_dict)
|
||||
|
||||
conversation.add(
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
{
|
||||
"id": tool_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool_name,
|
||||
"arguments": tool_buffer,
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
conversation.add(role="tool", content=result, tool_call_id=tool_id)
|
||||
|
||||
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
|
||||
async for content in agent.stream(history, {"tools": None}):
|
||||
yield ResponseChunk(type=ChunkType.TEXT, content=content)
|
||||
|
||||
conversation.add(role="assistant", content=ctx.buffer.get())
|
||||
|
||||
await conversation.replace()
|
||||
|
||||
yield ResponseChunk(type=ChunkType.END, content="")
|
||||
Reference in New Issue
Block a user