forked from innovacion/Mayacontigo
ic
This commit is contained in:
89
apps/riesgos/api/__init__.py
Normal file
89
apps/riesgos/api/__init__.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from contextvars import ContextVar
|
||||
from types import SimpleNamespace
|
||||
|
||||
from hvac import Client
|
||||
from pydantic import Field
|
||||
from langfuse import Langfuse
|
||||
from langfuse.decorators import langfuse_context
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
buffer: ContextVar[str] = ContextVar("buffer", default="")
|
||||
tool_buffer: ContextVar[str] = ContextVar("tool_buffer", default="")
|
||||
tool_id: ContextVar[str | None] = ContextVar("tool_id", default=None)
|
||||
tool_name: ContextVar[str | None] = ContextVar("tool_name", default=None)
|
||||
|
||||
context = SimpleNamespace(
|
||||
buffer=buffer,
|
||||
tool_buffer=tool_buffer,
|
||||
tool_id=tool_id,
|
||||
tool_name=tool_name,
|
||||
)
|
||||
|
||||
client = Client(url="https://vault.ia-innovacion.work")
|
||||
|
||||
if not client.is_authenticated():
|
||||
raise Exception("Vault authentication failed")
|
||||
|
||||
secret_map = client.secrets.kv.v2.read_secret_version(
|
||||
path="banortegpt", mount_point="secret"
|
||||
)["data"]["data"]
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model: str = "gpt-4o"
|
||||
model_temperature: int = 0
|
||||
embedding_model: str = "text-embedding-3-large"
|
||||
message_limit: int = 10
|
||||
storage_bucket: str = "riesgosreferences2"
|
||||
vector_index: str = "MayaRiesgos2"
|
||||
search_limit: int = 5
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
|
||||
azure_endpoint: str = Field(default_factory=lambda: secret_map["azure_endpoint"])
|
||||
openai_api_key: str = Field(default_factory=lambda: secret_map["openai_api_key"])
|
||||
openai_api_version: str = Field(
|
||||
default_factory=lambda: secret_map["openai_api_version"]
|
||||
)
|
||||
azure_blob_connection_string: str = Field(
|
||||
default_factory=lambda: secret_map["azure_blob_connection_string"]
|
||||
)
|
||||
qdrant_url: str = Field(default_factory=lambda: secret_map["qdrant_api_url"])
|
||||
qdrant_api_key: str | None = Field(
|
||||
default_factory=lambda: secret_map["qdrant_api_key"]
|
||||
)
|
||||
mongodb_url: str = Field(
|
||||
default_factory=lambda: secret_map["cosmosdb_connection_string"]
|
||||
)
|
||||
langfuse_host: str | None = None
|
||||
langfuse_public_key: str | None = None
|
||||
langfuse_secret_key: str | None = None
|
||||
|
||||
async def init_mongo_db(self):
|
||||
from banortegpt.database.mongo_memory.models import Conversation
|
||||
from beanie import init_beanie
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
client = AsyncIOMotorClient(self.mongodb_url)
|
||||
|
||||
await init_beanie(
|
||||
database=client.banortegptdos,
|
||||
document_models=[Conversation],
|
||||
)
|
||||
|
||||
def init_langfuse(self):
|
||||
langfuse_context.configure(
|
||||
host=self.langfuse_host,
|
||||
public_key=self.langfuse_public_key,
|
||||
secret_key=self.langfuse_secret_key,
|
||||
)
|
||||
|
||||
def get_langfuse(self):
|
||||
return Langfuse(
|
||||
host=self.langfuse_host,
|
||||
public_key=self.langfuse_public_key,
|
||||
secret_key=self.langfuse_secret_key,
|
||||
)
|
||||
|
||||
|
||||
config = Settings()
|
||||
131
apps/riesgos/api/agent.py
Normal file
131
apps/riesgos/api/agent.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
from banortegpt.storage.azure_storage import AzureStorage
|
||||
from banortegpt.vector.qdrant import AsyncQdrant
|
||||
from langchain_core.messages import AIMessageChunk
|
||||
from qdrant_client import models
|
||||
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
|
||||
from langchain_azure_ai.embeddings import AzureAIEmbeddingsModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from . import config, context
|
||||
|
||||
parent = Path(__file__).parent
|
||||
SYSTEM_PROMPT = (parent / "system_prompt.md").read_text()
|
||||
|
||||
AZURE_AI_URI = "https://eastus2.api.cognitive.microsoft.com"
|
||||
|
||||
class get_information(BaseModel):
|
||||
"""Buscar informacion relevante de documentos de Banorte sobre un sistema en particular."""
|
||||
|
||||
question: str = Field(..., description="La pregunta del usuario, reescrita para ser comprensible fuera de contexto.")
|
||||
system: str = Field(..., description="El sistema del cual se buscara informacion. Puede ser uno de los siguientes: ['ML','SACS','ED','CARATULA', 'SICRED']. 'ML' es 'Master de lineas', 'ED' es 'Expediente Digital'")
|
||||
|
||||
class MayaRiesgos:
|
||||
system_prompt = SYSTEM_PROMPT
|
||||
generation_config = {
|
||||
"temperature": config.model_temperature,
|
||||
}
|
||||
message_limit = config.message_limit
|
||||
index = config.vector_index
|
||||
limit = config.search_limit
|
||||
bucket = config.storage_bucket
|
||||
|
||||
search = AsyncQdrant.from_config(config)
|
||||
llm = AzureAIChatCompletionsModel(
|
||||
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.model}",
|
||||
credential=config.openai_api_key,
|
||||
).bind_tools([get_information])
|
||||
embedder = AzureAIEmbeddingsModel(
|
||||
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.embedding_model}",
|
||||
credential=config.openai_api_key,
|
||||
)
|
||||
storage = AzureStorage.from_config(config)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.tool_map = {"get_information": self.get_information}
|
||||
|
||||
def build_response(self, payloads):
|
||||
template = "------ REFERENCIA {index} ----- \n\n{content}"
|
||||
|
||||
filled_templates = [
|
||||
template.format(index=idx, content=payload["content"])
|
||||
for idx, payload in enumerate(payloads)
|
||||
]
|
||||
|
||||
return "\n".join(filled_templates)
|
||||
|
||||
async def get_information(
|
||||
self, question: str, system: Literal["ML", "SACS", "ED", "7.1", "SICRED"]
|
||||
):
|
||||
embedding = await self.embedder.aembed_query(question)
|
||||
|
||||
conditions = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="system",
|
||||
match=models.MatchAny(any=["ALL", system]),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
payloads = await self.search.semantic_search(
|
||||
collection=self.index,
|
||||
embedding=embedding,
|
||||
limit=self.limit,
|
||||
conditions=conditions,
|
||||
)
|
||||
|
||||
tool_response = self.build_response(payloads)
|
||||
|
||||
return tool_response, payloads
|
||||
|
||||
async def get_shareable_urls(self, metadatas: list):
|
||||
reference_urls = []
|
||||
image_urls = []
|
||||
|
||||
for metadata in metadatas:
|
||||
if (pagina := metadata.get("pagina")) and (
|
||||
archivo := metadata.get("archivo")
|
||||
):
|
||||
image_file = f"{pagina}_{archivo}.png"
|
||||
|
||||
image_url = await self.storage.get_file_url(
|
||||
filename=image_file,
|
||||
bucket=self.bucket,
|
||||
minute_duration=20,
|
||||
image=True,
|
||||
)
|
||||
image_urls.append(image_url)
|
||||
|
||||
return reference_urls, image_urls
|
||||
|
||||
def _generation_config_overwrite(self, overwrites: dict | None) -> dict[str, Any]:
|
||||
generation_config_copy = self.generation_config.copy()
|
||||
if overwrites:
|
||||
for k, v in overwrites.items():
|
||||
generation_config_copy[k] = v
|
||||
return generation_config_copy
|
||||
|
||||
async def stream(self, history, overwrites: dict | None = None):
|
||||
generation_config = self._generation_config_overwrite(overwrites)
|
||||
|
||||
async for chunk in self.llm.astream(input=history, **generation_config):
|
||||
assert isinstance(chunk, AIMessageChunk)
|
||||
if call := chunk.tool_call_chunks:
|
||||
if tool_id := call[0].get("id"):
|
||||
context.tool_id.set(tool_id)
|
||||
if name := call[0].get("name"):
|
||||
context.tool_name.set(name)
|
||||
if args := call[0].get("args"):
|
||||
context.tool_buffer.set(context.tool_buffer.get() + args)
|
||||
else:
|
||||
if buffer := chunk.content:
|
||||
assert isinstance(buffer, str)
|
||||
context.buffer.set(context.buffer.get() + buffer)
|
||||
yield buffer
|
||||
|
||||
async def generate(self, history, overwrites: dict | None = None):
|
||||
generation_config = self._generation_config_overwrite(overwrites)
|
||||
return await self.llm.ainvoke(input=history, **generation_config)
|
||||
23
apps/riesgos/api/server/__init__.py
Normal file
23
apps/riesgos/api/server/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from api import config
|
||||
|
||||
from .v1 import router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
await config.init_mongo_db()
|
||||
config.init_langfuse()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
lifespan=lifespan,
|
||||
docs_url="/api/docs",
|
||||
openapi_url="/api/openapi.json",
|
||||
)
|
||||
|
||||
app.include_router(router)
|
||||
64
apps/riesgos/api/server/v1.py
Normal file
64
apps/riesgos/api/server/v1.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import uuid
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api import config, services
|
||||
from api.agent import MayaRiesgos
|
||||
|
||||
router = APIRouter(prefix="/api/v1")
|
||||
agent = MayaRiesgos()
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
conversation_id: uuid.UUID
|
||||
prompt: str
|
||||
|
||||
|
||||
@router.post("/conversation")
|
||||
async def create_conversation():
|
||||
conversation_id = uuid.uuid4()
|
||||
await services.create_conversation(conversation_id, agent.system_prompt)
|
||||
return {"conversation_id": conversation_id}
|
||||
|
||||
|
||||
@router.post("/message")
|
||||
async def send(message: Message, stream: bool = False):
|
||||
if stream is True:
|
||||
|
||||
def b64_sse(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
async for chunk in func(*args, **kwargs):
|
||||
content = chunk.model_dump_json()
|
||||
data = f"data: {content}\n\n"
|
||||
yield data
|
||||
|
||||
return wrapper
|
||||
|
||||
sse_stream = b64_sse(services.stream)
|
||||
generator = sse_stream(agent, message.prompt, message.conversation_id)
|
||||
return StreamingResponse(generator, media_type="text/event-stream")
|
||||
else:
|
||||
response = await services.generate(
|
||||
agent, message.prompt, message.conversation_id
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class Feedback(BaseModel):
|
||||
key: str
|
||||
rating: Literal["Good", "Bad", "None"]
|
||||
|
||||
|
||||
@router.post("/feedback")
|
||||
async def register_feedback(data: Feedback):
|
||||
if data.rating:
|
||||
langfuse = config.get_langfuse()
|
||||
langfuse.score(
|
||||
id=data.key + "-rating",
|
||||
trace_id=data.key,
|
||||
name="Rating",
|
||||
value=data.rating,
|
||||
)
|
||||
6
apps/riesgos/api/services/__init__.py
Normal file
6
apps/riesgos/api/services/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from banortegpt.database.mongo_memory.crud import create_conversation
|
||||
|
||||
from .generate_response import generate
|
||||
from .stream_response import stream
|
||||
|
||||
__all__ = ["stream", "generate", "create_conversation"]
|
||||
89
apps/riesgos/api/services/generate_response.py
Normal file
89
apps/riesgos/api/services/generate_response.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from banortegpt.database.mongo_memory import crud
|
||||
from langfuse.decorators import langfuse_context, observe
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api import context as ctx
|
||||
from api.agent import MayaRiesgos
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
content: str
|
||||
urls: list[str]
|
||||
|
||||
|
||||
@observe(capture_input=False, capture_output=False)
|
||||
async def generate(
|
||||
agent: MayaRiesgos,
|
||||
prompt: str,
|
||||
conversation_id: UUID,
|
||||
) -> Response:
|
||||
conversation = await crud.get_conversation(conversation_id)
|
||||
|
||||
if conversation is None:
|
||||
raise ValueError(f"Conversation with id {conversation_id} not found")
|
||||
|
||||
conversation.add(role="user", content=prompt)
|
||||
|
||||
response = await agent.generate(conversation.to_openai_format(agent.message_limit))
|
||||
|
||||
reference_urls, image_urls = [], []
|
||||
|
||||
if call := response.tool_calls:
|
||||
if id := call[0].id:
|
||||
ctx.tool_id.set(id)
|
||||
if name := call[0].function.name:
|
||||
ctx.tool_name.set(name)
|
||||
ctx.tool_buffer.set(call[0].function.arguments)
|
||||
else:
|
||||
ctx.buffer.set(response.content)
|
||||
|
||||
buffer = ctx.buffer.get()
|
||||
tool_buffer = ctx.tool_buffer.get()
|
||||
tool_id = ctx.tool_id.get()
|
||||
tool_name = ctx.tool_name.get()
|
||||
|
||||
if tool_id is not None:
|
||||
# Si tool_buffer es un string JSON, lo convertimos a diccionario
|
||||
if isinstance(tool_buffer, str):
|
||||
try:
|
||||
tool_args = json.loads(tool_buffer)
|
||||
except json.JSONDecodeError:
|
||||
tool_args = {"question": tool_buffer}
|
||||
else:
|
||||
tool_args = tool_buffer
|
||||
|
||||
response, payloads = await agent.tool_map[tool_name](**tool_args) # type: ignore
|
||||
|
||||
tool_call: dict[str, Any] = agent.llm.build_tool_call(
|
||||
tool_id, tool_name, tool_buffer
|
||||
)
|
||||
tool_call_id: dict[str, Any] = agent.llm.build_tool_call_id(tool_id)
|
||||
|
||||
conversation.add("assistant", **tool_call)
|
||||
conversation.add("tool", content=response, **tool_call_id)
|
||||
|
||||
response = await agent.generate(
|
||||
conversation.to_openai_format(agent.message_limit), {"tools": None}
|
||||
)
|
||||
ctx.buffer.set(response.content)
|
||||
|
||||
reference_urls, image_urls = await agent.get_shareable_urls(payloads) # type: ignore
|
||||
|
||||
buffer = ctx.buffer.get()
|
||||
if buffer is None:
|
||||
raise ValueError("No buffer found")
|
||||
|
||||
conversation.add(role="assistant", content=buffer)
|
||||
|
||||
langfuse_context.update_current_trace(
|
||||
name=agent.__class__.__name__,
|
||||
session_id=str(conversation_id),
|
||||
input=prompt,
|
||||
output=buffer,
|
||||
)
|
||||
|
||||
return Response(content=buffer, urls=reference_urls + image_urls)
|
||||
102
apps/riesgos/api/services/stream_response.py
Normal file
102
apps/riesgos/api/services/stream_response.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import json
|
||||
from enum import StrEnum
|
||||
from typing import TypeAlias
|
||||
from uuid import UUID
|
||||
|
||||
from banortegpt.database.mongo_memory import crud
|
||||
from langfuse.decorators import langfuse_context, observe
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api import context as ctx
|
||||
from api.agent import MayaRiesgos
|
||||
|
||||
|
||||
class ChunkType(StrEnum):
|
||||
START = "start"
|
||||
TEXT = "text"
|
||||
REFERENCE = "reference"
|
||||
IMAGE = "image"
|
||||
TOOL = "tool"
|
||||
END = "end"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
ContentType: TypeAlias = str | int
|
||||
|
||||
|
||||
class ResponseChunk(BaseModel):
|
||||
type: ChunkType
|
||||
content: ContentType | list[ContentType] | None
|
||||
|
||||
|
||||
@observe(capture_input=False, capture_output=False)
|
||||
async def stream(agent: MayaRiesgos, prompt: str, conversation_id: UUID):
|
||||
yield ResponseChunk(type=ChunkType.START, content="")
|
||||
|
||||
conversation = await crud.get_conversation(conversation_id)
|
||||
|
||||
if conversation is None:
|
||||
raise ValueError(f"Conversation with id {conversation_id} not found")
|
||||
|
||||
conversation.add(role="user", content=prompt)
|
||||
|
||||
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
|
||||
async for content in agent.stream(history):
|
||||
yield ResponseChunk(type=ChunkType.TEXT, content=content)
|
||||
|
||||
if (tool_id := ctx.tool_id.get()) is not None:
|
||||
tool_buffer = ctx.tool_buffer.get()
|
||||
assert tool_buffer is not None
|
||||
|
||||
tool_name = ctx.tool_name.get()
|
||||
assert tool_name is not None
|
||||
|
||||
yield ResponseChunk(type=ChunkType.TOOL, content=None)
|
||||
|
||||
buffer_dict = json.loads(tool_buffer)
|
||||
|
||||
response, payloads = await agent.tool_map[tool_name](**buffer_dict)
|
||||
|
||||
conversation.add(
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
{
|
||||
"id": tool_id,
|
||||
"function": {
|
||||
"name": tool_name,
|
||||
"arguments": tool_buffer,
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
)
|
||||
conversation.add(role="tool", content=response, tool_call_id=tool_id)
|
||||
|
||||
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
|
||||
async for content in agent.stream(history, {"tools": None}):
|
||||
yield ResponseChunk(type=ChunkType.TEXT, content=content)
|
||||
|
||||
ref_urls, image_urls = await agent.get_shareable_urls(payloads) # type: ignore
|
||||
|
||||
if len(ref_urls) > 0:
|
||||
yield ResponseChunk(type=ChunkType.REFERENCE, content=ref_urls)
|
||||
|
||||
if len(image_urls) > 0:
|
||||
yield ResponseChunk(type=ChunkType.IMAGE, content=image_urls)
|
||||
|
||||
buffer = ctx.buffer.get()
|
||||
|
||||
conversation.add(role="assistant", content=buffer)
|
||||
|
||||
await conversation.save()
|
||||
|
||||
langfuse_context.update_current_trace(
|
||||
name=agent.__class__.__name__,
|
||||
session_id=str(conversation_id),
|
||||
input=prompt,
|
||||
output=buffer,
|
||||
)
|
||||
|
||||
yield ResponseChunk(
|
||||
type=ChunkType.END, content=langfuse_context.get_current_trace_id()
|
||||
)
|
||||
19
apps/riesgos/api/system_prompt.md
Normal file
19
apps/riesgos/api/system_prompt.md
Normal file
@@ -0,0 +1,19 @@
|
||||
Eres MayaRiesgos, una asistente virtual amigable y profesional de Banorte, especializada en proporcionar información precisa a los usuarios.
|
||||
|
||||
Tu función principal:
|
||||
1. Responder preguntas de los usuarios de manera informativa, empática y concisa.
|
||||
2. Utilizar la herramienta get_information antes de responder cualquier pregunta.
|
||||
3. Antes de usar la herramienta get_information, si la pregunta no menciona explícitamente un sistema, primero aclara con el usuario sobre qué sistema está consultando.
|
||||
Los posibles sistemas son:
|
||||
- ML: "Máster de Líneas"
|
||||
- ED: "Expediente Digital"
|
||||
- CARATULA
|
||||
- SACS
|
||||
- SICRED
|
||||
5. Si el usuario realiza una pregunta de seguimiento, puedes asumir que se refiere al mismo sistema mencionado en la consulta anterior.
|
||||
|
||||
Pautas para interactuar:
|
||||
- Siempre basa tu respuesta únicamente en el resultado de get_information.
|
||||
- Si la información obtenida mediante get_information no es suficiente para responder, informa al usuario que no cuentas con datos suficientes para brindar una respuesta.
|
||||
|
||||
Tu objetivo es ofrecer una experiencia informativa y satisfactoria, reflejando los valores de confianza y excelencia de Banorte.
|
||||
23
apps/riesgos/api/tools.json
Normal file
23
apps/riesgos/api/tools.json
Normal file
@@ -0,0 +1,23 @@
|
||||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_information",
|
||||
"description": "Buscar informacion relevante de documentos de Banorte sobre un sistema en particular.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "La pregunta del usuario, reescrita para ser comprensible fuera de contexto."
|
||||
},
|
||||
"system": {
|
||||
"type": "string",
|
||||
"description": "El sistema del cual se buscara informacion. Puede ser uno de los siguientes: ['ML','SACS','ED','CARATULA', 'SICRED']. 'ML' es 'Master de lineas', 'ED' es 'Expediente Digital'"
|
||||
}
|
||||
},
|
||||
"required": ["question", "system"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user