This commit is contained in:
Rogelio
2025-10-13 18:16:25 +00:00
parent 739f087cef
commit 325f1ef439
415 changed files with 46870 additions and 0 deletions

View File

View File

@@ -0,0 +1,3 @@
from .main import MayaBursatil
__all__ = ["MayaBursatil"]

View File

@@ -0,0 +1,130 @@
from pathlib import Path
from typing import Any
from langchain_core.messages import AIMessageChunk
from pydantic import BaseModel, Field
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
from langchain_azure_ai.embeddings import AzureAIEmbeddingsModel
from banortegpt.storage.azure_storage import AzureStorage
from banortegpt.vector.qdrant import AsyncQdrant
from api import context
from api.config import config
parent = Path(__file__).parent
SYSTEM_PROMPT = (parent / "system_prompt.md").read_text()
AZURE_AI_URI = "https://eastus2.api.cognitive.microsoft.com"
class get_information(BaseModel):
"""Search a private repository for information."""
question: str = Field(..., description="The user question")
class MayaBursatil:
system_prompt = SYSTEM_PROMPT
generation_config = {
"temperature": config.model_temperature,
}
embedding_model = config.embedding_model
message_limit = config.message_limit
index = config.vector_index
limit = config.search_limit
bucket = config.storage_bucket
search = AsyncQdrant.from_config(config)
llm = AzureAIChatCompletionsModel(
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.model}",
credential=config.openai_api_key,
).bind_tools([get_information])
embedder = AzureAIEmbeddingsModel(
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.embedding_model}",
credential=config.openai_api_key,
)
storage = AzureStorage.from_config(config)
def __init__(self) -> None:
self.tool_map = {
"get_information": self.get_information
}
def build_response(self, payloads, fallback):
template = "<FAQ {index}>\n\n{content}\n\n</FAQ {index}>"
filled_templates = [
template.format(index=idx, content=payload["content"])
for idx, payload in enumerate(payloads)
]
filled_templates.append(f"<FALLBACK>\n{fallback}\n</FALLBACK>")
return "\n".join(filled_templates)
async def get_information(self, question: str):
embedding = await self.embedder.aembed_query(question)
payloads = await self.search.semantic_search(embedding=embedding, collection=self.index, limit=self.limit)
fallback_messages: dict[str, int] = {}
for payload in payloads:
fallback_message = payload.get("fallback_message", "None")
if fallback_message not in fallback_messages:
fallback_messages[fallback_message] = 1
else:
fallback_messages[fallback_message] += 1
fallback = max(fallback_messages, key=fallback_messages.get) # type: ignore
tool_response = self.build_response(payloads, fallback)
return tool_response, payloads
async def get_shareable_urls(self, payloads: list):
reference_urls = []
image_urls = []
for payload in payloads:
if imagen := payload.get("imagen"):
image_url = await self.storage.get_file_url(
filename=imagen,
bucket=self.bucket,
minute_duration=20,
image=True,
)
if image_url:
image_urls.append(image_url)
else:
print("Image not found")
return reference_urls, image_urls
def _generation_config_overwrite(self, overwrites: dict | None) -> dict[str, Any]:
generation_config_copy = self.generation_config.copy()
if overwrites:
for k, v in overwrites.items():
generation_config_copy[k] = v
return generation_config_copy
async def stream(self, history, overwrites: dict | None = None):
generation_config = self._generation_config_overwrite(overwrites)
async for delta in self.llm.astream(input=history, **generation_config):
assert isinstance(delta, AIMessageChunk)
if call := delta.tool_call_chunks:
if tool_id := call[0].get("id"):
context.tool_id.set(tool_id)
if name := call[0].get("name"):
context.tool_name.set(name)
if args := call[0].get("args"):
context.tool_buffer.set(context.tool_buffer.get() + args)
else:
if buffer := delta.content:
assert isinstance(buffer, str)
context.buffer.set(context.buffer.get() + buffer)
yield buffer
async def generate(self, history, overwrites: dict | None = None):
generation_config = self._generation_config_overwrite(overwrites)
return await self.llm.ainvoke(input=history, **generation_config)

View File

@@ -0,0 +1,6 @@
Eres MayaBursatil, una muy amigable y símpatica asistente virtual del departamento de contraloria bursatil de Banorte.
Tu objetivo es responder preguntas de usuarios de manera informativa y empatica.
Para cada pregunta, utiliza la herramienta 'get_information' para obtener informacion de nuestro FAQ.
Utiliza la informacion para responder la pregunta del usuario.
Utiliza emojis.
Si no puedes responder la pregunta basado en la informacion del FAQ, responde con el contenido en el FALLBACK.

View File

@@ -0,0 +1,55 @@
from hvac import Client
from pydantic import Field
from pydantic_settings import BaseSettings
client = Client(url="https://vault.ia-innovacion.work")
if not client.is_authenticated():
raise Exception("Vault authentication failed")
secret_map = client.secrets.kv.v2.read_secret_version(
path="banortegpt", mount_point="secret"
)["data"]["data"]
class Settings(BaseSettings):
# Config
model: str = "gpt-4o"
model_temperature: int = 0
embedding_model: str = "text-embedding-3-large"
message_limit: int = 10
storage_bucket: str = "bursatilreferences"
vector_index: str = "MayaBursatil"
search_limit: int = 3
host: str = "0.0.0.0"
port: int = 8000
# API Keys
azure_endpoint: str = Field(default_factory=lambda: secret_map["azure_endpoint"])
openai_api_key: str = Field(default_factory=lambda: secret_map["openai_api_key"])
openai_api_version: str = Field(
default_factory=lambda: secret_map["openai_api_version"]
)
azure_blob_connection_string: str = Field(
default_factory=lambda: secret_map["azure_blob_connection_string"]
)
qdrant_url: str = Field(default_factory=lambda: secret_map["qdrant_api_url"])
qdrant_api_key: str | None = Field(
default_factory=lambda: secret_map["qdrant_api_key"]
)
mongodb_url: str = Field(
default_factory=lambda: secret_map["cosmosdb_connection_string"]
)
async def init_mongo_db(self):
from beanie import init_beanie
from motor.motor_asyncio import AsyncIOMotorClient
from banortegpt.database.mongo_memory.models import Conversation
await init_beanie(
database=AsyncIOMotorClient(self.mongodb_url).banortegptdos,
document_models=[Conversation],
)
config = Settings()

View File

@@ -0,0 +1,7 @@
from contextvars import ContextVar
buffer: ContextVar[str] = ContextVar("buffer", default="")
tool_buffer: ContextVar[str] = ContextVar("tool_buffer", default="")
tool_id: ContextVar[str | None] = ContextVar("tool_id", default=None)
tool_name: ContextVar[str | None] = ContextVar("tool_name", default=None)

View File

@@ -0,0 +1,55 @@
import uuid
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from api import services
from api.agent import MayaBursatil
from api.config import config
@asynccontextmanager
async def lifespan(_: FastAPI):
await config.init_mongo_db()
yield
app = FastAPI(lifespan=lifespan)
agent = MayaBursatil()
class Message(BaseModel):
conversation_id: uuid.UUID
prompt: str
@app.post("/api/v1/conversation")
async def create_conversation():
conversation_id = uuid.uuid4()
await services.create_conversation(conversation_id, agent.system_prompt)
return {"conversation_id": conversation_id}
@app.post("/api/v1/message")
async def send(message: Message, stream: bool = False):
if stream is True:
def b64_sse(func):
async def wrapper(*args, **kwargs):
async for chunk in func(*args, **kwargs):
content = chunk.model_dump_json()
data = f"data: {content}\n\n"
yield data
return wrapper
sse_stream = b64_sse(services.stream)
generator = sse_stream(agent, message.prompt, message.conversation_id)
return StreamingResponse(generator, media_type="text/event-stream")
else:
response = await services.generate(
agent, message.prompt, message.conversation_id
)
return response

View File

@@ -0,0 +1,10 @@
from banortegpt.database.mongo_memory.crud import create_conversation
from .generate_response import generate
from .stream_response import stream
__all__ = [
"stream",
"generate",
"create_conversation",
]

View File

@@ -0,0 +1,89 @@
import json
from typing import Any
from uuid import UUID
from langfuse.decorators import langfuse_context, observe
from pydantic import BaseModel
from api import context as ctx
from api.agent import MayaBursatil
from banortegpt.database.mongo_memory import crud
class Response(BaseModel):
content: str
urls: list[str]
@observe(capture_input=False, capture_output=False)
async def generate(
agent: MayaBursatil,
prompt: str,
conversation_id: UUID,
) -> Response:
conversation = await crud.get_conversation(conversation_id)
if conversation is None:
raise ValueError(f"Conversation with id {conversation_id} not found")
conversation.add(role="user", content=prompt)
response = await agent.generate(conversation.to_openai_format(agent.message_limit))
reference_urls, image_urls = [], []
if call := response.tool_calls:
if id := call[0].id:
ctx.tool_id.set(id)
if name := call[0].function.name:
ctx.tool_name.set(name)
ctx.tool_buffer.set(call[0].function.arguments)
else:
ctx.buffer.set(response.content)
buffer = ctx.buffer.get()
tool_buffer = ctx.tool_buffer.get()
tool_id = ctx.tool_id.get()
tool_name = ctx.tool_name.get()
if tool_id is not None:
# Si tool_buffer es un string JSON, lo convertimos a diccionario
if isinstance(tool_buffer, str):
try:
tool_args = json.loads(tool_buffer)
except json.JSONDecodeError:
tool_args = {"question": tool_buffer}
else:
tool_args = tool_buffer
response, payloads = await agent.tool_map[tool_name](**tool_args) # type: ignore
tool_call: dict[str, Any] = agent.llm.build_tool_call(
tool_id, tool_name, tool_buffer
)
tool_call_id: dict[str, Any] = agent.llm.build_tool_call_id(tool_id)
conversation.add("assistant", **tool_call)
conversation.add("tool", content=response, **tool_call_id)
response = await agent.generate(
conversation.to_openai_format(agent.message_limit), {"tools": None}
)
ctx.buffer.set(response.content)
reference_urls, image_urls = await agent.get_shareable_urls(payloads) # type: ignore
buffer = ctx.buffer.get()
if buffer is None:
raise ValueError("No buffer found")
conversation.add(role="assistant", content=buffer)
langfuse_context.update_current_trace(
name=str(conversation_id),
session_id=str(conversation_id),
input=prompt,
output=buffer,
)
return Response(content=buffer, urls=reference_urls + image_urls)

View File

@@ -0,0 +1,100 @@
import json
from enum import StrEnum
from typing import TypeAlias
from uuid import UUID
from langfuse.decorators import langfuse_context, observe
from pydantic import BaseModel
from api import context as ctx
from api.agent import MayaBursatil
from banortegpt.database.mongo_memory import crud
class ChunkType(StrEnum):
START = "start"
TEXT = "text"
REFERENCE = "reference"
IMAGE = "image"
TOOL = "tool"
END = "end"
ERROR = "error"
ContentType: TypeAlias = str | int
class ResponseChunk(BaseModel):
type: ChunkType
content: ContentType | list[ContentType] | None
@observe(capture_input=False, capture_output=False)
async def stream(agent: MayaBursatil, prompt: str, conversation_id: UUID):
yield ResponseChunk(type=ChunkType.START, content="")
conversation = await crud.get_conversation(conversation_id)
if conversation is None:
raise ValueError("Conversation not found")
conversation.add(role="user", content=prompt)
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
async for content in agent.stream(history):
yield ResponseChunk(type=ChunkType.TEXT, content=content)
if (tool_id := ctx.tool_id.get()) is not None:
tool_buffer = ctx.tool_buffer.get()
assert tool_buffer is not None
tool_name = ctx.tool_name.get()
assert tool_name is not None
yield ResponseChunk(type=ChunkType.TOOL, content=None)
buffer_dict = json.loads(tool_buffer)
response, payloads = await agent.tool_map[tool_name](**buffer_dict)
conversation.add(
role="assistant",
tool_calls=[
{
"id": tool_id,
"function": {
"name": tool_name,
"arguments": tool_buffer,
},
"type": "function",
}
],
)
conversation.add(role="tool", content=response, tool_call_id=tool_id)
history = conversation.to_openai_format(agent.message_limit, langchain_compat=True)
async for content in agent.stream(history, {"tools": None}):
yield ResponseChunk(type=ChunkType.TEXT, content=content)
ref_urls, image_urls = await agent.get_shareable_urls(payloads) # type: ignore
if len(ref_urls) > 0:
yield ResponseChunk(type=ChunkType.REFERENCE, content=ref_urls)
if len(image_urls) > 0:
yield ResponseChunk(type=ChunkType.IMAGE, content=image_urls)
buffer = ctx.buffer.get()
conversation.add(role="assistant", content=buffer)
await conversation.replace()
yield ResponseChunk(type=ChunkType.END, content="")
langfuse_context.update_current_trace(
name=agent.__class__.__name__,
session_id=str(conversation_id),
input=prompt,
output=buffer,
)