forked from innovacion/Mayacontigo
131 lines
4.6 KiB
Python
131 lines
4.6 KiB
Python
import logging
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from langchain_core.messages.ai import AIMessageChunk
|
|
from pydantic import BaseModel, Field
|
|
from banortegpt.storage.azure_storage import AzureStorage
|
|
from banortegpt.vector.qdrant import AsyncQdrant
|
|
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
|
|
from langchain_azure_ai.embeddings import AzureAIEmbeddingsModel
|
|
|
|
import api.context as ctx
|
|
from api.config import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
parent = Path(__file__).parent
|
|
SYSTEM_PROMPT = (parent / "system_prompt.md").read_text()
|
|
|
|
|
|
class get_information(BaseModel):
|
|
"""Search a private repository for information."""
|
|
|
|
question: str = Field(..., description="The user question")
|
|
|
|
|
|
AZURE_AI_URI = "https://eastus2.api.cognitive.microsoft.com"
|
|
|
|
|
|
class MayaOCP:
|
|
system_prompt = SYSTEM_PROMPT
|
|
generation_config = {
|
|
"temperature": config.model_temperature,
|
|
}
|
|
message_limit = config.message_limit
|
|
index = config.vector_index
|
|
limit = config.search_limit
|
|
bucket = config.storage_bucket
|
|
|
|
search = AsyncQdrant.from_config(config)
|
|
llm = AzureAIChatCompletionsModel(
|
|
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.model}",
|
|
credential=config.openai_api_key,
|
|
).bind_tools([get_information])
|
|
embedder = AzureAIEmbeddingsModel(
|
|
endpoint=f"{AZURE_AI_URI}/openai/deployments/{config.embedding_model}",
|
|
credential=config.openai_api_key,
|
|
)
|
|
storage = AzureStorage.from_config(config)
|
|
|
|
def __init__(self) -> None:
|
|
self.tool_map = {"get_information": self.get_information}
|
|
|
|
def build_response(self, payloads):
|
|
preface = ["Recuerda citar las referencias en el formato: texto[1]."]
|
|
|
|
template = "------ REFERENCIA {index} ----- \n\n{content}"
|
|
|
|
filled_templates = [
|
|
template.format(index=idx, content=payload.get("content", ""))
|
|
for idx, payload in enumerate(payloads)
|
|
]
|
|
|
|
return "\n".join(preface + filled_templates)
|
|
|
|
async def get_information(self, question: str):
|
|
logger.info(
|
|
f"Embedding question: {question} with model {self.embedder.model_name}"
|
|
)
|
|
embedding = await self.embedder.aembed_query(question)
|
|
|
|
results = await self.search.semantic_search(
|
|
embedding=embedding, collection=self.index, limit=self.limit
|
|
)
|
|
|
|
tool_response = self.build_response(results)
|
|
return tool_response, results
|
|
|
|
async def get_shareable_urls(self, metadatas: list):
|
|
reference_urls = []
|
|
image_urls = []
|
|
|
|
for metadata in metadatas:
|
|
if file := metadata.get("file"):
|
|
reference_url = await self.storage.get_file_url(
|
|
filename=file,
|
|
bucket=self.bucket,
|
|
minute_duration=20,
|
|
image=False,
|
|
)
|
|
reference_urls.append(reference_url)
|
|
if image_file := metadata.get("image"):
|
|
image_url = await self.storage.get_file_url(
|
|
filename=image_file,
|
|
bucket=self.bucket,
|
|
minute_duration=20,
|
|
image=True,
|
|
)
|
|
image_urls.append(image_url)
|
|
|
|
return reference_urls, image_urls
|
|
|
|
def _generation_config_overwrite(self, overwrites: dict | None) -> dict[str, Any]:
|
|
generation_config_copy = self.generation_config.copy()
|
|
if overwrites:
|
|
for k, v in overwrites.items():
|
|
generation_config_copy[k] = v
|
|
return generation_config_copy
|
|
|
|
async def stream(self, history, overwrites: dict | None = None):
|
|
generation_config = self._generation_config_overwrite(overwrites)
|
|
|
|
async for chunk in self.llm.astream(input=history, **generation_config):
|
|
assert isinstance(chunk, AIMessageChunk)
|
|
if call := chunk.tool_call_chunks:
|
|
if tool_id := call[0].get("id"):
|
|
ctx.tool_id.set(tool_id)
|
|
if name := call[0].get("name"):
|
|
ctx.tool_name.set(name)
|
|
if args := call[0].get("args"):
|
|
ctx.tool_buffer.set(ctx.tool_buffer.get() + args)
|
|
else:
|
|
if buffer := chunk.content:
|
|
assert isinstance(buffer, str)
|
|
ctx.buffer.set(ctx.buffer.get() + buffer)
|
|
yield buffer
|
|
|
|
async def generate(self, history, overwrites: dict | None = None):
|
|
generation_config = self._generation_config_overwrite(overwrites)
|
|
return await self.llm.ainvoke(input=history, **generation_config)
|