Files
Mayacontigo/apps/egresos/api/server.py

117 lines
3.2 KiB
Python

import time
import uuid
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from langfuse import Langfuse
from pydantic import BaseModel
from api import services
from api.agent import Agent
from api.config import config
# Configurar Langfuse
langfuse = Langfuse(
public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd",
secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c",
host="https://ailogger.azurewebsites.net",
)
@asynccontextmanager
async def lifespan(_: FastAPI):
await config.init_mongo_db()
yield
app = FastAPI(lifespan=lifespan)
agent = Agent()
@app.post("/api/v1/conversation")
async def create_conversation():
conversation_id = uuid.uuid4()
await services.create_conversation(conversation_id, agent.system_prompt)
return {"conversation_id": conversation_id}
class Message(BaseModel):
conversation_id: uuid.UUID
prompt: str
@app.post("/api/v1/message")
async def send(message: Message):
# Crear trace principal
trace = langfuse.trace(
name="chat_message",
session_id=str(message.conversation_id),
input={
"prompt": message.prompt,
"conversation_id": str(message.conversation_id),
},
)
def b64_sse(func):
async def wrapper(*args, **kwargs):
response_parts = []
start_time = time.time()
async for chunk in func(*args, **kwargs):
if chunk.type == "text" and chunk.content:
response_parts.append(str(chunk.content))
content = chunk.model_dump_json()
data = f"data: {content}\n\n"
yield data
end_time = time.time()
latency_ms = round((end_time - start_time) * 1000)
full_response = "".join(response_parts)
input_tokens = len(message.prompt.split()) * 1.3
output_tokens = len(full_response.split()) * 1.3
total_tokens = int(input_tokens + output_tokens)
cost_per_1k_input = 0.03
cost_per_1k_output = 0.06
total_cost = (input_tokens / 1000 * cost_per_1k_input) + (
output_tokens / 1000 * cost_per_1k_output
)
trace.update(
output={"response": full_response},
usage={
"input": int(input_tokens),
"output": int(output_tokens),
"total": total_tokens,
"unit": "TOKENS",
},
)
langfuse.score(
trace_id=trace.id,
name="latency",
value=latency_ms,
comment=f"Response time: {latency_ms}ms",
)
langfuse.score(
trace_id=trace.id,
name="cost",
value=round(total_cost, 4),
comment=f"Estimated cost: ${round(total_cost, 4)}",
)
return wrapper
sse_stream = b64_sse(services.stream)
generator = sse_stream(agent, message.prompt, message.conversation_id)
return StreamingResponse(generator, media_type="text/event-stream")
@app.get("/")
async def health():
return {"status": "ok"}