Files
Mayacontigo/apps/ChatEgresos/api/server.py
Rogelio 325f1ef439 ic
2025-10-13 18:16:25 +00:00

112 lines
3.3 KiB
Python

import uuid
import time
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from langfuse import Langfuse
from api import services
from api.agent import Agent
from api.config import config
# Configurar Langfuse
langfuse = Langfuse(
public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd",
secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c",
host="https://ailogger.azurewebsites.net"
)
@asynccontextmanager
async def lifespan(_: FastAPI):
await config.init_mongo_db()
yield
app = FastAPI(lifespan=lifespan)
agent = Agent()
@app.post("/api/v1/conversation")
async def create_conversation():
conversation_id = uuid.uuid4()
await services.create_conversation(conversation_id, agent.system_prompt)
return {"conversation_id": conversation_id}
class Message(BaseModel):
conversation_id: uuid.UUID
prompt: str
@app.post("/api/v1/message")
async def send(message: Message):
# Crear trace principal
trace = langfuse.trace(
name="chat_message",
session_id=str(message.conversation_id),
input={
"prompt": message.prompt,
"conversation_id": str(message.conversation_id)
}
)
def b64_sse(func):
async def wrapper(*args, **kwargs):
response_parts = []
start_time = time.time()
async for chunk in func(*args, **kwargs):
if chunk.type == "text" and chunk.content:
response_parts.append(str(chunk.content))
content = chunk.model_dump_json()
data = f"data: {content}\n\n"
yield data
end_time = time.time()
latency_ms = round((end_time - start_time) * 1000)
full_response = "".join(response_parts)
input_tokens = len(message.prompt.split()) * 1.3
output_tokens = len(full_response.split()) * 1.3
total_tokens = int(input_tokens + output_tokens)
cost_per_1k_input = 0.03
cost_per_1k_output = 0.06
total_cost = (input_tokens/1000 * cost_per_1k_input) + (output_tokens/1000 * cost_per_1k_output)
trace.update(
output={"response": full_response},
usage={
"input": int(input_tokens),
"output": int(output_tokens),
"total": total_tokens,
"unit": "TOKENS"
}
)
langfuse.score(
trace_id=trace.id,
name="latency",
value=latency_ms,
comment=f"Response time: {latency_ms}ms"
)
langfuse.score(
trace_id=trace.id,
name="cost",
value=round(total_cost, 4),
comment=f"Estimated cost: ${round(total_cost, 4)}"
)
return wrapper
sse_stream = b64_sse(services.stream)
generator = sse_stream(agent, message.prompt, message.conversation_id)
return StreamingResponse(generator, media_type="text/event-stream")