import uuid import time from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.responses import StreamingResponse from pydantic import BaseModel from langfuse import Langfuse from api import services from api.agent import Agent from api.config import config # Configurar Langfuse langfuse = Langfuse( public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd", secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c", host="https://ailogger.azurewebsites.net" ) @asynccontextmanager async def lifespan(_: FastAPI): await config.init_mongo_db() yield app = FastAPI(lifespan=lifespan) agent = Agent() @app.post("/api/v1/conversation") async def create_conversation(): conversation_id = uuid.uuid4() await services.create_conversation(conversation_id, agent.system_prompt) return {"conversation_id": conversation_id} class Message(BaseModel): conversation_id: uuid.UUID prompt: str @app.post("/api/v1/message") async def send(message: Message): # Crear trace principal trace = langfuse.trace( name="chat_message", session_id=str(message.conversation_id), input={ "prompt": message.prompt, "conversation_id": str(message.conversation_id) } ) def b64_sse(func): async def wrapper(*args, **kwargs): response_parts = [] start_time = time.time() async for chunk in func(*args, **kwargs): if chunk.type == "text" and chunk.content: response_parts.append(str(chunk.content)) content = chunk.model_dump_json() data = f"data: {content}\n\n" yield data end_time = time.time() latency_ms = round((end_time - start_time) * 1000) full_response = "".join(response_parts) input_tokens = len(message.prompt.split()) * 1.3 output_tokens = len(full_response.split()) * 1.3 total_tokens = int(input_tokens + output_tokens) cost_per_1k_input = 0.03 cost_per_1k_output = 0.06 total_cost = (input_tokens/1000 * cost_per_1k_input) + (output_tokens/1000 * cost_per_1k_output) trace.update( output={"response": full_response}, usage={ "input": int(input_tokens), "output": int(output_tokens), "total": total_tokens, "unit": "TOKENS" } ) langfuse.score( trace_id=trace.id, name="latency", value=latency_ms, comment=f"Response time: {latency_ms}ms" ) langfuse.score( trace_id=trace.id, name="cost", value=round(total_cost, 4), comment=f"Estimated cost: ${round(total_cost, 4)}" ) return wrapper sse_stream = b64_sse(services.stream) generator = sse_stream(agent, message.prompt, message.conversation_id) return StreamingResponse(generator, media_type="text/event-stream")