This commit is contained in:
Rogelio
2025-10-13 18:16:25 +00:00
parent 739f087cef
commit 325f1ef439
415 changed files with 46870 additions and 0 deletions

View File

@@ -0,0 +1,112 @@
import uuid
import time
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from langfuse import Langfuse
from api import services
from api.agent import Agent
from api.config import config
# Configurar Langfuse
langfuse = Langfuse(
public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd",
secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c",
host="https://ailogger.azurewebsites.net"
)
@asynccontextmanager
async def lifespan(_: FastAPI):
await config.init_mongo_db()
yield
app = FastAPI(lifespan=lifespan)
agent = Agent()
@app.post("/api/v1/conversation")
async def create_conversation():
conversation_id = uuid.uuid4()
await services.create_conversation(conversation_id, agent.system_prompt)
return {"conversation_id": conversation_id}
class Message(BaseModel):
conversation_id: uuid.UUID
prompt: str
@app.post("/api/v1/message")
async def send(message: Message):
# Crear trace principal
trace = langfuse.trace(
name="chat_message",
session_id=str(message.conversation_id),
input={
"prompt": message.prompt,
"conversation_id": str(message.conversation_id)
}
)
def b64_sse(func):
async def wrapper(*args, **kwargs):
response_parts = []
start_time = time.time()
async for chunk in func(*args, **kwargs):
if chunk.type == "text" and chunk.content:
response_parts.append(str(chunk.content))
content = chunk.model_dump_json()
data = f"data: {content}\n\n"
yield data
end_time = time.time()
latency_ms = round((end_time - start_time) * 1000)
full_response = "".join(response_parts)
input_tokens = len(message.prompt.split()) * 1.3
output_tokens = len(full_response.split()) * 1.3
total_tokens = int(input_tokens + output_tokens)
cost_per_1k_input = 0.03
cost_per_1k_output = 0.06
total_cost = (input_tokens/1000 * cost_per_1k_input) + (output_tokens/1000 * cost_per_1k_output)
trace.update(
output={"response": full_response},
usage={
"input": int(input_tokens),
"output": int(output_tokens),
"total": total_tokens,
"unit": "TOKENS"
}
)
langfuse.score(
trace_id=trace.id,
name="latency",
value=latency_ms,
comment=f"Response time: {latency_ms}ms"
)
langfuse.score(
trace_id=trace.id,
name="cost",
value=round(total_cost, 4),
comment=f"Estimated cost: ${round(total_cost, 4)}"
)
return wrapper
sse_stream = b64_sse(services.stream)
generator = sse_stream(agent, message.prompt, message.conversation_id)
return StreamingResponse(generator, media_type="text/event-stream")