add healthcheck to remaining apps

This commit is contained in:
2025-11-25 07:05:14 +00:00
parent eccd53673c
commit 6d9686e373
87 changed files with 850 additions and 632 deletions

116
apps/egresos/api/server.py Normal file
View File

@@ -0,0 +1,116 @@
import time
import uuid
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from langfuse import Langfuse
from pydantic import BaseModel
from api import services
from api.agent import Agent
from api.config import config
# Configurar Langfuse
langfuse = Langfuse(
public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd",
secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c",
host="https://ailogger.azurewebsites.net",
)
@asynccontextmanager
async def lifespan(_: FastAPI):
await config.init_mongo_db()
yield
app = FastAPI(lifespan=lifespan)
agent = Agent()
@app.post("/api/v1/conversation")
async def create_conversation():
conversation_id = uuid.uuid4()
await services.create_conversation(conversation_id, agent.system_prompt)
return {"conversation_id": conversation_id}
class Message(BaseModel):
conversation_id: uuid.UUID
prompt: str
@app.post("/api/v1/message")
async def send(message: Message):
# Crear trace principal
trace = langfuse.trace(
name="chat_message",
session_id=str(message.conversation_id),
input={
"prompt": message.prompt,
"conversation_id": str(message.conversation_id),
},
)
def b64_sse(func):
async def wrapper(*args, **kwargs):
response_parts = []
start_time = time.time()
async for chunk in func(*args, **kwargs):
if chunk.type == "text" and chunk.content:
response_parts.append(str(chunk.content))
content = chunk.model_dump_json()
data = f"data: {content}\n\n"
yield data
end_time = time.time()
latency_ms = round((end_time - start_time) * 1000)
full_response = "".join(response_parts)
input_tokens = len(message.prompt.split()) * 1.3
output_tokens = len(full_response.split()) * 1.3
total_tokens = int(input_tokens + output_tokens)
cost_per_1k_input = 0.03
cost_per_1k_output = 0.06
total_cost = (input_tokens / 1000 * cost_per_1k_input) + (
output_tokens / 1000 * cost_per_1k_output
)
trace.update(
output={"response": full_response},
usage={
"input": int(input_tokens),
"output": int(output_tokens),
"total": total_tokens,
"unit": "TOKENS",
},
)
langfuse.score(
trace_id=trace.id,
name="latency",
value=latency_ms,
comment=f"Response time: {latency_ms}ms",
)
langfuse.score(
trace_id=trace.id,
name="cost",
value=round(total_cost, 4),
comment=f"Estimated cost: ${round(total_cost, 4)}",
)
return wrapper
sse_stream = b64_sse(services.stream)
generator = sse_stream(agent, message.prompt, message.conversation_id)
return StreamingResponse(generator, media_type="text/event-stream")
@app.get("/")
async def health():
return {"status": "ok"}