forked from innovacion/Mayacontigo
add healthcheck to remaining apps
This commit is contained in:
116
apps/egresos/api/server.py
Normal file
116
apps/egresos/api/server.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import StreamingResponse
|
||||
from langfuse import Langfuse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api import services
|
||||
from api.agent import Agent
|
||||
from api.config import config
|
||||
|
||||
# Configurar Langfuse
|
||||
langfuse = Langfuse(
|
||||
public_key="pk-lf-49cb04b3-0c7d-475b-8105-ad8b8749ecdd",
|
||||
secret_key="sk-lf-e02fa322-c709-4d80-bef2-9cb279846a0c",
|
||||
host="https://ailogger.azurewebsites.net",
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
await config.init_mongo_db()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
agent = Agent()
|
||||
|
||||
|
||||
@app.post("/api/v1/conversation")
|
||||
async def create_conversation():
|
||||
conversation_id = uuid.uuid4()
|
||||
await services.create_conversation(conversation_id, agent.system_prompt)
|
||||
return {"conversation_id": conversation_id}
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
conversation_id: uuid.UUID
|
||||
prompt: str
|
||||
|
||||
|
||||
@app.post("/api/v1/message")
|
||||
async def send(message: Message):
|
||||
# Crear trace principal
|
||||
trace = langfuse.trace(
|
||||
name="chat_message",
|
||||
session_id=str(message.conversation_id),
|
||||
input={
|
||||
"prompt": message.prompt,
|
||||
"conversation_id": str(message.conversation_id),
|
||||
},
|
||||
)
|
||||
|
||||
def b64_sse(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
response_parts = []
|
||||
start_time = time.time()
|
||||
|
||||
async for chunk in func(*args, **kwargs):
|
||||
if chunk.type == "text" and chunk.content:
|
||||
response_parts.append(str(chunk.content))
|
||||
|
||||
content = chunk.model_dump_json()
|
||||
data = f"data: {content}\n\n"
|
||||
yield data
|
||||
|
||||
end_time = time.time()
|
||||
latency_ms = round((end_time - start_time) * 1000)
|
||||
full_response = "".join(response_parts)
|
||||
|
||||
input_tokens = len(message.prompt.split()) * 1.3
|
||||
output_tokens = len(full_response.split()) * 1.3
|
||||
total_tokens = int(input_tokens + output_tokens)
|
||||
|
||||
cost_per_1k_input = 0.03
|
||||
cost_per_1k_output = 0.06
|
||||
total_cost = (input_tokens / 1000 * cost_per_1k_input) + (
|
||||
output_tokens / 1000 * cost_per_1k_output
|
||||
)
|
||||
|
||||
trace.update(
|
||||
output={"response": full_response},
|
||||
usage={
|
||||
"input": int(input_tokens),
|
||||
"output": int(output_tokens),
|
||||
"total": total_tokens,
|
||||
"unit": "TOKENS",
|
||||
},
|
||||
)
|
||||
|
||||
langfuse.score(
|
||||
trace_id=trace.id,
|
||||
name="latency",
|
||||
value=latency_ms,
|
||||
comment=f"Response time: {latency_ms}ms",
|
||||
)
|
||||
|
||||
langfuse.score(
|
||||
trace_id=trace.id,
|
||||
name="cost",
|
||||
value=round(total_cost, 4),
|
||||
comment=f"Estimated cost: ${round(total_cost, 4)}",
|
||||
)
|
||||
|
||||
return wrapper
|
||||
|
||||
sse_stream = b64_sse(services.stream)
|
||||
generator = sse_stream(agent, message.prompt, message.conversation_id)
|
||||
return StreamingResponse(generator, media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
Reference in New Issue
Block a user