forked from innovacion/Mayacontigo
262 lines
8.0 KiB
Python
262 lines
8.0 KiB
Python
import uuid
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import StreamingResponse, FileResponse, RedirectResponse
|
|
from pydantic import BaseModel
|
|
from langfuse import Langfuse
|
|
|
|
from dotenv import load_dotenv # ← Agregar este import
|
|
|
|
from api import services
|
|
from api.agent import Agent
|
|
from api.config import config
|
|
|
|
# Cargar variables de entorno
|
|
load_dotenv()
|
|
|
|
# Configurar Langfuse desde variables de entorno
|
|
langfuse = Langfuse(
|
|
public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
|
|
secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
|
|
host=os.getenv("LANGFUSE_HOST")
|
|
)
|
|
|
|
|
|
# Mapeo completo de archivos a URLs públicas
|
|
PDF_PUBLIC_URLS = {
|
|
}
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(_: FastAPI):
|
|
await config.init_mongo_db()
|
|
yield
|
|
|
|
|
|
app = FastAPI(lifespan=lifespan)
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
expose_headers=["*"]
|
|
)
|
|
|
|
agent = Agent()
|
|
|
|
PDF_FOLDER = Path(__file__).parent / "agent" / "pdf"
|
|
PDF_FOLDER.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
@app.post("/api/v1/conversation")
|
|
async def create_conversation():
|
|
conversation_id = uuid.uuid4()
|
|
await services.create_conversation(conversation_id, agent.system_prompt)
|
|
return {"conversation_id": conversation_id}
|
|
|
|
|
|
class Message(BaseModel):
|
|
conversation_id: uuid.UUID
|
|
prompt: str
|
|
|
|
|
|
@app.post("/api/v1/message")
|
|
async def send(message: Message):
|
|
# Tracking básico del chat
|
|
trace = langfuse.trace(
|
|
name="rag_chat",
|
|
session_id=str(message.conversation_id),
|
|
input={"prompt": message.prompt}
|
|
)
|
|
|
|
def b64_sse(func):
|
|
async def wrapper(*args, **kwargs):
|
|
response_parts = []
|
|
|
|
async for chunk in func(*args, **kwargs):
|
|
if chunk.type == "text" and chunk.content:
|
|
response_parts.append(str(chunk.content))
|
|
|
|
content = chunk.model_dump_json()
|
|
data = f"data: {content}\n\n"
|
|
yield data
|
|
|
|
# Solo registrar input y output
|
|
full_response = "".join(response_parts)
|
|
trace.update(output={"response": full_response})
|
|
|
|
return wrapper
|
|
|
|
sse_stream = b64_sse(services.stream)
|
|
generator = sse_stream(agent, message.prompt, message.conversation_id)
|
|
return StreamingResponse(generator, media_type="text/event-stream")
|
|
|
|
|
|
@app.get("/api/pdf/{filename}")
|
|
async def get_pdf(filename: str):
|
|
print(f"🔍 Solicitud PDF para: {filename}")
|
|
|
|
if not filename.lower().endswith('.pdf'):
|
|
print(f"❌ Archivo no es PDF: {filename}")
|
|
raise HTTPException(status_code=400, detail="El archivo debe ser un PDF")
|
|
|
|
if '..' in filename or ('/' in filename and not filename.startswith('http')) or '\\' in filename:
|
|
print(f"❌ Nombre de archivo inválido: {filename}")
|
|
raise HTTPException(status_code=400, detail="Nombre de archivo inválido")
|
|
|
|
public_url = PDF_PUBLIC_URLS.get(filename)
|
|
|
|
if public_url:
|
|
print(f"✅ Redirigiendo a URL pública: {public_url}")
|
|
return RedirectResponse(
|
|
url=public_url,
|
|
status_code=302,
|
|
headers={
|
|
"Cache-Control": "public, max-age=3600",
|
|
"Access-Control-Allow-Origin": "*",
|
|
"Access-Control-Allow-Methods": "GET, OPTIONS",
|
|
"Access-Control-Allow-Headers": "*"
|
|
}
|
|
)
|
|
|
|
pdf_path = PDF_FOLDER / filename
|
|
|
|
if not pdf_path.exists():
|
|
print(f"❌ PDF no encontrado: {pdf_path}")
|
|
raise HTTPException(status_code=404, detail=f"PDF no encontrado. Archivo: {filename}")
|
|
|
|
if not pdf_path.is_file():
|
|
print(f"❌ No es un archivo: {pdf_path}")
|
|
raise HTTPException(status_code=404, detail="El recurso no es un archivo")
|
|
|
|
file_size = pdf_path.stat().st_size
|
|
print(f"📄 Sirviendo archivo local: {filename} ({file_size} bytes)")
|
|
|
|
if file_size == 0:
|
|
print(f"❌ Archivo vacío: {pdf_path}")
|
|
raise HTTPException(status_code=500, detail="El archivo PDF está vacío")
|
|
|
|
return FileResponse(
|
|
path=str(pdf_path),
|
|
media_type="application/pdf",
|
|
filename=filename,
|
|
headers={
|
|
"Content-Disposition": f"inline; filename={filename}",
|
|
"Content-Type": "application/pdf",
|
|
"Cache-Control": "public, max-age=3600",
|
|
"X-Frame-Options": "ALLOWALL",
|
|
"X-Content-Type-Options": "nosniff",
|
|
"Access-Control-Allow-Origin": "*"
|
|
}
|
|
)
|
|
|
|
|
|
@app.get("/api/pdfs")
|
|
async def list_pdfs():
|
|
try:
|
|
pdf_files = []
|
|
|
|
for filename, url in PDF_PUBLIC_URLS.items():
|
|
pdf_files.append({
|
|
"filename": filename,
|
|
"size": "N/A (Público)",
|
|
"url": f"/api/pdf/{filename}",
|
|
"public_url": url,
|
|
"type": "public"
|
|
})
|
|
|
|
local_files = []
|
|
for pattern in ["*.pdf", "*.PDF"]:
|
|
for file_path in PDF_FOLDER.glob(pattern):
|
|
if file_path.is_file() and file_path.name not in PDF_PUBLIC_URLS:
|
|
local_files.append({
|
|
"filename": file_path.name,
|
|
"size": file_path.stat().st_size,
|
|
"url": f"/api/pdf/{file_path.name}",
|
|
"type": "local"
|
|
})
|
|
|
|
pdf_files.extend(local_files)
|
|
|
|
debug_info = {
|
|
"current_working_directory": str(Path.cwd()),
|
|
"pdf_folder_path": str(PDF_FOLDER.absolute()),
|
|
"pdf_folder_exists": PDF_FOLDER.exists(),
|
|
"public_urls_count": len(PDF_PUBLIC_URLS),
|
|
"local_files_count": len(local_files),
|
|
"public_files": list(PDF_PUBLIC_URLS.keys()),
|
|
}
|
|
|
|
return {
|
|
"pdfs": pdf_files,
|
|
"debug": debug_info,
|
|
"total_pdfs": len(pdf_files)
|
|
}
|
|
except Exception as e:
|
|
import traceback
|
|
return {
|
|
"error": str(e),
|
|
"traceback": traceback.format_exc(),
|
|
"debug": {
|
|
"current_working_directory": str(Path.cwd()),
|
|
"script_file_path": __file__ if '__file__' in globals() else "unknown"
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/api/pdf/{filename}/info")
|
|
async def get_pdf_info(filename: str):
|
|
if not filename.lower().endswith('.pdf'):
|
|
raise HTTPException(status_code=400, detail="El archivo debe ser un PDF")
|
|
|
|
if '..' in filename or '/' in filename or '\\' in filename:
|
|
raise HTTPException(status_code=400, detail="Nombre de archivo inválido")
|
|
|
|
public_url = PDF_PUBLIC_URLS.get(filename)
|
|
if public_url:
|
|
return {
|
|
"filename": filename,
|
|
"size": "N/A",
|
|
"size_mb": "N/A",
|
|
"modified": "N/A",
|
|
"url": f"/api/pdf/{filename}",
|
|
"public_url": public_url,
|
|
"type": "public"
|
|
}
|
|
|
|
pdf_path = PDF_FOLDER / filename
|
|
|
|
if not pdf_path.exists():
|
|
raise HTTPException(status_code=404, detail="PDF no encontrado")
|
|
|
|
if not pdf_path.is_file():
|
|
raise HTTPException(status_code=404, detail="El recurso no es un archivo")
|
|
|
|
try:
|
|
file_stat = pdf_path.stat()
|
|
return {
|
|
"filename": filename,
|
|
"size": file_stat.st_size,
|
|
"size_mb": round(file_stat.st_size / (1024 * 1024), 2),
|
|
"modified": file_stat.st_mtime,
|
|
"url": f"/api/pdf/{filename}",
|
|
"type": "local"
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Error al obtener información del PDF: {str(e)}")
|
|
|
|
|
|
@app.get("/api/health")
|
|
async def health_check():
|
|
return {
|
|
"status": "healthy",
|
|
"pdf_folder": str(PDF_FOLDER),
|
|
"pdf_folder_exists": PDF_FOLDER.exists(),
|
|
"public_urls_configured": len(PDF_PUBLIC_URLS)
|
|
} |