From af9b5fed013c4d48c8ceaada9fce75e4ea05c102 Mon Sep 17 00:00:00 2001 From: Anibal Angulo Date: Fri, 7 Nov 2025 09:41:18 -0600 Subject: [PATCH] wip chat --- backend/app/agents/form_auditor/__init__.py | 39 ++ backend/app/agents/form_auditor/agent.py | 155 +++++ backend/app/agents/form_auditor/checks.py | 282 +++++++++ backend/app/agents/form_auditor/cli.py | 38 ++ backend/app/agents/form_auditor/models.py | 573 ++++++++++++++++++ backend/app/example_data.json | 608 ++++++++++++++++++++ backend/app/routers/agent.py | 342 +++++++++++ backend/app/routers/chunking_landingai.py | 125 ++-- backend/app/routers/dataroom.py | 129 +++++ backend/pyproject.toml | 5 + backend/uv.lock | 34 ++ docker-compose.yml | 4 +- frontend/package-lock.json | 50 ++ frontend/package.json | 1 + frontend/src/components/ChatExample.tsx | 241 ++++++++ frontend/src/components/ChatTab.tsx | 260 +++++++-- frontend/src/components/DashboardTab.tsx | 267 ++++++++- frontend/src/components/DataroomView.tsx | 125 +--- frontend/src/components/FilesTab.tsx | 7 +- frontend/src/components/Sidebar.tsx | 13 +- frontend/src/services/api.ts | 33 ++ 21 files changed, 3065 insertions(+), 266 deletions(-) create mode 100644 backend/app/agents/form_auditor/__init__.py create mode 100644 backend/app/agents/form_auditor/agent.py create mode 100644 backend/app/agents/form_auditor/checks.py create mode 100644 backend/app/agents/form_auditor/cli.py create mode 100644 backend/app/agents/form_auditor/models.py create mode 100644 backend/app/example_data.json create mode 100644 frontend/src/components/ChatExample.tsx diff --git a/backend/app/agents/form_auditor/__init__.py b/backend/app/agents/form_auditor/__init__.py new file mode 100644 index 0000000..77167c4 --- /dev/null +++ b/backend/app/agents/form_auditor/__init__.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any + +from .agent import agent, prepare_initial_findings +from .models import ( + AuditReport, + ExtractedIrsForm990PfDataSchema, + ValidatorState, +) + + +async def build_audit_report(payload: dict[str, Any]) -> AuditReport: + extraction_payload = payload.get("extraction") + if extraction_payload is None: + raise ValueError("Payload missing 'extraction' key.") + extraction = ExtractedIrsForm990PfDataSchema.model_validate(extraction_payload) + + initial_findings = prepare_initial_findings(extraction) + + metadata: dict[str, Any] = {} + metadata_raw = payload.get("metadata") + if isinstance(metadata_raw, dict): + metadata = {str(k): v for k, v in metadata_raw.items()} + + state = ValidatorState( + extraction=extraction, + initial_findings=initial_findings, + metadata=metadata, + ) + + prompt = ( + "Review the Form 990 extraction and deterministic checks. Validate or adjust " + "the findings, add any additional issues or mitigations, and craft narrative " + "section summaries that highlight the most material points. Focus on concrete " + "evidence; do not fabricate figures." + ) + result = await agent.run(prompt, deps=state) + return result.output diff --git a/backend/app/agents/form_auditor/agent.py b/backend/app/agents/form_auditor/agent.py new file mode 100644 index 0000000..9992edb --- /dev/null +++ b/backend/app/agents/form_auditor/agent.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +from collections.abc import Iterable + +from pydantic_ai import Agent, RunContext +from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.providers.azure import AzureProvider + +from app.core.config import settings + +from .checks import ( + aggregate_findings, + build_section_summaries, + check_balance_sheet_presence, + check_board_engagement, + check_expense_totals, + check_fundraising_alignment, + check_governance_policies, + check_missing_operational_details, + check_revenue_totals, + compose_overall_summary, + irs_ein_lookup, +) +from .models import ( + AuditFinding, + AuditReport, + ExtractedIrsForm990PfDataSchema, + Severity, + ValidatorState, +) + +provider = AzureProvider( + azure_endpoint=settings.AZURE_OPENAI_ENDPOINT, + api_version=settings.AZURE_OPENAI_API_VERSION, + api_key=settings.AZURE_OPENAI_API_KEY, +) +model = OpenAIChatModel(model_name="gpt-4o", provider=provider) +agent = Agent(model=model) + + +def prepare_initial_findings( + extraction: ExtractedIrsForm990PfDataSchema, +) -> list[AuditFinding]: + findings = [ + check_revenue_totals(extraction), + check_expense_totals(extraction), + check_fundraising_alignment(extraction), + check_balance_sheet_presence(extraction), + check_board_engagement(extraction), + check_missing_operational_details(extraction), + ] + findings.extend(check_governance_policies(extraction)) + return findings + + +def _merge_findings( + findings: Iterable[AuditFinding], + added: Iterable[AuditFinding], +) -> list[AuditFinding]: + existing = {finding.check_id: finding for finding in findings} + for finding in added: + existing[finding.check_id] = finding + return list(existing.values()) + + +agent = Agent( + model=model, + name="FormValidator", + deps_type=ValidatorState, + output_type=AuditReport, + system_prompt=( + "You are a Form 990 auditor. Review the extraction data and deterministic " + "checks provided in deps. Use tools to confirm calculations, add or adjust " + "findings, supply mitigation guidance, and craft concise section summaries. " + "The AuditReport must include severity (`Pass`, `Warning`, `Error`), " + "confidence scores, mitigation advice, section summaries, and an overall summary. " + "Ground every statement in supplied data; do not invent financial figures." + ), +) + + +@agent.tool +def revenue_check(ctx: RunContext[ValidatorState]) -> AuditFinding: + return check_revenue_totals(ctx.deps.extraction) + + +@agent.tool +def expense_check(ctx: RunContext[ValidatorState]) -> AuditFinding: + return check_expense_totals(ctx.deps.extraction) + + +@agent.tool +def fundraising_alignment_check(ctx: RunContext[ValidatorState]) -> AuditFinding: + return check_fundraising_alignment(ctx.deps.extraction) + + +@agent.tool +async def verify_ein(ctx: RunContext[ValidatorState]) -> AuditFinding: + ein = ctx.deps.extraction.core_organization_metadata.ein + exists, confidence, note = await irs_ein_lookup(ein) + if exists: + return AuditFinding( + check_id="irs_ein_match", + category="Compliance", + severity=Severity.PASS, + message="EIN confirmed against IRS index.", + mitigation="Document verification in the filing workpapers.", + confidence=confidence, + ) + return AuditFinding( + check_id="irs_ein_match", + category="Compliance", + severity=Severity.WARNING, + message=f"EIN {ein} could not be confirmed. {note}", + mitigation="Verify the EIN against the IRS EO BMF or IRS determination letter.", + confidence=confidence, + ) + + +@agent.output_validator +def finalize_report( + ctx: RunContext[ValidatorState], + report: AuditReport, +) -> AuditReport: + merged_findings = _merge_findings(ctx.deps.initial_findings, report.findings) + overall = aggregate_findings(merged_findings) + sections = build_section_summaries(merged_findings) + overall_summary = compose_overall_summary(merged_findings) + metadata = ctx.deps.metadata + notes = report.notes + if notes is None and isinstance(metadata, dict) and metadata.get("source"): + notes = f"Reviewed data source: {metadata['source']}." + year: int | None = None + if isinstance(metadata, dict): + metadata_year = metadata.get("return_year") + if metadata_year is not None: + try: + year = int(metadata_year) + except (TypeError, ValueError): + pass + core = ctx.deps.extraction.core_organization_metadata + organisation_name = core.legal_name or report.organisation_name + organisation_ein = core.ein or report.organisation_ein + return report.model_copy( + update={ + "organisation_ein": organisation_ein, + "organisation_name": organisation_name, + "year": year, + "findings": merged_findings, + "overall_severity": overall, + "sections": sections, + "overall_summary": overall_summary, + "notes": notes, + } + ) diff --git a/backend/app/agents/form_auditor/checks.py b/backend/app/agents/form_auditor/checks.py new file mode 100644 index 0000000..e894e29 --- /dev/null +++ b/backend/app/agents/form_auditor/checks.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +from collections import Counter, defaultdict + +from .models import ( + AuditFinding, + AuditSectionSummary, + ExtractedIrsForm990PfDataSchema, + Severity, +) + + +def aggregate_findings(findings: list[AuditFinding]) -> Severity: + order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1} + overall = Severity.PASS + for finding in findings: + if order[finding.severity] > order[overall]: + overall = finding.severity + return overall + + +def check_revenue_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding: + subtotal = sum( + value + for key, value in data.revenue_breakdown.model_dump().items() + if key != "total_revenue" + ) + if abs(subtotal - data.revenue_breakdown.total_revenue) <= 1: + return AuditFinding( + check_id="revenue_totals", + category="Revenue", + severity=Severity.PASS, + message=f"Revenue categories sum (${subtotal:,.2f}) matches total revenue.", + mitigation="Maintain detailed support for each revenue source to preserve reconciliation trail.", + confidence=0.95, + ) + return AuditFinding( + check_id="revenue_totals", + category="Revenue", + severity=Severity.ERROR, + message=( + f"Revenue categories sum (${subtotal:,.2f}) does not equal reported total " + f"(${data.revenue_breakdown.total_revenue:,.2f})." + ), + mitigation="Recalculate revenue totals and correct line items or Schedule A before filing.", + confidence=0.95, + ) + + +def check_expense_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding: + subtotal = ( + data.expenses_breakdown.program_services_expenses + + data.expenses_breakdown.management_general_expenses + + data.expenses_breakdown.fundraising_expenses + ) + if abs(subtotal - data.expenses_breakdown.total_expenses) <= 1: + return AuditFinding( + check_id="expense_totals", + category="Expenses", + severity=Severity.PASS, + message="Functional expenses match total expenses.", + mitigation="Keep functional allocation workpapers to support the reconciliation.", + confidence=0.95, + ) + return AuditFinding( + check_id="expense_totals", + category="Expenses", + severity=Severity.ERROR, + message=( + f"Functional expenses (${subtotal:,.2f}) do not reconcile to total expenses " + f"(${data.expenses_breakdown.total_expenses:,.2f})." + ), + mitigation="Review Part I, lines 23–27 and reclassify functional expenses to tie to Part II totals.", + confidence=0.95, + ) + + +def check_fundraising_alignment( + data: ExtractedIrsForm990PfDataSchema, +) -> AuditFinding: + reported_fundraising = data.expenses_breakdown.fundraising_expenses + event_expenses = data.fundraising_grantmaking.total_fundraising_event_expenses + difference = abs(reported_fundraising - event_expenses) + if difference <= 1: + return AuditFinding( + check_id="fundraising_alignment", + category="Fundraising", + severity=Severity.PASS, + message="Fundraising functional expenses align with reported event expenses.", + mitigation="Retain event ledgers and allocations to support matching totals.", + confidence=0.9, + ) + severity = ( + Severity.WARNING + if reported_fundraising and difference <= reported_fundraising * 0.1 + else Severity.ERROR + ) + return AuditFinding( + check_id="fundraising_alignment", + category="Fundraising", + severity=severity, + message=( + f"Fundraising functional expenses (${reported_fundraising:,.2f}) differ from " + f"reported event expenses (${event_expenses:,.2f}) by ${difference:,.2f}." + ), + mitigation="Reconcile Schedule G and Part I allocations to eliminate the variance.", + confidence=0.85, + ) + + +def check_balance_sheet_presence( + data: ExtractedIrsForm990PfDataSchema, +) -> AuditFinding: + if data.balance_sheet: + return AuditFinding( + check_id="balance_sheet_present", + category="Balance Sheet", + severity=Severity.PASS, + message="Balance sheet data is present.", + mitigation="Ensure ending net assets tie to Part I, line 30.", + confidence=0.7, + ) + return AuditFinding( + check_id="balance_sheet_absent", + category="Balance Sheet", + severity=Severity.WARNING, + message="Balance sheet section is empty; confirm Part II filing requirements.", + mitigation="Populate assets, liabilities, and net assets or attach supporting schedules.", + confidence=0.6, + ) + + +def check_governance_policies( + data: ExtractedIrsForm990PfDataSchema, +) -> list[AuditFinding]: + gm = data.governance_management_disclosure + findings: list[AuditFinding] = [] + policy_fields = { + "conflict_of_interest_policy": "Document the policy in Part VI or adopt one prior to filing.", + "whistleblower_policy": "Document whistleblower protections for staff and volunteers.", + "document_retention_policy": "Adopt and document a record retention policy.", + } + affirmative_fields = { + "financial_statements_reviewed": "Capture whether the board reviewed or audited year-end financials.", + "form_990_provided_to_governing_body": "Provide Form 990 to the board before submission and note the date of review.", + } + + for field, mitigation in policy_fields.items(): + value = (getattr(gm, field) or "").strip() + if not value or value.lower() in {"no", "n", "false"}: + findings.append( + AuditFinding( + check_id=f"{field}_missing", + category="Governance", + severity=Severity.WARNING, + message=f"{field.replace('_', ' ').title()} not reported or marked 'No'.", + mitigation=mitigation, + confidence=0.55, + ) + ) + + for field, mitigation in affirmative_fields.items(): + value = (getattr(gm, field) or "").strip() + if not value: + findings.append( + AuditFinding( + check_id=f"{field}_blank", + category="Governance", + severity=Severity.WARNING, + message=f"{field.replace('_', ' ').title()} left blank.", + mitigation=mitigation, + confidence=0.5, + ) + ) + return findings + + +def check_board_engagement(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding: + hours = [ + member.average_hours_per_week + for member in data.officers_directors_trustees_key_employees + if member.average_hours_per_week is not None + ] + total_hours = sum(hours) + if total_hours >= 5: + return AuditFinding( + check_id="board_hours", + category="Governance", + severity=Severity.PASS, + message="Officer and director time commitments appear reasonable.", + mitigation="Continue documenting board attendance and oversight responsibilities.", + confidence=0.7, + ) + return AuditFinding( + check_id="board_hours", + category="Governance", + severity=Severity.WARNING, + message=( + f"Aggregate reported board hours ({total_hours:.1f} per week) are low; " + "confirm entries reflect actual governance involvement." + ), + mitigation="Verify hours in Part VII; update if officers volunteer significant time.", + confidence=0.6, + ) + + +def check_missing_operational_details( + data: ExtractedIrsForm990PfDataSchema, +) -> AuditFinding: + descriptors = ( + data.functional_operational_data.fundraising_method_descriptions or "" + ).strip() + if descriptors: + return AuditFinding( + check_id="fundraising_methods_documented", + category="Operations", + severity=Severity.PASS, + message="Fundraising method descriptions provided.", + mitigation="Update narratives annually to reflect any new campaigns or joint ventures.", + confidence=0.65, + ) + return AuditFinding( + check_id="fundraising_methods_missing", + category="Operations", + severity=Severity.WARNING, + message="Fundraising method descriptions are blank.", + mitigation="Add a brief Schedule G narrative describing major fundraising approaches.", + confidence=0.55, + ) + + +def build_section_summaries(findings: list[AuditFinding]) -> list[AuditSectionSummary]: + grouped: defaultdict[str, list[AuditFinding]] = defaultdict(list) + for finding in findings: + grouped[finding.category].append(finding) + + summaries: list[AuditSectionSummary] = [] + severity_order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1} + for category, category_findings in grouped.items(): + counter = Counter(f.severity for f in category_findings) + severity = aggregate_findings(category_findings) + summary = ", ".join( + f"{count} {label}" + for label, count in ( + ("passes", counter.get(Severity.PASS, 0)), + ("warnings", counter.get(Severity.WARNING, 0)), + ("errors", counter.get(Severity.ERROR, 0)), + ) + ) + summary_text = f"{category} review: {summary}." + confidence = sum(f.confidence for f in category_findings) / len( + category_findings + ) + summaries.append( + AuditSectionSummary( + section=category, + severity=severity, + summary=summary_text, + confidence=confidence, + ) + ) + summaries.sort(key=lambda s: (-severity_order[s.severity], s.section.lower())) + return summaries + + +def compose_overall_summary(findings: list[AuditFinding]) -> str: + if not findings: + return "No automated findings generated." + counter = Counter(f.severity for f in findings) + parts = [] + if counter.get(Severity.ERROR): + parts.append(f"{counter[Severity.ERROR]} error(s)") + if counter.get(Severity.WARNING): + parts.append(f"{counter[Severity.WARNING]} warning(s)") + if counter.get(Severity.PASS): + parts.append(f"{counter[Severity.PASS]} check(s) passed") + summary = "Overall results: " + ", ".join(parts) + "." + return summary + + +async def irs_ein_lookup(_ein: str) -> tuple[bool, float, str]: + return False, 0.2, "IRS verification unavailable in current environment." diff --git a/backend/app/agents/form_auditor/cli.py b/backend/app/agents/form_auditor/cli.py new file mode 100644 index 0000000..69b91b4 --- /dev/null +++ b/backend/app/agents/form_auditor/cli.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import argparse +import asyncio +import json +from pathlib import Path + +from . import build_audit_report + +__all__ = ["build_audit_report", "main"] + + +def _load_payload(path: Path) -> dict: + text = path.read_text(encoding="utf-8") + return json.loads(text) + + +def _print_report(report: dict) -> None: + print(json.dumps(report, indent=2)) + + +def main(argv: list[str] | None = None) -> None: + parser = argparse.ArgumentParser( + description="Validate a Form 990 extraction payload using the Form Auditor agent." + ) + parser.add_argument( + "payload", + nargs="?", + default="example_data.json", + help="Path to a JSON file containing the extraction payload.", + ) + args = parser.parse_args(argv) + + payload_path = Path(args.payload).expanduser() + payload = _load_payload(payload_path) + + report = asyncio.run(build_audit_report(payload)) + _print_report(report.model_dump()) diff --git a/backend/app/agents/form_auditor/models.py b/backend/app/agents/form_auditor/models.py new file mode 100644 index 0000000..2e7d716 --- /dev/null +++ b/backend/app/agents/form_auditor/models.py @@ -0,0 +1,573 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, Field + + +class Severity(str, Enum): + PASS = "Pass" + WARNING = "Warning" + ERROR = "Error" + + +class AuditFinding(BaseModel): + check_id: str + category: str + severity: Severity + message: str + mitigation: str | None = None + confidence: float = Field(ge=0.0, le=1.0) + + +class AuditSectionSummary(BaseModel): + section: str + severity: Severity + summary: str + confidence: float = Field(ge=0.0, le=1.0) + + +class AuditReport(BaseModel): + organisation_ein: str + organisation_name: str + year: int | None + overall_severity: Severity + findings: list[AuditFinding] + sections: list[AuditSectionSummary] = Field(default_factory=list) + overall_summary: str | None = None + notes: str | None = None + + +class CoreOrgMetadata(BaseModel): + ein: str + legal_name: str + return_type: str + accounting_method: str + incorporation_state: str | None = None + + +class CoreOrganizationMetadata(BaseModel): + ein: str = Field( + ..., + description="Unique IRS identifier for the organization.", + title="Employer Identification Number (EIN)", + ) + legal_name: str = Field( + ..., + description="Official registered name of the organization.", + title="Legal Name of Organization", + ) + phone_number: str = Field( + ..., description="Primary contact phone number.", title="Phone Number" + ) + website_url: str = Field( + ..., description="Organization's website address.", title="Website URL" + ) + return_type: str = Field( + ..., + description="Type of IRS return filed (e.g., 990, 990-EZ, 990-PF).", + title="Return Type", + ) + amended_return: str = Field( + ..., + description="Indicates if the return is amended.", + title="Amended Return Flag", + ) + group_exemption_number: str = Field( + ..., + description="IRS group exemption number, if applicable.", + title="Group Exemption Number", + ) + subsection_code: str = Field( + ..., + description="IRS subsection code (e.g., 501(c)(3)).", + title="Subsection Code", + ) + ruling_date: str = Field( + ..., + description="Date of IRS ruling or determination letter.", + title="Ruling/Determination Letter Date", + ) + accounting_method: str = Field( + ..., + description="Accounting method used (cash, accrual, other).", + title="Accounting Method", + ) + organization_type: str = Field( + ..., + description="Legal structure (corporation, trust, association, etc.).", + title="Organization Type", + ) + year_of_formation: str = Field( + ..., description="Year the organization was formed.", title="Year of Formation" + ) + incorporation_state: str = Field( + ..., description="State of incorporation.", title="Incorporation State" + ) + + +class RevenueBreakdown(BaseModel): + total_revenue: float = Field( + ..., description="Sum of all revenue sources.", title="Total Revenue" + ) + contributions_gifts_grants: float = Field( + ..., + description="Revenue from donations and grants.", + title="Contributions, Gifts, and Grants", + ) + program_service_revenue: float = Field( + ..., + description="Revenue from program services.", + title="Program Service Revenue", + ) + membership_dues: float = Field( + ..., description="Revenue from membership dues.", title="Membership Dues" + ) + investment_income: float = Field( + ..., + description="Revenue from interest and dividends.", + title="Investment Income", + ) + gains_losses_sales_assets: float = Field( + ..., + description="Net gains or losses from asset sales.", + title="Gains/Losses from Sales of Assets", + ) + rental_income: float = Field( + ..., + description="Income from rental of real estate or equipment.", + title="Rental Income", + ) + related_organizations_revenue: float = Field( + ..., + description="Revenue from related organizations.", + title="Related Organizations Revenue", + ) + gaming_revenue: float = Field( + ..., description="Revenue from gaming activities.", title="Gaming Revenue" + ) + other_revenue: float = Field( + ..., description="Miscellaneous revenue sources.", title="Other Revenue" + ) + government_grants: float = Field( + ..., + description="Revenue from government grants.", + title="Revenue from Government Grants", + ) + foreign_contributions: float = Field( + ..., description="Revenue from foreign sources.", title="Foreign Contributions" + ) + + +class ExpensesBreakdown(BaseModel): + total_expenses: float = Field( + ..., description="Sum of all expenses.", title="Total Functional Expenses" + ) + program_services_expenses: float = Field( + ..., + description="Expenses for program services.", + title="Program Services Expenses", + ) + management_general_expenses: float = Field( + ..., + description="Administrative and management expenses.", + title="Management & General Expenses", + ) + fundraising_expenses: float = Field( + ..., + description="Expenses for fundraising activities.", + title="Fundraising Expenses", + ) + grants_us_organizations: float = Field( + ..., + description="Grants and assistance to U.S. organizations.", + title="Grants to U.S. Organizations", + ) + grants_us_individuals: float = Field( + ..., + description="Grants and assistance to U.S. individuals.", + title="Grants to U.S. Individuals", + ) + grants_foreign_organizations: float = Field( + ..., + description="Grants and assistance to foreign organizations.", + title="Grants to Foreign Organizations", + ) + grants_foreign_individuals: float = Field( + ..., + description="Grants and assistance to foreign individuals.", + title="Grants to Foreign Individuals", + ) + compensation_officers: float = Field( + ..., + description="Compensation paid to officers and key employees.", + title="Compensation of Officers/Key Employees", + ) + compensation_other_staff: float = Field( + ..., + description="Compensation paid to other staff.", + title="Compensation of Other Staff", + ) + payroll_taxes_benefits: float = Field( + ..., + description="Payroll taxes and employee benefits.", + title="Payroll Taxes and Benefits", + ) + professional_fees: float = Field( + ..., + description="Legal, accounting, and lobbying fees.", + title="Professional Fees", + ) + office_occupancy_costs: float = Field( + ..., + description="Office and occupancy expenses.", + title="Office and Occupancy Costs", + ) + information_technology_costs: float = Field( + ..., description="IT-related expenses.", title="Information Technology Costs" + ) + travel_conference_expenses: float = Field( + ..., + description="Travel and conference costs.", + title="Travel and Conference Expenses", + ) + depreciation_amortization: float = Field( + ..., + description="Depreciation and amortization expenses.", + title="Depreciation and Amortization", + ) + insurance: float = Field(..., description="Insurance expenses.", title="Insurance") + + +class OfficersDirectorsTrusteesKeyEmployee(BaseModel): + name: str = Field(..., description="Full name of the individual.", title="Name") + title_position: str = Field( + ..., description="Role or position held.", title="Title/Position" + ) + average_hours_per_week: float = Field( + ..., + description="Average weekly hours devoted to position.", + title="Average Hours Per Week", + ) + related_party_transactions: str = Field( + ..., + description="Indicates if related-party transactions occurred.", + title="Related-Party Transactions", + ) + former_officer: str = Field( + ..., + description="Indicates if the individual is a former officer.", + title="Former Officer Indicator", + ) + governance_role: str = Field( + ..., + description="Role in governance (voting, independent, etc.).", + title="Governance Role", + ) + + +class GovernanceManagementDisclosure(BaseModel): + governing_body_size: float = Field( + ..., + description="Number of voting members on the governing body.", + title="Governing Body Size", + ) + independent_members: float = Field( + ..., + description="Number of independent voting members.", + title="Number of Independent Members", + ) + financial_statements_reviewed: str = Field( + ..., + description="Indicates if financial statements were reviewed or audited.", + title="Financial Statements Reviewed/Audited", + ) + form_990_provided_to_governing_body: str = Field( + ..., + description="Indicates if Form 990 was provided to governing body before filing.", + title="Form 990 Provided to Governing Body", + ) + conflict_of_interest_policy: str = Field( + ..., + description="Indicates if a conflict-of-interest policy is in place.", + title="Conflict-of-Interest Policy", + ) + whistleblower_policy: str = Field( + ..., + description="Indicates if a whistleblower policy is in place.", + title="Whistleblower Policy", + ) + document_retention_policy: str = Field( + ..., + description="Indicates if a document retention/destruction policy is in place.", + title="Document Retention/Destruction Policy", + ) + ceo_compensation_review_process: str = Field( + ..., + description="Description of CEO compensation review process.", + title="CEO Compensation Review Process", + ) + public_disclosure_practices: str = Field( + ..., + description="Description of public disclosure practices.", + title="Public Disclosure Practices", + ) + + +class ProgramServiceAccomplishment(BaseModel): + program_name: str = Field( + ..., description="Name of the program.", title="Program Name" + ) + program_description: str = Field( + ..., description="Description of the program.", title="Program Description" + ) + expenses: float = Field( + ..., description="Expenses for the program.", title="Program Expenses" + ) + grants: float = Field( + ..., description="Grants made under the program.", title="Program Grants" + ) + revenue_generated: float = Field( + ..., description="Revenue generated by the program.", title="Revenue Generated" + ) + quantitative_outputs: str = Field( + ..., + description="Quantitative outputs (e.g., number served, events held).", + title="Quantitative Outputs", + ) + + +class FundraisingGrantmaking(BaseModel): + total_fundraising_event_revenue: float = Field( + ..., + description="Total revenue from fundraising events.", + title="Total Fundraising Event Revenue", + ) + total_fundraising_event_expenses: float = Field( + ..., + description="Total direct expenses for fundraising events.", + title="Total Fundraising Event Expenses", + ) + professional_fundraiser_fees: float = Field( + ..., + description="Fees paid to professional fundraisers.", + title="Professional Fundraiser Fees", + ) + + +class FunctionalOperationalData(BaseModel): + number_of_employees: float = Field( + ..., description="Total number of employees.", title="Number of Employees" + ) + number_of_volunteers: float = Field( + ..., description="Total number of volunteers.", title="Number of Volunteers" + ) + occupancy_costs: float = Field( + ..., description="Total occupancy costs.", title="Occupancy Costs" + ) + fundraising_method_descriptions: str = Field( + ..., + description="Descriptions of fundraising methods used.", + title="Fundraising Method Descriptions", + ) + joint_ventures_disregarded_entities: str = Field( + ..., + description="Details of joint ventures and disregarded entities.", + title="Joint Ventures and Disregarded Entities", + ) + + +class CompensationDetails(BaseModel): + base_compensation: float = Field( + ..., description="Base salary or wages.", title="Base Compensation" + ) + bonus: float = Field( + ..., description="Bonus or incentive compensation.", title="Bonus Compensation" + ) + incentive: float = Field( + ..., description="Incentive compensation.", title="Incentive Compensation" + ) + other: float = Field( + ..., description="Other forms of compensation.", title="Other Compensation" + ) + non_fixed_compensation: str = Field( + ..., + description="Indicates if compensation is non-fixed.", + title="Non-Fixed Compensation Flag", + ) + first_class_travel: str = Field( + ..., + description="Indicates if first-class travel was provided.", + title="First-Class Travel", + ) + housing_allowance: str = Field( + ..., + description="Indicates if housing allowance was provided.", + title="Housing Allowance", + ) + expense_account_usage: str = Field( + ..., + description="Indicates if expense account was used.", + title="Expense Account Usage", + ) + supplemental_retirement: str = Field( + ..., + description="Indicates if supplemental retirement or deferred comp was provided.", + title="Supplemental Retirement/Deferred Comp", + ) + + +class PoliticalLobbyingActivities(BaseModel): + lobbying_expenditures_direct: float = Field( + ..., + description="Direct lobbying expenditures.", + title="Direct Lobbying Expenditures", + ) + lobbying_expenditures_grassroots: float = Field( + ..., + description="Grassroots lobbying expenditures.", + title="Grassroots Lobbying Expenditures", + ) + election_501h_status: str = Field( + ..., + description="Indicates if 501(h) election was made.", + title="501(h) Election Status", + ) + political_campaign_expenditures: float = Field( + ..., + description="Expenditures for political campaigns.", + title="Political Campaign Expenditures", + ) + related_organizations_affiliates: str = Field( + ..., + description="Details of related organizations or affiliates involved.", + title="Related Organizations/Affiliates Involved", + ) + + +class InvestmentsEndowment(BaseModel): + investment_types: str = Field( + ..., + description="Types of investments held (securities, partnerships, real estate).", + title="Investment Types", + ) + donor_restricted_endowment_values: float = Field( + ..., + description="Value of donor-restricted endowments.", + title="Donor-Restricted Endowment Values", + ) + net_appreciation_depreciation: float = Field( + ..., + description="Net appreciation or depreciation of investments.", + title="Net Appreciation/Depreciation", + ) + related_organization_transactions: str = Field( + ..., + description="Details of transactions with related organizations.", + title="Related Organization Transactions", + ) + loans_to_from_related_parties: str = Field( + ..., + description="Details of loans to or from related parties.", + title="Loans to/from Related Parties", + ) + + +class TaxCompliancePenalties(BaseModel): + penalties_excise_taxes_reported: str = Field( + ..., + description="Reported penalties or excise taxes.", + title="Penalties or Excise Taxes Reported", + ) + unrelated_business_income_disclosure: str = Field( + ..., + description="Disclosure of unrelated business income (UBI).", + title="Unrelated Business Income Disclosure", + ) + foreign_bank_account_reporting: str = Field( + ..., + description="Disclosure of foreign bank accounts (FBAR equivalent).", + title="Foreign Bank Account Reporting", + ) + schedule_o_narrative_explanations: str = Field( + ..., + description="Narrative explanations from Schedule O.", + title="Schedule O Narrative Explanations", + ) + + +class ExtractedIrsForm990PfDataSchema(BaseModel): + core_organization_metadata: CoreOrganizationMetadata = Field( + ..., + description="Essential identifiers and attributes for normalizing entities across filings and years.", + title="Core Organization Metadata", + ) + revenue_breakdown: RevenueBreakdown = Field( + ..., + description="Detailed breakdown of revenue streams for the fiscal year.", + title="Revenue Breakdown", + ) + expenses_breakdown: ExpensesBreakdown = Field( + ..., + description="Detailed breakdown of expenses for the fiscal year.", + title="Expenses Breakdown", + ) + balance_sheet: dict[str, Any] = Field( + ..., + description="Assets, liabilities, and net assets at year end.", + title="Balance Sheet Data", + ) + officers_directors_trustees_key_employees: list[ + OfficersDirectorsTrusteesKeyEmployee + ] = Field( + ..., + description="List of key personnel and their compensation.", + title="Officers, Directors, Trustees, Key Employees", + ) + governance_management_disclosure: GovernanceManagementDisclosure = Field( + ..., + description="Governance and management practices, policies, and disclosures.", + title="Governance, Management, and Disclosure", + ) + program_service_accomplishments: list[ProgramServiceAccomplishment] = Field( + ..., + description="Major programs and their outputs for the fiscal year.", + title="Program Service Accomplishments", + ) + fundraising_grantmaking: FundraisingGrantmaking = Field( + ..., + description="Fundraising event details and grantmaking activities.", + title="Fundraising & Grantmaking", + ) + functional_operational_data: FunctionalOperationalData = Field( + ..., + description="Operational metrics and related-organization relationships.", + title="Functional & Operational Data", + ) + compensation_details: CompensationDetails = Field( + ..., + description="Detailed breakdown of officer compensation and benefits.", + title="Compensation Details", + ) + political_lobbying_activities: PoliticalLobbyingActivities = Field( + ..., + description="Details of political and lobbying expenditures and affiliations.", + title="Political & Lobbying Activities", + ) + investments_endowment: InvestmentsEndowment = Field( + ..., + description="Investment holdings, endowment values, and related transactions.", + title="Investments & Endowment", + ) + tax_compliance_penalties: TaxCompliancePenalties = Field( + ..., + description="Tax compliance indicators, penalties, and narrative explanations.", + title="Tax Compliance / Penalties", + ) + + +class ValidatorState(BaseModel): + extraction: ExtractedIrsForm990PfDataSchema + initial_findings: list[AuditFinding] = Field(default_factory=list) + metadata: dict[str, Any] = Field(default_factory=dict) diff --git a/backend/app/example_data.json b/backend/app/example_data.json new file mode 100644 index 0000000..61bfd50 --- /dev/null +++ b/backend/app/example_data.json @@ -0,0 +1,608 @@ +{ + "extraction": { + "core_organization_metadata": { + "ein": "84-2674654", + "legal_name": "07 IN HEAVEN MEMORIAL SCHOLARSHIP", + "phone_number": "(262) 215-0300", + "website_url": "", + "return_type": "990-PF", + "amended_return": "No", + "group_exemption_number": "", + "subsection_code": "501(c)(3)", + "ruling_date": "", + "accounting_method": "Cash", + "organization_type": "corporation", + "year_of_formation": "", + "incorporation_state": "WI" + }, + "revenue_breakdown": { + "total_revenue": 5227, + "contributions_gifts_grants": 5227, + "program_service_revenue": 0, + "membership_dues": 0, + "investment_income": 0, + "gains_losses_sales_assets": 0, + "rental_income": 0, + "related_organizations_revenue": 0, + "gaming_revenue": 0, + "other_revenue": 0, + "government_grants": 0, + "foreign_contributions": 0 + }, + "expenses_breakdown": { + "total_expenses": 2104, + "program_services_expenses": 0, + "management_general_expenses": 0, + "fundraising_expenses": 2104, + "grants_us_organizations": 0, + "grants_us_individuals": 0, + "grants_foreign_organizations": 0, + "grants_foreign_individuals": 0, + "compensation_officers": 0, + "compensation_other_staff": 0, + "payroll_taxes_benefits": 0, + "professional_fees": 0, + "office_occupancy_costs": 0, + "information_technology_costs": 0, + "travel_conference_expenses": 0, + "depreciation_amortization": 0, + "insurance": 0 + }, + "balance_sheet": {}, + "officers_directors_trustees_key_employees": [ + { + "name": "REBECCA TERPSTRA", + "title_position": "PRESIDENT", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "" + }, + { + "name": "ROBERT GUZMAN", + "title_position": "VICE PRESDEINT", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "" + }, + { + "name": "ANDREA VALENTI", + "title_position": "TREASURER", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "" + }, + { + "name": "BETHANY WALSH", + "title_position": "SECRETARY", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "" + } + ], + "governance_management_disclosure": { + "governing_body_size": 4, + "independent_members": 4, + "financial_statements_reviewed": "", + "form_990_provided_to_governing_body": "", + "conflict_of_interest_policy": "", + "whistleblower_policy": "", + "document_retention_policy": "", + "ceo_compensation_review_process": "", + "public_disclosure_practices": "Yes" + }, + "program_service_accomplishments": [], + "fundraising_grantmaking": { + "total_fundraising_event_revenue": 0, + "total_fundraising_event_expenses": 2104, + "professional_fundraiser_fees": 0 + }, + "functional_operational_data": { + "number_of_employees": 0, + "number_of_volunteers": 0, + "occupancy_costs": 0, + "fundraising_method_descriptions": "", + "joint_ventures_disregarded_entities": "" + }, + "compensation_details": { + "base_compensation": 0, + "bonus": 0, + "incentive": 0, + "other": 0, + "non_fixed_compensation": "", + "first_class_travel": "", + "housing_allowance": "", + "expense_account_usage": "", + "supplemental_retirement": "" + }, + "political_lobbying_activities": { + "lobbying_expenditures_direct": 0, + "lobbying_expenditures_grassroots": 0, + "election_501h_status": "", + "political_campaign_expenditures": 0, + "related_organizations_affiliates": "" + }, + "investments_endowment": { + "investment_types": "", + "donor_restricted_endowment_values": 0, + "net_appreciation_depreciation": 0, + "related_organization_transactions": "", + "loans_to_from_related_parties": "" + }, + "tax_compliance_penalties": { + "penalties_excise_taxes_reported": "No", + "unrelated_business_income_disclosure": "No", + "foreign_bank_account_reporting": "No", + "schedule_o_narrative_explanations": "" + } + }, + "extraction_metadata": { + "core_organization_metadata": { + "ein": { + "value": "84-2674654", + "references": ["0-7"] + }, + "legal_name": { + "value": "07 IN HEAVEN MEMORIAL SCHOLARSHIP", + "references": ["0-6"] + }, + "phone_number": { + "value": "(262) 215-0300", + "references": ["0-a"] + }, + "website_url": { + "value": "", + "references": [] + }, + "return_type": { + "value": "990-PF", + "references": ["4ade8ed0-bce7-4bd5-bd8d-190e3e4be95b"] + }, + "amended_return": { + "value": "No", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"] + }, + "group_exemption_number": { + "value": "", + "references": [] + }, + "subsection_code": { + "value": "501(c)(3)", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"] + }, + "ruling_date": { + "value": "", + "references": [] + }, + "accounting_method": { + "value": "Cash", + "references": ["0-d"] + }, + "organization_type": { + "value": "corporation", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"] + }, + "year_of_formation": { + "value": "", + "references": [] + }, + "incorporation_state": { + "value": "WI", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"] + } + }, + "revenue_breakdown": { + "total_revenue": { + "value": 5227, + "references": ["0-1z"] + }, + "contributions_gifts_grants": { + "value": 5227, + "references": ["0-m"] + }, + "program_service_revenue": { + "value": 0, + "references": [] + }, + "membership_dues": { + "value": 0, + "references": [] + }, + "investment_income": { + "value": 0, + "references": [] + }, + "gains_losses_sales_assets": { + "value": 0, + "references": [] + }, + "rental_income": { + "value": 0, + "references": [] + }, + "related_organizations_revenue": { + "value": 0, + "references": [] + }, + "gaming_revenue": { + "value": 0, + "references": [] + }, + "other_revenue": { + "value": 0, + "references": [] + }, + "government_grants": { + "value": 0, + "references": [] + }, + "foreign_contributions": { + "value": 0, + "references": [] + } + }, + "expenses_breakdown": { + "total_expenses": { + "value": 2104, + "references": ["0-2S"] + }, + "program_services_expenses": { + "value": 0, + "references": [] + }, + "management_general_expenses": { + "value": 0, + "references": [] + }, + "fundraising_expenses": { + "value": 2104, + "references": ["13-d"] + }, + "grants_us_organizations": { + "value": 0, + "references": [] + }, + "grants_us_individuals": { + "value": 0, + "references": [] + }, + "grants_foreign_organizations": { + "value": 0, + "references": [] + }, + "grants_foreign_individuals": { + "value": 0, + "references": [] + }, + "compensation_officers": { + "value": 0, + "references": ["5-1q", "5-1w", "5-1C", "5-1I"] + }, + "compensation_other_staff": { + "value": 0, + "references": [] + }, + "payroll_taxes_benefits": { + "value": 0, + "references": [] + }, + "professional_fees": { + "value": 0, + "references": [] + }, + "office_occupancy_costs": { + "value": 0, + "references": [] + }, + "information_technology_costs": { + "value": 0, + "references": [] + }, + "travel_conference_expenses": { + "value": 0, + "references": [] + }, + "depreciation_amortization": { + "value": 0, + "references": [] + }, + "insurance": { + "value": 0, + "references": [] + } + }, + "balance_sheet": {}, + "officers_directors_trustees_key_employees": [ + { + "name": { + "value": "REBECCA TERPSTRA", + "references": ["5-1o"] + }, + "title_position": { + "value": "PRESIDENT", + "references": ["5-1p"] + }, + "average_hours_per_week": { + "value": 0.1, + "references": ["5-1p"] + }, + "related_party_transactions": { + "value": "", + "references": [] + }, + "former_officer": { + "value": "", + "references": [] + }, + "governance_role": { + "value": "", + "references": [] + } + }, + { + "name": { + "value": "ROBERT GUZMAN", + "references": ["5-1u"] + }, + "title_position": { + "value": "VICE PRESDEINT", + "references": ["5-1v"] + }, + "average_hours_per_week": { + "value": 0.1, + "references": ["5-1v"] + }, + "related_party_transactions": { + "value": "", + "references": [] + }, + "former_officer": { + "value": "", + "references": [] + }, + "governance_role": { + "value": "", + "references": [] + } + }, + { + "name": { + "value": "ANDREA VALENTI", + "references": ["5-1A"] + }, + "title_position": { + "value": "TREASURER", + "references": ["5-1B"] + }, + "average_hours_per_week": { + "value": 0.1, + "references": ["5-1B"] + }, + "related_party_transactions": { + "value": "", + "references": [] + }, + "former_officer": { + "value": "", + "references": [] + }, + "governance_role": { + "value": "", + "references": [] + } + }, + { + "name": { + "value": "BETHANY WALSH", + "references": ["5-1G"] + }, + "title_position": { + "value": "SECRETARY", + "references": ["5-1H"] + }, + "average_hours_per_week": { + "value": 0.1, + "references": ["5-1H"] + }, + "related_party_transactions": { + "value": "", + "references": [] + }, + "former_officer": { + "value": "", + "references": [] + }, + "governance_role": { + "value": "", + "references": [] + } + } + ], + "governance_management_disclosure": { + "governing_body_size": { + "value": 4, + "references": ["5-1o", "5-1u", "5-1A", "5-1G"] + }, + "independent_members": { + "value": 4, + "references": ["5-1o", "5-1u", "5-1A", "5-1G"] + }, + "financial_statements_reviewed": { + "value": "", + "references": [] + }, + "form_990_provided_to_governing_body": { + "value": "", + "references": [] + }, + "conflict_of_interest_policy": { + "value": "", + "references": [] + }, + "whistleblower_policy": { + "value": "", + "references": [] + }, + "document_retention_policy": { + "value": "", + "references": [] + }, + "ceo_compensation_review_process": { + "value": "", + "references": [] + }, + "public_disclosure_practices": { + "value": "Yes", + "references": ["4-g"] + } + }, + "program_service_accomplishments": [], + "fundraising_grantmaking": { + "total_fundraising_event_revenue": { + "value": 0, + "references": [] + }, + "total_fundraising_event_expenses": { + "value": 2104, + "references": ["13-d"] + }, + "professional_fundraiser_fees": { + "value": 0, + "references": [] + } + }, + "functional_operational_data": { + "number_of_employees": { + "value": 0, + "references": [] + }, + "number_of_volunteers": { + "value": 0, + "references": [] + }, + "occupancy_costs": { + "value": 0, + "references": [] + }, + "fundraising_method_descriptions": { + "value": "", + "references": [] + }, + "joint_ventures_disregarded_entities": { + "value": "", + "references": [] + } + }, + "compensation_details": { + "base_compensation": { + "value": 0, + "references": ["5-1q", "5-1w"] + }, + "bonus": { + "value": 0, + "references": [] + }, + "incentive": { + "value": 0, + "references": [] + }, + "other": { + "value": 0, + "references": [] + }, + "non_fixed_compensation": { + "value": "", + "references": [] + }, + "first_class_travel": { + "value": "", + "references": [] + }, + "housing_allowance": { + "value": "", + "references": [] + }, + "expense_account_usage": { + "value": "", + "references": [] + }, + "supplemental_retirement": { + "value": "", + "references": [] + } + }, + "political_lobbying_activities": { + "lobbying_expenditures_direct": { + "value": 0, + "references": [] + }, + "lobbying_expenditures_grassroots": { + "value": 0, + "references": [] + }, + "election_501h_status": { + "value": "", + "references": [] + }, + "political_campaign_expenditures": { + "value": 0, + "references": [] + }, + "related_organizations_affiliates": { + "value": "", + "references": [] + } + }, + "investments_endowment": { + "investment_types": { + "value": "", + "references": [] + }, + "donor_restricted_endowment_values": { + "value": 0, + "references": [] + }, + "net_appreciation_depreciation": { + "value": 0, + "references": [] + }, + "related_organization_transactions": { + "value": "", + "references": [] + }, + "loans_to_from_related_parties": { + "value": "", + "references": [] + } + }, + "tax_compliance_penalties": { + "penalties_excise_taxes_reported": { + "value": "No", + "references": ["3-I"] + }, + "unrelated_business_income_disclosure": { + "value": "No", + "references": ["3-Y"] + }, + "foreign_bank_account_reporting": { + "value": "No", + "references": ["4-H"] + }, + "schedule_o_narrative_explanations": { + "value": "", + "references": [] + } + } + }, + "metadata": { + "filename": "markdown.md", + "org_id": null, + "duration_ms": 16656, + "credit_usage": 27.2, + "job_id": "nnmr8lcxtykk5ll5wodjtrnn6", + "version": "extract-20250930" + } +} diff --git a/backend/app/routers/agent.py b/backend/app/routers/agent.py index ccef116..d38b5ff 100644 --- a/backend/app/routers/agent.py +++ b/backend/app/routers/agent.py @@ -6,6 +6,7 @@ from pydantic_ai.ui.vercel_ai import VercelAIAdapter from starlette.requests import Request from starlette.responses import Response +from app.agents import form_auditor from app.core.config import settings provider = AzureProvider( @@ -19,6 +20,347 @@ agent = Agent(model=model) router = APIRouter(prefix="/api/v1/agent", tags=["Agent"]) +@agent.tool_plain() +async def build_audit_report(): + """Calls the audit subagent to get a full audit report of the organization""" + data = { + "extraction": { + "core_organization_metadata": { + "ein": "84-2674654", + "legal_name": "07 IN HEAVEN MEMORIAL SCHOLARSHIP", + "phone_number": "(262) 215-0300", + "website_url": "", + "return_type": "990-PF", + "amended_return": "No", + "group_exemption_number": "", + "subsection_code": "501(c)(3)", + "ruling_date": "", + "accounting_method": "Cash", + "organization_type": "corporation", + "year_of_formation": "", + "incorporation_state": "WI", + }, + "revenue_breakdown": { + "total_revenue": 5227, + "contributions_gifts_grants": 5227, + "program_service_revenue": 0, + "membership_dues": 0, + "investment_income": 0, + "gains_losses_sales_assets": 0, + "rental_income": 0, + "related_organizations_revenue": 0, + "gaming_revenue": 0, + "other_revenue": 0, + "government_grants": 0, + "foreign_contributions": 0, + }, + "expenses_breakdown": { + "total_expenses": 2104, + "program_services_expenses": 0, + "management_general_expenses": 0, + "fundraising_expenses": 2104, + "grants_us_organizations": 0, + "grants_us_individuals": 0, + "grants_foreign_organizations": 0, + "grants_foreign_individuals": 0, + "compensation_officers": 0, + "compensation_other_staff": 0, + "payroll_taxes_benefits": 0, + "professional_fees": 0, + "office_occupancy_costs": 0, + "information_technology_costs": 0, + "travel_conference_expenses": 0, + "depreciation_amortization": 0, + "insurance": 0, + }, + "balance_sheet": {}, + "officers_directors_trustees_key_employees": [ + { + "name": "REBECCA TERPSTRA", + "title_position": "PRESIDENT", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "", + }, + { + "name": "ROBERT GUZMAN", + "title_position": "VICE PRESDEINT", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "", + }, + { + "name": "ANDREA VALENTI", + "title_position": "TREASURER", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "", + }, + { + "name": "BETHANY WALSH", + "title_position": "SECRETARY", + "average_hours_per_week": 0.1, + "related_party_transactions": "", + "former_officer": "", + "governance_role": "", + }, + ], + "governance_management_disclosure": { + "governing_body_size": 4, + "independent_members": 4, + "financial_statements_reviewed": "", + "form_990_provided_to_governing_body": "", + "conflict_of_interest_policy": "", + "whistleblower_policy": "", + "document_retention_policy": "", + "ceo_compensation_review_process": "", + "public_disclosure_practices": "Yes", + }, + "program_service_accomplishments": [], + "fundraising_grantmaking": { + "total_fundraising_event_revenue": 0, + "total_fundraising_event_expenses": 2104, + "professional_fundraiser_fees": 0, + }, + "functional_operational_data": { + "number_of_employees": 0, + "number_of_volunteers": 0, + "occupancy_costs": 0, + "fundraising_method_descriptions": "", + "joint_ventures_disregarded_entities": "", + }, + "compensation_details": { + "base_compensation": 0, + "bonus": 0, + "incentive": 0, + "other": 0, + "non_fixed_compensation": "", + "first_class_travel": "", + "housing_allowance": "", + "expense_account_usage": "", + "supplemental_retirement": "", + }, + "political_lobbying_activities": { + "lobbying_expenditures_direct": 0, + "lobbying_expenditures_grassroots": 0, + "election_501h_status": "", + "political_campaign_expenditures": 0, + "related_organizations_affiliates": "", + }, + "investments_endowment": { + "investment_types": "", + "donor_restricted_endowment_values": 0, + "net_appreciation_depreciation": 0, + "related_organization_transactions": "", + "loans_to_from_related_parties": "", + }, + "tax_compliance_penalties": { + "penalties_excise_taxes_reported": "No", + "unrelated_business_income_disclosure": "No", + "foreign_bank_account_reporting": "No", + "schedule_o_narrative_explanations": "", + }, + }, + "extraction_metadata": { + "core_organization_metadata": { + "ein": {"value": "84-2674654", "references": ["0-7"]}, + "legal_name": { + "value": "07 IN HEAVEN MEMORIAL SCHOLARSHIP", + "references": ["0-6"], + }, + "phone_number": {"value": "(262) 215-0300", "references": ["0-a"]}, + "website_url": {"value": "", "references": []}, + "return_type": { + "value": "990-PF", + "references": ["4ade8ed0-bce7-4bd5-bd8d-190e3e4be95b"], + }, + "amended_return": { + "value": "No", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"], + }, + "group_exemption_number": {"value": "", "references": []}, + "subsection_code": { + "value": "501(c)(3)", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"], + }, + "ruling_date": {"value": "", "references": []}, + "accounting_method": {"value": "Cash", "references": ["0-d"]}, + "organization_type": { + "value": "corporation", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"], + }, + "year_of_formation": {"value": "", "references": []}, + "incorporation_state": { + "value": "WI", + "references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"], + }, + }, + "revenue_breakdown": { + "total_revenue": {"value": 5227, "references": ["0-1z"]}, + "contributions_gifts_grants": {"value": 5227, "references": ["0-m"]}, + "program_service_revenue": {"value": 0, "references": []}, + "membership_dues": {"value": 0, "references": []}, + "investment_income": {"value": 0, "references": []}, + "gains_losses_sales_assets": {"value": 0, "references": []}, + "rental_income": {"value": 0, "references": []}, + "related_organizations_revenue": {"value": 0, "references": []}, + "gaming_revenue": {"value": 0, "references": []}, + "other_revenue": {"value": 0, "references": []}, + "government_grants": {"value": 0, "references": []}, + "foreign_contributions": {"value": 0, "references": []}, + }, + "expenses_breakdown": { + "total_expenses": {"value": 2104, "references": ["0-2S"]}, + "program_services_expenses": {"value": 0, "references": []}, + "management_general_expenses": {"value": 0, "references": []}, + "fundraising_expenses": {"value": 2104, "references": ["13-d"]}, + "grants_us_organizations": {"value": 0, "references": []}, + "grants_us_individuals": {"value": 0, "references": []}, + "grants_foreign_organizations": {"value": 0, "references": []}, + "grants_foreign_individuals": {"value": 0, "references": []}, + "compensation_officers": { + "value": 0, + "references": ["5-1q", "5-1w", "5-1C", "5-1I"], + }, + "compensation_other_staff": {"value": 0, "references": []}, + "payroll_taxes_benefits": {"value": 0, "references": []}, + "professional_fees": {"value": 0, "references": []}, + "office_occupancy_costs": {"value": 0, "references": []}, + "information_technology_costs": {"value": 0, "references": []}, + "travel_conference_expenses": {"value": 0, "references": []}, + "depreciation_amortization": {"value": 0, "references": []}, + "insurance": {"value": 0, "references": []}, + }, + "balance_sheet": {}, + "officers_directors_trustees_key_employees": [ + { + "name": {"value": "REBECCA TERPSTRA", "references": ["5-1o"]}, + "title_position": {"value": "PRESIDENT", "references": ["5-1p"]}, + "average_hours_per_week": {"value": 0.1, "references": ["5-1p"]}, + "related_party_transactions": {"value": "", "references": []}, + "former_officer": {"value": "", "references": []}, + "governance_role": {"value": "", "references": []}, + }, + { + "name": {"value": "ROBERT GUZMAN", "references": ["5-1u"]}, + "title_position": { + "value": "VICE PRESDEINT", + "references": ["5-1v"], + }, + "average_hours_per_week": {"value": 0.1, "references": ["5-1v"]}, + "related_party_transactions": {"value": "", "references": []}, + "former_officer": {"value": "", "references": []}, + "governance_role": {"value": "", "references": []}, + }, + { + "name": {"value": "ANDREA VALENTI", "references": ["5-1A"]}, + "title_position": {"value": "TREASURER", "references": ["5-1B"]}, + "average_hours_per_week": {"value": 0.1, "references": ["5-1B"]}, + "related_party_transactions": {"value": "", "references": []}, + "former_officer": {"value": "", "references": []}, + "governance_role": {"value": "", "references": []}, + }, + { + "name": {"value": "BETHANY WALSH", "references": ["5-1G"]}, + "title_position": {"value": "SECRETARY", "references": ["5-1H"]}, + "average_hours_per_week": {"value": 0.1, "references": ["5-1H"]}, + "related_party_transactions": {"value": "", "references": []}, + "former_officer": {"value": "", "references": []}, + "governance_role": {"value": "", "references": []}, + }, + ], + "governance_management_disclosure": { + "governing_body_size": { + "value": 4, + "references": ["5-1o", "5-1u", "5-1A", "5-1G"], + }, + "independent_members": { + "value": 4, + "references": ["5-1o", "5-1u", "5-1A", "5-1G"], + }, + "financial_statements_reviewed": {"value": "", "references": []}, + "form_990_provided_to_governing_body": {"value": "", "references": []}, + "conflict_of_interest_policy": {"value": "", "references": []}, + "whistleblower_policy": {"value": "", "references": []}, + "document_retention_policy": {"value": "", "references": []}, + "ceo_compensation_review_process": {"value": "", "references": []}, + "public_disclosure_practices": {"value": "Yes", "references": ["4-g"]}, + }, + "program_service_accomplishments": [], + "fundraising_grantmaking": { + "total_fundraising_event_revenue": {"value": 0, "references": []}, + "total_fundraising_event_expenses": { + "value": 2104, + "references": ["13-d"], + }, + "professional_fundraiser_fees": {"value": 0, "references": []}, + }, + "functional_operational_data": { + "number_of_employees": {"value": 0, "references": []}, + "number_of_volunteers": {"value": 0, "references": []}, + "occupancy_costs": {"value": 0, "references": []}, + "fundraising_method_descriptions": {"value": "", "references": []}, + "joint_ventures_disregarded_entities": {"value": "", "references": []}, + }, + "compensation_details": { + "base_compensation": {"value": 0, "references": ["5-1q", "5-1w"]}, + "bonus": {"value": 0, "references": []}, + "incentive": {"value": 0, "references": []}, + "other": {"value": 0, "references": []}, + "non_fixed_compensation": {"value": "", "references": []}, + "first_class_travel": {"value": "", "references": []}, + "housing_allowance": {"value": "", "references": []}, + "expense_account_usage": {"value": "", "references": []}, + "supplemental_retirement": {"value": "", "references": []}, + }, + "political_lobbying_activities": { + "lobbying_expenditures_direct": {"value": 0, "references": []}, + "lobbying_expenditures_grassroots": {"value": 0, "references": []}, + "election_501h_status": {"value": "", "references": []}, + "political_campaign_expenditures": {"value": 0, "references": []}, + "related_organizations_affiliates": {"value": "", "references": []}, + }, + "investments_endowment": { + "investment_types": {"value": "", "references": []}, + "donor_restricted_endowment_values": {"value": 0, "references": []}, + "net_appreciation_depreciation": {"value": 0, "references": []}, + "related_organization_transactions": {"value": "", "references": []}, + "loans_to_from_related_parties": {"value": "", "references": []}, + }, + "tax_compliance_penalties": { + "penalties_excise_taxes_reported": { + "value": "No", + "references": ["3-I"], + }, + "unrelated_business_income_disclosure": { + "value": "No", + "references": ["3-Y"], + }, + "foreign_bank_account_reporting": { + "value": "No", + "references": ["4-H"], + }, + "schedule_o_narrative_explanations": {"value": "", "references": []}, + }, + }, + "metadata": { + "filename": "markdown.md", + "org_id": None, + "duration_ms": 16656, + "credit_usage": 27.2, + "job_id": "nnmr8lcxtykk5ll5wodjtrnn6", + "version": "extract-20250930", + }, + } + + result = await form_auditor.build_audit_report(data) + + return result.model_dump_json() + + @router.post("/chat") async def chat(request: Request) -> Response: return await VercelAIAdapter.dispatch_request(request, agent=agent) diff --git a/backend/app/routers/chunking_landingai.py b/backend/app/routers/chunking_landingai.py index d04e8f1..5f1d5d6 100644 --- a/backend/app/routers/chunking_landingai.py +++ b/backend/app/routers/chunking_landingai.py @@ -2,17 +2,18 @@ Router para procesamiento de PDFs con LandingAI. Soporta dos modos: rápido (solo parse) y extracción (parse + extract con schema). """ + import logging import time +from typing import List, Literal, Optional + from fastapi import APIRouter, HTTPException -from pydantic import BaseModel, Field -from typing import Optional, List, Literal - from langchain_core.documents import Document +from pydantic import BaseModel, Field -from ..services.landingai_service import get_landingai_service -from ..services.chunking_service import get_chunking_service from ..repositories.schema_repository import get_schema_repository +from ..services.chunking_service import get_chunking_service +from ..services.landingai_service import get_landingai_service from ..utils.chunking.token_manager import TokenManager logger = logging.getLogger(__name__) @@ -22,6 +23,7 @@ router = APIRouter(prefix="/api/v1/chunking-landingai", tags=["chunking-landinga class ProcessLandingAIRequest(BaseModel): """Request para procesar PDF con LandingAI""" + file_name: str = Field(..., description="Nombre del archivo PDF") tema: str = Field(..., description="Tema/carpeta del archivo") collection_name: str = Field(..., description="Colección de Qdrant") @@ -29,34 +31,33 @@ class ProcessLandingAIRequest(BaseModel): # Modo de procesamiento mode: Literal["quick", "extract"] = Field( default="quick", - description="Modo: 'quick' (solo parse) o 'extract' (parse + datos estructurados)" + description="Modo: 'quick' (solo parse) o 'extract' (parse + datos estructurados)", ) # Schema (obligatorio si mode='extract') schema_id: Optional[str] = Field( - None, - description="ID del schema a usar (requerido si mode='extract')" + None, description="ID del schema a usar (requerido si mode='extract')" ) # Configuración de chunks include_chunk_types: List[str] = Field( default=["text", "table"], - description="Tipos de chunks a incluir: text, table, figure, etc." + description="Tipos de chunks a incluir: text, table, figure, etc.", ) max_tokens_per_chunk: int = Field( default=1500, ge=500, le=3000, - description="Tokens máximos por chunk (flexible para tablas/figuras)" + description="Tokens máximos por chunk (flexible para tablas/figuras)", ) merge_small_chunks: bool = Field( - default=True, - description="Unir chunks pequeños de la misma página y tipo" + default=True, description="Unir chunks pequeños de la misma página y tipo" ) class ProcessLandingAIResponse(BaseModel): """Response del procesamiento con LandingAI""" + success: bool mode: str processing_time_seconds: float @@ -97,9 +98,9 @@ async def process_with_landingai(request: ProcessLandingAIRequest): start_time = time.time() try: - logger.info(f"\n{'='*60}") - logger.info(f"INICIANDO PROCESAMIENTO CON LANDINGAI") - logger.info(f"{'='*60}") + logger.info(f"\n{'=' * 60}") + logger.info("INICIANDO PROCESAMIENTO CON LANDINGAI") + logger.info(f"{'=' * 60}") logger.info(f"Archivo: {request.file_name}") logger.info(f"Tema: {request.tema}") logger.info(f"Modo: {request.mode}") @@ -111,7 +112,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest): if not request.schema_id: raise HTTPException( status_code=400, - detail="schema_id es requerido cuando mode='extract'" + detail="schema_id es requerido cuando mode='extract'", ) schema_repo = get_schema_repository() @@ -119,8 +120,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest): if not custom_schema: raise HTTPException( - status_code=404, - detail=f"Schema no encontrado: {request.schema_id}" + status_code=404, detail=f"Schema no encontrado: {request.schema_id}" ) logger.info(f"Schema seleccionado: {custom_schema.schema_name}") @@ -131,14 +131,12 @@ async def process_with_landingai(request: ProcessLandingAIRequest): try: pdf_bytes = await chunking_service.download_pdf_from_blob( - request.file_name, - request.tema + request.file_name, request.tema ) except Exception as e: logger.error(f"Error descargando PDF: {e}") raise HTTPException( - status_code=404, - detail=f"No se pudo descargar el PDF: {str(e)}" + status_code=404, detail=f"No se pudo descargar el PDF: {str(e)}" ) # 3. Procesar con LandingAI @@ -150,13 +148,12 @@ async def process_with_landingai(request: ProcessLandingAIRequest): pdf_bytes=pdf_bytes, file_name=request.file_name, custom_schema=custom_schema, - include_chunk_types=request.include_chunk_types + include_chunk_types=request.include_chunk_types, ) except Exception as e: logger.error(f"Error en LandingAI: {e}") raise HTTPException( - status_code=500, - detail=f"Error procesando con LandingAI: {str(e)}" + status_code=500, detail=f"Error procesando con LandingAI: {str(e)}" ) documents = result["chunks"] @@ -164,7 +161,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest): if not documents: raise HTTPException( status_code=400, - detail="No se generaron chunks después del procesamiento" + detail="No se generaron chunks después del procesamiento", ) # 4. Aplicar control flexible de tokens @@ -172,7 +169,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest): documents = _apply_flexible_token_control( documents, max_tokens=request.max_tokens_per_chunk, - merge_small=request.merge_small_chunks + merge_small=request.merge_small_chunks, ) # 5. Generar embeddings @@ -180,13 +177,16 @@ async def process_with_landingai(request: ProcessLandingAIRequest): texts = [doc.page_content for doc in documents] try: - embeddings = await chunking_service.embedding_service.generate_embeddings_batch(texts) + embeddings = ( + await chunking_service.embedding_service.generate_embeddings_batch( + texts + ) + ) logger.info(f"Embeddings generados: {len(embeddings)} vectores") except Exception as e: logger.error(f"Error generando embeddings: {e}") raise HTTPException( - status_code=500, - detail=f"Error generando embeddings: {str(e)}" + status_code=500, detail=f"Error generando embeddings: {str(e)}" ) # 6. Preparar chunks para Qdrant con IDs determinísticos @@ -198,38 +198,38 @@ async def process_with_landingai(request: ProcessLandingAIRequest): chunk_id = chunking_service._generate_deterministic_id( file_name=request.file_name, page=doc.metadata.get("page", 1), - chunk_index=doc.metadata.get("chunk_id", str(idx)) + chunk_index=doc.metadata.get("chunk_id", str(idx)), ) - qdrant_chunks.append({ - "id": chunk_id, - "vector": embedding, - "payload": { - "page_content": doc.page_content, - "metadata": doc.metadata # Metadata rica de LandingAI + qdrant_chunks.append( + { + "id": chunk_id, + "vector": embedding, + "payload": { + "page_content": doc.page_content, + "metadata": doc.metadata, # Metadata rica de LandingAI + }, } - }) + ) # 7. Subir a Qdrant try: upload_result = await chunking_service.vector_db.add_chunks( - request.collection_name, - qdrant_chunks + request.collection_name, qdrant_chunks ) logger.info(f"Subida completada: {upload_result['chunks_added']} chunks") except Exception as e: logger.error(f"Error subiendo a Qdrant: {e}") raise HTTPException( - status_code=500, - detail=f"Error subiendo a Qdrant: {str(e)}" + status_code=500, detail=f"Error subiendo a Qdrant: {str(e)}" ) # Tiempo total processing_time = time.time() - start_time - logger.info(f"\n{'='*60}") + logger.info(f"\n{'=' * 60}") logger.info(f"PROCESAMIENTO COMPLETADO") - logger.info(f"{'='*60}") + logger.info(f"{'=' * 60}") logger.info(f"Tiempo: {processing_time:.2f}s") logger.info(f"Chunks procesados: {len(documents)}") logger.info(f"Chunks subidos: {upload_result['chunks_added']}") @@ -245,23 +245,18 @@ async def process_with_landingai(request: ProcessLandingAIRequest): schema_used=custom_schema.schema_id if custom_schema else None, extracted_data=result.get("extracted_data"), parse_metadata=result["parse_metadata"], - message=f"PDF procesado exitosamente en modo {request.mode}" + message=f"PDF procesado exitosamente en modo {request.mode}", ) except HTTPException: raise except Exception as e: logger.error(f"Error inesperado en procesamiento: {e}") - raise HTTPException( - status_code=500, - detail=f"Error inesperado: {str(e)}" - ) + raise HTTPException(status_code=500, detail=f"Error inesperado: {str(e)}") def _apply_flexible_token_control( - documents: List[Document], - max_tokens: int, - merge_small: bool + documents: List[Document], max_tokens: int, merge_small: bool ) -> List[Document]: """ Aplica control flexible de tokens (Opción C del diseño). @@ -306,14 +301,10 @@ def _apply_flexible_token_control( else: # Intentar merge si es pequeño - if ( - merge_small and - tokens < max_tokens * 0.5 and - i < len(documents) - 1 - ): + if merge_small and tokens < max_tokens * 0.5 and i < len(documents) - 1: next_doc = documents[i + 1] if _can_merge(doc, next_doc, max_tokens, token_manager): - logger.debug(f"Merging chunks {i} y {i+1}") + logger.debug(f"Merging chunks {i} y {i + 1}") doc = _merge_documents(doc, next_doc) i += 1 # Skip next @@ -326,9 +317,7 @@ def _apply_flexible_token_control( def _split_large_chunk( - doc: Document, - max_tokens: int, - token_manager: TokenManager + doc: Document, max_tokens: int, token_manager: TokenManager ) -> List[Document]: """Divide un chunk grande en sub-chunks""" content = doc.page_content @@ -343,8 +332,7 @@ def _split_large_chunk( # Guardar chunk actual sub_content = " ".join(current_chunk) sub_doc = Document( - page_content=sub_content, - metadata={**doc.metadata, "is_split": True} + page_content=sub_content, metadata={**doc.metadata, "is_split": True} ) sub_chunks.append(sub_doc) current_chunk = [word] @@ -357,8 +345,7 @@ def _split_large_chunk( if current_chunk: sub_content = " ".join(current_chunk) sub_doc = Document( - page_content=sub_content, - metadata={**doc.metadata, "is_split": True} + page_content=sub_content, metadata={**doc.metadata, "is_split": True} ) sub_chunks.append(sub_doc) @@ -366,10 +353,7 @@ def _split_large_chunk( def _can_merge( - doc1: Document, - doc2: Document, - max_tokens: int, - token_manager: TokenManager + doc1: Document, doc2: Document, max_tokens: int, token_manager: TokenManager ) -> bool: """Verifica si dos docs se pueden mergear""" # Misma página @@ -391,6 +375,5 @@ def _merge_documents(doc1: Document, doc2: Document) -> Document: """Mergea dos documentos""" merged_content = f"{doc1.page_content}\n\n{doc2.page_content}" return Document( - page_content=merged_content, - metadata={**doc1.metadata, "is_merged": True} + page_content=merged_content, metadata={**doc1.metadata, "is_merged": True} ) diff --git a/backend/app/routers/dataroom.py b/backend/app/routers/dataroom.py index 353f1ec..dff2e3d 100644 --- a/backend/app/routers/dataroom.py +++ b/backend/app/routers/dataroom.py @@ -1,10 +1,12 @@ import logging +from typing import Optional from fastapi import APIRouter, HTTPException from pydantic import BaseModel from ..models.dataroom import DataRoom from ..models.vector_models import CollectionCreateRequest +from ..services.azure_service import azure_service from ..services.vector_service import vector_service logger = logging.getLogger(__name__) @@ -16,9 +18,136 @@ class DataroomCreate(BaseModel): storage: str = "" +class DataroomInfo(BaseModel): + name: str + collection: str + storage: str + file_count: int + total_size_bytes: int + total_size_mb: float + collection_exists: bool + vector_count: Optional[int] + collection_info: Optional[dict] + file_types: dict + recent_files: list + + router = APIRouter(prefix="/dataroom", tags=["Dataroom"]) +@router.get("/{dataroom_name}/info") +async def dataroom_info(dataroom_name: str) -> DataroomInfo: + """ + Obtener información detallada de un dataroom específico + """ + try: + # Find the dataroom in Redis + datarooms = DataRoom.find().all() + dataroom = None + for room in datarooms: + if room.name == dataroom_name: + dataroom = room + break + + if not dataroom: + raise HTTPException( + status_code=404, detail=f"Dataroom '{dataroom_name}' not found" + ) + + # Get file information from Azure Storage + try: + files_data = await azure_service.list_files(dataroom_name) + except Exception as e: + logger.warning(f"Could not fetch files for dataroom '{dataroom_name}': {e}") + files_data = [] + + # Calculate file metrics + file_count = len(files_data) + total_size_bytes = sum(file_data.get("size", 0) for file_data in files_data) + total_size_mb = ( + round(total_size_bytes / (1024 * 1024), 2) if total_size_bytes > 0 else 0.0 + ) + + # Analyze file types + file_types = {} + recent_files = [] + + for file_data in files_data: + # Count file types by extension + filename = file_data.get("name", "") + if "." in filename: + ext = filename.split(".")[-1].lower() + file_types[ext] = file_types.get(ext, 0) + 1 + + # Collect recent files (up to 5) + if len(recent_files) < 5: + recent_files.append( + { + "name": filename, + "size_mb": round(file_data.get("size", 0) / (1024 * 1024), 2), + "last_modified": file_data.get("last_modified"), + } + ) + + # Sort recent files by last modified (newest first) + recent_files.sort(key=lambda x: x.get("last_modified", ""), reverse=True) + + # Get vector collection information + collection_exists = False + vector_count = None + collection_info = None + + try: + collection_exists_response = await vector_service.check_collection_exists( + dataroom_name + ) + collection_exists = collection_exists_response.exists + + if collection_exists: + collection_info_response = await vector_service.get_collection_info( + dataroom_name + ) + if collection_info_response: + collection_info = { + "vectors_count": collection_info_response.vectors_count, + "indexed_vectors_count": collection_info_response.indexed_vectors_count, + "points_count": collection_info_response.points_count, + "segments_count": collection_info_response.segments_count, + "status": collection_info_response.status, + } + vector_count = collection_info_response.vectors_count + except Exception as e: + logger.warning( + f"Could not fetch collection info for '{dataroom_name}': {e}" + ) + + logger.info( + f"Retrieved info for dataroom '{dataroom_name}': {file_count} files, {total_size_mb}MB" + ) + + return DataroomInfo( + name=dataroom.name, + collection=dataroom.collection, + storage=dataroom.storage, + file_count=file_count, + total_size_bytes=total_size_bytes, + total_size_mb=total_size_mb, + collection_exists=collection_exists, + vector_count=vector_count, + collection_info=collection_info, + file_types=file_types, + recent_files=recent_files, + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting dataroom info for '{dataroom_name}': {e}") + raise HTTPException( + status_code=500, detail=f"Error getting dataroom info: {str(e)}" + ) + + @router.get("/") async def list_datarooms(): """ diff --git a/backend/pyproject.toml b/backend/pyproject.toml index e41a601..c9f1f7a 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -33,3 +33,8 @@ dependencies = [ [project.scripts] dev = "uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload" start = "uvicorn app.main:app --host 0.0.0.0 --port 8000" + +[dependency-groups] +dev = [ + "ruff>=0.14.4", +] diff --git a/backend/uv.lock b/backend/uv.lock index 376925a..5effb39 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -86,6 +86,11 @@ dependencies = [ { name = "websockets" }, ] +[package.dev-dependencies] +dev = [ + { name = "ruff" }, +] + [package.metadata] requires-dist = [ { name = "azure-storage-blob", specifier = ">=12.26.0" }, @@ -110,6 +115,9 @@ requires-dist = [ { name = "websockets", specifier = ">=14.1" }, ] +[package.metadata.requires-dev] +dev = [{ name = "ruff", specifier = ">=0.14.4" }] + [[package]] name = "cachetools" version = "6.2.1" @@ -1789,6 +1797,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.14.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/55/cccfca45157a2031dcbb5a462a67f7cf27f8b37d4b3b1cd7438f0f5c1df6/ruff-0.14.4.tar.gz", hash = "sha256:f459a49fe1085a749f15414ca76f61595f1a2cc8778ed7c279b6ca2e1fd19df3", size = 5587844, upload-time = "2025-11-06T22:07:45.033Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/b9/67240254166ae1eaa38dec32265e9153ac53645a6c6670ed36ad00722af8/ruff-0.14.4-py3-none-linux_armv6l.whl", hash = "sha256:e6604613ffbcf2297cd5dcba0e0ac9bd0c11dc026442dfbb614504e87c349518", size = 12606781, upload-time = "2025-11-06T22:07:01.841Z" }, + { url = "https://files.pythonhosted.org/packages/46/c8/09b3ab245d8652eafe5256ab59718641429f68681ee713ff06c5c549f156/ruff-0.14.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d99c0b52b6f0598acede45ee78288e5e9b4409d1ce7f661f0fa36d4cbeadf9a4", size = 12946765, upload-time = "2025-11-06T22:07:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/14/bb/1564b000219144bf5eed2359edc94c3590dd49d510751dad26202c18a17d/ruff-0.14.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9358d490ec030f1b51d048a7fd6ead418ed0826daf6149e95e30aa67c168af33", size = 11928120, upload-time = "2025-11-06T22:07:08.023Z" }, + { url = "https://files.pythonhosted.org/packages/a3/92/d5f1770e9988cc0742fefaa351e840d9aef04ec24ae1be36f333f96d5704/ruff-0.14.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b40d27924f1f02dfa827b9c0712a13c0e4b108421665322218fc38caf615c2", size = 12370877, upload-time = "2025-11-06T22:07:10.015Z" }, + { url = "https://files.pythonhosted.org/packages/e2/29/e9282efa55f1973d109faf839a63235575519c8ad278cc87a182a366810e/ruff-0.14.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f5e649052a294fe00818650712083cddc6cc02744afaf37202c65df9ea52efa5", size = 12408538, upload-time = "2025-11-06T22:07:13.085Z" }, + { url = "https://files.pythonhosted.org/packages/8e/01/930ed6ecfce130144b32d77d8d69f5c610e6d23e6857927150adf5d7379a/ruff-0.14.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa082a8f878deeba955531f975881828fd6afd90dfa757c2b0808aadb437136e", size = 13141942, upload-time = "2025-11-06T22:07:15.386Z" }, + { url = "https://files.pythonhosted.org/packages/6a/46/a9c89b42b231a9f487233f17a89cbef9d5acd538d9488687a02ad288fa6b/ruff-0.14.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1043c6811c2419e39011890f14d0a30470f19d47d197c4858b2787dfa698f6c8", size = 14544306, upload-time = "2025-11-06T22:07:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/78/96/9c6cf86491f2a6d52758b830b89b78c2ae61e8ca66b86bf5a20af73d20e6/ruff-0.14.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f3a936ac27fb7c2a93e4f4b943a662775879ac579a433291a6f69428722649", size = 14210427, upload-time = "2025-11-06T22:07:19.832Z" }, + { url = "https://files.pythonhosted.org/packages/71/f4/0666fe7769a54f63e66404e8ff698de1dcde733e12e2fd1c9c6efb689cb5/ruff-0.14.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95643ffd209ce78bc113266b88fba3d39e0461f0cbc8b55fb92505030fb4a850", size = 13658488, upload-time = "2025-11-06T22:07:22.32Z" }, + { url = "https://files.pythonhosted.org/packages/ee/79/6ad4dda2cfd55e41ac9ed6d73ef9ab9475b1eef69f3a85957210c74ba12c/ruff-0.14.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456daa2fa1021bc86ca857f43fe29d5d8b3f0e55e9f90c58c317c1dcc2afc7b5", size = 13354908, upload-time = "2025-11-06T22:07:24.347Z" }, + { url = "https://files.pythonhosted.org/packages/b5/60/f0b6990f740bb15c1588601d19d21bcc1bd5de4330a07222041678a8e04f/ruff-0.14.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f911bba769e4a9f51af6e70037bb72b70b45a16db5ce73e1f72aefe6f6d62132", size = 13587803, upload-time = "2025-11-06T22:07:26.327Z" }, + { url = "https://files.pythonhosted.org/packages/c9/da/eaaada586f80068728338e0ef7f29ab3e4a08a692f92eb901a4f06bbff24/ruff-0.14.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76158a7369b3979fa878612c623a7e5430c18b2fd1c73b214945c2d06337db67", size = 12279654, upload-time = "2025-11-06T22:07:28.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b1d0e82cf9bf8aed10a6d45be47b3f402730aa2c438164424783ac88c0ed/ruff-0.14.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f3b8f3b442d2b14c246e7aeca2e75915159e06a3540e2f4bed9f50d062d24469", size = 12357520, upload-time = "2025-11-06T22:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/53e2b42cc82804617e5c7950b7079d79996c27e99c4652131c6a1100657f/ruff-0.14.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c62da9a06779deecf4d17ed04939ae8b31b517643b26370c3be1d26f3ef7dbde", size = 12719431, upload-time = "2025-11-06T22:07:33.831Z" }, + { url = "https://files.pythonhosted.org/packages/a2/94/80e3d74ed9a72d64e94a7b7706b1c1ebaa315ef2076fd33581f6a1cd2f95/ruff-0.14.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a443a83a1506c684e98acb8cb55abaf3ef725078be40237463dae4463366349", size = 13464394, upload-time = "2025-11-06T22:07:35.905Z" }, + { url = "https://files.pythonhosted.org/packages/54/1a/a49f071f04c42345c793d22f6cf5e0920095e286119ee53a64a3a3004825/ruff-0.14.4-py3-none-win32.whl", hash = "sha256:643b69cb63cd996f1fc7229da726d07ac307eae442dd8974dbc7cf22c1e18fff", size = 12493429, upload-time = "2025-11-06T22:07:38.43Z" }, + { url = "https://files.pythonhosted.org/packages/bc/22/e58c43e641145a2b670328fb98bc384e20679b5774258b1e540207580266/ruff-0.14.4-py3-none-win_amd64.whl", hash = "sha256:26673da283b96fe35fa0c939bf8411abec47111644aa9f7cfbd3c573fb125d2c", size = 13635380, upload-time = "2025-11-06T22:07:40.496Z" }, + { url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" }, +] + [[package]] name = "setuptools" version = "80.9.0" diff --git a/docker-compose.yml b/docker-compose.yml index f980425..ed2506e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,11 +26,13 @@ services: - app-network db: - # docker run -p 6379:6379 -p 8001:8001 redis/redis-stack image: redis/redis-stack:latest ports: - 6379:6379 - 8001:8001 + volumes: + - redis_data:/data # Persistent Redis data + restart: unless-stopped networks: - app-network diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 4891967..4e11d08 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -8,6 +8,7 @@ "name": "frontend", "version": "0.0.0", "dependencies": { + "@ai-sdk/react": "^2.0.89", "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.15", @@ -110,6 +111,30 @@ "zod": "^3.25.76 || ^4.1.8" } }, + "node_modules/@ai-sdk/react": { + "version": "2.0.89", + "resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-2.0.89.tgz", + "integrity": "sha512-r2uCqx042JOjNrSlDrjh7ufSIfU2BM6Lo4qe47KHkYuJjPfssxhLpJUCFLB01iV7Foyn/xpbq06Zr6WI4qUDgw==", + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/provider-utils": "3.0.16", + "ai": "5.0.89", + "swr": "^2.2.5", + "throttleit": "2.1.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "react": "^18 || ^19 || ^19.0.0-rc", + "zod": "^3.25.76 || ^4.1.8" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, "node_modules/@alloc/quick-lru": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", @@ -8818,6 +8843,19 @@ "node": ">=8" } }, + "node_modules/swr": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/swr/-/swr-2.3.6.tgz", + "integrity": "sha512-wfHRmHWk/isGNMwlLGlZX5Gzz/uTgo0o2IRuTMcf4CPuPFJZlq0rDaKUx+ozB5nBOReNV1kiOyzMfj+MBMikLw==", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.3", + "use-sync-external-store": "^1.4.0" + }, + "peerDependencies": { + "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/tailwind-merge": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz", @@ -8857,6 +8895,18 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/throttleit": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz", + "integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/tiny-invariant": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", diff --git a/frontend/package.json b/frontend/package.json index 9eee342..64c7fe1 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -10,6 +10,7 @@ "preview": "vite preview" }, "dependencies": { + "@ai-sdk/react": "^2.0.89", "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.15", diff --git a/frontend/src/components/ChatExample.tsx b/frontend/src/components/ChatExample.tsx new file mode 100644 index 0000000..221fd82 --- /dev/null +++ b/frontend/src/components/ChatExample.tsx @@ -0,0 +1,241 @@ +import { + Conversation, + ConversationContent, + ConversationScrollButton, +} from "@/components/ai-elements/conversation"; +import { Message, MessageContent } from "@/components/ai-elements/message"; +import { + PromptInput, + PromptInputActionAddAttachments, + PromptInputActionMenu, + PromptInputActionMenuContent, + PromptInputActionMenuTrigger, + PromptInputAttachment, + PromptInputAttachments, + PromptInputBody, + PromptInputButton, + PromptInputHeader, + type PromptInputMessage, + PromptInputSelect, + PromptInputSelectContent, + PromptInputSelectItem, + PromptInputSelectTrigger, + PromptInputSelectValue, + PromptInputSubmit, + PromptInputTextarea, + PromptInputFooter, + PromptInputTools, +} from "@/components/ai-elements/prompt-input"; +import { Action, Actions } from "@/components/ai-elements/actions"; +import { Fragment, useState } from "react"; +import { useChat } from "@ai-sdk/react"; +import { Response } from "@/components/ai-elements/response"; +import { CopyIcon, GlobeIcon, RefreshCcwIcon } from "lucide-react"; +import { + Source, + Sources, + SourcesContent, + SourcesTrigger, +} from "@/components/ai-elements/sources"; +import { + Reasoning, + ReasoningContent, + ReasoningTrigger, +} from "@/components/ai-elements/reasoning"; +import { Loader } from "@/components/ai-elements/loader"; +import { DefaultChatTransport } from "ai"; + +const models = [ + { + name: "GPT 4o", + value: "openai/gpt-4o", + }, + { + name: "Deepseek R1", + value: "deepseek/deepseek-r1", + }, +]; + +const ChatBotDemo = () => { + const [input, setInput] = useState(""); + const [model, setModel] = useState(models[0].value); + const [webSearch, setWebSearch] = useState(false); + const { messages, sendMessage, status, regenerate } = useChat({ + transport: new DefaultChatTransport({ + api: "/api/v1/chat", + }), + }); + + const handleSubmit = (message: PromptInputMessage) => { + const hasText = Boolean(message.text); + const hasAttachments = Boolean(message.files?.length); + + if (!(hasText || hasAttachments)) { + return; + } + + sendMessage( + { + text: message.text || "Sent with attachments", + files: message.files, + }, + { + body: { + model: model, + webSearch: webSearch, + }, + }, + ); + setInput(""); + }; + + return ( +
+
+ + + {messages.map((message) => ( +
+ {message.role === "assistant" && + message.parts.filter((part) => part.type === "source-url") + .length > 0 && ( + + part.type === "source-url", + ).length + } + /> + {message.parts + .filter((part) => part.type === "source-url") + .map((part, i) => ( + + + + ))} + + )} + {message.parts.map((part, i) => { + switch (part.type) { + case "text": + return ( + + + + {part.text} + + + {message.role === "assistant" && + i === messages.length - 1 && ( + + regenerate()} + label="Retry" + > + + + + navigator.clipboard.writeText(part.text) + } + label="Copy" + > + + + + )} + + ); + case "reasoning": + return ( + + + {part.text} + + ); + default: + return null; + } + })} +
+ ))} + {status === "submitted" && } +
+ +
+ + + + + {(attachment) => } + + + + setInput(e.target.value)} + value={input} + /> + + + + + + + + + + setWebSearch(!webSearch)} + > + + Search + + { + setModel(value); + }} + value={model} + > + + + + + {models.map((model) => ( + + {model.name} + + ))} + + + + + + +
+
+ ); +}; + +export default ChatBotDemo; diff --git a/frontend/src/components/ChatTab.tsx b/frontend/src/components/ChatTab.tsx index d670986..2c9d531 100644 --- a/frontend/src/components/ChatTab.tsx +++ b/frontend/src/components/ChatTab.tsx @@ -1,12 +1,94 @@ -import { MessageCircle, Send, Bot, User } from "lucide-react"; -import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; +import { + Conversation, + ConversationContent, + ConversationScrollButton, +} from "@/components/ai-elements/conversation"; +import { Message, MessageContent } from "@/components/ai-elements/message"; +import { + PromptInput, + PromptInputActionAddAttachments, + PromptInputActionMenu, + PromptInputActionMenuContent, + PromptInputActionMenuTrigger, + PromptInputAttachment, + PromptInputAttachments, + PromptInputBody, + PromptInputHeader, + type PromptInputMessage, + PromptInputSubmit, + PromptInputTextarea, + PromptInputFooter, + PromptInputTools, +} from "@/components/ai-elements/prompt-input"; +import { Action, Actions } from "@/components/ai-elements/actions"; +import { Fragment, useState, useEffect } from "react"; +import { useChat } from "@ai-sdk/react"; +import { Response } from "@/components/ai-elements/response"; +import { + CopyIcon, + RefreshCcwIcon, + MessageCircle, + Bot, + AlertCircle, + PaperclipIcon, +} from "lucide-react"; +import { Loader } from "@/components/ai-elements/loader"; +import { DefaultChatTransport } from "ai"; interface ChatTabProps { selectedTema: string | null; } export function ChatTab({ selectedTema }: ChatTabProps) { + const [input, setInput] = useState(""); + const [error, setError] = useState(null); + + const { + messages, + sendMessage, + status, + regenerate, + error: chatError, + } = useChat({ + transport: new DefaultChatTransport({ + api: "/api/v1/agent/chat", + }), + onError: (error) => { + setError(`Error en el chat: ${error.message}`); + }, + }); + + // Clear error when starting new conversation + useEffect(() => { + if (status === "streaming") { + setError(null); + } + }, [status]); + + const handleSubmit = (message: PromptInputMessage) => { + const hasText = Boolean(message.text?.trim()); + const hasAttachments = Boolean(message.files?.length); + + if (!(hasText || hasAttachments)) { + return; + } + + setError(null); + sendMessage( + { + text: message.text || "Enviado con archivos adjuntos", + files: message.files, + }, + { + body: { + dataroom: selectedTema, + context: `Usuario está consultando sobre el dataroom: ${selectedTema}`, + }, + }, + ); + setInput(""); + }; + if (!selectedTema) { return (
@@ -37,55 +119,137 @@ export function ChatTab({ selectedTema }: ChatTabProps) {
- {/* Chat Messages Area */} -
-
- {/* Welcome Message */} -
-
- -
-
-

- ¡Hola! Soy tu asistente de IA para el dataroom {selectedTema}. - Puedes hacerme preguntas sobre los documentos almacenados aquí. -

-
-
+ {/* Chat Content */} +
+ + +
+ {/* Welcome Message */} + {messages.length === 0 && ( +
+
+ +
+
+

+ ¡Hola! Soy tu asistente de IA para el dataroom{" "} + {selectedTema}. Puedes hacerme preguntas + sobre los documentos almacenados aquí. +

+
+
+ )} - {/* Placeholder for future messages */} -
- -

- Chat Inteligente -

-

- El chat estará disponible próximamente. Podrás hacer preguntas sobre los - documentos y obtener respuestas basadas en el contenido del dataroom. -

-
-
-
+ {/* Error Message */} + {error && ( +
+
+ +
+
+

{error}

+
+
+ )} - {/* Chat Input Area */} -
-
-
-
- ( +
+ {message.parts.map((part, i) => { + switch (part.type) { + case "text": + return ( + + + + {part.text} + + + {message.role === "assistant" && + i === message.parts.length - 1 && ( + + regenerate()} + label="Regenerar" + disabled={status === "streaming"} + > + + + + navigator.clipboard.writeText(part.text) + } + label="Copiar" + > + + + + )} + + ); + default: + return null; + } + })} +
+ ))} + {status === "streaming" && } + {status === "loading" && } +
+ + + + + {/* Chat Input */} +
+ + + + {(attachment) => } + + + + setInput(e.target.value)} + value={input} placeholder={`Pregunta algo sobre ${selectedTema}...`} - disabled - className="w-full" + disabled={status === "streaming" || status === "loading"} + className="min-h-[60px] resize-none border-0 focus:ring-0 transition-all duration-200 text-base px-4 py-3 bg-white rounded-xl" /> -
- -
-

- Esta funcionalidad estará disponible próximamente -

+ + + + + + + + + + + + + + +
diff --git a/frontend/src/components/DashboardTab.tsx b/frontend/src/components/DashboardTab.tsx index 34e9a22..a8507d7 100644 --- a/frontend/src/components/DashboardTab.tsx +++ b/frontend/src/components/DashboardTab.tsx @@ -1,10 +1,91 @@ -import { FileText, Users, Database, Activity } from "lucide-react"; +import { useState, useEffect } from "react"; +import { + FileText, + Database, + Activity, + TrendingUp, + AlertCircle, + CheckCircle, + Loader2, +} from "lucide-react"; +import { api } from "@/services/api"; interface DashboardTabProps { selectedTema: string | null; } +interface DataroomInfo { + name: string; + collection: string; + storage: string; + file_count: number; + total_size_bytes: number; + total_size_mb: number; + collection_exists: boolean; + vector_count: number | null; + collection_info: { + vectors_count: number; + indexed_vectors_count: number; + points_count: number; + segments_count: number; + status: string; + } | null; + file_types: Record; + recent_files: Array<{ + name: string; + size_mb: number; + last_modified: string; + }>; +} + export function DashboardTab({ selectedTema }: DashboardTabProps) { + const [dataroomInfo, setDataroomInfo] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + if (selectedTema) { + fetchDataroomInfo(); + } + }, [selectedTema]); + + const fetchDataroomInfo = async () => { + if (!selectedTema) return; + + setLoading(true); + setError(null); + + try { + const info = await api.getDataroomInfo(selectedTema); + setDataroomInfo(info); + } catch (err) { + const errorMessage = + err instanceof Error ? err.message : "Error desconocido"; + setError(`Error cargando información: ${errorMessage}`); + console.error("Error fetching dataroom info:", err); + } finally { + setLoading(false); + } + }; + + const formatFileTypes = (fileTypes: Record) => { + const entries = Object.entries(fileTypes); + if (entries.length === 0) return "Sin archivos"; + + return entries + .sort(([, a], [, b]) => b - a) // Sort by count descending + .slice(0, 3) // Take top 3 + .map(([ext, count]) => `${ext.toUpperCase()}: ${count}`) + .join(", "); + }; + + const formatBytes = (bytes: number) => { + if (bytes === 0) return "0 MB"; + const mb = bytes / (1024 * 1024); + if (mb < 1) return `${(bytes / 1024).toFixed(1)} KB`; + return `${mb.toFixed(1)} MB`; + }; + if (!selectedTema) { return (
@@ -16,6 +97,40 @@ export function DashboardTab({ selectedTema }: DashboardTabProps) { ); } + if (loading) { + return ( +
+ +

Cargando métricas...

+
+ ); + } + + if (error) { + return ( +
+
+ +
+

Error

+

{error}

+
+
+
+ ); + } + + if (!dataroomInfo) { + return ( +
+ +

+ No se pudo cargar la información del dataroom +

+
+ ); + } + return (
@@ -36,7 +151,12 @@ export function DashboardTab({ selectedTema }: DashboardTabProps) {

Archivos

-

--

+

+ {dataroomInfo.file_count} +

+

+ {formatFileTypes(dataroomInfo.file_types)} +

@@ -48,8 +168,15 @@ export function DashboardTab({ selectedTema }: DashboardTabProps) {
-

Almacenamiento

-

--

+

+ Almacenamiento +

+

+ {dataroomInfo.total_size_mb.toFixed(1)} MB +

+

+ {formatBytes(dataroomInfo.total_size_bytes)} +

@@ -62,37 +189,139 @@ export function DashboardTab({ selectedTema }: DashboardTabProps) {

Vectores

-

--

+

+ {dataroomInfo.vector_count ?? 0} +

+

+ {dataroomInfo.collection_exists + ? "Vectores indexados" + : "Sin vectores"} +

- {/* Activity Card */} + {/* Collection Status Card */}
- +
-

Actividad

-

--

+

Estado

+
+

+ {dataroomInfo.collection_exists ? "Activo" : "Inactivo"} +

+ {dataroomInfo.collection_exists ? ( + + ) : ( + + )} +
+ {dataroomInfo.collection_info ? ( +

+ {dataroomInfo.collection_info.indexed_vectors_count}/ + {dataroomInfo.collection_info.vectors_count} vectores + indexados +

+ ) : ( +

+ {dataroomInfo.collection_exists + ? "Colección sin datos" + : "Sin colección"} +

+ )}
- {/* Coming Soon Message */} -
-
- -

- Panel de Métricas + {/* Recent Files Section */} + {dataroomInfo.recent_files.length > 0 && ( +
+

+ Archivos Recientes

-

- Este panel se llenará con métricas detalladas y gráficos interactivos próximamente. -

+
+
+ {dataroomInfo.recent_files.map((file, index) => ( +
+
+ +
+

+ {file.name} +

+

+ {new Date(file.last_modified).toLocaleDateString( + "es-ES", + { + year: "numeric", + month: "short", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + }, + )} +

+
+
+
+

+ {file.size_mb.toFixed(2)} MB +

+
+
+ ))} +
+
-

+ )} + + {/* Collection Details */} + {dataroomInfo.collection_info && ( +
+

+ Detalles de la Colección +

+
+
+
+

+ Total Vectores +

+

+ {dataroomInfo.collection_info.vectors_count} +

+
+
+

+ Vectores Indexados +

+

+ {dataroomInfo.collection_info.indexed_vectors_count} +

+
+
+

Puntos

+

+ {dataroomInfo.collection_info.points_count} +

+
+
+

Segmentos

+

+ {dataroomInfo.collection_info.segments_count} +

+
+
+
+
+ )}
); } diff --git a/frontend/src/components/DataroomView.tsx b/frontend/src/components/DataroomView.tsx index f9efeb7..4792ddf 100644 --- a/frontend/src/components/DataroomView.tsx +++ b/frontend/src/components/DataroomView.tsx @@ -5,11 +5,6 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { FilesTab } from "./FilesTab"; import { DashboardTab } from "./DashboardTab"; import { ChatTab } from "./ChatTab"; -import { - CheckCircle2, - AlertCircle, - Loader2, -} from "lucide-react"; interface DataroomViewProps { onProcessingChange?: (isProcessing: boolean) => void; @@ -18,133 +13,27 @@ interface DataroomViewProps { export function DataroomView({ onProcessingChange }: DataroomViewProps = {}) { const { selectedTema, files } = useFileStore(); - // Collection status states - const [isCheckingCollection, setIsCheckingCollection] = useState(false); - const [collectionExists, setCollectionExists] = useState( - null, - ); - const [collectionError, setCollectionError] = useState(null); const [processing, setProcessing] = useState(false); - // Check collection status when tema changes - useEffect(() => { - checkCollectionStatus(); - }, [selectedTema]); - - // Load files when tema changes - useEffect(() => { - loadFiles(); - }, [selectedTema]); - - const checkCollectionStatus = async () => { - if (!selectedTema) { - setCollectionExists(null); - return; - } - - setIsCheckingCollection(true); - setCollectionError(null); - - try { - const result = await api.checkCollectionExists(selectedTema); - setCollectionExists(result.exists); - } catch (err) { - console.error("Error checking collection:", err); - setCollectionError( - err instanceof Error ? err.message : "Error al verificar colección", - ); - setCollectionExists(null); - } finally { - setIsCheckingCollection(false); - } - }; - - const handleCreateCollection = async () => { - if (!selectedTema) return; - - setIsCheckingCollection(true); - setCollectionError(null); - - try { - const result = await api.createCollection(selectedTema); - if (result.success) { - setCollectionExists(true); - console.log(`Collection "${selectedTema}" created successfully`); - } - } catch (err) { - console.error("Error creating collection:", err); - setCollectionError( - err instanceof Error ? err.message : "Error al crear colección", - ); - } finally { - setIsCheckingCollection(false); - } - }; - - const loadFiles = async () => { - // This will be handled by FilesTab component - }; - const handleProcessingChange = (isProcessing: boolean) => { setProcessing(isProcessing); onProcessingChange?.(isProcessing); }; - const totalFiles = files.length; - return (
-
-

- {selectedTema - ? `Dataroom: ${selectedTema}` - : "Selecciona un dataroom"} -

- {/* Collection Status Indicator */} - {selectedTema && ( -
- {isCheckingCollection ? ( - <> - - - Verificando... - - - ) : collectionExists === true ? ( - <> - - - Colección disponible - - - ) : collectionExists === false ? ( - <> - - - - ) : collectionError ? ( - <> - - - Error de conexión - - - ) : null} -
- )} -
+

+ {selectedTema + ? `Dataroom: ${selectedTema}` + : "Selecciona un dataroom"} +

{selectedTema - ? `${totalFiles} archivo${totalFiles !== 1 ? "s" : ""}` - : "Selecciona un dataroom de la barra lateral para ver sus archivos"} + ? "Gestiona archivos, consulta métricas y chatea con IA sobre el contenido" + : "Selecciona un dataroom de la barra lateral para comenzar"}

diff --git a/frontend/src/components/FilesTab.tsx b/frontend/src/components/FilesTab.tsx index 9563808..8a2e9fd 100644 --- a/frontend/src/components/FilesTab.tsx +++ b/frontend/src/components/FilesTab.tsx @@ -1,4 +1,4 @@ -import { useState } from "react"; +import { useState, useEffect } from "react"; import { useFileStore } from "@/stores/fileStore"; import { api } from "@/services/api"; import { Button } from "@/components/ui/button"; @@ -80,6 +80,11 @@ export function FilesTab({ const [chunkingFileTema, setChunkingFileTema] = useState(""); const [chunkingCollectionName, setChunkingCollectionName] = useState(""); + // Load files when component mounts or selectedTema changes + useEffect(() => { + loadFiles(); + }, [selectedTema]); + const loadFiles = async () => { // Don't load files if no dataroom is selected if (!selectedTema) { diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 12348eb..55b8742 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -273,14 +273,11 @@ export function Sidebar({ collapsed ? "justify-center" : "justify-between", )} > -

- {collapsed ? "Rooms" : "Datarooms"} -

+ {!collapsed && ( +

+ Datarooms +

+ )} {renderWithTooltip( "Crear dataroom",