wip chat
This commit is contained in:
39
backend/app/agents/form_auditor/__init__.py
Normal file
39
backend/app/agents/form_auditor/__init__.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .agent import agent, prepare_initial_findings
|
||||
from .models import (
|
||||
AuditReport,
|
||||
ExtractedIrsForm990PfDataSchema,
|
||||
ValidatorState,
|
||||
)
|
||||
|
||||
|
||||
async def build_audit_report(payload: dict[str, Any]) -> AuditReport:
|
||||
extraction_payload = payload.get("extraction")
|
||||
if extraction_payload is None:
|
||||
raise ValueError("Payload missing 'extraction' key.")
|
||||
extraction = ExtractedIrsForm990PfDataSchema.model_validate(extraction_payload)
|
||||
|
||||
initial_findings = prepare_initial_findings(extraction)
|
||||
|
||||
metadata: dict[str, Any] = {}
|
||||
metadata_raw = payload.get("metadata")
|
||||
if isinstance(metadata_raw, dict):
|
||||
metadata = {str(k): v for k, v in metadata_raw.items()}
|
||||
|
||||
state = ValidatorState(
|
||||
extraction=extraction,
|
||||
initial_findings=initial_findings,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"Review the Form 990 extraction and deterministic checks. Validate or adjust "
|
||||
"the findings, add any additional issues or mitigations, and craft narrative "
|
||||
"section summaries that highlight the most material points. Focus on concrete "
|
||||
"evidence; do not fabricate figures."
|
||||
)
|
||||
result = await agent.run(prompt, deps=state)
|
||||
return result.output
|
||||
155
backend/app/agents/form_auditor/agent.py
Normal file
155
backend/app/agents/form_auditor/agent.py
Normal file
@@ -0,0 +1,155 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
from pydantic_ai import Agent, RunContext
|
||||
from pydantic_ai.models.openai import OpenAIChatModel
|
||||
from pydantic_ai.providers.azure import AzureProvider
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
from .checks import (
|
||||
aggregate_findings,
|
||||
build_section_summaries,
|
||||
check_balance_sheet_presence,
|
||||
check_board_engagement,
|
||||
check_expense_totals,
|
||||
check_fundraising_alignment,
|
||||
check_governance_policies,
|
||||
check_missing_operational_details,
|
||||
check_revenue_totals,
|
||||
compose_overall_summary,
|
||||
irs_ein_lookup,
|
||||
)
|
||||
from .models import (
|
||||
AuditFinding,
|
||||
AuditReport,
|
||||
ExtractedIrsForm990PfDataSchema,
|
||||
Severity,
|
||||
ValidatorState,
|
||||
)
|
||||
|
||||
provider = AzureProvider(
|
||||
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
|
||||
api_version=settings.AZURE_OPENAI_API_VERSION,
|
||||
api_key=settings.AZURE_OPENAI_API_KEY,
|
||||
)
|
||||
model = OpenAIChatModel(model_name="gpt-4o", provider=provider)
|
||||
agent = Agent(model=model)
|
||||
|
||||
|
||||
def prepare_initial_findings(
|
||||
extraction: ExtractedIrsForm990PfDataSchema,
|
||||
) -> list[AuditFinding]:
|
||||
findings = [
|
||||
check_revenue_totals(extraction),
|
||||
check_expense_totals(extraction),
|
||||
check_fundraising_alignment(extraction),
|
||||
check_balance_sheet_presence(extraction),
|
||||
check_board_engagement(extraction),
|
||||
check_missing_operational_details(extraction),
|
||||
]
|
||||
findings.extend(check_governance_policies(extraction))
|
||||
return findings
|
||||
|
||||
|
||||
def _merge_findings(
|
||||
findings: Iterable[AuditFinding],
|
||||
added: Iterable[AuditFinding],
|
||||
) -> list[AuditFinding]:
|
||||
existing = {finding.check_id: finding for finding in findings}
|
||||
for finding in added:
|
||||
existing[finding.check_id] = finding
|
||||
return list(existing.values())
|
||||
|
||||
|
||||
agent = Agent(
|
||||
model=model,
|
||||
name="FormValidator",
|
||||
deps_type=ValidatorState,
|
||||
output_type=AuditReport,
|
||||
system_prompt=(
|
||||
"You are a Form 990 auditor. Review the extraction data and deterministic "
|
||||
"checks provided in deps. Use tools to confirm calculations, add or adjust "
|
||||
"findings, supply mitigation guidance, and craft concise section summaries. "
|
||||
"The AuditReport must include severity (`Pass`, `Warning`, `Error`), "
|
||||
"confidence scores, mitigation advice, section summaries, and an overall summary. "
|
||||
"Ground every statement in supplied data; do not invent financial figures."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@agent.tool
|
||||
def revenue_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
|
||||
return check_revenue_totals(ctx.deps.extraction)
|
||||
|
||||
|
||||
@agent.tool
|
||||
def expense_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
|
||||
return check_expense_totals(ctx.deps.extraction)
|
||||
|
||||
|
||||
@agent.tool
|
||||
def fundraising_alignment_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
|
||||
return check_fundraising_alignment(ctx.deps.extraction)
|
||||
|
||||
|
||||
@agent.tool
|
||||
async def verify_ein(ctx: RunContext[ValidatorState]) -> AuditFinding:
|
||||
ein = ctx.deps.extraction.core_organization_metadata.ein
|
||||
exists, confidence, note = await irs_ein_lookup(ein)
|
||||
if exists:
|
||||
return AuditFinding(
|
||||
check_id="irs_ein_match",
|
||||
category="Compliance",
|
||||
severity=Severity.PASS,
|
||||
message="EIN confirmed against IRS index.",
|
||||
mitigation="Document verification in the filing workpapers.",
|
||||
confidence=confidence,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="irs_ein_match",
|
||||
category="Compliance",
|
||||
severity=Severity.WARNING,
|
||||
message=f"EIN {ein} could not be confirmed. {note}",
|
||||
mitigation="Verify the EIN against the IRS EO BMF or IRS determination letter.",
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
@agent.output_validator
|
||||
def finalize_report(
|
||||
ctx: RunContext[ValidatorState],
|
||||
report: AuditReport,
|
||||
) -> AuditReport:
|
||||
merged_findings = _merge_findings(ctx.deps.initial_findings, report.findings)
|
||||
overall = aggregate_findings(merged_findings)
|
||||
sections = build_section_summaries(merged_findings)
|
||||
overall_summary = compose_overall_summary(merged_findings)
|
||||
metadata = ctx.deps.metadata
|
||||
notes = report.notes
|
||||
if notes is None and isinstance(metadata, dict) and metadata.get("source"):
|
||||
notes = f"Reviewed data source: {metadata['source']}."
|
||||
year: int | None = None
|
||||
if isinstance(metadata, dict):
|
||||
metadata_year = metadata.get("return_year")
|
||||
if metadata_year is not None:
|
||||
try:
|
||||
year = int(metadata_year)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
core = ctx.deps.extraction.core_organization_metadata
|
||||
organisation_name = core.legal_name or report.organisation_name
|
||||
organisation_ein = core.ein or report.organisation_ein
|
||||
return report.model_copy(
|
||||
update={
|
||||
"organisation_ein": organisation_ein,
|
||||
"organisation_name": organisation_name,
|
||||
"year": year,
|
||||
"findings": merged_findings,
|
||||
"overall_severity": overall,
|
||||
"sections": sections,
|
||||
"overall_summary": overall_summary,
|
||||
"notes": notes,
|
||||
}
|
||||
)
|
||||
282
backend/app/agents/form_auditor/checks.py
Normal file
282
backend/app/agents/form_auditor/checks.py
Normal file
@@ -0,0 +1,282 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from .models import (
|
||||
AuditFinding,
|
||||
AuditSectionSummary,
|
||||
ExtractedIrsForm990PfDataSchema,
|
||||
Severity,
|
||||
)
|
||||
|
||||
|
||||
def aggregate_findings(findings: list[AuditFinding]) -> Severity:
|
||||
order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1}
|
||||
overall = Severity.PASS
|
||||
for finding in findings:
|
||||
if order[finding.severity] > order[overall]:
|
||||
overall = finding.severity
|
||||
return overall
|
||||
|
||||
|
||||
def check_revenue_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
|
||||
subtotal = sum(
|
||||
value
|
||||
for key, value in data.revenue_breakdown.model_dump().items()
|
||||
if key != "total_revenue"
|
||||
)
|
||||
if abs(subtotal - data.revenue_breakdown.total_revenue) <= 1:
|
||||
return AuditFinding(
|
||||
check_id="revenue_totals",
|
||||
category="Revenue",
|
||||
severity=Severity.PASS,
|
||||
message=f"Revenue categories sum (${subtotal:,.2f}) matches total revenue.",
|
||||
mitigation="Maintain detailed support for each revenue source to preserve reconciliation trail.",
|
||||
confidence=0.95,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="revenue_totals",
|
||||
category="Revenue",
|
||||
severity=Severity.ERROR,
|
||||
message=(
|
||||
f"Revenue categories sum (${subtotal:,.2f}) does not equal reported total "
|
||||
f"(${data.revenue_breakdown.total_revenue:,.2f})."
|
||||
),
|
||||
mitigation="Recalculate revenue totals and correct line items or Schedule A before filing.",
|
||||
confidence=0.95,
|
||||
)
|
||||
|
||||
|
||||
def check_expense_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
|
||||
subtotal = (
|
||||
data.expenses_breakdown.program_services_expenses
|
||||
+ data.expenses_breakdown.management_general_expenses
|
||||
+ data.expenses_breakdown.fundraising_expenses
|
||||
)
|
||||
if abs(subtotal - data.expenses_breakdown.total_expenses) <= 1:
|
||||
return AuditFinding(
|
||||
check_id="expense_totals",
|
||||
category="Expenses",
|
||||
severity=Severity.PASS,
|
||||
message="Functional expenses match total expenses.",
|
||||
mitigation="Keep functional allocation workpapers to support the reconciliation.",
|
||||
confidence=0.95,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="expense_totals",
|
||||
category="Expenses",
|
||||
severity=Severity.ERROR,
|
||||
message=(
|
||||
f"Functional expenses (${subtotal:,.2f}) do not reconcile to total expenses "
|
||||
f"(${data.expenses_breakdown.total_expenses:,.2f})."
|
||||
),
|
||||
mitigation="Review Part I, lines 23–27 and reclassify functional expenses to tie to Part II totals.",
|
||||
confidence=0.95,
|
||||
)
|
||||
|
||||
|
||||
def check_fundraising_alignment(
|
||||
data: ExtractedIrsForm990PfDataSchema,
|
||||
) -> AuditFinding:
|
||||
reported_fundraising = data.expenses_breakdown.fundraising_expenses
|
||||
event_expenses = data.fundraising_grantmaking.total_fundraising_event_expenses
|
||||
difference = abs(reported_fundraising - event_expenses)
|
||||
if difference <= 1:
|
||||
return AuditFinding(
|
||||
check_id="fundraising_alignment",
|
||||
category="Fundraising",
|
||||
severity=Severity.PASS,
|
||||
message="Fundraising functional expenses align with reported event expenses.",
|
||||
mitigation="Retain event ledgers and allocations to support matching totals.",
|
||||
confidence=0.9,
|
||||
)
|
||||
severity = (
|
||||
Severity.WARNING
|
||||
if reported_fundraising and difference <= reported_fundraising * 0.1
|
||||
else Severity.ERROR
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="fundraising_alignment",
|
||||
category="Fundraising",
|
||||
severity=severity,
|
||||
message=(
|
||||
f"Fundraising functional expenses (${reported_fundraising:,.2f}) differ from "
|
||||
f"reported event expenses (${event_expenses:,.2f}) by ${difference:,.2f}."
|
||||
),
|
||||
mitigation="Reconcile Schedule G and Part I allocations to eliminate the variance.",
|
||||
confidence=0.85,
|
||||
)
|
||||
|
||||
|
||||
def check_balance_sheet_presence(
|
||||
data: ExtractedIrsForm990PfDataSchema,
|
||||
) -> AuditFinding:
|
||||
if data.balance_sheet:
|
||||
return AuditFinding(
|
||||
check_id="balance_sheet_present",
|
||||
category="Balance Sheet",
|
||||
severity=Severity.PASS,
|
||||
message="Balance sheet data is present.",
|
||||
mitigation="Ensure ending net assets tie to Part I, line 30.",
|
||||
confidence=0.7,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="balance_sheet_absent",
|
||||
category="Balance Sheet",
|
||||
severity=Severity.WARNING,
|
||||
message="Balance sheet section is empty; confirm Part II filing requirements.",
|
||||
mitigation="Populate assets, liabilities, and net assets or attach supporting schedules.",
|
||||
confidence=0.6,
|
||||
)
|
||||
|
||||
|
||||
def check_governance_policies(
|
||||
data: ExtractedIrsForm990PfDataSchema,
|
||||
) -> list[AuditFinding]:
|
||||
gm = data.governance_management_disclosure
|
||||
findings: list[AuditFinding] = []
|
||||
policy_fields = {
|
||||
"conflict_of_interest_policy": "Document the policy in Part VI or adopt one prior to filing.",
|
||||
"whistleblower_policy": "Document whistleblower protections for staff and volunteers.",
|
||||
"document_retention_policy": "Adopt and document a record retention policy.",
|
||||
}
|
||||
affirmative_fields = {
|
||||
"financial_statements_reviewed": "Capture whether the board reviewed or audited year-end financials.",
|
||||
"form_990_provided_to_governing_body": "Provide Form 990 to the board before submission and note the date of review.",
|
||||
}
|
||||
|
||||
for field, mitigation in policy_fields.items():
|
||||
value = (getattr(gm, field) or "").strip()
|
||||
if not value or value.lower() in {"no", "n", "false"}:
|
||||
findings.append(
|
||||
AuditFinding(
|
||||
check_id=f"{field}_missing",
|
||||
category="Governance",
|
||||
severity=Severity.WARNING,
|
||||
message=f"{field.replace('_', ' ').title()} not reported or marked 'No'.",
|
||||
mitigation=mitigation,
|
||||
confidence=0.55,
|
||||
)
|
||||
)
|
||||
|
||||
for field, mitigation in affirmative_fields.items():
|
||||
value = (getattr(gm, field) or "").strip()
|
||||
if not value:
|
||||
findings.append(
|
||||
AuditFinding(
|
||||
check_id=f"{field}_blank",
|
||||
category="Governance",
|
||||
severity=Severity.WARNING,
|
||||
message=f"{field.replace('_', ' ').title()} left blank.",
|
||||
mitigation=mitigation,
|
||||
confidence=0.5,
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
|
||||
def check_board_engagement(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
|
||||
hours = [
|
||||
member.average_hours_per_week
|
||||
for member in data.officers_directors_trustees_key_employees
|
||||
if member.average_hours_per_week is not None
|
||||
]
|
||||
total_hours = sum(hours)
|
||||
if total_hours >= 5:
|
||||
return AuditFinding(
|
||||
check_id="board_hours",
|
||||
category="Governance",
|
||||
severity=Severity.PASS,
|
||||
message="Officer and director time commitments appear reasonable.",
|
||||
mitigation="Continue documenting board attendance and oversight responsibilities.",
|
||||
confidence=0.7,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="board_hours",
|
||||
category="Governance",
|
||||
severity=Severity.WARNING,
|
||||
message=(
|
||||
f"Aggregate reported board hours ({total_hours:.1f} per week) are low; "
|
||||
"confirm entries reflect actual governance involvement."
|
||||
),
|
||||
mitigation="Verify hours in Part VII; update if officers volunteer significant time.",
|
||||
confidence=0.6,
|
||||
)
|
||||
|
||||
|
||||
def check_missing_operational_details(
|
||||
data: ExtractedIrsForm990PfDataSchema,
|
||||
) -> AuditFinding:
|
||||
descriptors = (
|
||||
data.functional_operational_data.fundraising_method_descriptions or ""
|
||||
).strip()
|
||||
if descriptors:
|
||||
return AuditFinding(
|
||||
check_id="fundraising_methods_documented",
|
||||
category="Operations",
|
||||
severity=Severity.PASS,
|
||||
message="Fundraising method descriptions provided.",
|
||||
mitigation="Update narratives annually to reflect any new campaigns or joint ventures.",
|
||||
confidence=0.65,
|
||||
)
|
||||
return AuditFinding(
|
||||
check_id="fundraising_methods_missing",
|
||||
category="Operations",
|
||||
severity=Severity.WARNING,
|
||||
message="Fundraising method descriptions are blank.",
|
||||
mitigation="Add a brief Schedule G narrative describing major fundraising approaches.",
|
||||
confidence=0.55,
|
||||
)
|
||||
|
||||
|
||||
def build_section_summaries(findings: list[AuditFinding]) -> list[AuditSectionSummary]:
|
||||
grouped: defaultdict[str, list[AuditFinding]] = defaultdict(list)
|
||||
for finding in findings:
|
||||
grouped[finding.category].append(finding)
|
||||
|
||||
summaries: list[AuditSectionSummary] = []
|
||||
severity_order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1}
|
||||
for category, category_findings in grouped.items():
|
||||
counter = Counter(f.severity for f in category_findings)
|
||||
severity = aggregate_findings(category_findings)
|
||||
summary = ", ".join(
|
||||
f"{count} {label}"
|
||||
for label, count in (
|
||||
("passes", counter.get(Severity.PASS, 0)),
|
||||
("warnings", counter.get(Severity.WARNING, 0)),
|
||||
("errors", counter.get(Severity.ERROR, 0)),
|
||||
)
|
||||
)
|
||||
summary_text = f"{category} review: {summary}."
|
||||
confidence = sum(f.confidence for f in category_findings) / len(
|
||||
category_findings
|
||||
)
|
||||
summaries.append(
|
||||
AuditSectionSummary(
|
||||
section=category,
|
||||
severity=severity,
|
||||
summary=summary_text,
|
||||
confidence=confidence,
|
||||
)
|
||||
)
|
||||
summaries.sort(key=lambda s: (-severity_order[s.severity], s.section.lower()))
|
||||
return summaries
|
||||
|
||||
|
||||
def compose_overall_summary(findings: list[AuditFinding]) -> str:
|
||||
if not findings:
|
||||
return "No automated findings generated."
|
||||
counter = Counter(f.severity for f in findings)
|
||||
parts = []
|
||||
if counter.get(Severity.ERROR):
|
||||
parts.append(f"{counter[Severity.ERROR]} error(s)")
|
||||
if counter.get(Severity.WARNING):
|
||||
parts.append(f"{counter[Severity.WARNING]} warning(s)")
|
||||
if counter.get(Severity.PASS):
|
||||
parts.append(f"{counter[Severity.PASS]} check(s) passed")
|
||||
summary = "Overall results: " + ", ".join(parts) + "."
|
||||
return summary
|
||||
|
||||
|
||||
async def irs_ein_lookup(_ein: str) -> tuple[bool, float, str]:
|
||||
return False, 0.2, "IRS verification unavailable in current environment."
|
||||
38
backend/app/agents/form_auditor/cli.py
Normal file
38
backend/app/agents/form_auditor/cli.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from . import build_audit_report
|
||||
|
||||
__all__ = ["build_audit_report", "main"]
|
||||
|
||||
|
||||
def _load_payload(path: Path) -> dict:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def _print_report(report: dict) -> None:
|
||||
print(json.dumps(report, indent=2))
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate a Form 990 extraction payload using the Form Auditor agent."
|
||||
)
|
||||
parser.add_argument(
|
||||
"payload",
|
||||
nargs="?",
|
||||
default="example_data.json",
|
||||
help="Path to a JSON file containing the extraction payload.",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
payload_path = Path(args.payload).expanduser()
|
||||
payload = _load_payload(payload_path)
|
||||
|
||||
report = asyncio.run(build_audit_report(payload))
|
||||
_print_report(report.model_dump())
|
||||
573
backend/app/agents/form_auditor/models.py
Normal file
573
backend/app/agents/form_auditor/models.py
Normal file
@@ -0,0 +1,573 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class Severity(str, Enum):
|
||||
PASS = "Pass"
|
||||
WARNING = "Warning"
|
||||
ERROR = "Error"
|
||||
|
||||
|
||||
class AuditFinding(BaseModel):
|
||||
check_id: str
|
||||
category: str
|
||||
severity: Severity
|
||||
message: str
|
||||
mitigation: str | None = None
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
|
||||
|
||||
class AuditSectionSummary(BaseModel):
|
||||
section: str
|
||||
severity: Severity
|
||||
summary: str
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
|
||||
|
||||
class AuditReport(BaseModel):
|
||||
organisation_ein: str
|
||||
organisation_name: str
|
||||
year: int | None
|
||||
overall_severity: Severity
|
||||
findings: list[AuditFinding]
|
||||
sections: list[AuditSectionSummary] = Field(default_factory=list)
|
||||
overall_summary: str | None = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class CoreOrgMetadata(BaseModel):
|
||||
ein: str
|
||||
legal_name: str
|
||||
return_type: str
|
||||
accounting_method: str
|
||||
incorporation_state: str | None = None
|
||||
|
||||
|
||||
class CoreOrganizationMetadata(BaseModel):
|
||||
ein: str = Field(
|
||||
...,
|
||||
description="Unique IRS identifier for the organization.",
|
||||
title="Employer Identification Number (EIN)",
|
||||
)
|
||||
legal_name: str = Field(
|
||||
...,
|
||||
description="Official registered name of the organization.",
|
||||
title="Legal Name of Organization",
|
||||
)
|
||||
phone_number: str = Field(
|
||||
..., description="Primary contact phone number.", title="Phone Number"
|
||||
)
|
||||
website_url: str = Field(
|
||||
..., description="Organization's website address.", title="Website URL"
|
||||
)
|
||||
return_type: str = Field(
|
||||
...,
|
||||
description="Type of IRS return filed (e.g., 990, 990-EZ, 990-PF).",
|
||||
title="Return Type",
|
||||
)
|
||||
amended_return: str = Field(
|
||||
...,
|
||||
description="Indicates if the return is amended.",
|
||||
title="Amended Return Flag",
|
||||
)
|
||||
group_exemption_number: str = Field(
|
||||
...,
|
||||
description="IRS group exemption number, if applicable.",
|
||||
title="Group Exemption Number",
|
||||
)
|
||||
subsection_code: str = Field(
|
||||
...,
|
||||
description="IRS subsection code (e.g., 501(c)(3)).",
|
||||
title="Subsection Code",
|
||||
)
|
||||
ruling_date: str = Field(
|
||||
...,
|
||||
description="Date of IRS ruling or determination letter.",
|
||||
title="Ruling/Determination Letter Date",
|
||||
)
|
||||
accounting_method: str = Field(
|
||||
...,
|
||||
description="Accounting method used (cash, accrual, other).",
|
||||
title="Accounting Method",
|
||||
)
|
||||
organization_type: str = Field(
|
||||
...,
|
||||
description="Legal structure (corporation, trust, association, etc.).",
|
||||
title="Organization Type",
|
||||
)
|
||||
year_of_formation: str = Field(
|
||||
..., description="Year the organization was formed.", title="Year of Formation"
|
||||
)
|
||||
incorporation_state: str = Field(
|
||||
..., description="State of incorporation.", title="Incorporation State"
|
||||
)
|
||||
|
||||
|
||||
class RevenueBreakdown(BaseModel):
|
||||
total_revenue: float = Field(
|
||||
..., description="Sum of all revenue sources.", title="Total Revenue"
|
||||
)
|
||||
contributions_gifts_grants: float = Field(
|
||||
...,
|
||||
description="Revenue from donations and grants.",
|
||||
title="Contributions, Gifts, and Grants",
|
||||
)
|
||||
program_service_revenue: float = Field(
|
||||
...,
|
||||
description="Revenue from program services.",
|
||||
title="Program Service Revenue",
|
||||
)
|
||||
membership_dues: float = Field(
|
||||
..., description="Revenue from membership dues.", title="Membership Dues"
|
||||
)
|
||||
investment_income: float = Field(
|
||||
...,
|
||||
description="Revenue from interest and dividends.",
|
||||
title="Investment Income",
|
||||
)
|
||||
gains_losses_sales_assets: float = Field(
|
||||
...,
|
||||
description="Net gains or losses from asset sales.",
|
||||
title="Gains/Losses from Sales of Assets",
|
||||
)
|
||||
rental_income: float = Field(
|
||||
...,
|
||||
description="Income from rental of real estate or equipment.",
|
||||
title="Rental Income",
|
||||
)
|
||||
related_organizations_revenue: float = Field(
|
||||
...,
|
||||
description="Revenue from related organizations.",
|
||||
title="Related Organizations Revenue",
|
||||
)
|
||||
gaming_revenue: float = Field(
|
||||
..., description="Revenue from gaming activities.", title="Gaming Revenue"
|
||||
)
|
||||
other_revenue: float = Field(
|
||||
..., description="Miscellaneous revenue sources.", title="Other Revenue"
|
||||
)
|
||||
government_grants: float = Field(
|
||||
...,
|
||||
description="Revenue from government grants.",
|
||||
title="Revenue from Government Grants",
|
||||
)
|
||||
foreign_contributions: float = Field(
|
||||
..., description="Revenue from foreign sources.", title="Foreign Contributions"
|
||||
)
|
||||
|
||||
|
||||
class ExpensesBreakdown(BaseModel):
|
||||
total_expenses: float = Field(
|
||||
..., description="Sum of all expenses.", title="Total Functional Expenses"
|
||||
)
|
||||
program_services_expenses: float = Field(
|
||||
...,
|
||||
description="Expenses for program services.",
|
||||
title="Program Services Expenses",
|
||||
)
|
||||
management_general_expenses: float = Field(
|
||||
...,
|
||||
description="Administrative and management expenses.",
|
||||
title="Management & General Expenses",
|
||||
)
|
||||
fundraising_expenses: float = Field(
|
||||
...,
|
||||
description="Expenses for fundraising activities.",
|
||||
title="Fundraising Expenses",
|
||||
)
|
||||
grants_us_organizations: float = Field(
|
||||
...,
|
||||
description="Grants and assistance to U.S. organizations.",
|
||||
title="Grants to U.S. Organizations",
|
||||
)
|
||||
grants_us_individuals: float = Field(
|
||||
...,
|
||||
description="Grants and assistance to U.S. individuals.",
|
||||
title="Grants to U.S. Individuals",
|
||||
)
|
||||
grants_foreign_organizations: float = Field(
|
||||
...,
|
||||
description="Grants and assistance to foreign organizations.",
|
||||
title="Grants to Foreign Organizations",
|
||||
)
|
||||
grants_foreign_individuals: float = Field(
|
||||
...,
|
||||
description="Grants and assistance to foreign individuals.",
|
||||
title="Grants to Foreign Individuals",
|
||||
)
|
||||
compensation_officers: float = Field(
|
||||
...,
|
||||
description="Compensation paid to officers and key employees.",
|
||||
title="Compensation of Officers/Key Employees",
|
||||
)
|
||||
compensation_other_staff: float = Field(
|
||||
...,
|
||||
description="Compensation paid to other staff.",
|
||||
title="Compensation of Other Staff",
|
||||
)
|
||||
payroll_taxes_benefits: float = Field(
|
||||
...,
|
||||
description="Payroll taxes and employee benefits.",
|
||||
title="Payroll Taxes and Benefits",
|
||||
)
|
||||
professional_fees: float = Field(
|
||||
...,
|
||||
description="Legal, accounting, and lobbying fees.",
|
||||
title="Professional Fees",
|
||||
)
|
||||
office_occupancy_costs: float = Field(
|
||||
...,
|
||||
description="Office and occupancy expenses.",
|
||||
title="Office and Occupancy Costs",
|
||||
)
|
||||
information_technology_costs: float = Field(
|
||||
..., description="IT-related expenses.", title="Information Technology Costs"
|
||||
)
|
||||
travel_conference_expenses: float = Field(
|
||||
...,
|
||||
description="Travel and conference costs.",
|
||||
title="Travel and Conference Expenses",
|
||||
)
|
||||
depreciation_amortization: float = Field(
|
||||
...,
|
||||
description="Depreciation and amortization expenses.",
|
||||
title="Depreciation and Amortization",
|
||||
)
|
||||
insurance: float = Field(..., description="Insurance expenses.", title="Insurance")
|
||||
|
||||
|
||||
class OfficersDirectorsTrusteesKeyEmployee(BaseModel):
|
||||
name: str = Field(..., description="Full name of the individual.", title="Name")
|
||||
title_position: str = Field(
|
||||
..., description="Role or position held.", title="Title/Position"
|
||||
)
|
||||
average_hours_per_week: float = Field(
|
||||
...,
|
||||
description="Average weekly hours devoted to position.",
|
||||
title="Average Hours Per Week",
|
||||
)
|
||||
related_party_transactions: str = Field(
|
||||
...,
|
||||
description="Indicates if related-party transactions occurred.",
|
||||
title="Related-Party Transactions",
|
||||
)
|
||||
former_officer: str = Field(
|
||||
...,
|
||||
description="Indicates if the individual is a former officer.",
|
||||
title="Former Officer Indicator",
|
||||
)
|
||||
governance_role: str = Field(
|
||||
...,
|
||||
description="Role in governance (voting, independent, etc.).",
|
||||
title="Governance Role",
|
||||
)
|
||||
|
||||
|
||||
class GovernanceManagementDisclosure(BaseModel):
|
||||
governing_body_size: float = Field(
|
||||
...,
|
||||
description="Number of voting members on the governing body.",
|
||||
title="Governing Body Size",
|
||||
)
|
||||
independent_members: float = Field(
|
||||
...,
|
||||
description="Number of independent voting members.",
|
||||
title="Number of Independent Members",
|
||||
)
|
||||
financial_statements_reviewed: str = Field(
|
||||
...,
|
||||
description="Indicates if financial statements were reviewed or audited.",
|
||||
title="Financial Statements Reviewed/Audited",
|
||||
)
|
||||
form_990_provided_to_governing_body: str = Field(
|
||||
...,
|
||||
description="Indicates if Form 990 was provided to governing body before filing.",
|
||||
title="Form 990 Provided to Governing Body",
|
||||
)
|
||||
conflict_of_interest_policy: str = Field(
|
||||
...,
|
||||
description="Indicates if a conflict-of-interest policy is in place.",
|
||||
title="Conflict-of-Interest Policy",
|
||||
)
|
||||
whistleblower_policy: str = Field(
|
||||
...,
|
||||
description="Indicates if a whistleblower policy is in place.",
|
||||
title="Whistleblower Policy",
|
||||
)
|
||||
document_retention_policy: str = Field(
|
||||
...,
|
||||
description="Indicates if a document retention/destruction policy is in place.",
|
||||
title="Document Retention/Destruction Policy",
|
||||
)
|
||||
ceo_compensation_review_process: str = Field(
|
||||
...,
|
||||
description="Description of CEO compensation review process.",
|
||||
title="CEO Compensation Review Process",
|
||||
)
|
||||
public_disclosure_practices: str = Field(
|
||||
...,
|
||||
description="Description of public disclosure practices.",
|
||||
title="Public Disclosure Practices",
|
||||
)
|
||||
|
||||
|
||||
class ProgramServiceAccomplishment(BaseModel):
|
||||
program_name: str = Field(
|
||||
..., description="Name of the program.", title="Program Name"
|
||||
)
|
||||
program_description: str = Field(
|
||||
..., description="Description of the program.", title="Program Description"
|
||||
)
|
||||
expenses: float = Field(
|
||||
..., description="Expenses for the program.", title="Program Expenses"
|
||||
)
|
||||
grants: float = Field(
|
||||
..., description="Grants made under the program.", title="Program Grants"
|
||||
)
|
||||
revenue_generated: float = Field(
|
||||
..., description="Revenue generated by the program.", title="Revenue Generated"
|
||||
)
|
||||
quantitative_outputs: str = Field(
|
||||
...,
|
||||
description="Quantitative outputs (e.g., number served, events held).",
|
||||
title="Quantitative Outputs",
|
||||
)
|
||||
|
||||
|
||||
class FundraisingGrantmaking(BaseModel):
|
||||
total_fundraising_event_revenue: float = Field(
|
||||
...,
|
||||
description="Total revenue from fundraising events.",
|
||||
title="Total Fundraising Event Revenue",
|
||||
)
|
||||
total_fundraising_event_expenses: float = Field(
|
||||
...,
|
||||
description="Total direct expenses for fundraising events.",
|
||||
title="Total Fundraising Event Expenses",
|
||||
)
|
||||
professional_fundraiser_fees: float = Field(
|
||||
...,
|
||||
description="Fees paid to professional fundraisers.",
|
||||
title="Professional Fundraiser Fees",
|
||||
)
|
||||
|
||||
|
||||
class FunctionalOperationalData(BaseModel):
|
||||
number_of_employees: float = Field(
|
||||
..., description="Total number of employees.", title="Number of Employees"
|
||||
)
|
||||
number_of_volunteers: float = Field(
|
||||
..., description="Total number of volunteers.", title="Number of Volunteers"
|
||||
)
|
||||
occupancy_costs: float = Field(
|
||||
..., description="Total occupancy costs.", title="Occupancy Costs"
|
||||
)
|
||||
fundraising_method_descriptions: str = Field(
|
||||
...,
|
||||
description="Descriptions of fundraising methods used.",
|
||||
title="Fundraising Method Descriptions",
|
||||
)
|
||||
joint_ventures_disregarded_entities: str = Field(
|
||||
...,
|
||||
description="Details of joint ventures and disregarded entities.",
|
||||
title="Joint Ventures and Disregarded Entities",
|
||||
)
|
||||
|
||||
|
||||
class CompensationDetails(BaseModel):
|
||||
base_compensation: float = Field(
|
||||
..., description="Base salary or wages.", title="Base Compensation"
|
||||
)
|
||||
bonus: float = Field(
|
||||
..., description="Bonus or incentive compensation.", title="Bonus Compensation"
|
||||
)
|
||||
incentive: float = Field(
|
||||
..., description="Incentive compensation.", title="Incentive Compensation"
|
||||
)
|
||||
other: float = Field(
|
||||
..., description="Other forms of compensation.", title="Other Compensation"
|
||||
)
|
||||
non_fixed_compensation: str = Field(
|
||||
...,
|
||||
description="Indicates if compensation is non-fixed.",
|
||||
title="Non-Fixed Compensation Flag",
|
||||
)
|
||||
first_class_travel: str = Field(
|
||||
...,
|
||||
description="Indicates if first-class travel was provided.",
|
||||
title="First-Class Travel",
|
||||
)
|
||||
housing_allowance: str = Field(
|
||||
...,
|
||||
description="Indicates if housing allowance was provided.",
|
||||
title="Housing Allowance",
|
||||
)
|
||||
expense_account_usage: str = Field(
|
||||
...,
|
||||
description="Indicates if expense account was used.",
|
||||
title="Expense Account Usage",
|
||||
)
|
||||
supplemental_retirement: str = Field(
|
||||
...,
|
||||
description="Indicates if supplemental retirement or deferred comp was provided.",
|
||||
title="Supplemental Retirement/Deferred Comp",
|
||||
)
|
||||
|
||||
|
||||
class PoliticalLobbyingActivities(BaseModel):
|
||||
lobbying_expenditures_direct: float = Field(
|
||||
...,
|
||||
description="Direct lobbying expenditures.",
|
||||
title="Direct Lobbying Expenditures",
|
||||
)
|
||||
lobbying_expenditures_grassroots: float = Field(
|
||||
...,
|
||||
description="Grassroots lobbying expenditures.",
|
||||
title="Grassroots Lobbying Expenditures",
|
||||
)
|
||||
election_501h_status: str = Field(
|
||||
...,
|
||||
description="Indicates if 501(h) election was made.",
|
||||
title="501(h) Election Status",
|
||||
)
|
||||
political_campaign_expenditures: float = Field(
|
||||
...,
|
||||
description="Expenditures for political campaigns.",
|
||||
title="Political Campaign Expenditures",
|
||||
)
|
||||
related_organizations_affiliates: str = Field(
|
||||
...,
|
||||
description="Details of related organizations or affiliates involved.",
|
||||
title="Related Organizations/Affiliates Involved",
|
||||
)
|
||||
|
||||
|
||||
class InvestmentsEndowment(BaseModel):
|
||||
investment_types: str = Field(
|
||||
...,
|
||||
description="Types of investments held (securities, partnerships, real estate).",
|
||||
title="Investment Types",
|
||||
)
|
||||
donor_restricted_endowment_values: float = Field(
|
||||
...,
|
||||
description="Value of donor-restricted endowments.",
|
||||
title="Donor-Restricted Endowment Values",
|
||||
)
|
||||
net_appreciation_depreciation: float = Field(
|
||||
...,
|
||||
description="Net appreciation or depreciation of investments.",
|
||||
title="Net Appreciation/Depreciation",
|
||||
)
|
||||
related_organization_transactions: str = Field(
|
||||
...,
|
||||
description="Details of transactions with related organizations.",
|
||||
title="Related Organization Transactions",
|
||||
)
|
||||
loans_to_from_related_parties: str = Field(
|
||||
...,
|
||||
description="Details of loans to or from related parties.",
|
||||
title="Loans to/from Related Parties",
|
||||
)
|
||||
|
||||
|
||||
class TaxCompliancePenalties(BaseModel):
|
||||
penalties_excise_taxes_reported: str = Field(
|
||||
...,
|
||||
description="Reported penalties or excise taxes.",
|
||||
title="Penalties or Excise Taxes Reported",
|
||||
)
|
||||
unrelated_business_income_disclosure: str = Field(
|
||||
...,
|
||||
description="Disclosure of unrelated business income (UBI).",
|
||||
title="Unrelated Business Income Disclosure",
|
||||
)
|
||||
foreign_bank_account_reporting: str = Field(
|
||||
...,
|
||||
description="Disclosure of foreign bank accounts (FBAR equivalent).",
|
||||
title="Foreign Bank Account Reporting",
|
||||
)
|
||||
schedule_o_narrative_explanations: str = Field(
|
||||
...,
|
||||
description="Narrative explanations from Schedule O.",
|
||||
title="Schedule O Narrative Explanations",
|
||||
)
|
||||
|
||||
|
||||
class ExtractedIrsForm990PfDataSchema(BaseModel):
|
||||
core_organization_metadata: CoreOrganizationMetadata = Field(
|
||||
...,
|
||||
description="Essential identifiers and attributes for normalizing entities across filings and years.",
|
||||
title="Core Organization Metadata",
|
||||
)
|
||||
revenue_breakdown: RevenueBreakdown = Field(
|
||||
...,
|
||||
description="Detailed breakdown of revenue streams for the fiscal year.",
|
||||
title="Revenue Breakdown",
|
||||
)
|
||||
expenses_breakdown: ExpensesBreakdown = Field(
|
||||
...,
|
||||
description="Detailed breakdown of expenses for the fiscal year.",
|
||||
title="Expenses Breakdown",
|
||||
)
|
||||
balance_sheet: dict[str, Any] = Field(
|
||||
...,
|
||||
description="Assets, liabilities, and net assets at year end.",
|
||||
title="Balance Sheet Data",
|
||||
)
|
||||
officers_directors_trustees_key_employees: list[
|
||||
OfficersDirectorsTrusteesKeyEmployee
|
||||
] = Field(
|
||||
...,
|
||||
description="List of key personnel and their compensation.",
|
||||
title="Officers, Directors, Trustees, Key Employees",
|
||||
)
|
||||
governance_management_disclosure: GovernanceManagementDisclosure = Field(
|
||||
...,
|
||||
description="Governance and management practices, policies, and disclosures.",
|
||||
title="Governance, Management, and Disclosure",
|
||||
)
|
||||
program_service_accomplishments: list[ProgramServiceAccomplishment] = Field(
|
||||
...,
|
||||
description="Major programs and their outputs for the fiscal year.",
|
||||
title="Program Service Accomplishments",
|
||||
)
|
||||
fundraising_grantmaking: FundraisingGrantmaking = Field(
|
||||
...,
|
||||
description="Fundraising event details and grantmaking activities.",
|
||||
title="Fundraising & Grantmaking",
|
||||
)
|
||||
functional_operational_data: FunctionalOperationalData = Field(
|
||||
...,
|
||||
description="Operational metrics and related-organization relationships.",
|
||||
title="Functional & Operational Data",
|
||||
)
|
||||
compensation_details: CompensationDetails = Field(
|
||||
...,
|
||||
description="Detailed breakdown of officer compensation and benefits.",
|
||||
title="Compensation Details",
|
||||
)
|
||||
political_lobbying_activities: PoliticalLobbyingActivities = Field(
|
||||
...,
|
||||
description="Details of political and lobbying expenditures and affiliations.",
|
||||
title="Political & Lobbying Activities",
|
||||
)
|
||||
investments_endowment: InvestmentsEndowment = Field(
|
||||
...,
|
||||
description="Investment holdings, endowment values, and related transactions.",
|
||||
title="Investments & Endowment",
|
||||
)
|
||||
tax_compliance_penalties: TaxCompliancePenalties = Field(
|
||||
...,
|
||||
description="Tax compliance indicators, penalties, and narrative explanations.",
|
||||
title="Tax Compliance / Penalties",
|
||||
)
|
||||
|
||||
|
||||
class ValidatorState(BaseModel):
|
||||
extraction: ExtractedIrsForm990PfDataSchema
|
||||
initial_findings: list[AuditFinding] = Field(default_factory=list)
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
608
backend/app/example_data.json
Normal file
608
backend/app/example_data.json
Normal file
@@ -0,0 +1,608 @@
|
||||
{
|
||||
"extraction": {
|
||||
"core_organization_metadata": {
|
||||
"ein": "84-2674654",
|
||||
"legal_name": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
|
||||
"phone_number": "(262) 215-0300",
|
||||
"website_url": "",
|
||||
"return_type": "990-PF",
|
||||
"amended_return": "No",
|
||||
"group_exemption_number": "",
|
||||
"subsection_code": "501(c)(3)",
|
||||
"ruling_date": "",
|
||||
"accounting_method": "Cash",
|
||||
"organization_type": "corporation",
|
||||
"year_of_formation": "",
|
||||
"incorporation_state": "WI"
|
||||
},
|
||||
"revenue_breakdown": {
|
||||
"total_revenue": 5227,
|
||||
"contributions_gifts_grants": 5227,
|
||||
"program_service_revenue": 0,
|
||||
"membership_dues": 0,
|
||||
"investment_income": 0,
|
||||
"gains_losses_sales_assets": 0,
|
||||
"rental_income": 0,
|
||||
"related_organizations_revenue": 0,
|
||||
"gaming_revenue": 0,
|
||||
"other_revenue": 0,
|
||||
"government_grants": 0,
|
||||
"foreign_contributions": 0
|
||||
},
|
||||
"expenses_breakdown": {
|
||||
"total_expenses": 2104,
|
||||
"program_services_expenses": 0,
|
||||
"management_general_expenses": 0,
|
||||
"fundraising_expenses": 2104,
|
||||
"grants_us_organizations": 0,
|
||||
"grants_us_individuals": 0,
|
||||
"grants_foreign_organizations": 0,
|
||||
"grants_foreign_individuals": 0,
|
||||
"compensation_officers": 0,
|
||||
"compensation_other_staff": 0,
|
||||
"payroll_taxes_benefits": 0,
|
||||
"professional_fees": 0,
|
||||
"office_occupancy_costs": 0,
|
||||
"information_technology_costs": 0,
|
||||
"travel_conference_expenses": 0,
|
||||
"depreciation_amortization": 0,
|
||||
"insurance": 0
|
||||
},
|
||||
"balance_sheet": {},
|
||||
"officers_directors_trustees_key_employees": [
|
||||
{
|
||||
"name": "REBECCA TERPSTRA",
|
||||
"title_position": "PRESIDENT",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": ""
|
||||
},
|
||||
{
|
||||
"name": "ROBERT GUZMAN",
|
||||
"title_position": "VICE PRESDEINT",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": ""
|
||||
},
|
||||
{
|
||||
"name": "ANDREA VALENTI",
|
||||
"title_position": "TREASURER",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": ""
|
||||
},
|
||||
{
|
||||
"name": "BETHANY WALSH",
|
||||
"title_position": "SECRETARY",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": ""
|
||||
}
|
||||
],
|
||||
"governance_management_disclosure": {
|
||||
"governing_body_size": 4,
|
||||
"independent_members": 4,
|
||||
"financial_statements_reviewed": "",
|
||||
"form_990_provided_to_governing_body": "",
|
||||
"conflict_of_interest_policy": "",
|
||||
"whistleblower_policy": "",
|
||||
"document_retention_policy": "",
|
||||
"ceo_compensation_review_process": "",
|
||||
"public_disclosure_practices": "Yes"
|
||||
},
|
||||
"program_service_accomplishments": [],
|
||||
"fundraising_grantmaking": {
|
||||
"total_fundraising_event_revenue": 0,
|
||||
"total_fundraising_event_expenses": 2104,
|
||||
"professional_fundraiser_fees": 0
|
||||
},
|
||||
"functional_operational_data": {
|
||||
"number_of_employees": 0,
|
||||
"number_of_volunteers": 0,
|
||||
"occupancy_costs": 0,
|
||||
"fundraising_method_descriptions": "",
|
||||
"joint_ventures_disregarded_entities": ""
|
||||
},
|
||||
"compensation_details": {
|
||||
"base_compensation": 0,
|
||||
"bonus": 0,
|
||||
"incentive": 0,
|
||||
"other": 0,
|
||||
"non_fixed_compensation": "",
|
||||
"first_class_travel": "",
|
||||
"housing_allowance": "",
|
||||
"expense_account_usage": "",
|
||||
"supplemental_retirement": ""
|
||||
},
|
||||
"political_lobbying_activities": {
|
||||
"lobbying_expenditures_direct": 0,
|
||||
"lobbying_expenditures_grassroots": 0,
|
||||
"election_501h_status": "",
|
||||
"political_campaign_expenditures": 0,
|
||||
"related_organizations_affiliates": ""
|
||||
},
|
||||
"investments_endowment": {
|
||||
"investment_types": "",
|
||||
"donor_restricted_endowment_values": 0,
|
||||
"net_appreciation_depreciation": 0,
|
||||
"related_organization_transactions": "",
|
||||
"loans_to_from_related_parties": ""
|
||||
},
|
||||
"tax_compliance_penalties": {
|
||||
"penalties_excise_taxes_reported": "No",
|
||||
"unrelated_business_income_disclosure": "No",
|
||||
"foreign_bank_account_reporting": "No",
|
||||
"schedule_o_narrative_explanations": ""
|
||||
}
|
||||
},
|
||||
"extraction_metadata": {
|
||||
"core_organization_metadata": {
|
||||
"ein": {
|
||||
"value": "84-2674654",
|
||||
"references": ["0-7"]
|
||||
},
|
||||
"legal_name": {
|
||||
"value": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
|
||||
"references": ["0-6"]
|
||||
},
|
||||
"phone_number": {
|
||||
"value": "(262) 215-0300",
|
||||
"references": ["0-a"]
|
||||
},
|
||||
"website_url": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"return_type": {
|
||||
"value": "990-PF",
|
||||
"references": ["4ade8ed0-bce7-4bd5-bd8d-190e3e4be95b"]
|
||||
},
|
||||
"amended_return": {
|
||||
"value": "No",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"]
|
||||
},
|
||||
"group_exemption_number": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"subsection_code": {
|
||||
"value": "501(c)(3)",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"]
|
||||
},
|
||||
"ruling_date": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"accounting_method": {
|
||||
"value": "Cash",
|
||||
"references": ["0-d"]
|
||||
},
|
||||
"organization_type": {
|
||||
"value": "corporation",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"]
|
||||
},
|
||||
"year_of_formation": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"incorporation_state": {
|
||||
"value": "WI",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"]
|
||||
}
|
||||
},
|
||||
"revenue_breakdown": {
|
||||
"total_revenue": {
|
||||
"value": 5227,
|
||||
"references": ["0-1z"]
|
||||
},
|
||||
"contributions_gifts_grants": {
|
||||
"value": 5227,
|
||||
"references": ["0-m"]
|
||||
},
|
||||
"program_service_revenue": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"membership_dues": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"investment_income": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"gains_losses_sales_assets": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"rental_income": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"related_organizations_revenue": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"gaming_revenue": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"other_revenue": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"government_grants": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"foreign_contributions": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"expenses_breakdown": {
|
||||
"total_expenses": {
|
||||
"value": 2104,
|
||||
"references": ["0-2S"]
|
||||
},
|
||||
"program_services_expenses": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"management_general_expenses": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"fundraising_expenses": {
|
||||
"value": 2104,
|
||||
"references": ["13-d"]
|
||||
},
|
||||
"grants_us_organizations": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"grants_us_individuals": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"grants_foreign_organizations": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"grants_foreign_individuals": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"compensation_officers": {
|
||||
"value": 0,
|
||||
"references": ["5-1q", "5-1w", "5-1C", "5-1I"]
|
||||
},
|
||||
"compensation_other_staff": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"payroll_taxes_benefits": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"professional_fees": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"office_occupancy_costs": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"information_technology_costs": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"travel_conference_expenses": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"depreciation_amortization": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"insurance": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"balance_sheet": {},
|
||||
"officers_directors_trustees_key_employees": [
|
||||
{
|
||||
"name": {
|
||||
"value": "REBECCA TERPSTRA",
|
||||
"references": ["5-1o"]
|
||||
},
|
||||
"title_position": {
|
||||
"value": "PRESIDENT",
|
||||
"references": ["5-1p"]
|
||||
},
|
||||
"average_hours_per_week": {
|
||||
"value": 0.1,
|
||||
"references": ["5-1p"]
|
||||
},
|
||||
"related_party_transactions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"former_officer": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"governance_role": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "ROBERT GUZMAN",
|
||||
"references": ["5-1u"]
|
||||
},
|
||||
"title_position": {
|
||||
"value": "VICE PRESDEINT",
|
||||
"references": ["5-1v"]
|
||||
},
|
||||
"average_hours_per_week": {
|
||||
"value": 0.1,
|
||||
"references": ["5-1v"]
|
||||
},
|
||||
"related_party_transactions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"former_officer": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"governance_role": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "ANDREA VALENTI",
|
||||
"references": ["5-1A"]
|
||||
},
|
||||
"title_position": {
|
||||
"value": "TREASURER",
|
||||
"references": ["5-1B"]
|
||||
},
|
||||
"average_hours_per_week": {
|
||||
"value": 0.1,
|
||||
"references": ["5-1B"]
|
||||
},
|
||||
"related_party_transactions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"former_officer": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"governance_role": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "BETHANY WALSH",
|
||||
"references": ["5-1G"]
|
||||
},
|
||||
"title_position": {
|
||||
"value": "SECRETARY",
|
||||
"references": ["5-1H"]
|
||||
},
|
||||
"average_hours_per_week": {
|
||||
"value": 0.1,
|
||||
"references": ["5-1H"]
|
||||
},
|
||||
"related_party_transactions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"former_officer": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"governance_role": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
}
|
||||
],
|
||||
"governance_management_disclosure": {
|
||||
"governing_body_size": {
|
||||
"value": 4,
|
||||
"references": ["5-1o", "5-1u", "5-1A", "5-1G"]
|
||||
},
|
||||
"independent_members": {
|
||||
"value": 4,
|
||||
"references": ["5-1o", "5-1u", "5-1A", "5-1G"]
|
||||
},
|
||||
"financial_statements_reviewed": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"form_990_provided_to_governing_body": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"conflict_of_interest_policy": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"whistleblower_policy": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"document_retention_policy": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"ceo_compensation_review_process": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"public_disclosure_practices": {
|
||||
"value": "Yes",
|
||||
"references": ["4-g"]
|
||||
}
|
||||
},
|
||||
"program_service_accomplishments": [],
|
||||
"fundraising_grantmaking": {
|
||||
"total_fundraising_event_revenue": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"total_fundraising_event_expenses": {
|
||||
"value": 2104,
|
||||
"references": ["13-d"]
|
||||
},
|
||||
"professional_fundraiser_fees": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"functional_operational_data": {
|
||||
"number_of_employees": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"number_of_volunteers": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"occupancy_costs": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"fundraising_method_descriptions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"joint_ventures_disregarded_entities": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"compensation_details": {
|
||||
"base_compensation": {
|
||||
"value": 0,
|
||||
"references": ["5-1q", "5-1w"]
|
||||
},
|
||||
"bonus": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"incentive": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"other": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"non_fixed_compensation": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"first_class_travel": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"housing_allowance": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"expense_account_usage": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"supplemental_retirement": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"political_lobbying_activities": {
|
||||
"lobbying_expenditures_direct": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"lobbying_expenditures_grassroots": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"election_501h_status": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"political_campaign_expenditures": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"related_organizations_affiliates": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"investments_endowment": {
|
||||
"investment_types": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"donor_restricted_endowment_values": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"net_appreciation_depreciation": {
|
||||
"value": 0,
|
||||
"references": []
|
||||
},
|
||||
"related_organization_transactions": {
|
||||
"value": "",
|
||||
"references": []
|
||||
},
|
||||
"loans_to_from_related_parties": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
},
|
||||
"tax_compliance_penalties": {
|
||||
"penalties_excise_taxes_reported": {
|
||||
"value": "No",
|
||||
"references": ["3-I"]
|
||||
},
|
||||
"unrelated_business_income_disclosure": {
|
||||
"value": "No",
|
||||
"references": ["3-Y"]
|
||||
},
|
||||
"foreign_bank_account_reporting": {
|
||||
"value": "No",
|
||||
"references": ["4-H"]
|
||||
},
|
||||
"schedule_o_narrative_explanations": {
|
||||
"value": "",
|
||||
"references": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"filename": "markdown.md",
|
||||
"org_id": null,
|
||||
"duration_ms": 16656,
|
||||
"credit_usage": 27.2,
|
||||
"job_id": "nnmr8lcxtykk5ll5wodjtrnn6",
|
||||
"version": "extract-20250930"
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ from pydantic_ai.ui.vercel_ai import VercelAIAdapter
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
|
||||
from app.agents import form_auditor
|
||||
from app.core.config import settings
|
||||
|
||||
provider = AzureProvider(
|
||||
@@ -19,6 +20,347 @@ agent = Agent(model=model)
|
||||
router = APIRouter(prefix="/api/v1/agent", tags=["Agent"])
|
||||
|
||||
|
||||
@agent.tool_plain()
|
||||
async def build_audit_report():
|
||||
"""Calls the audit subagent to get a full audit report of the organization"""
|
||||
data = {
|
||||
"extraction": {
|
||||
"core_organization_metadata": {
|
||||
"ein": "84-2674654",
|
||||
"legal_name": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
|
||||
"phone_number": "(262) 215-0300",
|
||||
"website_url": "",
|
||||
"return_type": "990-PF",
|
||||
"amended_return": "No",
|
||||
"group_exemption_number": "",
|
||||
"subsection_code": "501(c)(3)",
|
||||
"ruling_date": "",
|
||||
"accounting_method": "Cash",
|
||||
"organization_type": "corporation",
|
||||
"year_of_formation": "",
|
||||
"incorporation_state": "WI",
|
||||
},
|
||||
"revenue_breakdown": {
|
||||
"total_revenue": 5227,
|
||||
"contributions_gifts_grants": 5227,
|
||||
"program_service_revenue": 0,
|
||||
"membership_dues": 0,
|
||||
"investment_income": 0,
|
||||
"gains_losses_sales_assets": 0,
|
||||
"rental_income": 0,
|
||||
"related_organizations_revenue": 0,
|
||||
"gaming_revenue": 0,
|
||||
"other_revenue": 0,
|
||||
"government_grants": 0,
|
||||
"foreign_contributions": 0,
|
||||
},
|
||||
"expenses_breakdown": {
|
||||
"total_expenses": 2104,
|
||||
"program_services_expenses": 0,
|
||||
"management_general_expenses": 0,
|
||||
"fundraising_expenses": 2104,
|
||||
"grants_us_organizations": 0,
|
||||
"grants_us_individuals": 0,
|
||||
"grants_foreign_organizations": 0,
|
||||
"grants_foreign_individuals": 0,
|
||||
"compensation_officers": 0,
|
||||
"compensation_other_staff": 0,
|
||||
"payroll_taxes_benefits": 0,
|
||||
"professional_fees": 0,
|
||||
"office_occupancy_costs": 0,
|
||||
"information_technology_costs": 0,
|
||||
"travel_conference_expenses": 0,
|
||||
"depreciation_amortization": 0,
|
||||
"insurance": 0,
|
||||
},
|
||||
"balance_sheet": {},
|
||||
"officers_directors_trustees_key_employees": [
|
||||
{
|
||||
"name": "REBECCA TERPSTRA",
|
||||
"title_position": "PRESIDENT",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": "",
|
||||
},
|
||||
{
|
||||
"name": "ROBERT GUZMAN",
|
||||
"title_position": "VICE PRESDEINT",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": "",
|
||||
},
|
||||
{
|
||||
"name": "ANDREA VALENTI",
|
||||
"title_position": "TREASURER",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": "",
|
||||
},
|
||||
{
|
||||
"name": "BETHANY WALSH",
|
||||
"title_position": "SECRETARY",
|
||||
"average_hours_per_week": 0.1,
|
||||
"related_party_transactions": "",
|
||||
"former_officer": "",
|
||||
"governance_role": "",
|
||||
},
|
||||
],
|
||||
"governance_management_disclosure": {
|
||||
"governing_body_size": 4,
|
||||
"independent_members": 4,
|
||||
"financial_statements_reviewed": "",
|
||||
"form_990_provided_to_governing_body": "",
|
||||
"conflict_of_interest_policy": "",
|
||||
"whistleblower_policy": "",
|
||||
"document_retention_policy": "",
|
||||
"ceo_compensation_review_process": "",
|
||||
"public_disclosure_practices": "Yes",
|
||||
},
|
||||
"program_service_accomplishments": [],
|
||||
"fundraising_grantmaking": {
|
||||
"total_fundraising_event_revenue": 0,
|
||||
"total_fundraising_event_expenses": 2104,
|
||||
"professional_fundraiser_fees": 0,
|
||||
},
|
||||
"functional_operational_data": {
|
||||
"number_of_employees": 0,
|
||||
"number_of_volunteers": 0,
|
||||
"occupancy_costs": 0,
|
||||
"fundraising_method_descriptions": "",
|
||||
"joint_ventures_disregarded_entities": "",
|
||||
},
|
||||
"compensation_details": {
|
||||
"base_compensation": 0,
|
||||
"bonus": 0,
|
||||
"incentive": 0,
|
||||
"other": 0,
|
||||
"non_fixed_compensation": "",
|
||||
"first_class_travel": "",
|
||||
"housing_allowance": "",
|
||||
"expense_account_usage": "",
|
||||
"supplemental_retirement": "",
|
||||
},
|
||||
"political_lobbying_activities": {
|
||||
"lobbying_expenditures_direct": 0,
|
||||
"lobbying_expenditures_grassroots": 0,
|
||||
"election_501h_status": "",
|
||||
"political_campaign_expenditures": 0,
|
||||
"related_organizations_affiliates": "",
|
||||
},
|
||||
"investments_endowment": {
|
||||
"investment_types": "",
|
||||
"donor_restricted_endowment_values": 0,
|
||||
"net_appreciation_depreciation": 0,
|
||||
"related_organization_transactions": "",
|
||||
"loans_to_from_related_parties": "",
|
||||
},
|
||||
"tax_compliance_penalties": {
|
||||
"penalties_excise_taxes_reported": "No",
|
||||
"unrelated_business_income_disclosure": "No",
|
||||
"foreign_bank_account_reporting": "No",
|
||||
"schedule_o_narrative_explanations": "",
|
||||
},
|
||||
},
|
||||
"extraction_metadata": {
|
||||
"core_organization_metadata": {
|
||||
"ein": {"value": "84-2674654", "references": ["0-7"]},
|
||||
"legal_name": {
|
||||
"value": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
|
||||
"references": ["0-6"],
|
||||
},
|
||||
"phone_number": {"value": "(262) 215-0300", "references": ["0-a"]},
|
||||
"website_url": {"value": "", "references": []},
|
||||
"return_type": {
|
||||
"value": "990-PF",
|
||||
"references": ["4ade8ed0-bce7-4bd5-bd8d-190e3e4be95b"],
|
||||
},
|
||||
"amended_return": {
|
||||
"value": "No",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
|
||||
},
|
||||
"group_exemption_number": {"value": "", "references": []},
|
||||
"subsection_code": {
|
||||
"value": "501(c)(3)",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
|
||||
},
|
||||
"ruling_date": {"value": "", "references": []},
|
||||
"accounting_method": {"value": "Cash", "references": ["0-d"]},
|
||||
"organization_type": {
|
||||
"value": "corporation",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
|
||||
},
|
||||
"year_of_formation": {"value": "", "references": []},
|
||||
"incorporation_state": {
|
||||
"value": "WI",
|
||||
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
|
||||
},
|
||||
},
|
||||
"revenue_breakdown": {
|
||||
"total_revenue": {"value": 5227, "references": ["0-1z"]},
|
||||
"contributions_gifts_grants": {"value": 5227, "references": ["0-m"]},
|
||||
"program_service_revenue": {"value": 0, "references": []},
|
||||
"membership_dues": {"value": 0, "references": []},
|
||||
"investment_income": {"value": 0, "references": []},
|
||||
"gains_losses_sales_assets": {"value": 0, "references": []},
|
||||
"rental_income": {"value": 0, "references": []},
|
||||
"related_organizations_revenue": {"value": 0, "references": []},
|
||||
"gaming_revenue": {"value": 0, "references": []},
|
||||
"other_revenue": {"value": 0, "references": []},
|
||||
"government_grants": {"value": 0, "references": []},
|
||||
"foreign_contributions": {"value": 0, "references": []},
|
||||
},
|
||||
"expenses_breakdown": {
|
||||
"total_expenses": {"value": 2104, "references": ["0-2S"]},
|
||||
"program_services_expenses": {"value": 0, "references": []},
|
||||
"management_general_expenses": {"value": 0, "references": []},
|
||||
"fundraising_expenses": {"value": 2104, "references": ["13-d"]},
|
||||
"grants_us_organizations": {"value": 0, "references": []},
|
||||
"grants_us_individuals": {"value": 0, "references": []},
|
||||
"grants_foreign_organizations": {"value": 0, "references": []},
|
||||
"grants_foreign_individuals": {"value": 0, "references": []},
|
||||
"compensation_officers": {
|
||||
"value": 0,
|
||||
"references": ["5-1q", "5-1w", "5-1C", "5-1I"],
|
||||
},
|
||||
"compensation_other_staff": {"value": 0, "references": []},
|
||||
"payroll_taxes_benefits": {"value": 0, "references": []},
|
||||
"professional_fees": {"value": 0, "references": []},
|
||||
"office_occupancy_costs": {"value": 0, "references": []},
|
||||
"information_technology_costs": {"value": 0, "references": []},
|
||||
"travel_conference_expenses": {"value": 0, "references": []},
|
||||
"depreciation_amortization": {"value": 0, "references": []},
|
||||
"insurance": {"value": 0, "references": []},
|
||||
},
|
||||
"balance_sheet": {},
|
||||
"officers_directors_trustees_key_employees": [
|
||||
{
|
||||
"name": {"value": "REBECCA TERPSTRA", "references": ["5-1o"]},
|
||||
"title_position": {"value": "PRESIDENT", "references": ["5-1p"]},
|
||||
"average_hours_per_week": {"value": 0.1, "references": ["5-1p"]},
|
||||
"related_party_transactions": {"value": "", "references": []},
|
||||
"former_officer": {"value": "", "references": []},
|
||||
"governance_role": {"value": "", "references": []},
|
||||
},
|
||||
{
|
||||
"name": {"value": "ROBERT GUZMAN", "references": ["5-1u"]},
|
||||
"title_position": {
|
||||
"value": "VICE PRESDEINT",
|
||||
"references": ["5-1v"],
|
||||
},
|
||||
"average_hours_per_week": {"value": 0.1, "references": ["5-1v"]},
|
||||
"related_party_transactions": {"value": "", "references": []},
|
||||
"former_officer": {"value": "", "references": []},
|
||||
"governance_role": {"value": "", "references": []},
|
||||
},
|
||||
{
|
||||
"name": {"value": "ANDREA VALENTI", "references": ["5-1A"]},
|
||||
"title_position": {"value": "TREASURER", "references": ["5-1B"]},
|
||||
"average_hours_per_week": {"value": 0.1, "references": ["5-1B"]},
|
||||
"related_party_transactions": {"value": "", "references": []},
|
||||
"former_officer": {"value": "", "references": []},
|
||||
"governance_role": {"value": "", "references": []},
|
||||
},
|
||||
{
|
||||
"name": {"value": "BETHANY WALSH", "references": ["5-1G"]},
|
||||
"title_position": {"value": "SECRETARY", "references": ["5-1H"]},
|
||||
"average_hours_per_week": {"value": 0.1, "references": ["5-1H"]},
|
||||
"related_party_transactions": {"value": "", "references": []},
|
||||
"former_officer": {"value": "", "references": []},
|
||||
"governance_role": {"value": "", "references": []},
|
||||
},
|
||||
],
|
||||
"governance_management_disclosure": {
|
||||
"governing_body_size": {
|
||||
"value": 4,
|
||||
"references": ["5-1o", "5-1u", "5-1A", "5-1G"],
|
||||
},
|
||||
"independent_members": {
|
||||
"value": 4,
|
||||
"references": ["5-1o", "5-1u", "5-1A", "5-1G"],
|
||||
},
|
||||
"financial_statements_reviewed": {"value": "", "references": []},
|
||||
"form_990_provided_to_governing_body": {"value": "", "references": []},
|
||||
"conflict_of_interest_policy": {"value": "", "references": []},
|
||||
"whistleblower_policy": {"value": "", "references": []},
|
||||
"document_retention_policy": {"value": "", "references": []},
|
||||
"ceo_compensation_review_process": {"value": "", "references": []},
|
||||
"public_disclosure_practices": {"value": "Yes", "references": ["4-g"]},
|
||||
},
|
||||
"program_service_accomplishments": [],
|
||||
"fundraising_grantmaking": {
|
||||
"total_fundraising_event_revenue": {"value": 0, "references": []},
|
||||
"total_fundraising_event_expenses": {
|
||||
"value": 2104,
|
||||
"references": ["13-d"],
|
||||
},
|
||||
"professional_fundraiser_fees": {"value": 0, "references": []},
|
||||
},
|
||||
"functional_operational_data": {
|
||||
"number_of_employees": {"value": 0, "references": []},
|
||||
"number_of_volunteers": {"value": 0, "references": []},
|
||||
"occupancy_costs": {"value": 0, "references": []},
|
||||
"fundraising_method_descriptions": {"value": "", "references": []},
|
||||
"joint_ventures_disregarded_entities": {"value": "", "references": []},
|
||||
},
|
||||
"compensation_details": {
|
||||
"base_compensation": {"value": 0, "references": ["5-1q", "5-1w"]},
|
||||
"bonus": {"value": 0, "references": []},
|
||||
"incentive": {"value": 0, "references": []},
|
||||
"other": {"value": 0, "references": []},
|
||||
"non_fixed_compensation": {"value": "", "references": []},
|
||||
"first_class_travel": {"value": "", "references": []},
|
||||
"housing_allowance": {"value": "", "references": []},
|
||||
"expense_account_usage": {"value": "", "references": []},
|
||||
"supplemental_retirement": {"value": "", "references": []},
|
||||
},
|
||||
"political_lobbying_activities": {
|
||||
"lobbying_expenditures_direct": {"value": 0, "references": []},
|
||||
"lobbying_expenditures_grassroots": {"value": 0, "references": []},
|
||||
"election_501h_status": {"value": "", "references": []},
|
||||
"political_campaign_expenditures": {"value": 0, "references": []},
|
||||
"related_organizations_affiliates": {"value": "", "references": []},
|
||||
},
|
||||
"investments_endowment": {
|
||||
"investment_types": {"value": "", "references": []},
|
||||
"donor_restricted_endowment_values": {"value": 0, "references": []},
|
||||
"net_appreciation_depreciation": {"value": 0, "references": []},
|
||||
"related_organization_transactions": {"value": "", "references": []},
|
||||
"loans_to_from_related_parties": {"value": "", "references": []},
|
||||
},
|
||||
"tax_compliance_penalties": {
|
||||
"penalties_excise_taxes_reported": {
|
||||
"value": "No",
|
||||
"references": ["3-I"],
|
||||
},
|
||||
"unrelated_business_income_disclosure": {
|
||||
"value": "No",
|
||||
"references": ["3-Y"],
|
||||
},
|
||||
"foreign_bank_account_reporting": {
|
||||
"value": "No",
|
||||
"references": ["4-H"],
|
||||
},
|
||||
"schedule_o_narrative_explanations": {"value": "", "references": []},
|
||||
},
|
||||
},
|
||||
"metadata": {
|
||||
"filename": "markdown.md",
|
||||
"org_id": None,
|
||||
"duration_ms": 16656,
|
||||
"credit_usage": 27.2,
|
||||
"job_id": "nnmr8lcxtykk5ll5wodjtrnn6",
|
||||
"version": "extract-20250930",
|
||||
},
|
||||
}
|
||||
|
||||
result = await form_auditor.build_audit_report(data)
|
||||
|
||||
return result.model_dump_json()
|
||||
|
||||
|
||||
@router.post("/chat")
|
||||
async def chat(request: Request) -> Response:
|
||||
return await VercelAIAdapter.dispatch_request(request, agent=agent)
|
||||
|
||||
@@ -2,17 +2,18 @@
|
||||
Router para procesamiento de PDFs con LandingAI.
|
||||
Soporta dos modos: rápido (solo parse) y extracción (parse + extract con schema).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Literal, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, List, Literal
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..services.landingai_service import get_landingai_service
|
||||
from ..services.chunking_service import get_chunking_service
|
||||
from ..repositories.schema_repository import get_schema_repository
|
||||
from ..services.chunking_service import get_chunking_service
|
||||
from ..services.landingai_service import get_landingai_service
|
||||
from ..utils.chunking.token_manager import TokenManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -22,6 +23,7 @@ router = APIRouter(prefix="/api/v1/chunking-landingai", tags=["chunking-landinga
|
||||
|
||||
class ProcessLandingAIRequest(BaseModel):
|
||||
"""Request para procesar PDF con LandingAI"""
|
||||
|
||||
file_name: str = Field(..., description="Nombre del archivo PDF")
|
||||
tema: str = Field(..., description="Tema/carpeta del archivo")
|
||||
collection_name: str = Field(..., description="Colección de Qdrant")
|
||||
@@ -29,34 +31,33 @@ class ProcessLandingAIRequest(BaseModel):
|
||||
# Modo de procesamiento
|
||||
mode: Literal["quick", "extract"] = Field(
|
||||
default="quick",
|
||||
description="Modo: 'quick' (solo parse) o 'extract' (parse + datos estructurados)"
|
||||
description="Modo: 'quick' (solo parse) o 'extract' (parse + datos estructurados)",
|
||||
)
|
||||
|
||||
# Schema (obligatorio si mode='extract')
|
||||
schema_id: Optional[str] = Field(
|
||||
None,
|
||||
description="ID del schema a usar (requerido si mode='extract')"
|
||||
None, description="ID del schema a usar (requerido si mode='extract')"
|
||||
)
|
||||
|
||||
# Configuración de chunks
|
||||
include_chunk_types: List[str] = Field(
|
||||
default=["text", "table"],
|
||||
description="Tipos de chunks a incluir: text, table, figure, etc."
|
||||
description="Tipos de chunks a incluir: text, table, figure, etc.",
|
||||
)
|
||||
max_tokens_per_chunk: int = Field(
|
||||
default=1500,
|
||||
ge=500,
|
||||
le=3000,
|
||||
description="Tokens máximos por chunk (flexible para tablas/figuras)"
|
||||
description="Tokens máximos por chunk (flexible para tablas/figuras)",
|
||||
)
|
||||
merge_small_chunks: bool = Field(
|
||||
default=True,
|
||||
description="Unir chunks pequeños de la misma página y tipo"
|
||||
default=True, description="Unir chunks pequeños de la misma página y tipo"
|
||||
)
|
||||
|
||||
|
||||
class ProcessLandingAIResponse(BaseModel):
|
||||
"""Response del procesamiento con LandingAI"""
|
||||
|
||||
success: bool
|
||||
mode: str
|
||||
processing_time_seconds: float
|
||||
@@ -97,9 +98,9 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info(f"INICIANDO PROCESAMIENTO CON LANDINGAI")
|
||||
logger.info(f"{'='*60}")
|
||||
logger.info(f"\n{'=' * 60}")
|
||||
logger.info("INICIANDO PROCESAMIENTO CON LANDINGAI")
|
||||
logger.info(f"{'=' * 60}")
|
||||
logger.info(f"Archivo: {request.file_name}")
|
||||
logger.info(f"Tema: {request.tema}")
|
||||
logger.info(f"Modo: {request.mode}")
|
||||
@@ -111,7 +112,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
if not request.schema_id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="schema_id es requerido cuando mode='extract'"
|
||||
detail="schema_id es requerido cuando mode='extract'",
|
||||
)
|
||||
|
||||
schema_repo = get_schema_repository()
|
||||
@@ -119,8 +120,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
|
||||
if not custom_schema:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Schema no encontrado: {request.schema_id}"
|
||||
status_code=404, detail=f"Schema no encontrado: {request.schema_id}"
|
||||
)
|
||||
|
||||
logger.info(f"Schema seleccionado: {custom_schema.schema_name}")
|
||||
@@ -131,14 +131,12 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
|
||||
try:
|
||||
pdf_bytes = await chunking_service.download_pdf_from_blob(
|
||||
request.file_name,
|
||||
request.tema
|
||||
request.file_name, request.tema
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error descargando PDF: {e}")
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No se pudo descargar el PDF: {str(e)}"
|
||||
status_code=404, detail=f"No se pudo descargar el PDF: {str(e)}"
|
||||
)
|
||||
|
||||
# 3. Procesar con LandingAI
|
||||
@@ -150,13 +148,12 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
pdf_bytes=pdf_bytes,
|
||||
file_name=request.file_name,
|
||||
custom_schema=custom_schema,
|
||||
include_chunk_types=request.include_chunk_types
|
||||
include_chunk_types=request.include_chunk_types,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error en LandingAI: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error procesando con LandingAI: {str(e)}"
|
||||
status_code=500, detail=f"Error procesando con LandingAI: {str(e)}"
|
||||
)
|
||||
|
||||
documents = result["chunks"]
|
||||
@@ -164,7 +161,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
if not documents:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No se generaron chunks después del procesamiento"
|
||||
detail="No se generaron chunks después del procesamiento",
|
||||
)
|
||||
|
||||
# 4. Aplicar control flexible de tokens
|
||||
@@ -172,7 +169,7 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
documents = _apply_flexible_token_control(
|
||||
documents,
|
||||
max_tokens=request.max_tokens_per_chunk,
|
||||
merge_small=request.merge_small_chunks
|
||||
merge_small=request.merge_small_chunks,
|
||||
)
|
||||
|
||||
# 5. Generar embeddings
|
||||
@@ -180,13 +177,16 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
texts = [doc.page_content for doc in documents]
|
||||
|
||||
try:
|
||||
embeddings = await chunking_service.embedding_service.generate_embeddings_batch(texts)
|
||||
embeddings = (
|
||||
await chunking_service.embedding_service.generate_embeddings_batch(
|
||||
texts
|
||||
)
|
||||
)
|
||||
logger.info(f"Embeddings generados: {len(embeddings)} vectores")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generando embeddings: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error generando embeddings: {str(e)}"
|
||||
status_code=500, detail=f"Error generando embeddings: {str(e)}"
|
||||
)
|
||||
|
||||
# 6. Preparar chunks para Qdrant con IDs determinísticos
|
||||
@@ -198,38 +198,38 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
chunk_id = chunking_service._generate_deterministic_id(
|
||||
file_name=request.file_name,
|
||||
page=doc.metadata.get("page", 1),
|
||||
chunk_index=doc.metadata.get("chunk_id", str(idx))
|
||||
chunk_index=doc.metadata.get("chunk_id", str(idx)),
|
||||
)
|
||||
|
||||
qdrant_chunks.append({
|
||||
"id": chunk_id,
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"page_content": doc.page_content,
|
||||
"metadata": doc.metadata # Metadata rica de LandingAI
|
||||
qdrant_chunks.append(
|
||||
{
|
||||
"id": chunk_id,
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"page_content": doc.page_content,
|
||||
"metadata": doc.metadata, # Metadata rica de LandingAI
|
||||
},
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
# 7. Subir a Qdrant
|
||||
try:
|
||||
upload_result = await chunking_service.vector_db.add_chunks(
|
||||
request.collection_name,
|
||||
qdrant_chunks
|
||||
request.collection_name, qdrant_chunks
|
||||
)
|
||||
logger.info(f"Subida completada: {upload_result['chunks_added']} chunks")
|
||||
except Exception as e:
|
||||
logger.error(f"Error subiendo a Qdrant: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error subiendo a Qdrant: {str(e)}"
|
||||
status_code=500, detail=f"Error subiendo a Qdrant: {str(e)}"
|
||||
)
|
||||
|
||||
# Tiempo total
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info(f"\n{'=' * 60}")
|
||||
logger.info(f"PROCESAMIENTO COMPLETADO")
|
||||
logger.info(f"{'='*60}")
|
||||
logger.info(f"{'=' * 60}")
|
||||
logger.info(f"Tiempo: {processing_time:.2f}s")
|
||||
logger.info(f"Chunks procesados: {len(documents)}")
|
||||
logger.info(f"Chunks subidos: {upload_result['chunks_added']}")
|
||||
@@ -245,23 +245,18 @@ async def process_with_landingai(request: ProcessLandingAIRequest):
|
||||
schema_used=custom_schema.schema_id if custom_schema else None,
|
||||
extracted_data=result.get("extracted_data"),
|
||||
parse_metadata=result["parse_metadata"],
|
||||
message=f"PDF procesado exitosamente en modo {request.mode}"
|
||||
message=f"PDF procesado exitosamente en modo {request.mode}",
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error inesperado en procesamiento: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error inesperado: {str(e)}"
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"Error inesperado: {str(e)}")
|
||||
|
||||
|
||||
def _apply_flexible_token_control(
|
||||
documents: List[Document],
|
||||
max_tokens: int,
|
||||
merge_small: bool
|
||||
documents: List[Document], max_tokens: int, merge_small: bool
|
||||
) -> List[Document]:
|
||||
"""
|
||||
Aplica control flexible de tokens (Opción C del diseño).
|
||||
@@ -306,14 +301,10 @@ def _apply_flexible_token_control(
|
||||
|
||||
else:
|
||||
# Intentar merge si es pequeño
|
||||
if (
|
||||
merge_small and
|
||||
tokens < max_tokens * 0.5 and
|
||||
i < len(documents) - 1
|
||||
):
|
||||
if merge_small and tokens < max_tokens * 0.5 and i < len(documents) - 1:
|
||||
next_doc = documents[i + 1]
|
||||
if _can_merge(doc, next_doc, max_tokens, token_manager):
|
||||
logger.debug(f"Merging chunks {i} y {i+1}")
|
||||
logger.debug(f"Merging chunks {i} y {i + 1}")
|
||||
doc = _merge_documents(doc, next_doc)
|
||||
i += 1 # Skip next
|
||||
|
||||
@@ -326,9 +317,7 @@ def _apply_flexible_token_control(
|
||||
|
||||
|
||||
def _split_large_chunk(
|
||||
doc: Document,
|
||||
max_tokens: int,
|
||||
token_manager: TokenManager
|
||||
doc: Document, max_tokens: int, token_manager: TokenManager
|
||||
) -> List[Document]:
|
||||
"""Divide un chunk grande en sub-chunks"""
|
||||
content = doc.page_content
|
||||
@@ -343,8 +332,7 @@ def _split_large_chunk(
|
||||
# Guardar chunk actual
|
||||
sub_content = " ".join(current_chunk)
|
||||
sub_doc = Document(
|
||||
page_content=sub_content,
|
||||
metadata={**doc.metadata, "is_split": True}
|
||||
page_content=sub_content, metadata={**doc.metadata, "is_split": True}
|
||||
)
|
||||
sub_chunks.append(sub_doc)
|
||||
current_chunk = [word]
|
||||
@@ -357,8 +345,7 @@ def _split_large_chunk(
|
||||
if current_chunk:
|
||||
sub_content = " ".join(current_chunk)
|
||||
sub_doc = Document(
|
||||
page_content=sub_content,
|
||||
metadata={**doc.metadata, "is_split": True}
|
||||
page_content=sub_content, metadata={**doc.metadata, "is_split": True}
|
||||
)
|
||||
sub_chunks.append(sub_doc)
|
||||
|
||||
@@ -366,10 +353,7 @@ def _split_large_chunk(
|
||||
|
||||
|
||||
def _can_merge(
|
||||
doc1: Document,
|
||||
doc2: Document,
|
||||
max_tokens: int,
|
||||
token_manager: TokenManager
|
||||
doc1: Document, doc2: Document, max_tokens: int, token_manager: TokenManager
|
||||
) -> bool:
|
||||
"""Verifica si dos docs se pueden mergear"""
|
||||
# Misma página
|
||||
@@ -391,6 +375,5 @@ def _merge_documents(doc1: Document, doc2: Document) -> Document:
|
||||
"""Mergea dos documentos"""
|
||||
merged_content = f"{doc1.page_content}\n\n{doc2.page_content}"
|
||||
return Document(
|
||||
page_content=merged_content,
|
||||
metadata={**doc1.metadata, "is_merged": True}
|
||||
page_content=merged_content, metadata={**doc1.metadata, "is_merged": True}
|
||||
)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..models.dataroom import DataRoom
|
||||
from ..models.vector_models import CollectionCreateRequest
|
||||
from ..services.azure_service import azure_service
|
||||
from ..services.vector_service import vector_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,9 +18,136 @@ class DataroomCreate(BaseModel):
|
||||
storage: str = ""
|
||||
|
||||
|
||||
class DataroomInfo(BaseModel):
|
||||
name: str
|
||||
collection: str
|
||||
storage: str
|
||||
file_count: int
|
||||
total_size_bytes: int
|
||||
total_size_mb: float
|
||||
collection_exists: bool
|
||||
vector_count: Optional[int]
|
||||
collection_info: Optional[dict]
|
||||
file_types: dict
|
||||
recent_files: list
|
||||
|
||||
|
||||
router = APIRouter(prefix="/dataroom", tags=["Dataroom"])
|
||||
|
||||
|
||||
@router.get("/{dataroom_name}/info")
|
||||
async def dataroom_info(dataroom_name: str) -> DataroomInfo:
|
||||
"""
|
||||
Obtener información detallada de un dataroom específico
|
||||
"""
|
||||
try:
|
||||
# Find the dataroom in Redis
|
||||
datarooms = DataRoom.find().all()
|
||||
dataroom = None
|
||||
for room in datarooms:
|
||||
if room.name == dataroom_name:
|
||||
dataroom = room
|
||||
break
|
||||
|
||||
if not dataroom:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Dataroom '{dataroom_name}' not found"
|
||||
)
|
||||
|
||||
# Get file information from Azure Storage
|
||||
try:
|
||||
files_data = await azure_service.list_files(dataroom_name)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not fetch files for dataroom '{dataroom_name}': {e}")
|
||||
files_data = []
|
||||
|
||||
# Calculate file metrics
|
||||
file_count = len(files_data)
|
||||
total_size_bytes = sum(file_data.get("size", 0) for file_data in files_data)
|
||||
total_size_mb = (
|
||||
round(total_size_bytes / (1024 * 1024), 2) if total_size_bytes > 0 else 0.0
|
||||
)
|
||||
|
||||
# Analyze file types
|
||||
file_types = {}
|
||||
recent_files = []
|
||||
|
||||
for file_data in files_data:
|
||||
# Count file types by extension
|
||||
filename = file_data.get("name", "")
|
||||
if "." in filename:
|
||||
ext = filename.split(".")[-1].lower()
|
||||
file_types[ext] = file_types.get(ext, 0) + 1
|
||||
|
||||
# Collect recent files (up to 5)
|
||||
if len(recent_files) < 5:
|
||||
recent_files.append(
|
||||
{
|
||||
"name": filename,
|
||||
"size_mb": round(file_data.get("size", 0) / (1024 * 1024), 2),
|
||||
"last_modified": file_data.get("last_modified"),
|
||||
}
|
||||
)
|
||||
|
||||
# Sort recent files by last modified (newest first)
|
||||
recent_files.sort(key=lambda x: x.get("last_modified", ""), reverse=True)
|
||||
|
||||
# Get vector collection information
|
||||
collection_exists = False
|
||||
vector_count = None
|
||||
collection_info = None
|
||||
|
||||
try:
|
||||
collection_exists_response = await vector_service.check_collection_exists(
|
||||
dataroom_name
|
||||
)
|
||||
collection_exists = collection_exists_response.exists
|
||||
|
||||
if collection_exists:
|
||||
collection_info_response = await vector_service.get_collection_info(
|
||||
dataroom_name
|
||||
)
|
||||
if collection_info_response:
|
||||
collection_info = {
|
||||
"vectors_count": collection_info_response.vectors_count,
|
||||
"indexed_vectors_count": collection_info_response.indexed_vectors_count,
|
||||
"points_count": collection_info_response.points_count,
|
||||
"segments_count": collection_info_response.segments_count,
|
||||
"status": collection_info_response.status,
|
||||
}
|
||||
vector_count = collection_info_response.vectors_count
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Could not fetch collection info for '{dataroom_name}': {e}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Retrieved info for dataroom '{dataroom_name}': {file_count} files, {total_size_mb}MB"
|
||||
)
|
||||
|
||||
return DataroomInfo(
|
||||
name=dataroom.name,
|
||||
collection=dataroom.collection,
|
||||
storage=dataroom.storage,
|
||||
file_count=file_count,
|
||||
total_size_bytes=total_size_bytes,
|
||||
total_size_mb=total_size_mb,
|
||||
collection_exists=collection_exists,
|
||||
vector_count=vector_count,
|
||||
collection_info=collection_info,
|
||||
file_types=file_types,
|
||||
recent_files=recent_files,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting dataroom info for '{dataroom_name}': {e}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Error getting dataroom info: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_datarooms():
|
||||
"""
|
||||
|
||||
@@ -33,3 +33,8 @@ dependencies = [
|
||||
[project.scripts]
|
||||
dev = "uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload"
|
||||
start = "uvicorn app.main:app --host 0.0.0.0 --port 8000"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ruff>=0.14.4",
|
||||
]
|
||||
|
||||
34
backend/uv.lock
generated
34
backend/uv.lock
generated
@@ -86,6 +86,11 @@ dependencies = [
|
||||
{ name = "websockets" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "ruff" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "azure-storage-blob", specifier = ">=12.26.0" },
|
||||
@@ -110,6 +115,9 @@ requires-dist = [
|
||||
{ name = "websockets", specifier = ">=14.1" },
|
||||
]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [{ name = "ruff", specifier = ">=0.14.4" }]
|
||||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
version = "6.2.1"
|
||||
@@ -1789,6 +1797,32 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.14.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/df/55/cccfca45157a2031dcbb5a462a67f7cf27f8b37d4b3b1cd7438f0f5c1df6/ruff-0.14.4.tar.gz", hash = "sha256:f459a49fe1085a749f15414ca76f61595f1a2cc8778ed7c279b6ca2e1fd19df3", size = 5587844, upload-time = "2025-11-06T22:07:45.033Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/17/b9/67240254166ae1eaa38dec32265e9153ac53645a6c6670ed36ad00722af8/ruff-0.14.4-py3-none-linux_armv6l.whl", hash = "sha256:e6604613ffbcf2297cd5dcba0e0ac9bd0c11dc026442dfbb614504e87c349518", size = 12606781, upload-time = "2025-11-06T22:07:01.841Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/c8/09b3ab245d8652eafe5256ab59718641429f68681ee713ff06c5c549f156/ruff-0.14.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d99c0b52b6f0598acede45ee78288e5e9b4409d1ce7f661f0fa36d4cbeadf9a4", size = 12946765, upload-time = "2025-11-06T22:07:05.858Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/bb/1564b000219144bf5eed2359edc94c3590dd49d510751dad26202c18a17d/ruff-0.14.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9358d490ec030f1b51d048a7fd6ead418ed0826daf6149e95e30aa67c168af33", size = 11928120, upload-time = "2025-11-06T22:07:08.023Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/92/d5f1770e9988cc0742fefaa351e840d9aef04ec24ae1be36f333f96d5704/ruff-0.14.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b40d27924f1f02dfa827b9c0712a13c0e4b108421665322218fc38caf615c2", size = 12370877, upload-time = "2025-11-06T22:07:10.015Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/29/e9282efa55f1973d109faf839a63235575519c8ad278cc87a182a366810e/ruff-0.14.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f5e649052a294fe00818650712083cddc6cc02744afaf37202c65df9ea52efa5", size = 12408538, upload-time = "2025-11-06T22:07:13.085Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/01/930ed6ecfce130144b32d77d8d69f5c610e6d23e6857927150adf5d7379a/ruff-0.14.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa082a8f878deeba955531f975881828fd6afd90dfa757c2b0808aadb437136e", size = 13141942, upload-time = "2025-11-06T22:07:15.386Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/46/a9c89b42b231a9f487233f17a89cbef9d5acd538d9488687a02ad288fa6b/ruff-0.14.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1043c6811c2419e39011890f14d0a30470f19d47d197c4858b2787dfa698f6c8", size = 14544306, upload-time = "2025-11-06T22:07:17.631Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/96/9c6cf86491f2a6d52758b830b89b78c2ae61e8ca66b86bf5a20af73d20e6/ruff-0.14.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f3a936ac27fb7c2a93e4f4b943a662775879ac579a433291a6f69428722649", size = 14210427, upload-time = "2025-11-06T22:07:19.832Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/f4/0666fe7769a54f63e66404e8ff698de1dcde733e12e2fd1c9c6efb689cb5/ruff-0.14.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95643ffd209ce78bc113266b88fba3d39e0461f0cbc8b55fb92505030fb4a850", size = 13658488, upload-time = "2025-11-06T22:07:22.32Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/79/6ad4dda2cfd55e41ac9ed6d73ef9ab9475b1eef69f3a85957210c74ba12c/ruff-0.14.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456daa2fa1021bc86ca857f43fe29d5d8b3f0e55e9f90c58c317c1dcc2afc7b5", size = 13354908, upload-time = "2025-11-06T22:07:24.347Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/60/f0b6990f740bb15c1588601d19d21bcc1bd5de4330a07222041678a8e04f/ruff-0.14.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f911bba769e4a9f51af6e70037bb72b70b45a16db5ce73e1f72aefe6f6d62132", size = 13587803, upload-time = "2025-11-06T22:07:26.327Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/da/eaaada586f80068728338e0ef7f29ab3e4a08a692f92eb901a4f06bbff24/ruff-0.14.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76158a7369b3979fa878612c623a7e5430c18b2fd1c73b214945c2d06337db67", size = 12279654, upload-time = "2025-11-06T22:07:28.46Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/d4/b1d0e82cf9bf8aed10a6d45be47b3f402730aa2c438164424783ac88c0ed/ruff-0.14.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f3b8f3b442d2b14c246e7aeca2e75915159e06a3540e2f4bed9f50d062d24469", size = 12357520, upload-time = "2025-11-06T22:07:31.468Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/f4/53e2b42cc82804617e5c7950b7079d79996c27e99c4652131c6a1100657f/ruff-0.14.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c62da9a06779deecf4d17ed04939ae8b31b517643b26370c3be1d26f3ef7dbde", size = 12719431, upload-time = "2025-11-06T22:07:33.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/94/80e3d74ed9a72d64e94a7b7706b1c1ebaa315ef2076fd33581f6a1cd2f95/ruff-0.14.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a443a83a1506c684e98acb8cb55abaf3ef725078be40237463dae4463366349", size = 13464394, upload-time = "2025-11-06T22:07:35.905Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/1a/a49f071f04c42345c793d22f6cf5e0920095e286119ee53a64a3a3004825/ruff-0.14.4-py3-none-win32.whl", hash = "sha256:643b69cb63cd996f1fc7229da726d07ac307eae442dd8974dbc7cf22c1e18fff", size = 12493429, upload-time = "2025-11-06T22:07:38.43Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/22/e58c43e641145a2b670328fb98bc384e20679b5774258b1e540207580266/ruff-0.14.4-py3-none-win_amd64.whl", hash = "sha256:26673da283b96fe35fa0c939bf8411abec47111644aa9f7cfbd3c573fb125d2c", size = 13635380, upload-time = "2025-11-06T22:07:40.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "80.9.0"
|
||||
|
||||
Reference in New Issue
Block a user