wip chat

2025-11-07 09:41:18 -06:00
parent cafe0bf5f3
commit af9b5fed01
21 changed files with 3065 additions and 266 deletions
--- a/backend/app/agents/form_auditor/init.py
+++ b/backend/app/agents/form_auditor/init.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .agent import agent, prepare_initial_findings
+from .models import (
+    AuditReport,
+    ExtractedIrsForm990PfDataSchema,
+    ValidatorState,
+)
+
+
+async def build_audit_report(payload: dict[str, Any]) -> AuditReport:
+    extraction_payload = payload.get("extraction")
+    if extraction_payload is None:
+        raise ValueError("Payload missing 'extraction' key.")
+    extraction = ExtractedIrsForm990PfDataSchema.model_validate(extraction_payload)
+
+    initial_findings = prepare_initial_findings(extraction)
+
+    metadata: dict[str, Any] = {}
+    metadata_raw = payload.get("metadata")
+    if isinstance(metadata_raw, dict):
+        metadata = {str(k): v for k, v in metadata_raw.items()}
+
+    state = ValidatorState(
+        extraction=extraction,
+        initial_findings=initial_findings,
+        metadata=metadata,
+    )
+
+    prompt = (
+        "Review the Form 990 extraction and deterministic checks. Validate or adjust "
+        "the findings, add any additional issues or mitigations, and craft narrative "
+        "section summaries that highlight the most material points. Focus on concrete "
+        "evidence; do not fabricate figures."
+    )
+    result = await agent.run(prompt, deps=state)
+    return result.output
--- a/backend/app/agents/form_auditor/agent.py
+++ b/backend/app/agents/form_auditor/agent.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from pydantic_ai import Agent, RunContext
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.azure import AzureProvider
+
+from app.core.config import settings
+
+from .checks import (
+    aggregate_findings,
+    build_section_summaries,
+    check_balance_sheet_presence,
+    check_board_engagement,
+    check_expense_totals,
+    check_fundraising_alignment,
+    check_governance_policies,
+    check_missing_operational_details,
+    check_revenue_totals,
+    compose_overall_summary,
+    irs_ein_lookup,
+)
+from .models import (
+    AuditFinding,
+    AuditReport,
+    ExtractedIrsForm990PfDataSchema,
+    Severity,
+    ValidatorState,
+)
+
+provider = AzureProvider(
+    azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
+    api_version=settings.AZURE_OPENAI_API_VERSION,
+    api_key=settings.AZURE_OPENAI_API_KEY,
+)
+model = OpenAIChatModel(model_name="gpt-4o", provider=provider)
+agent = Agent(model=model)
+
+
+def prepare_initial_findings(
+    extraction: ExtractedIrsForm990PfDataSchema,
+) -> list[AuditFinding]:
+    findings = [
+        check_revenue_totals(extraction),
+        check_expense_totals(extraction),
+        check_fundraising_alignment(extraction),
+        check_balance_sheet_presence(extraction),
+        check_board_engagement(extraction),
+        check_missing_operational_details(extraction),
+    ]
+    findings.extend(check_governance_policies(extraction))
+    return findings
+
+
+def _merge_findings(
+    findings: Iterable[AuditFinding],
+    added: Iterable[AuditFinding],
+) -> list[AuditFinding]:
+    existing = {finding.check_id: finding for finding in findings}
+    for finding in added:
+        existing[finding.check_id] = finding
+    return list(existing.values())
+
+
+agent = Agent(
+    model=model,
+    name="FormValidator",
+    deps_type=ValidatorState,
+    output_type=AuditReport,
+    system_prompt=(
+        "You are a Form 990 auditor. Review the extraction data and deterministic "
+        "checks provided in deps. Use tools to confirm calculations, add or adjust "
+        "findings, supply mitigation guidance, and craft concise section summaries. "
+        "The AuditReport must include severity (`Pass`, `Warning`, `Error`), "
+        "confidence scores, mitigation advice, section summaries, and an overall summary. "
+        "Ground every statement in supplied data; do not invent financial figures."
+    ),
+)
+
+
+@agent.tool
+def revenue_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
+    return check_revenue_totals(ctx.deps.extraction)
+
+
+@agent.tool
+def expense_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
+    return check_expense_totals(ctx.deps.extraction)
+
+
+@agent.tool
+def fundraising_alignment_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
+    return check_fundraising_alignment(ctx.deps.extraction)
+
+
+@agent.tool
+async def verify_ein(ctx: RunContext[ValidatorState]) -> AuditFinding:
+    ein = ctx.deps.extraction.core_organization_metadata.ein
+    exists, confidence, note = await irs_ein_lookup(ein)
+    if exists:
+        return AuditFinding(
+            check_id="irs_ein_match",
+            category="Compliance",
+            severity=Severity.PASS,
+            message="EIN confirmed against IRS index.",
+            mitigation="Document verification in the filing workpapers.",
+            confidence=confidence,
+        )
+    return AuditFinding(
+        check_id="irs_ein_match",
+        category="Compliance",
+        severity=Severity.WARNING,
+        message=f"EIN {ein} could not be confirmed. {note}",
+        mitigation="Verify the EIN against the IRS EO BMF or IRS determination letter.",
+        confidence=confidence,
+    )
+
+
+@agent.output_validator
+def finalize_report(
+    ctx: RunContext[ValidatorState],
+    report: AuditReport,
+) -> AuditReport:
+    merged_findings = _merge_findings(ctx.deps.initial_findings, report.findings)
+    overall = aggregate_findings(merged_findings)
+    sections = build_section_summaries(merged_findings)
+    overall_summary = compose_overall_summary(merged_findings)
+    metadata = ctx.deps.metadata
+    notes = report.notes
+    if notes is None and isinstance(metadata, dict) and metadata.get("source"):
+        notes = f"Reviewed data source: {metadata['source']}."
+    year: int | None = None
+    if isinstance(metadata, dict):
+        metadata_year = metadata.get("return_year")
+        if metadata_year is not None:
+            try:
+                year = int(metadata_year)
+            except (TypeError, ValueError):
+                pass
+    core = ctx.deps.extraction.core_organization_metadata
+    organisation_name = core.legal_name or report.organisation_name
+    organisation_ein = core.ein or report.organisation_ein
+    return report.model_copy(
+        update={
+            "organisation_ein": organisation_ein,
+            "organisation_name": organisation_name,
+            "year": year,
+            "findings": merged_findings,
+            "overall_severity": overall,
+            "sections": sections,
+            "overall_summary": overall_summary,
+            "notes": notes,
+        }
+    )
--- a/backend/app/agents/form_auditor/checks.py
+++ b/backend/app/agents/form_auditor/checks.py
@@ -0,0 +1,282 @@
+from __future__ import annotations
+
+from collections import Counter, defaultdict
+
+from .models import (
+    AuditFinding,
+    AuditSectionSummary,
+    ExtractedIrsForm990PfDataSchema,
+    Severity,
+)
+
+
+def aggregate_findings(findings: list[AuditFinding]) -> Severity:
+    order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1}
+    overall = Severity.PASS
+    for finding in findings:
+        if order[finding.severity] > order[overall]:
+            overall = finding.severity
+    return overall
+
+
+def check_revenue_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
+    subtotal = sum(
+        value
+        for key, value in data.revenue_breakdown.model_dump().items()
+        if key != "total_revenue"
+    )
+    if abs(subtotal - data.revenue_breakdown.total_revenue) <= 1:
+        return AuditFinding(
+            check_id="revenue_totals",
+            category="Revenue",
+            severity=Severity.PASS,
+            message=f"Revenue categories sum (${subtotal:,.2f}) matches total revenue.",
+            mitigation="Maintain detailed support for each revenue source to preserve reconciliation trail.",
+            confidence=0.95,
+        )
+    return AuditFinding(
+        check_id="revenue_totals",
+        category="Revenue",
+        severity=Severity.ERROR,
+        message=(
+            f"Revenue categories sum (${subtotal:,.2f}) does not equal reported total "
+            f"(${data.revenue_breakdown.total_revenue:,.2f})."
+        ),
+        mitigation="Recalculate revenue totals and correct line items or Schedule A before filing.",
+        confidence=0.95,
+    )
+
+
+def check_expense_totals(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
+    subtotal = (
+        data.expenses_breakdown.program_services_expenses
+        + data.expenses_breakdown.management_general_expenses
+        + data.expenses_breakdown.fundraising_expenses
+    )
+    if abs(subtotal - data.expenses_breakdown.total_expenses) <= 1:
+        return AuditFinding(
+            check_id="expense_totals",
+            category="Expenses",
+            severity=Severity.PASS,
+            message="Functional expenses match total expenses.",
+            mitigation="Keep functional allocation workpapers to support the reconciliation.",
+            confidence=0.95,
+        )
+    return AuditFinding(
+        check_id="expense_totals",
+        category="Expenses",
+        severity=Severity.ERROR,
+        message=(
+            f"Functional expenses (${subtotal:,.2f}) do not reconcile to total expenses "
+            f"(${data.expenses_breakdown.total_expenses:,.2f})."
+        ),
+        mitigation="Review Part I, lines 23–27 and reclassify functional expenses to tie to Part II totals.",
+        confidence=0.95,
+    )
+
+
+def check_fundraising_alignment(
+    data: ExtractedIrsForm990PfDataSchema,
+) -> AuditFinding:
+    reported_fundraising = data.expenses_breakdown.fundraising_expenses
+    event_expenses = data.fundraising_grantmaking.total_fundraising_event_expenses
+    difference = abs(reported_fundraising - event_expenses)
+    if difference <= 1:
+        return AuditFinding(
+            check_id="fundraising_alignment",
+            category="Fundraising",
+            severity=Severity.PASS,
+            message="Fundraising functional expenses align with reported event expenses.",
+            mitigation="Retain event ledgers and allocations to support matching totals.",
+            confidence=0.9,
+        )
+    severity = (
+        Severity.WARNING
+        if reported_fundraising and difference <= reported_fundraising * 0.1
+        else Severity.ERROR
+    )
+    return AuditFinding(
+        check_id="fundraising_alignment",
+        category="Fundraising",
+        severity=severity,
+        message=(
+            f"Fundraising functional expenses (${reported_fundraising:,.2f}) differ from "
+            f"reported event expenses (${event_expenses:,.2f}) by ${difference:,.2f}."
+        ),
+        mitigation="Reconcile Schedule G and Part I allocations to eliminate the variance.",
+        confidence=0.85,
+    )
+
+
+def check_balance_sheet_presence(
+    data: ExtractedIrsForm990PfDataSchema,
+) -> AuditFinding:
+    if data.balance_sheet:
+        return AuditFinding(
+            check_id="balance_sheet_present",
+            category="Balance Sheet",
+            severity=Severity.PASS,
+            message="Balance sheet data is present.",
+            mitigation="Ensure ending net assets tie to Part I, line 30.",
+            confidence=0.7,
+        )
+    return AuditFinding(
+        check_id="balance_sheet_absent",
+        category="Balance Sheet",
+        severity=Severity.WARNING,
+        message="Balance sheet section is empty; confirm Part II filing requirements.",
+        mitigation="Populate assets, liabilities, and net assets or attach supporting schedules.",
+        confidence=0.6,
+    )
+
+
+def check_governance_policies(
+    data: ExtractedIrsForm990PfDataSchema,
+) -> list[AuditFinding]:
+    gm = data.governance_management_disclosure
+    findings: list[AuditFinding] = []
+    policy_fields = {
+        "conflict_of_interest_policy": "Document the policy in Part VI or adopt one prior to filing.",
+        "whistleblower_policy": "Document whistleblower protections for staff and volunteers.",
+        "document_retention_policy": "Adopt and document a record retention policy.",
+    }
+    affirmative_fields = {
+        "financial_statements_reviewed": "Capture whether the board reviewed or audited year-end financials.",
+        "form_990_provided_to_governing_body": "Provide Form 990 to the board before submission and note the date of review.",
+    }
+
+    for field, mitigation in policy_fields.items():
+        value = (getattr(gm, field) or "").strip()
+        if not value or value.lower() in {"no", "n", "false"}:
+            findings.append(
+                AuditFinding(
+                    check_id=f"{field}_missing",
+                    category="Governance",
+                    severity=Severity.WARNING,
+                    message=f"{field.replace('_', ' ').title()} not reported or marked 'No'.",
+                    mitigation=mitigation,
+                    confidence=0.55,
+                )
+            )
+
+    for field, mitigation in affirmative_fields.items():
+        value = (getattr(gm, field) or "").strip()
+        if not value:
+            findings.append(
+                AuditFinding(
+                    check_id=f"{field}_blank",
+                    category="Governance",
+                    severity=Severity.WARNING,
+                    message=f"{field.replace('_', ' ').title()} left blank.",
+                    mitigation=mitigation,
+                    confidence=0.5,
+                )
+            )
+    return findings
+
+
+def check_board_engagement(data: ExtractedIrsForm990PfDataSchema) -> AuditFinding:
+    hours = [
+        member.average_hours_per_week
+        for member in data.officers_directors_trustees_key_employees
+        if member.average_hours_per_week is not None
+    ]
+    total_hours = sum(hours)
+    if total_hours >= 5:
+        return AuditFinding(
+            check_id="board_hours",
+            category="Governance",
+            severity=Severity.PASS,
+            message="Officer and director time commitments appear reasonable.",
+            mitigation="Continue documenting board attendance and oversight responsibilities.",
+            confidence=0.7,
+        )
+    return AuditFinding(
+        check_id="board_hours",
+        category="Governance",
+        severity=Severity.WARNING,
+        message=(
+            f"Aggregate reported board hours ({total_hours:.1f} per week) are low; "
+            "confirm entries reflect actual governance involvement."
+        ),
+        mitigation="Verify hours in Part VII; update if officers volunteer significant time.",
+        confidence=0.6,
+    )
+
+
+def check_missing_operational_details(
+    data: ExtractedIrsForm990PfDataSchema,
+) -> AuditFinding:
+    descriptors = (
+        data.functional_operational_data.fundraising_method_descriptions or ""
+    ).strip()
+    if descriptors:
+        return AuditFinding(
+            check_id="fundraising_methods_documented",
+            category="Operations",
+            severity=Severity.PASS,
+            message="Fundraising method descriptions provided.",
+            mitigation="Update narratives annually to reflect any new campaigns or joint ventures.",
+            confidence=0.65,
+        )
+    return AuditFinding(
+        check_id="fundraising_methods_missing",
+        category="Operations",
+        severity=Severity.WARNING,
+        message="Fundraising method descriptions are blank.",
+        mitigation="Add a brief Schedule G narrative describing major fundraising approaches.",
+        confidence=0.55,
+    )
+
+
+def build_section_summaries(findings: list[AuditFinding]) -> list[AuditSectionSummary]:
+    grouped: defaultdict[str, list[AuditFinding]] = defaultdict(list)
+    for finding in findings:
+        grouped[finding.category].append(finding)
+
+    summaries: list[AuditSectionSummary] = []
+    severity_order = {Severity.ERROR: 3, Severity.WARNING: 2, Severity.PASS: 1}
+    for category, category_findings in grouped.items():
+        counter = Counter(f.severity for f in category_findings)
+        severity = aggregate_findings(category_findings)
+        summary = ", ".join(
+            f"{count} {label}"
+            for label, count in (
+                ("passes", counter.get(Severity.PASS, 0)),
+                ("warnings", counter.get(Severity.WARNING, 0)),
+                ("errors", counter.get(Severity.ERROR, 0)),
+            )
+        )
+        summary_text = f"{category} review: {summary}."
+        confidence = sum(f.confidence for f in category_findings) / len(
+            category_findings
+        )
+        summaries.append(
+            AuditSectionSummary(
+                section=category,
+                severity=severity,
+                summary=summary_text,
+                confidence=confidence,
+            )
+        )
+    summaries.sort(key=lambda s: (-severity_order[s.severity], s.section.lower()))
+    return summaries
+
+
+def compose_overall_summary(findings: list[AuditFinding]) -> str:
+    if not findings:
+        return "No automated findings generated."
+    counter = Counter(f.severity for f in findings)
+    parts = []
+    if counter.get(Severity.ERROR):
+        parts.append(f"{counter[Severity.ERROR]} error(s)")
+    if counter.get(Severity.WARNING):
+        parts.append(f"{counter[Severity.WARNING]} warning(s)")
+    if counter.get(Severity.PASS):
+        parts.append(f"{counter[Severity.PASS]} check(s) passed")
+    summary = "Overall results: " + ", ".join(parts) + "."
+    return summary
+
+
+async def irs_ein_lookup(_ein: str) -> tuple[bool, float, str]:
+    return False, 0.2, "IRS verification unavailable in current environment."
--- a/backend/app/agents/form_auditor/cli.py
+++ b/backend/app/agents/form_auditor/cli.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+from pathlib import Path
+
+from . import build_audit_report
+
+__all__ = ["build_audit_report", "main"]
+
+
+def _load_payload(path: Path) -> dict:
+    text = path.read_text(encoding="utf-8")
+    return json.loads(text)
+
+
+def _print_report(report: dict) -> None:
+    print(json.dumps(report, indent=2))
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        description="Validate a Form 990 extraction payload using the Form Auditor agent."
+    )
+    parser.add_argument(
+        "payload",
+        nargs="?",
+        default="example_data.json",
+        help="Path to a JSON file containing the extraction payload.",
+    )
+    args = parser.parse_args(argv)
+
+    payload_path = Path(args.payload).expanduser()
+    payload = _load_payload(payload_path)
+
+    report = asyncio.run(build_audit_report(payload))
+    _print_report(report.model_dump())
--- a/backend/app/agents/form_auditor/models.py
+++ b/backend/app/agents/form_auditor/models.py
@@ -0,0 +1,573 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class Severity(str, Enum):
+    PASS = "Pass"
+    WARNING = "Warning"
+    ERROR = "Error"
+
+
+class AuditFinding(BaseModel):
+    check_id: str
+    category: str
+    severity: Severity
+    message: str
+    mitigation: str | None = None
+    confidence: float = Field(ge=0.0, le=1.0)
+
+
+class AuditSectionSummary(BaseModel):
+    section: str
+    severity: Severity
+    summary: str
+    confidence: float = Field(ge=0.0, le=1.0)
+
+
+class AuditReport(BaseModel):
+    organisation_ein: str
+    organisation_name: str
+    year: int | None
+    overall_severity: Severity
+    findings: list[AuditFinding]
+    sections: list[AuditSectionSummary] = Field(default_factory=list)
+    overall_summary: str | None = None
+    notes: str | None = None
+
+
+class CoreOrgMetadata(BaseModel):
+    ein: str
+    legal_name: str
+    return_type: str
+    accounting_method: str
+    incorporation_state: str | None = None
+
+
+class CoreOrganizationMetadata(BaseModel):
+    ein: str = Field(
+        ...,
+        description="Unique IRS identifier for the organization.",
+        title="Employer Identification Number (EIN)",
+    )
+    legal_name: str = Field(
+        ...,
+        description="Official registered name of the organization.",
+        title="Legal Name of Organization",
+    )
+    phone_number: str = Field(
+        ..., description="Primary contact phone number.", title="Phone Number"
+    )
+    website_url: str = Field(
+        ..., description="Organization's website address.", title="Website URL"
+    )
+    return_type: str = Field(
+        ...,
+        description="Type of IRS return filed (e.g., 990, 990-EZ, 990-PF).",
+        title="Return Type",
+    )
+    amended_return: str = Field(
+        ...,
+        description="Indicates if the return is amended.",
+        title="Amended Return Flag",
+    )
+    group_exemption_number: str = Field(
+        ...,
+        description="IRS group exemption number, if applicable.",
+        title="Group Exemption Number",
+    )
+    subsection_code: str = Field(
+        ...,
+        description="IRS subsection code (e.g., 501(c)(3)).",
+        title="Subsection Code",
+    )
+    ruling_date: str = Field(
+        ...,
+        description="Date of IRS ruling or determination letter.",
+        title="Ruling/Determination Letter Date",
+    )
+    accounting_method: str = Field(
+        ...,
+        description="Accounting method used (cash, accrual, other).",
+        title="Accounting Method",
+    )
+    organization_type: str = Field(
+        ...,
+        description="Legal structure (corporation, trust, association, etc.).",
+        title="Organization Type",
+    )
+    year_of_formation: str = Field(
+        ..., description="Year the organization was formed.", title="Year of Formation"
+    )
+    incorporation_state: str = Field(
+        ..., description="State of incorporation.", title="Incorporation State"
+    )
+
+
+class RevenueBreakdown(BaseModel):
+    total_revenue: float = Field(
+        ..., description="Sum of all revenue sources.", title="Total Revenue"
+    )
+    contributions_gifts_grants: float = Field(
+        ...,
+        description="Revenue from donations and grants.",
+        title="Contributions, Gifts, and Grants",
+    )
+    program_service_revenue: float = Field(
+        ...,
+        description="Revenue from program services.",
+        title="Program Service Revenue",
+    )
+    membership_dues: float = Field(
+        ..., description="Revenue from membership dues.", title="Membership Dues"
+    )
+    investment_income: float = Field(
+        ...,
+        description="Revenue from interest and dividends.",
+        title="Investment Income",
+    )
+    gains_losses_sales_assets: float = Field(
+        ...,
+        description="Net gains or losses from asset sales.",
+        title="Gains/Losses from Sales of Assets",
+    )
+    rental_income: float = Field(
+        ...,
+        description="Income from rental of real estate or equipment.",
+        title="Rental Income",
+    )
+    related_organizations_revenue: float = Field(
+        ...,
+        description="Revenue from related organizations.",
+        title="Related Organizations Revenue",
+    )
+    gaming_revenue: float = Field(
+        ..., description="Revenue from gaming activities.", title="Gaming Revenue"
+    )
+    other_revenue: float = Field(
+        ..., description="Miscellaneous revenue sources.", title="Other Revenue"
+    )
+    government_grants: float = Field(
+        ...,
+        description="Revenue from government grants.",
+        title="Revenue from Government Grants",
+    )
+    foreign_contributions: float = Field(
+        ..., description="Revenue from foreign sources.", title="Foreign Contributions"
+    )
+
+
+class ExpensesBreakdown(BaseModel):
+    total_expenses: float = Field(
+        ..., description="Sum of all expenses.", title="Total Functional Expenses"
+    )
+    program_services_expenses: float = Field(
+        ...,
+        description="Expenses for program services.",
+        title="Program Services Expenses",
+    )
+    management_general_expenses: float = Field(
+        ...,
+        description="Administrative and management expenses.",
+        title="Management & General Expenses",
+    )
+    fundraising_expenses: float = Field(
+        ...,
+        description="Expenses for fundraising activities.",
+        title="Fundraising Expenses",
+    )
+    grants_us_organizations: float = Field(
+        ...,
+        description="Grants and assistance to U.S. organizations.",
+        title="Grants to U.S. Organizations",
+    )
+    grants_us_individuals: float = Field(
+        ...,
+        description="Grants and assistance to U.S. individuals.",
+        title="Grants to U.S. Individuals",
+    )
+    grants_foreign_organizations: float = Field(
+        ...,
+        description="Grants and assistance to foreign organizations.",
+        title="Grants to Foreign Organizations",
+    )
+    grants_foreign_individuals: float = Field(
+        ...,
+        description="Grants and assistance to foreign individuals.",
+        title="Grants to Foreign Individuals",
+    )
+    compensation_officers: float = Field(
+        ...,
+        description="Compensation paid to officers and key employees.",
+        title="Compensation of Officers/Key Employees",
+    )
+    compensation_other_staff: float = Field(
+        ...,
+        description="Compensation paid to other staff.",
+        title="Compensation of Other Staff",
+    )
+    payroll_taxes_benefits: float = Field(
+        ...,
+        description="Payroll taxes and employee benefits.",
+        title="Payroll Taxes and Benefits",
+    )
+    professional_fees: float = Field(
+        ...,
+        description="Legal, accounting, and lobbying fees.",
+        title="Professional Fees",
+    )
+    office_occupancy_costs: float = Field(
+        ...,
+        description="Office and occupancy expenses.",
+        title="Office and Occupancy Costs",
+    )
+    information_technology_costs: float = Field(
+        ..., description="IT-related expenses.", title="Information Technology Costs"
+    )
+    travel_conference_expenses: float = Field(
+        ...,
+        description="Travel and conference costs.",
+        title="Travel and Conference Expenses",
+    )
+    depreciation_amortization: float = Field(
+        ...,
+        description="Depreciation and amortization expenses.",
+        title="Depreciation and Amortization",
+    )
+    insurance: float = Field(..., description="Insurance expenses.", title="Insurance")
+
+
+class OfficersDirectorsTrusteesKeyEmployee(BaseModel):
+    name: str = Field(..., description="Full name of the individual.", title="Name")
+    title_position: str = Field(
+        ..., description="Role or position held.", title="Title/Position"
+    )
+    average_hours_per_week: float = Field(
+        ...,
+        description="Average weekly hours devoted to position.",
+        title="Average Hours Per Week",
+    )
+    related_party_transactions: str = Field(
+        ...,
+        description="Indicates if related-party transactions occurred.",
+        title="Related-Party Transactions",
+    )
+    former_officer: str = Field(
+        ...,
+        description="Indicates if the individual is a former officer.",
+        title="Former Officer Indicator",
+    )
+    governance_role: str = Field(
+        ...,
+        description="Role in governance (voting, independent, etc.).",
+        title="Governance Role",
+    )
+
+
+class GovernanceManagementDisclosure(BaseModel):
+    governing_body_size: float = Field(
+        ...,
+        description="Number of voting members on the governing body.",
+        title="Governing Body Size",
+    )
+    independent_members: float = Field(
+        ...,
+        description="Number of independent voting members.",
+        title="Number of Independent Members",
+    )
+    financial_statements_reviewed: str = Field(
+        ...,
+        description="Indicates if financial statements were reviewed or audited.",
+        title="Financial Statements Reviewed/Audited",
+    )
+    form_990_provided_to_governing_body: str = Field(
+        ...,
+        description="Indicates if Form 990 was provided to governing body before filing.",
+        title="Form 990 Provided to Governing Body",
+    )
+    conflict_of_interest_policy: str = Field(
+        ...,
+        description="Indicates if a conflict-of-interest policy is in place.",
+        title="Conflict-of-Interest Policy",
+    )
+    whistleblower_policy: str = Field(
+        ...,
+        description="Indicates if a whistleblower policy is in place.",
+        title="Whistleblower Policy",
+    )
+    document_retention_policy: str = Field(
+        ...,
+        description="Indicates if a document retention/destruction policy is in place.",
+        title="Document Retention/Destruction Policy",
+    )
+    ceo_compensation_review_process: str = Field(
+        ...,
+        description="Description of CEO compensation review process.",
+        title="CEO Compensation Review Process",
+    )
+    public_disclosure_practices: str = Field(
+        ...,
+        description="Description of public disclosure practices.",
+        title="Public Disclosure Practices",
+    )
+
+
+class ProgramServiceAccomplishment(BaseModel):
+    program_name: str = Field(
+        ..., description="Name of the program.", title="Program Name"
+    )
+    program_description: str = Field(
+        ..., description="Description of the program.", title="Program Description"
+    )
+    expenses: float = Field(
+        ..., description="Expenses for the program.", title="Program Expenses"
+    )
+    grants: float = Field(
+        ..., description="Grants made under the program.", title="Program Grants"
+    )
+    revenue_generated: float = Field(
+        ..., description="Revenue generated by the program.", title="Revenue Generated"
+    )
+    quantitative_outputs: str = Field(
+        ...,
+        description="Quantitative outputs (e.g., number served, events held).",
+        title="Quantitative Outputs",
+    )
+
+
+class FundraisingGrantmaking(BaseModel):
+    total_fundraising_event_revenue: float = Field(
+        ...,
+        description="Total revenue from fundraising events.",
+        title="Total Fundraising Event Revenue",
+    )
+    total_fundraising_event_expenses: float = Field(
+        ...,
+        description="Total direct expenses for fundraising events.",
+        title="Total Fundraising Event Expenses",
+    )
+    professional_fundraiser_fees: float = Field(
+        ...,
+        description="Fees paid to professional fundraisers.",
+        title="Professional Fundraiser Fees",
+    )
+
+
+class FunctionalOperationalData(BaseModel):
+    number_of_employees: float = Field(
+        ..., description="Total number of employees.", title="Number of Employees"
+    )
+    number_of_volunteers: float = Field(
+        ..., description="Total number of volunteers.", title="Number of Volunteers"
+    )
+    occupancy_costs: float = Field(
+        ..., description="Total occupancy costs.", title="Occupancy Costs"
+    )
+    fundraising_method_descriptions: str = Field(
+        ...,
+        description="Descriptions of fundraising methods used.",
+        title="Fundraising Method Descriptions",
+    )
+    joint_ventures_disregarded_entities: str = Field(
+        ...,
+        description="Details of joint ventures and disregarded entities.",
+        title="Joint Ventures and Disregarded Entities",
+    )
+
+
+class CompensationDetails(BaseModel):
+    base_compensation: float = Field(
+        ..., description="Base salary or wages.", title="Base Compensation"
+    )
+    bonus: float = Field(
+        ..., description="Bonus or incentive compensation.", title="Bonus Compensation"
+    )
+    incentive: float = Field(
+        ..., description="Incentive compensation.", title="Incentive Compensation"
+    )
+    other: float = Field(
+        ..., description="Other forms of compensation.", title="Other Compensation"
+    )
+    non_fixed_compensation: str = Field(
+        ...,
+        description="Indicates if compensation is non-fixed.",
+        title="Non-Fixed Compensation Flag",
+    )
+    first_class_travel: str = Field(
+        ...,
+        description="Indicates if first-class travel was provided.",
+        title="First-Class Travel",
+    )
+    housing_allowance: str = Field(
+        ...,
+        description="Indicates if housing allowance was provided.",
+        title="Housing Allowance",
+    )
+    expense_account_usage: str = Field(
+        ...,
+        description="Indicates if expense account was used.",
+        title="Expense Account Usage",
+    )
+    supplemental_retirement: str = Field(
+        ...,
+        description="Indicates if supplemental retirement or deferred comp was provided.",
+        title="Supplemental Retirement/Deferred Comp",
+    )
+
+
+class PoliticalLobbyingActivities(BaseModel):
+    lobbying_expenditures_direct: float = Field(
+        ...,
+        description="Direct lobbying expenditures.",
+        title="Direct Lobbying Expenditures",
+    )
+    lobbying_expenditures_grassroots: float = Field(
+        ...,
+        description="Grassroots lobbying expenditures.",
+        title="Grassroots Lobbying Expenditures",
+    )
+    election_501h_status: str = Field(
+        ...,
+        description="Indicates if 501(h) election was made.",
+        title="501(h) Election Status",
+    )
+    political_campaign_expenditures: float = Field(
+        ...,
+        description="Expenditures for political campaigns.",
+        title="Political Campaign Expenditures",
+    )
+    related_organizations_affiliates: str = Field(
+        ...,
+        description="Details of related organizations or affiliates involved.",
+        title="Related Organizations/Affiliates Involved",
+    )
+
+
+class InvestmentsEndowment(BaseModel):
+    investment_types: str = Field(
+        ...,
+        description="Types of investments held (securities, partnerships, real estate).",
+        title="Investment Types",
+    )
+    donor_restricted_endowment_values: float = Field(
+        ...,
+        description="Value of donor-restricted endowments.",
+        title="Donor-Restricted Endowment Values",
+    )
+    net_appreciation_depreciation: float = Field(
+        ...,
+        description="Net appreciation or depreciation of investments.",
+        title="Net Appreciation/Depreciation",
+    )
+    related_organization_transactions: str = Field(
+        ...,
+        description="Details of transactions with related organizations.",
+        title="Related Organization Transactions",
+    )
+    loans_to_from_related_parties: str = Field(
+        ...,
+        description="Details of loans to or from related parties.",
+        title="Loans to/from Related Parties",
+    )
+
+
+class TaxCompliancePenalties(BaseModel):
+    penalties_excise_taxes_reported: str = Field(
+        ...,
+        description="Reported penalties or excise taxes.",
+        title="Penalties or Excise Taxes Reported",
+    )
+    unrelated_business_income_disclosure: str = Field(
+        ...,
+        description="Disclosure of unrelated business income (UBI).",
+        title="Unrelated Business Income Disclosure",
+    )
+    foreign_bank_account_reporting: str = Field(
+        ...,
+        description="Disclosure of foreign bank accounts (FBAR equivalent).",
+        title="Foreign Bank Account Reporting",
+    )
+    schedule_o_narrative_explanations: str = Field(
+        ...,
+        description="Narrative explanations from Schedule O.",
+        title="Schedule O Narrative Explanations",
+    )
+
+
+class ExtractedIrsForm990PfDataSchema(BaseModel):
+    core_organization_metadata: CoreOrganizationMetadata = Field(
+        ...,
+        description="Essential identifiers and attributes for normalizing entities across filings and years.",
+        title="Core Organization Metadata",
+    )
+    revenue_breakdown: RevenueBreakdown = Field(
+        ...,
+        description="Detailed breakdown of revenue streams for the fiscal year.",
+        title="Revenue Breakdown",
+    )
+    expenses_breakdown: ExpensesBreakdown = Field(
+        ...,
+        description="Detailed breakdown of expenses for the fiscal year.",
+        title="Expenses Breakdown",
+    )
+    balance_sheet: dict[str, Any] = Field(
+        ...,
+        description="Assets, liabilities, and net assets at year end.",
+        title="Balance Sheet Data",
+    )
+    officers_directors_trustees_key_employees: list[
+        OfficersDirectorsTrusteesKeyEmployee
+    ] = Field(
+        ...,
+        description="List of key personnel and their compensation.",
+        title="Officers, Directors, Trustees, Key Employees",
+    )
+    governance_management_disclosure: GovernanceManagementDisclosure = Field(
+        ...,
+        description="Governance and management practices, policies, and disclosures.",
+        title="Governance, Management, and Disclosure",
+    )
+    program_service_accomplishments: list[ProgramServiceAccomplishment] = Field(
+        ...,
+        description="Major programs and their outputs for the fiscal year.",
+        title="Program Service Accomplishments",
+    )
+    fundraising_grantmaking: FundraisingGrantmaking = Field(
+        ...,
+        description="Fundraising event details and grantmaking activities.",
+        title="Fundraising & Grantmaking",
+    )
+    functional_operational_data: FunctionalOperationalData = Field(
+        ...,
+        description="Operational metrics and related-organization relationships.",
+        title="Functional & Operational Data",
+    )
+    compensation_details: CompensationDetails = Field(
+        ...,
+        description="Detailed breakdown of officer compensation and benefits.",
+        title="Compensation Details",
+    )
+    political_lobbying_activities: PoliticalLobbyingActivities = Field(
+        ...,
+        description="Details of political and lobbying expenditures and affiliations.",
+        title="Political & Lobbying Activities",
+    )
+    investments_endowment: InvestmentsEndowment = Field(
+        ...,
+        description="Investment holdings, endowment values, and related transactions.",
+        title="Investments & Endowment",
+    )
+    tax_compliance_penalties: TaxCompliancePenalties = Field(
+        ...,
+        description="Tax compliance indicators, penalties, and narrative explanations.",
+        title="Tax Compliance / Penalties",
+    )
+
+
+class ValidatorState(BaseModel):
+    extraction: ExtractedIrsForm990PfDataSchema
+    initial_findings: list[AuditFinding] = Field(default_factory=list)
+    metadata: dict[str, Any] = Field(default_factory=dict)