Files
Anibal Angulo af9b5fed01 wip chat
2025-11-07 09:41:18 -06:00

156 lines
5.0 KiB
Python

from __future__ import annotations
from collections.abc import Iterable
from pydantic_ai import Agent, RunContext
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.azure import AzureProvider
from app.core.config import settings
from .checks import (
aggregate_findings,
build_section_summaries,
check_balance_sheet_presence,
check_board_engagement,
check_expense_totals,
check_fundraising_alignment,
check_governance_policies,
check_missing_operational_details,
check_revenue_totals,
compose_overall_summary,
irs_ein_lookup,
)
from .models import (
AuditFinding,
AuditReport,
ExtractedIrsForm990PfDataSchema,
Severity,
ValidatorState,
)
provider = AzureProvider(
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
api_version=settings.AZURE_OPENAI_API_VERSION,
api_key=settings.AZURE_OPENAI_API_KEY,
)
model = OpenAIChatModel(model_name="gpt-4o", provider=provider)
agent = Agent(model=model)
def prepare_initial_findings(
extraction: ExtractedIrsForm990PfDataSchema,
) -> list[AuditFinding]:
findings = [
check_revenue_totals(extraction),
check_expense_totals(extraction),
check_fundraising_alignment(extraction),
check_balance_sheet_presence(extraction),
check_board_engagement(extraction),
check_missing_operational_details(extraction),
]
findings.extend(check_governance_policies(extraction))
return findings
def _merge_findings(
findings: Iterable[AuditFinding],
added: Iterable[AuditFinding],
) -> list[AuditFinding]:
existing = {finding.check_id: finding for finding in findings}
for finding in added:
existing[finding.check_id] = finding
return list(existing.values())
agent = Agent(
model=model,
name="FormValidator",
deps_type=ValidatorState,
output_type=AuditReport,
system_prompt=(
"You are a Form 990 auditor. Review the extraction data and deterministic "
"checks provided in deps. Use tools to confirm calculations, add or adjust "
"findings, supply mitigation guidance, and craft concise section summaries. "
"The AuditReport must include severity (`Pass`, `Warning`, `Error`), "
"confidence scores, mitigation advice, section summaries, and an overall summary. "
"Ground every statement in supplied data; do not invent financial figures."
),
)
@agent.tool
def revenue_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
return check_revenue_totals(ctx.deps.extraction)
@agent.tool
def expense_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
return check_expense_totals(ctx.deps.extraction)
@agent.tool
def fundraising_alignment_check(ctx: RunContext[ValidatorState]) -> AuditFinding:
return check_fundraising_alignment(ctx.deps.extraction)
@agent.tool
async def verify_ein(ctx: RunContext[ValidatorState]) -> AuditFinding:
ein = ctx.deps.extraction.core_organization_metadata.ein
exists, confidence, note = await irs_ein_lookup(ein)
if exists:
return AuditFinding(
check_id="irs_ein_match",
category="Compliance",
severity=Severity.PASS,
message="EIN confirmed against IRS index.",
mitigation="Document verification in the filing workpapers.",
confidence=confidence,
)
return AuditFinding(
check_id="irs_ein_match",
category="Compliance",
severity=Severity.WARNING,
message=f"EIN {ein} could not be confirmed. {note}",
mitigation="Verify the EIN against the IRS EO BMF or IRS determination letter.",
confidence=confidence,
)
@agent.output_validator
def finalize_report(
ctx: RunContext[ValidatorState],
report: AuditReport,
) -> AuditReport:
merged_findings = _merge_findings(ctx.deps.initial_findings, report.findings)
overall = aggregate_findings(merged_findings)
sections = build_section_summaries(merged_findings)
overall_summary = compose_overall_summary(merged_findings)
metadata = ctx.deps.metadata
notes = report.notes
if notes is None and isinstance(metadata, dict) and metadata.get("source"):
notes = f"Reviewed data source: {metadata['source']}."
year: int | None = None
if isinstance(metadata, dict):
metadata_year = metadata.get("return_year")
if metadata_year is not None:
try:
year = int(metadata_year)
except (TypeError, ValueError):
pass
core = ctx.deps.extraction.core_organization_metadata
organisation_name = core.legal_name or report.organisation_name
organisation_ein = core.ein or report.organisation_ein
return report.model_copy(
update={
"organisation_ein": organisation_ein,
"organisation_name": organisation_name,
"year": year,
"findings": merged_findings,
"overall_severity": overall,
"sections": sections,
"overall_summary": overall_summary,
"notes": notes,
}
)