add agent context

This commit is contained in:
Anibal Angulo
2025-11-09 08:35:01 -06:00
parent a23f45ca6d
commit 77a11ef32e
16 changed files with 1227 additions and 946 deletions

View File

@@ -11,15 +11,23 @@ from .models import (
async def build_audit_report(payload: dict[str, Any]) -> AuditReport:
extraction_payload = payload.get("extraction")
metadata_raw: Any = None
extraction_payload: Any = None
if isinstance(payload, dict) and "extraction" in payload:
extraction_payload = payload.get("extraction")
metadata_raw = payload.get("metadata")
else:
extraction_payload = payload
if extraction_payload is None:
raise ValueError("Payload missing 'extraction' key.")
raise ValueError("Payload missing extraction data.")
extraction = ExtractedIrsForm990PfDataSchema.model_validate(extraction_payload)
initial_findings = prepare_initial_findings(extraction)
metadata: dict[str, Any] = {}
metadata_raw = payload.get("metadata")
if isinstance(metadata_raw, dict):
metadata = {str(k): v for k, v in metadata_raw.items()}

View File

@@ -1,9 +1,10 @@
from __future__ import annotations
import re
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, model_validator
class Severity(str, Enum):
@@ -497,6 +498,214 @@ class TaxCompliancePenalties(BaseModel):
)
_OFFICER_HOURS_PATTERN = re.compile(r"([\d.]+)\s*hrs?/wk", re.IGNORECASE)
def _parse_officer_list(entries: list[str] | None) -> list[dict[str, Any]]:
if not entries:
return []
parsed: list[dict[str, Any]] = []
for raw in entries:
if not isinstance(raw, str):
continue
parts = [part.strip() for part in raw.split(",")]
name = parts[0] if parts else ""
title = parts[1] if len(parts) > 1 else ""
role = parts[3] if len(parts) > 3 else ""
hours = 0.0
match = _OFFICER_HOURS_PATTERN.search(raw)
if match:
try:
hours = float(match.group(1))
except ValueError:
hours = 0.0
parsed.append(
{
"name": name,
"title_position": title,
"average_hours_per_week": hours,
"related_party_transactions": "",
"former_officer": "",
"governance_role": role,
}
)
return parsed
def _build_program_accomplishments(
descriptions: list[str] | None,
) -> list[dict[str, Any]]:
if not descriptions:
return []
programs: list[dict[str, Any]] = []
for idx, description in enumerate(descriptions, start=1):
if not isinstance(description, str):
continue
programs.append(
{
"program_name": f"Program {idx}",
"program_description": description.strip(),
"expenses": 0.0,
"grants": 0.0,
"revenue_generated": 0.0,
"quantitative_outputs": "",
}
)
return programs
def _transform_flat_payload(data: dict[str, Any]) -> dict[str, Any]:
def get_str(key: str) -> str:
value = data.get(key)
if value is None:
return ""
return str(value)
def get_value(key: str, default: Any = 0) -> Any:
return data.get(key, default)
transformed: dict[str, Any] = {
"core_organization_metadata": {
"ein": get_str("ein"),
"legal_name": get_str("legal_name"),
"phone_number": get_str("phone_number"),
"website_url": get_str("website_url"),
"return_type": get_str("return_type"),
"amended_return": get_str("amended_return"),
"group_exemption_number": get_str("group_exemption_number"),
"subsection_code": get_str("subsection_code"),
"ruling_date": get_str("ruling_date"),
"accounting_method": get_str("accounting_method"),
"organization_type": get_str("organization_type"),
"year_of_formation": get_str("year_of_formation"),
"incorporation_state": get_str("incorporation_state"),
},
"revenue_breakdown": {
"total_revenue": get_value("total_revenue"),
"contributions_gifts_grants": get_value("contributions_gifts_grants"),
"program_service_revenue": get_value("program_service_revenue"),
"membership_dues": get_value("membership_dues"),
"investment_income": get_value("investment_income"),
"gains_losses_sales_assets": get_value("gains_losses_sales_assets"),
"rental_income": get_value("rental_income"),
"related_organizations_revenue": get_value("related_organizations_revenue"),
"gaming_revenue": get_value("gaming_revenue"),
"other_revenue": get_value("other_revenue"),
"government_grants": get_value("government_grants"),
"foreign_contributions": get_value("foreign_contributions"),
},
"expenses_breakdown": {
"total_expenses": get_value("total_expenses"),
"program_services_expenses": get_value("program_services_expenses"),
"management_general_expenses": get_value("management_general_expenses"),
"fundraising_expenses": get_value("fundraising_expenses"),
"grants_us_organizations": get_value("grants_us_organizations"),
"grants_us_individuals": get_value("grants_us_individuals"),
"grants_foreign_organizations": get_value("grants_foreign_organizations"),
"grants_foreign_individuals": get_value("grants_foreign_individuals"),
"compensation_officers": get_value("compensation_officers"),
"compensation_other_staff": get_value("compensation_other_staff"),
"payroll_taxes_benefits": get_value("payroll_taxes_benefits"),
"professional_fees": get_value("professional_fees"),
"office_occupancy_costs": get_value("office_occupancy_costs"),
"information_technology_costs": get_value("information_technology_costs"),
"travel_conference_expenses": get_value("travel_conference_expenses"),
"depreciation_amortization": get_value("depreciation_amortization"),
"insurance": get_value("insurance"),
},
"balance_sheet": data.get("balance_sheet") or {},
"officers_directors_trustees_key_employees": _parse_officer_list(
data.get("officers_list")
),
"governance_management_disclosure": {
"governing_body_size": get_value("governing_body_size"),
"independent_members": get_value("independent_members"),
"financial_statements_reviewed": get_str("financial_statements_reviewed"),
"form_990_provided_to_governing_body": get_str(
"form_990_provided_to_governing_body"
),
"conflict_of_interest_policy": get_str("conflict_of_interest_policy"),
"whistleblower_policy": get_str("whistleblower_policy"),
"document_retention_policy": get_str("document_retention_policy"),
"ceo_compensation_review_process": get_str(
"ceo_compensation_review_process"
),
"public_disclosure_practices": get_str("public_disclosure_practices"),
},
"program_service_accomplishments": _build_program_accomplishments(
data.get("program_accomplishments_list")
),
"fundraising_grantmaking": {
"total_fundraising_event_revenue": get_value(
"total_fundraising_event_revenue"
),
"total_fundraising_event_expenses": get_value(
"total_fundraising_event_expenses"
),
"professional_fundraiser_fees": get_value("professional_fundraiser_fees"),
},
"functional_operational_data": {
"number_of_employees": get_value("number_of_employees"),
"number_of_volunteers": get_value("number_of_volunteers"),
"occupancy_costs": get_value("occupancy_costs"),
"fundraising_method_descriptions": get_str(
"fundraising_method_descriptions"
),
"joint_ventures_disregarded_entities": get_str(
"joint_ventures_disregarded_entities"
),
},
"compensation_details": {
"base_compensation": get_value("base_compensation"),
"bonus": get_value("bonus"),
"incentive": get_value("incentive"),
"other": get_value("other_compensation", get_value("other", 0)),
"non_fixed_compensation": get_str("non_fixed_compensation"),
"first_class_travel": get_str("first_class_travel"),
"housing_allowance": get_str("housing_allowance"),
"expense_account_usage": get_str("expense_account_usage"),
"supplemental_retirement": get_str("supplemental_retirement"),
},
"political_lobbying_activities": {
"lobbying_expenditures_direct": get_value("lobbying_expenditures_direct"),
"lobbying_expenditures_grassroots": get_value(
"lobbying_expenditures_grassroots"
),
"election_501h_status": get_str("election_501h_status"),
"political_campaign_expenditures": get_value(
"political_campaign_expenditures"
),
"related_organizations_affiliates": get_str(
"related_organizations_affiliates"
),
},
"investments_endowment": {
"investment_types": get_str("investment_types"),
"donor_restricted_endowment_values": get_value(
"donor_restricted_endowment_values"
),
"net_appreciation_depreciation": get_value("net_appreciation_depreciation"),
"related_organization_transactions": get_str(
"related_organization_transactions"
),
"loans_to_from_related_parties": get_str("loans_to_from_related_parties"),
},
"tax_compliance_penalties": {
"penalties_excise_taxes_reported": get_str(
"penalties_excise_taxes_reported"
),
"unrelated_business_income_disclosure": get_str(
"unrelated_business_income_disclosure"
),
"foreign_bank_account_reporting": get_str("foreign_bank_account_reporting"),
"schedule_o_narrative_explanations": get_str(
"schedule_o_narrative_explanations"
),
},
}
return transformed
class ExtractedIrsForm990PfDataSchema(BaseModel):
core_organization_metadata: CoreOrganizationMetadata = Field(
...,
@@ -514,7 +723,7 @@ class ExtractedIrsForm990PfDataSchema(BaseModel):
title="Expenses Breakdown",
)
balance_sheet: dict[str, Any] = Field(
...,
default_factory=dict,
description="Assets, liabilities, and net assets at year end.",
title="Balance Sheet Data",
)
@@ -566,6 +775,15 @@ class ExtractedIrsForm990PfDataSchema(BaseModel):
title="Tax Compliance / Penalties",
)
@model_validator(mode="before")
@classmethod
def _ensure_structure(cls, value: Any) -> Any:
if not isinstance(value, dict):
return value
if "core_organization_metadata" in value:
return value
return _transform_flat_payload(value)
class ValidatorState(BaseModel):
extraction: ExtractedIrsForm990PfDataSchema

View File

@@ -1,5 +1,9 @@
from fastapi import APIRouter
from pydantic_ai import Agent
import json
from dataclasses import dataclass
from typing import Annotated, Any
from fastapi import APIRouter, Header
from pydantic_ai import Agent, RunContext
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.azure import AzureProvider
from pydantic_ai.ui.vercel_ai import VercelAIAdapter
@@ -8,6 +12,7 @@ from starlette.responses import Response
from app.agents import form_auditor, web_search
from app.core.config import settings
from app.services.extracted_data_service import get_extracted_data_service
provider = AzureProvider(
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
@@ -15,347 +20,26 @@ provider = AzureProvider(
api_key=settings.AZURE_OPENAI_API_KEY,
)
model = OpenAIChatModel(model_name="gpt-4o", provider=provider)
agent = Agent(model=model)
@dataclass
class Deps:
extracted_data: dict[str, Any]
agent = Agent(model=model, deps_type=Deps)
router = APIRouter(prefix="/api/v1/agent", tags=["Agent"])
data = {
"extraction": {
"core_organization_metadata": {
"ein": "84-2674654",
"legal_name": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
"phone_number": "(262) 215-0300",
"website_url": "",
"return_type": "990-PF",
"amended_return": "No",
"group_exemption_number": "",
"subsection_code": "501(c)(3)",
"ruling_date": "",
"accounting_method": "Cash",
"organization_type": "corporation",
"year_of_formation": "",
"incorporation_state": "WI",
},
"revenue_breakdown": {
"total_revenue": 5227,
"contributions_gifts_grants": 5227,
"program_service_revenue": 0,
"membership_dues": 0,
"investment_income": 0,
"gains_losses_sales_assets": 0,
"rental_income": 0,
"related_organizations_revenue": 0,
"gaming_revenue": 0,
"other_revenue": 0,
"government_grants": 0,
"foreign_contributions": 0,
},
"expenses_breakdown": {
"total_expenses": 2104,
"program_services_expenses": 0,
"management_general_expenses": 0,
"fundraising_expenses": 2104,
"grants_us_organizations": 0,
"grants_us_individuals": 0,
"grants_foreign_organizations": 0,
"grants_foreign_individuals": 0,
"compensation_officers": 0,
"compensation_other_staff": 0,
"payroll_taxes_benefits": 0,
"professional_fees": 0,
"office_occupancy_costs": 0,
"information_technology_costs": 0,
"travel_conference_expenses": 0,
"depreciation_amortization": 0,
"insurance": 0,
},
"balance_sheet": {},
"officers_directors_trustees_key_employees": [
{
"name": "REBECCA TERPSTRA",
"title_position": "PRESIDENT",
"average_hours_per_week": 0.1,
"related_party_transactions": "",
"former_officer": "",
"governance_role": "",
},
{
"name": "ROBERT GUZMAN",
"title_position": "VICE PRESDEINT",
"average_hours_per_week": 0.1,
"related_party_transactions": "",
"former_officer": "",
"governance_role": "",
},
{
"name": "ANDREA VALENTI",
"title_position": "TREASURER",
"average_hours_per_week": 0.1,
"related_party_transactions": "",
"former_officer": "",
"governance_role": "",
},
{
"name": "BETHANY WALSH",
"title_position": "SECRETARY",
"average_hours_per_week": 0.1,
"related_party_transactions": "",
"former_officer": "",
"governance_role": "",
},
],
"governance_management_disclosure": {
"governing_body_size": 4,
"independent_members": 4,
"financial_statements_reviewed": "",
"form_990_provided_to_governing_body": "",
"conflict_of_interest_policy": "",
"whistleblower_policy": "",
"document_retention_policy": "",
"ceo_compensation_review_process": "",
"public_disclosure_practices": "Yes",
},
"program_service_accomplishments": [],
"fundraising_grantmaking": {
"total_fundraising_event_revenue": 0,
"total_fundraising_event_expenses": 2104,
"professional_fundraiser_fees": 0,
},
"functional_operational_data": {
"number_of_employees": 0,
"number_of_volunteers": 0,
"occupancy_costs": 0,
"fundraising_method_descriptions": "",
"joint_ventures_disregarded_entities": "",
},
"compensation_details": {
"base_compensation": 0,
"bonus": 0,
"incentive": 0,
"other": 0,
"non_fixed_compensation": "",
"first_class_travel": "",
"housing_allowance": "",
"expense_account_usage": "",
"supplemental_retirement": "",
},
"political_lobbying_activities": {
"lobbying_expenditures_direct": 0,
"lobbying_expenditures_grassroots": 0,
"election_501h_status": "",
"political_campaign_expenditures": 0,
"related_organizations_affiliates": "",
},
"investments_endowment": {
"investment_types": "",
"donor_restricted_endowment_values": 0,
"net_appreciation_depreciation": 0,
"related_organization_transactions": "",
"loans_to_from_related_parties": "",
},
"tax_compliance_penalties": {
"penalties_excise_taxes_reported": "No",
"unrelated_business_income_disclosure": "No",
"foreign_bank_account_reporting": "No",
"schedule_o_narrative_explanations": "",
},
},
"extraction_metadata": {
"core_organization_metadata": {
"ein": {"value": "84-2674654", "references": ["0-7"]},
"legal_name": {
"value": "07 IN HEAVEN MEMORIAL SCHOLARSHIP",
"references": ["0-6"],
},
"phone_number": {"value": "(262) 215-0300", "references": ["0-a"]},
"website_url": {"value": "", "references": []},
"return_type": {
"value": "990-PF",
"references": ["4ade8ed0-bce7-4bd5-bd8d-190e3e4be95b"],
},
"amended_return": {
"value": "No",
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
},
"group_exemption_number": {"value": "", "references": []},
"subsection_code": {
"value": "501(c)(3)",
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
},
"ruling_date": {"value": "", "references": []},
"accounting_method": {"value": "Cash", "references": ["0-d"]},
"organization_type": {
"value": "corporation",
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
},
"year_of_formation": {"value": "", "references": []},
"incorporation_state": {
"value": "WI",
"references": ["4ac9edc4-e9bb-430f-b4c4-a42bf4c04b28"],
},
},
"revenue_breakdown": {
"total_revenue": {"value": 5227, "references": ["0-1z"]},
"contributions_gifts_grants": {"value": 5227, "references": ["0-m"]},
"program_service_revenue": {"value": 0, "references": []},
"membership_dues": {"value": 0, "references": []},
"investment_income": {"value": 0, "references": []},
"gains_losses_sales_assets": {"value": 0, "references": []},
"rental_income": {"value": 0, "references": []},
"related_organizations_revenue": {"value": 0, "references": []},
"gaming_revenue": {"value": 0, "references": []},
"other_revenue": {"value": 0, "references": []},
"government_grants": {"value": 0, "references": []},
"foreign_contributions": {"value": 0, "references": []},
},
"expenses_breakdown": {
"total_expenses": {"value": 2104, "references": ["0-2S"]},
"program_services_expenses": {"value": 0, "references": []},
"management_general_expenses": {"value": 0, "references": []},
"fundraising_expenses": {"value": 2104, "references": ["13-d"]},
"grants_us_organizations": {"value": 0, "references": []},
"grants_us_individuals": {"value": 0, "references": []},
"grants_foreign_organizations": {"value": 0, "references": []},
"grants_foreign_individuals": {"value": 0, "references": []},
"compensation_officers": {
"value": 0,
"references": ["5-1q", "5-1w", "5-1C", "5-1I"],
},
"compensation_other_staff": {"value": 0, "references": []},
"payroll_taxes_benefits": {"value": 0, "references": []},
"professional_fees": {"value": 0, "references": []},
"office_occupancy_costs": {"value": 0, "references": []},
"information_technology_costs": {"value": 0, "references": []},
"travel_conference_expenses": {"value": 0, "references": []},
"depreciation_amortization": {"value": 0, "references": []},
"insurance": {"value": 0, "references": []},
},
"balance_sheet": {},
"officers_directors_trustees_key_employees": [
{
"name": {"value": "REBECCA TERPSTRA", "references": ["5-1o"]},
"title_position": {"value": "PRESIDENT", "references": ["5-1p"]},
"average_hours_per_week": {"value": 0.1, "references": ["5-1p"]},
"related_party_transactions": {"value": "", "references": []},
"former_officer": {"value": "", "references": []},
"governance_role": {"value": "", "references": []},
},
{
"name": {"value": "ROBERT GUZMAN", "references": ["5-1u"]},
"title_position": {
"value": "VICE PRESDEINT",
"references": ["5-1v"],
},
"average_hours_per_week": {"value": 0.1, "references": ["5-1v"]},
"related_party_transactions": {"value": "", "references": []},
"former_officer": {"value": "", "references": []},
"governance_role": {"value": "", "references": []},
},
{
"name": {"value": "ANDREA VALENTI", "references": ["5-1A"]},
"title_position": {"value": "TREASURER", "references": ["5-1B"]},
"average_hours_per_week": {"value": 0.1, "references": ["5-1B"]},
"related_party_transactions": {"value": "", "references": []},
"former_officer": {"value": "", "references": []},
"governance_role": {"value": "", "references": []},
},
{
"name": {"value": "BETHANY WALSH", "references": ["5-1G"]},
"title_position": {"value": "SECRETARY", "references": ["5-1H"]},
"average_hours_per_week": {"value": 0.1, "references": ["5-1H"]},
"related_party_transactions": {"value": "", "references": []},
"former_officer": {"value": "", "references": []},
"governance_role": {"value": "", "references": []},
},
],
"governance_management_disclosure": {
"governing_body_size": {
"value": 4,
"references": ["5-1o", "5-1u", "5-1A", "5-1G"],
},
"independent_members": {
"value": 4,
"references": ["5-1o", "5-1u", "5-1A", "5-1G"],
},
"financial_statements_reviewed": {"value": "", "references": []},
"form_990_provided_to_governing_body": {"value": "", "references": []},
"conflict_of_interest_policy": {"value": "", "references": []},
"whistleblower_policy": {"value": "", "references": []},
"document_retention_policy": {"value": "", "references": []},
"ceo_compensation_review_process": {"value": "", "references": []},
"public_disclosure_practices": {"value": "Yes", "references": ["4-g"]},
},
"program_service_accomplishments": [],
"fundraising_grantmaking": {
"total_fundraising_event_revenue": {"value": 0, "references": []},
"total_fundraising_event_expenses": {
"value": 2104,
"references": ["13-d"],
},
"professional_fundraiser_fees": {"value": 0, "references": []},
},
"functional_operational_data": {
"number_of_employees": {"value": 0, "references": []},
"number_of_volunteers": {"value": 0, "references": []},
"occupancy_costs": {"value": 0, "references": []},
"fundraising_method_descriptions": {"value": "", "references": []},
"joint_ventures_disregarded_entities": {"value": "", "references": []},
},
"compensation_details": {
"base_compensation": {"value": 0, "references": ["5-1q", "5-1w"]},
"bonus": {"value": 0, "references": []},
"incentive": {"value": 0, "references": []},
"other": {"value": 0, "references": []},
"non_fixed_compensation": {"value": "", "references": []},
"first_class_travel": {"value": "", "references": []},
"housing_allowance": {"value": "", "references": []},
"expense_account_usage": {"value": "", "references": []},
"supplemental_retirement": {"value": "", "references": []},
},
"political_lobbying_activities": {
"lobbying_expenditures_direct": {"value": 0, "references": []},
"lobbying_expenditures_grassroots": {"value": 0, "references": []},
"election_501h_status": {"value": "", "references": []},
"political_campaign_expenditures": {"value": 0, "references": []},
"related_organizations_affiliates": {"value": "", "references": []},
},
"investments_endowment": {
"investment_types": {"value": "", "references": []},
"donor_restricted_endowment_values": {"value": 0, "references": []},
"net_appreciation_depreciation": {"value": 0, "references": []},
"related_organization_transactions": {"value": "", "references": []},
"loans_to_from_related_parties": {"value": "", "references": []},
},
"tax_compliance_penalties": {
"penalties_excise_taxes_reported": {
"value": "No",
"references": ["3-I"],
},
"unrelated_business_income_disclosure": {
"value": "No",
"references": ["3-Y"],
},
"foreign_bank_account_reporting": {
"value": "No",
"references": ["4-H"],
},
"schedule_o_narrative_explanations": {"value": "", "references": []},
},
},
"metadata": {
"filename": "markdown.md",
"org_id": None,
"duration_ms": 16656,
"credit_usage": 27.2,
"job_id": "nnmr8lcxtykk5ll5wodjtrnn6",
"version": "extract-20250930",
},
}
@agent.tool_plain
async def build_audit_report():
@agent.tool
async def build_audit_report(ctx: RunContext[Deps]):
"""Calls the audit subagent to get a full audit report of the organization"""
data = ctx.deps.extracted_data
with open("data/audit_report.json", "w") as f:
json.dump(data, f)
result = await form_auditor.build_audit_report(data)
return result.model_dump()
@@ -370,5 +54,13 @@ async def search_web_information(query: str, max_results: int = 5):
@router.post("/chat")
async def chat(request: Request) -> Response:
return await VercelAIAdapter.dispatch_request(request, agent=agent)
async def chat(request: Request, tema: Annotated[str, Header()]) -> Response:
extracted_data_service = get_extracted_data_service()
data = await extracted_data_service.get_by_tema(tema)
extracted_data = [doc.get_extracted_data() for doc in data]
deps = Deps(extracted_data=extracted_data[0])
return await VercelAIAdapter.dispatch_request(request, agent=agent, deps=deps)

View File

@@ -14,8 +14,14 @@ logger = logging.getLogger(__name__)
class DataroomCreate(BaseModel):
name: str
collection: str = ""
storage: str = ""
@property
def collection(self) -> str:
return self.name.lower().replace(" ", "_")
@property
def storage(self) -> str:
return self.name.lower().replace(" ", "_")
class DataroomInfo(BaseModel):
@@ -110,9 +116,9 @@ async def dataroom_info(dataroom_name: str) -> DataroomInfo:
if collection_info_response:
collection_info = {
"vectors_count": collection_info_response.vectors_count,
"indexed_vectors_count": collection_info_response.indexed_vectors_count,
"points_count": collection_info_response.points_count,
"segments_count": collection_info_response.segments_count,
"indexed_vectors_count": collection_info_response.vectors_count,
"points_count": collection_info_response.vectors_count,
"segments_count": collection_info_response.vectors_count,
"status": collection_info_response.status,
}
vector_count = collection_info_response.vectors_count