add analysis component
This commit is contained in:
112
backend/app/agents/analyst/__init__.py
Normal file
112
backend/app/agents/analyst/__init__.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterable, List
|
||||
|
||||
from app.agents.form_auditor.models import ExtractedIrsForm990PfDataSchema
|
||||
|
||||
from .agent import agent
|
||||
from .metrics import SnapshotBundle, build_key_metrics, build_snapshots
|
||||
from .models import AnalystReport, AnalystState
|
||||
|
||||
__all__ = ["build_performance_report"]
|
||||
|
||||
|
||||
def _resolve_year(
|
||||
entry: dict[str, Any], extraction: ExtractedIrsForm990PfDataSchema
|
||||
) -> int:
|
||||
candidates: Iterable[Any] = (
|
||||
entry.get("calendar_year"),
|
||||
entry.get("year"),
|
||||
entry.get("tax_year"),
|
||||
entry.get("return_year"),
|
||||
entry.get("metadata", {}).get("return_year")
|
||||
if isinstance(entry.get("metadata"), dict)
|
||||
else None,
|
||||
entry.get("metadata", {}).get("tax_year")
|
||||
if isinstance(entry.get("metadata"), dict)
|
||||
else None,
|
||||
extraction.core_organization_metadata.calendar_year,
|
||||
)
|
||||
for candidate in candidates:
|
||||
if candidate in (None, ""):
|
||||
continue
|
||||
try:
|
||||
return int(candidate)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
raise ValueError("Unable to determine filing year for one of the payload entries.")
|
||||
|
||||
|
||||
async def build_performance_report(payloads: List[dict[str, Any]]) -> AnalystReport:
|
||||
if not payloads:
|
||||
raise ValueError("At least one payload is required for performance analysis.")
|
||||
|
||||
bundles: List[SnapshotBundle] = []
|
||||
|
||||
organisation_name = ""
|
||||
organisation_ein = ""
|
||||
|
||||
for entry in payloads:
|
||||
if not isinstance(entry, dict):
|
||||
raise TypeError("Each payload entry must be a dict.")
|
||||
|
||||
extraction_payload = entry.get("extraction") if "extraction" in entry else entry
|
||||
extraction = ExtractedIrsForm990PfDataSchema.model_validate(extraction_payload)
|
||||
year = _resolve_year(entry, extraction)
|
||||
|
||||
if not organisation_ein:
|
||||
organisation_ein = extraction.core_organization_metadata.ein
|
||||
organisation_name = extraction.core_organization_metadata.legal_name
|
||||
else:
|
||||
if extraction.core_organization_metadata.ein != organisation_ein:
|
||||
raise ValueError(
|
||||
"All payload entries must belong to the same organization."
|
||||
)
|
||||
|
||||
bundles.append(SnapshotBundle(year=year, extraction=extraction))
|
||||
|
||||
bundles.sort(key=lambda bundle: bundle.year)
|
||||
snapshots = build_snapshots(bundles)
|
||||
metrics = build_key_metrics(snapshots)
|
||||
|
||||
notes = []
|
||||
if metrics:
|
||||
revenue_metric = metrics[0]
|
||||
expense_metric = metrics[1] if len(metrics) > 1 else None
|
||||
if revenue_metric.cagr is not None:
|
||||
notes.append(f"Revenue CAGR: {revenue_metric.cagr:.2%}")
|
||||
if expense_metric and expense_metric.cagr is not None:
|
||||
notes.append(f"Expense CAGR: {expense_metric.cagr:.2%}")
|
||||
surplus_metric = next(
|
||||
(m for m in metrics if m.name == "Operating Surplus"), None
|
||||
)
|
||||
if surplus_metric:
|
||||
last_value = surplus_metric.points[-1].value if surplus_metric.points else 0
|
||||
notes.append(f"Latest operating surplus: {last_value:,.0f}")
|
||||
|
||||
state = AnalystState(
|
||||
organisation_name=organisation_name,
|
||||
organisation_ein=organisation_ein,
|
||||
series=snapshots,
|
||||
key_metrics=metrics,
|
||||
notes=notes,
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"Analyze the provided multi-year financial context. Quantify notable trends, "
|
||||
"call out risks or strengths, and supply actionable recommendations. "
|
||||
"Capture both positive momentum and areas requiring attention."
|
||||
)
|
||||
result = await agent.run(prompt, deps=state)
|
||||
report = result.output
|
||||
|
||||
years = [snapshot.year for snapshot in snapshots]
|
||||
|
||||
return report.model_copy(
|
||||
update={
|
||||
"organisation_name": organisation_name,
|
||||
"organisation_ein": organisation_ein,
|
||||
"years_analyzed": years,
|
||||
"key_metrics": metrics,
|
||||
}
|
||||
)
|
||||
33
backend/app/agents/analyst/agent.py
Normal file
33
backend/app/agents/analyst/agent.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic_ai import Agent
|
||||
from pydantic_ai.models.openai import OpenAIChatModel
|
||||
from pydantic_ai.providers.azure import AzureProvider
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
from .models import AnalystReport, AnalystState
|
||||
|
||||
provider = AzureProvider(
|
||||
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
|
||||
api_version=settings.AZURE_OPENAI_API_VERSION,
|
||||
api_key=settings.AZURE_OPENAI_API_KEY,
|
||||
)
|
||||
|
||||
model = OpenAIChatModel(model_name="gpt-4o", provider=provider)
|
||||
|
||||
agent = Agent(
|
||||
model=model,
|
||||
name="MultiYearAnalyst",
|
||||
deps_type=AnalystState,
|
||||
output_type=AnalystReport,
|
||||
system_prompt=(
|
||||
"You are a nonprofit financial analyst. You receive multi-year Form 990 extractions "
|
||||
"summarized into deterministic metrics (series, ratios, surplus, CAGR). Use the context "
|
||||
"to highlight performance trends, governance implications, and forward-looking risks. "
|
||||
"Focus on numeric trends: revenue growth, expense discipline, surplus stability, "
|
||||
"program-vs-admin mix, and fundraising efficiency. Provide concise bullet insights, "
|
||||
"clear recommendations tied to the data, and a balanced outlook (strengths vs watch items). "
|
||||
"Only cite facts available in the provided series—do not invent figures."
|
||||
),
|
||||
)
|
||||
197
backend/app/agents/analyst/metrics.py
Normal file
197
backend/app/agents/analyst/metrics.py
Normal file
@@ -0,0 +1,197 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, List, Sequence, Tuple
|
||||
|
||||
from app.agents.form_auditor.models import ExtractedIrsForm990PfDataSchema
|
||||
|
||||
from .models import TrendDirection, TrendMetric, TrendMetricPoint, YearlySnapshot
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotBundle:
|
||||
year: int
|
||||
extraction: ExtractedIrsForm990PfDataSchema
|
||||
|
||||
|
||||
def _safe_ratio(numerator: float, denominator: float) -> float | None:
|
||||
if denominator in (0, None):
|
||||
return None
|
||||
try:
|
||||
return numerator / denominator
|
||||
except ZeroDivisionError:
|
||||
return None
|
||||
|
||||
|
||||
def _growth(current: float, previous: float | None) -> float | None:
|
||||
if previous in (None, 0):
|
||||
return None
|
||||
try:
|
||||
return (current - previous) / previous
|
||||
except ZeroDivisionError:
|
||||
return None
|
||||
|
||||
|
||||
def _direction_from_points(values: Sequence[float | None]) -> TrendDirection:
|
||||
clean = [value for value in values if value is not None]
|
||||
if len(clean) < 2:
|
||||
return TrendDirection.STABLE
|
||||
|
||||
start, end = clean[0], clean[-1]
|
||||
if start is None or end is None:
|
||||
return TrendDirection.STABLE
|
||||
|
||||
delta = end - start
|
||||
tolerance = abs(start) * 0.02 if start else 0.01
|
||||
if abs(delta) <= tolerance:
|
||||
return TrendDirection.STABLE
|
||||
|
||||
if len(clean) > 2:
|
||||
swings = sum(
|
||||
1
|
||||
for idx in range(1, len(clean) - 1)
|
||||
if (clean[idx] - clean[idx - 1]) * (clean[idx + 1] - clean[idx]) < 0
|
||||
)
|
||||
if swings >= len(clean) // 2:
|
||||
return TrendDirection.VOLATILE
|
||||
|
||||
return TrendDirection.IMPROVING if delta > 0 else TrendDirection.DECLINING
|
||||
|
||||
|
||||
def _cagr(start: float | None, end: float | None, periods: int) -> float | None:
|
||||
if start is None or end is None or start <= 0 or end <= 0 or periods <= 0:
|
||||
return None
|
||||
return (end / start) ** (1 / periods) - 1
|
||||
|
||||
|
||||
def build_snapshots(bundles: Sequence[SnapshotBundle]) -> List[YearlySnapshot]:
|
||||
snapshots: List[YearlySnapshot] = []
|
||||
previous_revenue = None
|
||||
previous_expenses = None
|
||||
|
||||
for bundle in bundles:
|
||||
rev = bundle.extraction.revenue_breakdown.total_revenue
|
||||
exp = bundle.extraction.expenses_breakdown.total_expenses
|
||||
program = bundle.extraction.expenses_breakdown.program_services_expenses
|
||||
admin = bundle.extraction.expenses_breakdown.management_general_expenses
|
||||
fundraising = bundle.extraction.expenses_breakdown.fundraising_expenses
|
||||
|
||||
snapshots.append(
|
||||
YearlySnapshot(
|
||||
year=bundle.year,
|
||||
total_revenue=rev,
|
||||
total_expenses=exp,
|
||||
revenue_growth=_growth(rev, previous_revenue),
|
||||
expense_growth=_growth(exp, previous_expenses),
|
||||
surplus=rev - exp,
|
||||
program_ratio=_safe_ratio(program, exp),
|
||||
admin_ratio=_safe_ratio(admin, exp),
|
||||
fundraising_ratio=_safe_ratio(fundraising, exp),
|
||||
net_margin=_safe_ratio(rev - exp, rev),
|
||||
)
|
||||
)
|
||||
previous_revenue = rev
|
||||
previous_expenses = exp
|
||||
|
||||
return snapshots
|
||||
|
||||
|
||||
def _metric_from_series(
|
||||
name: str,
|
||||
unit: str,
|
||||
description: str,
|
||||
values: Iterable[Tuple[int, float | None]],
|
||||
) -> TrendMetric:
|
||||
points = [
|
||||
TrendMetricPoint(year=year, value=value or 0.0, growth=None)
|
||||
for year, value in values
|
||||
]
|
||||
|
||||
for idx in range(1, len(points)):
|
||||
prev = points[idx - 1].value
|
||||
curr = points[idx].value
|
||||
points[idx].growth = _growth(curr, prev)
|
||||
|
||||
data_values = [point.value for point in points]
|
||||
direction = _direction_from_points(data_values)
|
||||
cagr = None
|
||||
if len(points) >= 2:
|
||||
cagr = _cagr(points[0].value, points[-1].value, len(points) - 1)
|
||||
|
||||
return TrendMetric(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=description,
|
||||
points=points,
|
||||
cagr=cagr,
|
||||
direction=direction,
|
||||
)
|
||||
|
||||
|
||||
def build_key_metrics(snapshots: Sequence[YearlySnapshot]) -> List[TrendMetric]:
|
||||
if not snapshots:
|
||||
return []
|
||||
|
||||
metrics = [
|
||||
_metric_from_series(
|
||||
"Total Revenue",
|
||||
"USD",
|
||||
"Reported total revenue in Part I.",
|
||||
[(snap.year, snap.total_revenue) for snap in snapshots],
|
||||
),
|
||||
_metric_from_series(
|
||||
"Total Expenses",
|
||||
"USD",
|
||||
"Reported total expenses in Part I.",
|
||||
[(snap.year, snap.total_expenses) for snap in snapshots],
|
||||
),
|
||||
_metric_from_series(
|
||||
"Operating Surplus",
|
||||
"USD",
|
||||
"Difference between total revenue and total expenses.",
|
||||
[(snap.year, snap.surplus) for snap in snapshots],
|
||||
),
|
||||
_metric_from_series(
|
||||
"Program Service Ratio",
|
||||
"Ratio",
|
||||
"Program service expenses divided by total expenses.",
|
||||
[
|
||||
(
|
||||
snap.year,
|
||||
snap.program_ratio if snap.program_ratio is not None else 0.0,
|
||||
)
|
||||
for snap in snapshots
|
||||
],
|
||||
),
|
||||
_metric_from_series(
|
||||
"Administrative Ratio",
|
||||
"Ratio",
|
||||
"Management & general expenses divided by total expenses.",
|
||||
[
|
||||
(snap.year, snap.admin_ratio if snap.admin_ratio is not None else 0.0)
|
||||
for snap in snapshots
|
||||
],
|
||||
),
|
||||
_metric_from_series(
|
||||
"Fundraising Ratio",
|
||||
"Ratio",
|
||||
"Fundraising expenses divided by total expenses.",
|
||||
[
|
||||
(
|
||||
snap.year,
|
||||
snap.fundraising_ratio
|
||||
if snap.fundraising_ratio is not None
|
||||
else 0.0,
|
||||
)
|
||||
for snap in snapshots
|
||||
],
|
||||
),
|
||||
]
|
||||
|
||||
for metric in metrics:
|
||||
if metric.name.endswith("Ratio"):
|
||||
metric.notes = "Higher values indicate greater spending share."
|
||||
elif metric.name == "Operating Surplus":
|
||||
metric.notes = "Positive surplus implies revenues exceeded expenses."
|
||||
|
||||
return metrics
|
||||
74
backend/app/agents/analyst/models.py
Normal file
74
backend/app/agents/analyst/models.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TrendDirection(str, Enum):
|
||||
IMPROVING = "Improving"
|
||||
DECLINING = "Declining"
|
||||
STABLE = "Stable"
|
||||
VOLATILE = "Volatile"
|
||||
|
||||
|
||||
class TrendMetricPoint(BaseModel):
|
||||
year: int
|
||||
value: float
|
||||
growth: float | None = Field(
|
||||
default=None, description="Year-over-year growth expressed as a decimal."
|
||||
)
|
||||
|
||||
|
||||
class TrendMetric(BaseModel):
|
||||
name: str
|
||||
unit: str
|
||||
description: str
|
||||
points: List[TrendMetricPoint]
|
||||
cagr: float | None = Field(
|
||||
default=None,
|
||||
description="Compound annual growth rate across the analyzed period.",
|
||||
)
|
||||
direction: TrendDirection = Field(
|
||||
default=TrendDirection.STABLE, description="Overall direction of the metric."
|
||||
)
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class TrendInsight(BaseModel):
|
||||
category: str
|
||||
direction: TrendDirection
|
||||
summary: str
|
||||
confidence: float = Field(default=0.7, ge=0.0, le=1.0)
|
||||
|
||||
|
||||
class AnalystReport(BaseModel):
|
||||
organisation_name: str
|
||||
organisation_ein: str
|
||||
years_analyzed: List[int] = Field(default_factory=list)
|
||||
key_metrics: List[TrendMetric] = Field(default_factory=list)
|
||||
insights: List[TrendInsight] = Field(default_factory=list)
|
||||
recommendations: List[str] = Field(default_factory=list)
|
||||
outlook: str = "Pending analysis"
|
||||
|
||||
|
||||
class YearlySnapshot(BaseModel):
|
||||
year: int
|
||||
total_revenue: float
|
||||
total_expenses: float
|
||||
revenue_growth: float | None = None
|
||||
expense_growth: float | None = None
|
||||
surplus: float | None = None
|
||||
program_ratio: float | None = None
|
||||
admin_ratio: float | None = None
|
||||
fundraising_ratio: float | None = None
|
||||
net_margin: float | None = None
|
||||
|
||||
|
||||
class AnalystState(BaseModel):
|
||||
organisation_name: str
|
||||
organisation_ein: str
|
||||
series: List[YearlySnapshot]
|
||||
key_metrics: List[TrendMetric]
|
||||
notes: List[str] = Field(default_factory=list)
|
||||
Reference in New Issue
Block a user