1 changed files with 62 additions and 25 deletions
--- a/src/va_agent/governance.py
+++ b/src/va_agent/governance.py
@@ -1,5 +1,6 @@
 # ruff: noqa: E501
 """GovernancePlugin: Guardrails for VAia, the virtual assistant for VA."""
+
 import json
 import logging
 import re
@@ -22,10 +23,56 @@ logger = logging.getLogger(__name__)


 FORBIDDEN_EMOJIS = [
-    "🥵","🔪","🎰","🎲","🃏","😤","🤬","😡","😠","🩸","🧨","🪓","☠️","💀",
-    "💣","🔫","👗","💦","🍑","🍆","👄","👅","🫦","💩","⚖️","⚔️","✝️","🕍",
-    "🕌","⛪","🍻","🍸","🥃","🍷","🍺","🚬","👹","👺","👿","😈","🤡","🧙",
-    "🧙‍♀️", "🧙‍♂️", "🧛", "🧛‍♀️", "🧛‍♂️", "🔞","🧿","💊"
+    "🥵",
+    "🔪",
+    "🎰",
+    "🎲",
+    "🃏",
+    "😤",
+    "🤬",
+    "😡",
+    "😠",
+    "🩸",
+    "🧨",
+    "🪓",
+    "☠️",
+    "💀",
+    "💣",
+    "🔫",
+    "👗",
+    "💦",
+    "🍑",
+    "🍆",
+    "👄",
+    "👅",
+    "🫦",
+    "💩",
+    "⚖️",
+    "⚔️",
+    "✝️",
+    "🕍",
+    "🕌",
+    "⛪",
+    "🍻",
+    "🍸",
+    "🥃",
+    "🍷",
+    "🍺",
+    "🚬",
+    "👹",
+    "👺",
+    "👿",
+    "😈",
+    "🤡",
+    "🧙",
+    "🧙‍♀️",
+    "🧙‍♂️",
+    "🧛",
+    "🧛‍♀️",
+    "🧛‍♂️",
+    "🔞",
+    "🧿",
+    "💊",
 ]


@@ -37,12 +84,11 @@ class GuardrailOutput(BaseModel):
        description="Decision for the user prompt",
    )
    reasoning: str | None = Field(
-        default=None,
-        description="Optional reasoning for the decision"
+        default=None, description="Optional reasoning for the decision"
    )
    blocking_response: str | None = Field(
        default=None,
-        description="Optional custom blocking response to return to the user if unsafe"
+        description="Optional custom blocking response to return to the user if unsafe",
    )


@@ -54,7 +100,7 @@ class GovernancePlugin:
        self.guardrail_llm = Client(
            vertexai=True,
            project=settings.google_cloud_project,
-            location=settings.google_cloud_location
+            location=settings.google_cloud_location,
        )
        _guardrail_instruction = """
 Eres una capa de seguridad y protección de marca para VAia, el asistente virtual de VA en WhatsApp.
@@ -85,9 +131,9 @@ Devuelve un JSON con la siguiente estructura:
        _schema = GuardrailOutput.model_json_schema()
        # Force strict JSON output from the guardrail LLM
        self._guardrail_gen_config = GenerateContentConfig(
-            system_instruction = _guardrail_instruction,
-            response_mime_type = "application/json",
-            response_schema = _schema,
+            system_instruction=_guardrail_instruction,
+            response_mime_type="application/json",
+            response_schema=_schema,
            max_output_tokens=1000,
            temperature=0.1,
        )
@@ -106,7 +152,6 @@ Devuelve un JSON con la siguiente estructura:
            rf"|🖕{tone_pattern}"  # middle finger with all skin tone variations
        )

-
    def _remove_emojis(self, text: str) -> tuple[str, list[str]]:
        removed = self._combined_pattern.findall(text)
        text = self._combined_pattern.sub("", text)
@@ -139,8 +184,7 @@ Devuelve un JSON con la siguiente estructura:
            decision = data.get("decision", "safe").lower()
            reasoning = data.get("reasoning", "")
            blocking_response = data.get(
-                "blocking_response",
-                "Lo siento, no puedo ayudarte con esa solicitud 😅"
+                "blocking_response", "Lo siento, no puedo ayudarte con esa solicitud 😅"
            )

            if decision == "unsafe":
@@ -148,13 +192,8 @@ Devuelve un JSON con la siguiente estructura:
                callback_context.state["guardrail_message"] = "[GUARDRAIL_BLOCKED]"
                callback_context.state["guardrail_reasoning"] = reasoning
                return LlmResponse(
-                    content=Content(
-                        role="model",
-                        parts=[
-                            Part(text=blocking_response)
-                        ]
-                    ),
-                    usage_metadata=resp.usage_metadata or None
+                    content=Content(role="model", parts=[Part(text=blocking_response)]),
+                    usage_metadata=resp.usage_metadata or None,
                )
            callback_context.state["guardrail_blocked"] = False
            callback_context.state["guardrail_message"] = "[GUARDRAIL_PASSED]"
@@ -168,9 +207,7 @@ Devuelve un JSON con la siguiente estructura:
                content=Content(
                    role="model",
                    parts=[
-                        Part(
-                            text="Lo siento, no puedo ayudarte con esa solicitud 😅"
-                        )
+                        Part(text="Lo siento, no puedo ayudarte con esa solicitud 😅")
                    ],
                ),
                interrupted=True,