fix(guardrails): censor user and model events when blocked
Some checks failed
CI / ci (pull_request) Failing after 12s
Some checks failed
CI / ci (pull_request) Failing after 12s
This commit is contained in:
@@ -233,5 +233,9 @@ class GovernancePlugin:
|
||||
deleted,
|
||||
)
|
||||
|
||||
# Reset censorship flag for next interaction
|
||||
if callback_context:
|
||||
callback_context.state["guardrail_censored"] = False
|
||||
|
||||
except Exception:
|
||||
logger.exception("Error in after_model_callback")
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import copy
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
@@ -378,8 +379,55 @@ class FirestoreSessionService(BaseSessionService):
|
||||
event = await super().append_event(session=session, event=event)
|
||||
session.last_update_time = event.timestamp
|
||||
|
||||
# Persist event document
|
||||
# Determine if we need to censor this event (model response when guardrail blocked)
|
||||
should_censor_model = (
|
||||
session.state.get("guardrail_blocked", False)
|
||||
and event.author == app_name
|
||||
and hasattr(event, "content")
|
||||
and event.content
|
||||
and event.content.parts
|
||||
and not session.state.get("guardrail_censored", False)
|
||||
)
|
||||
|
||||
# Prepare event data for Firestore
|
||||
if should_censor_model:
|
||||
# Mark as censored to avoid double-censoring
|
||||
session.state["guardrail_censored"] = True
|
||||
|
||||
# Create a censored version of the model response
|
||||
event_to_save = copy.deepcopy(event)
|
||||
event_to_save.content.parts[0].text = "[respuesta de adversidad]"
|
||||
event_data = event_to_save.model_dump(mode="json", exclude_none=True)
|
||||
|
||||
# Also censor the previous user message in Firestore
|
||||
# Find the last user event in the session
|
||||
for i in range(len(session.events) - 1, -1, -1):
|
||||
prev_event = session.events[i]
|
||||
if (
|
||||
prev_event.author == "user"
|
||||
and prev_event.content
|
||||
and prev_event.content.parts
|
||||
):
|
||||
# Update this event in Firestore with censored content
|
||||
censored_user_content = Content(
|
||||
role="user", parts=[Part(text="[pregunta mala]")]
|
||||
)
|
||||
await (
|
||||
self._events_col(app_name, user_id, session_id)
|
||||
.document(prev_event.id)
|
||||
.update(
|
||||
{
|
||||
"content": censored_user_content.model_dump(
|
||||
mode="json", exclude_none=True
|
||||
)
|
||||
}
|
||||
)
|
||||
)
|
||||
break
|
||||
else:
|
||||
event_data = event.model_dump(mode="json", exclude_none=True)
|
||||
|
||||
# Persist event document
|
||||
await (
|
||||
self._events_col(app_name, user_id, session_id)
|
||||
.document(event.id)
|
||||
|
||||
Reference in New Issue
Block a user