This commit is contained in:
2026-02-20 04:38:32 +00:00
parent 14ed21a1f9
commit bcdc41ecd5
20 changed files with 309 additions and 283 deletions

View File

@@ -1,5 +1,4 @@
"""
Copyright 2025 Google. This software is provided as-is, without warranty or
"""Copyright 2025 Google. This software is provided as-is, without warranty or
representation for any use or purpose. Your use of it is subject to your
agreement with Google.
@@ -8,29 +7,28 @@ Data Loss Prevention service for obfuscating sensitive information.
import logging
import re
from google.cloud import dlp_v2
from google.cloud.dlp_v2 import types
from ..config import Settings
from capa_de_integracion.config import Settings
logger = logging.getLogger(__name__)
class DLPService:
"""
Service for detecting and obfuscating sensitive data using Google Cloud DLP.
"""Service for detecting and obfuscating sensitive data using Google Cloud DLP.
Integrates with the DLP API to scan text for PII and other sensitive information,
then obfuscates findings based on their info type.
"""
def __init__(self, settings: Settings):
"""
Initialize DLP service.
def __init__(self, settings: Settings) -> None:
"""Initialize DLP service.
Args:
settings: Application settings
"""
self.settings = settings
self.project_id = settings.gcp_project_id
@@ -40,8 +38,7 @@ class DLPService:
logger.info("DLP Service initialized")
async def get_obfuscated_string(self, text: str, template_id: str) -> str:
"""
Inspect text for sensitive data and obfuscate findings.
"""Inspect text for sensitive data and obfuscate findings.
Args:
text: Text to inspect and obfuscate
@@ -52,6 +49,7 @@ class DLPService:
Raises:
Exception: If DLP API call fails (returns original text on error)
"""
try:
# Build content item
@@ -63,7 +61,7 @@ class DLPService:
# Build inspect config
finding_limits = types.InspectConfig.FindingLimits(
max_findings_per_item=0 # No limit
max_findings_per_item=0, # No limit
)
inspect_config = types.InspectConfig(
@@ -91,8 +89,7 @@ class DLPService:
if findings_count > 0:
return self._obfuscate_text(response, text)
else:
return text
return text
except Exception as e:
logger.error(
@@ -102,8 +99,7 @@ class DLPService:
return text
def _obfuscate_text(self, response: types.InspectContentResponse, text: str) -> str:
"""
Obfuscate sensitive findings in text.
"""Obfuscate sensitive findings in text.
Args:
response: DLP inspect content response with findings
@@ -111,6 +107,7 @@ class DLPService:
Returns:
Text with sensitive data obfuscated
"""
# Filter findings by likelihood (> POSSIBLE, which is value 3)
findings = [
@@ -127,7 +124,7 @@ class DLPService:
info_type = finding.info_type.name
logger.info(
f"InfoType: {info_type} | Likelihood: {finding.likelihood.value}"
f"InfoType: {info_type} | Likelihood: {finding.likelihood.value}",
)
# Obfuscate based on info type
@@ -136,13 +133,11 @@ class DLPService:
text = text.replace(quote, replacement)
# Clean up consecutive DIRECCION tags
text = self._clean_direccion(text)
return self._clean_direccion(text)
return text
def _get_replacement(self, info_type: str, quote: str) -> str | None:
"""
Get replacement text for a given info type.
"""Get replacement text for a given info type.
Args:
info_type: DLP info type name
@@ -150,6 +145,7 @@ class DLPService:
Returns:
Replacement text or None to skip
"""
replacements = {
"CREDIT_CARD_NUMBER": f"**** **** **** {self._get_last4(quote)}",
@@ -190,7 +186,7 @@ class DLPService:
pattern = r"\[DIRECCION\](?:(?:,\s*|\s+)\[DIRECCION\])*"
return re.sub(pattern, "[DIRECCION]", text).strip()
async def close(self):
async def close(self) -> None:
"""Close DLP client."""
await self.dlp_client.transport.close()
logger.info("DLP client closed")