civicconnect / debug_profanity.py
Mohan Rao Boddu
Fix HuggingFace folder structure
4a87503
#!/usr/bin/env python3
"""
Debug script to test profanity detection
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Test import
print("=" * 60)
print("Testing Profanity-Check Import")
print("=" * 60)
try:
from profanity_check import predict
print("βœ… Successfully imported profanity_check.predict")
# Test with clean text
print("\nTesting with clean text: 'This is a clean sentence'")
result_clean = predict(["This is a clean sentence"])
print(f"Result: {result_clean}")
print(f"Type: {type(result_clean)}")
if hasattr(result_clean, '__getitem__'):
print(f"Value: {result_clean[0]}")
print(f"Is profane: {bool(int(result_clean[0]) == 1)}")
# Test with profane text
print("\nTesting with profane text: 'This is a fucking pothole'")
result_profane = predict(["This is a fucking pothole"])
print(f"Result: {result_profane}")
print(f"Type: {type(result_profane)}")
if hasattr(result_profane, '__getitem__'):
print(f"Value: {result_profane[0]}")
print(f"Is profane: {bool(int(result_profane[0]) == 1)}")
# Test with more profane words
test_cases = [
"fuck",
"shit",
"bitch",
"damn",
"hell",
"This is a clean description of a pothole",
"fuck this pothole",
"what the hell is wrong",
]
print("\n" + "=" * 60)
print("Testing Multiple Cases")
print("=" * 60)
for test_text in test_cases:
result = predict([test_text])
is_profane = bool(int(result[0]) == 1) if hasattr(result, '__getitem__') else bool(int(result) == 1)
status = "PROFANE" if is_profane else "CLEAN"
print(f"{status:8} | '{test_text}'")
except ImportError as e:
print(f"❌ Failed to import profanity_check: {e}")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
# Test the actual pipeline
print("\n" + "=" * 60)
print("Testing Pipeline Function")
print("=" * 60)
try:
from app.pipeline import classify_report, PROFANITY_AVAILABLE, _profanity_predict
print(f"PROFANITY_AVAILABLE: {PROFANITY_AVAILABLE}")
print(f"_profanity_predict is None: {_profanity_predict is None}")
# Test cases
test_reports = [
{
"report_id": "test1",
"description": "This is a fucking pothole",
"category": "Road & Traffic",
"user_id": "test"
},
{
"report_id": "test2",
"description": "There is a large pothole on Main Street",
"category": "Road & Traffic",
"user_id": "test"
},
{
"report_id": "test3",
"description": "What the hell is wrong with this road",
"category": "Road & Traffic",
"user_id": "test"
},
]
for report in test_reports:
result = classify_report(report)
status = result.get("status")
reason = result.get("reason", "")
print(f"\nDescription: '{report['description']}'")
print(f"Status: {status}")
if reason:
print(f"Reason: {reason}")
if status == "rejected" and "abusive" in reason.lower():
print("βœ… Profanity detected correctly!")
elif status == "accepted" and any(word in report['description'].lower() for word in ["fuck", "hell"]):
print("❌ Profanity NOT detected!")
except Exception as e:
print(f"❌ Error testing pipeline: {e}")
import traceback
traceback.print_exc()