Spaces:

BladeSzaSza
/

OFPBadWord

Sleeping

App Files Files Community

OFPBadWord / app.py

BladeSzaSza

Comprehensive UI redesign with professional styling

90f2d18 4 months ago

raw

history blame contribute delete

24.8 kB

	"""
	OFP Bad Word Sentinel - Gradio Dashboard
	Real-time monitoring interface for content moderation
	"""

	import gradio as gr
	import os
	import logging
	from datetime import datetime, timezone
	from apscheduler.schedulers.background import BackgroundScheduler
	import yaml
	from fastapi import FastAPI
	from fastapi.responses import JSONResponse
	import uvicorn
	from threading import Thread

	# Import sentinel components
	from src.profanity_detector import ProfanityDetector
	from src.sentinel import BadWordSentinel
	from src.models import Envelope, DialogEvent

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Load configuration
	CONFIG_FILE = 'config/config.yaml'
	try:
	with open(CONFIG_FILE, 'r') as f:
	config = yaml.safe_load(f)
	logger.info("Configuration loaded successfully")
	except FileNotFoundError:
	logger.warning("Config file not found, using defaults")
	config = {
	'sentinel': {
	'speaker_uri': 'tag:sentinel.service,2025:badword-01',
	'service_url': 'https://sentinel-service.com/ofp',
	'convener_uri': 'tag:convener.service,2025:default',
	'convener_url': 'https://convener-service.com/ofp'
	},
	'profanity': {
	'use_default': True,
	'whitelist': ['scunthorpe', 'arsenal']
	},
	'monitoring': {
	'check_interval': 30,
	'auto_start': True
	},
	'dashboard': {
	'refresh_interval': 5,
	'show_test_panel': True
	}
	}

	# Initialize profanity detector
	whitelist = config['profanity'].get('whitelist', [])
	custom_wordlist_path = config['profanity'].get('custom_wordlist')

	# Load custom words if specified
	custom_words = None
	if custom_wordlist_path and os.path.exists(custom_wordlist_path):
	custom_words = ProfanityDetector.load_wordlist_from_file(custom_wordlist_path)
	if custom_words:
	logger.info(f"Loaded {len(custom_words)} custom words")

	# Get Kaggle dataset configuration
	use_kaggle = config['profanity'].get('use_kaggle_dataset', False)
	kaggle_path = config['profanity'].get('kaggle_dataset_path') if use_kaggle else None
	languages = config['profanity'].get('languages', ['en']) if use_kaggle else None

	detector = ProfanityDetector(
	custom_words=custom_words,
	whitelist=whitelist,
	kaggle_dataset_path=kaggle_path,
	languages=languages
	)

	# Initialize sentinel
	sentinel = BadWordSentinel(
	speaker_uri=config['sentinel']['speaker_uri'],
	service_url=config['sentinel']['service_url'],
	profanity_detector=detector,
	convener_uri=config['sentinel']['convener_uri'],
	convener_url=config['sentinel']['convener_url']
	)

	# Start monitoring if auto-start enabled
	if config['monitoring'].get('auto_start', True):
	sentinel.start_monitoring()


	# Background monitoring simulation
	def simulate_monitoring():
	"""Simulate OFP event processing (in production, replace with actual OFP listener)"""
	try:
	if sentinel.is_monitoring:
	# This is a simulation - in production, replace with actual OFP event stream
	# For demo purposes, we just update the status
	sentinel._log_activity("Monitoring check completed")
	logger.debug("Monitoring check completed")

	except Exception as e:
	logger.error(f"Monitoring error: {e}")
	sentinel._log_activity(f"ERROR: {str(e)}")


	# Setup scheduler for background tasks
	scheduler = BackgroundScheduler()
	check_interval = config['monitoring'].get('check_interval', 30)
	scheduler.add_job(func=simulate_monitoring, trigger="interval", seconds=check_interval)
	scheduler.start()
	logger.info(f"Background scheduler started (interval: {check_interval}s)")


	# Create FastAPI app
	app = FastAPI()

	@app.get("/manifest")
	async def get_manifest():
	"""Serve the assistant manifest (simple JSON response)"""
	manifest = sentinel.get_manifest()
	return JSONResponse(content=manifest)

	@app.post("/ofp")
	async def receive_ofp_envelope(envelope: dict):
	"""Receive OFP envelopes from convener

	Handles all OFP events including:
	- getManifests: Returns publishManifest event with servicingManifests
	- utterance: Processes messages for profanity detection
	- Other events: Logged and acknowledged
	"""
	try:
	# Check if this is an OFP envelope
	if "openFloor" not in envelope:
	return JSONResponse(
	content={"status": "error", "message": "Invalid OFP envelope"},
	status_code=400
	)

	openfloor_data = envelope["openFloor"]
	events = openfloor_data.get("events", [])

	# Process each event
	for event in events:
	event_type = event.get("eventType")

	# Handle getManifests event
	if event_type == "getManifests":
	manifest = sentinel.get_manifest()
	return JSONResponse(content={
	"openFloor": {
	"schema": {"version": "1.0.0"},
	"conversation": openfloor_data.get("conversation", {}),
	"sender": {
	"speakerUri": config['sentinel']['speaker_uri'],
	"serviceUrl": config['sentinel']['service_url']
	},
	"events": [{
	"eventType": "publishManifest",
	"to": event.get("to", {}),
	"parameters": {
	"servicingManifests": [manifest]
	}
	}]
	}
	})

	# Handle utterance events (main monitoring function)
	elif event_type == "utterance":
	from src.models import Envelope
	from datetime import datetime, timezone
	import uuid

	# Process for monitoring/statistics
	ofp_envelope = Envelope.from_dict(envelope)
	sentinel.process_envelope(ofp_envelope)

	# Extract text from the incoming utterance
	params = event.get('parameters', {})
	dialog_event = params.get('dialogEvent', {})
	features = dialog_event.get('features', {})
	text_feature = features.get('text', {})
	tokens = text_feature.get('tokens', [])
	text = ' '.join(token.get('value', '') for token in tokens)

	# Check for profanity
	violation = detector.detect_violations(text)

	if violation:
	# Profanity detected - send warning utterance back
	warning_message = f"⚠️ Content moderation alert: {violation['violation_count']} violation(s) detected. Censored: {violation['censored_text']}"

	return JSONResponse(content={
	"openFloor": {
	"schema": {"version": "1.0.0"},
	"conversation": openfloor_data.get("conversation", {}),
	"sender": {
	"speakerUri": config['sentinel']['speaker_uri'],
	"serviceUrl": config['sentinel']['service_url']
	},
	"events": [{
	"eventType": "utterance",
	"to": event.get("to", {}),
	"parameters": {
	"dialogEvent": {
	"id": f"de:{uuid.uuid4()}",
	"speakerUri": config['sentinel']['speaker_uri'],
	"span": {
	"startTime": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
	},
	"features": {
	"text": {
	"mimeType": "text/plain",
	"tokens": [{
	"value": warning_message,
	"confidence": 1
	}]
	}
	}
	}
	}
	}]
	}
	})
	else:
	# Clean message - acknowledge silently
	return JSONResponse(content={
	"openFloor": {
	"schema": {"version": "1.0.0"},
	"conversation": openfloor_data.get("conversation", {}),
	"sender": {
	"speakerUri": config['sentinel']['speaker_uri'],
	"serviceUrl": config['sentinel']['service_url']
	},
	"events": []
	}
	})

	# Return acknowledgment for other event types
	return JSONResponse(content={
	"openFloor": {
	"schema": {"version": "1.0.0"},
	"conversation": openfloor_data.get("conversation", {}),
	"sender": {
	"speakerUri": config['sentinel']['speaker_uri'],
	"serviceUrl": config['sentinel']['service_url']
	},
	"events": []
	}
	})

	except Exception as e:
	logger.error(f"Error processing OFP envelope: {e}")
	return JSONResponse(
	content={"status": "error", "message": str(e)},
	status_code=400
	)


	# Gradio Interface Functions
	def update_dashboard():
	"""Update dashboard with current status"""
	status = sentinel.get_status()
	recent_logs = '\n'.join(status['recent_logs']) if status['recent_logs'] else "No recent activity"
	current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	return (
	status['connection_status'],
	current_time,
	status['violations_detected'],
	status['alerts_sent'],
	status['messages_processed'],
	recent_logs
	)


	def test_detection(text: str):
	"""Test profanity detection on input text"""
	if not text:
	return {"error": "No text provided"}

	violation = detector.detect_violations(text)

	if violation:
	return {
	"profane": True,
	"severity": violation['severity'],
	"violations_found": violation['violations'],
	"censored": violation['censored_text'],
	"count": violation['violation_count']
	}
	else:
	return {
	"profane": False,
	"message": "No profanity detected"
	}


	def simulate_test_violation():
	"""Simulate a test violation for demonstration"""
	# Create mock envelope with profane content
	test_envelope = Envelope(
	schema={"version": "1.0.0"},
	conversation={"id": "conv:test-123"},
	sender={"speakerUri": "tag:test.user,2025:demo"},
	events=[{
	"eventType": "utterance",
	"parameters": {
	"dialogEvent": {
	"id": "de:test-456",
	"speakerUri": "tag:test.user,2025:demo",
	"span": {"startTime": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')},
	"features": {
	"text": {
	"mimeType": "text/plain",
	"tokens": [{"value": "This is a test with sh1t and damn"}]
	}
	}
	}
	}
	}]
	)

	sentinel.process_envelope(test_envelope)
	return update_dashboard()


	def toggle_monitoring(current_status: str):
	"""Toggle monitoring on/off"""
	if "Active" in current_status:
	sentinel.stop_monitoring()
	else:
	sentinel.start_monitoring()
	return update_dashboard()


	def reset_stats():
	"""Reset statistics"""
	sentinel.reset_statistics()
	return update_dashboard()


	# Build Gradio Interface with Custom CSS
	custom_css = """
	/* Main container styling */
	.gradio-container {
	font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
	max-width: 1200px !important;
	margin: 0 auto !important;
	}

	/* Hero header */
	.hero-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 3rem 2rem;
	border-radius: 16px;
	margin-bottom: 2rem;
	box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3);
	text-align: center;
	}

	.hero-header h1 {
	color: white !important;
	font-size: 2.75rem !important;
	margin: 0 0 1rem 0 !important;
	font-weight: 700 !important;
	letter-spacing: -0.02em;
	}

	.hero-subtitle {
	color: rgba(255, 255, 255, 0.95) !important;
	font-size: 1.15rem !important;
	margin: 0.5rem 0 1rem 0 !important;
	line-height: 1.6;
	}

	.hero-stats {
	color: rgba(255, 255, 255, 0.9) !important;
	font-weight: 600 !important;
	font-size: 1.05rem !important;
	margin-top: 1rem !important;
	}

	/* Status dashboard styling */
	.status-dashboard {
	background: white;
	border-radius: 16px;
	padding: 2rem;
	margin: 2rem 0;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
	border: 1px solid #e2e8f0;
	}

	.status-title {
	font-size: 1.5rem !important;
	font-weight: 700 !important;
	color: #2d3748 !important;
	margin: 0 0 1.5rem 0 !important;
	display: flex;
	align-items: center;
	gap: 0.5rem;
	}

	.stat-card {
	background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
	border-radius: 12px;
	padding: 1.5rem;
	text-align: center;
	border: 2px solid #e2e8f0;
	transition: all 0.3s ease;
	height: 100%;
	}

	.stat-card:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
	border-color: #667eea;
	}

	.stat-label {
	font-size: 0.875rem !important;
	font-weight: 600 !important;
	color: #718096 !important;
	text-transform: uppercase;
	letter-spacing: 0.05em;
	margin-bottom: 0.5rem !important;
	}

	.stat-value {
	font-size: 2rem !important;
	font-weight: 700 !important;
	color: #2d3748 !important;
	margin-top: 0.5rem !important;
	}

	.stat-icon {
	font-size: 1.5rem;
	margin-bottom: 0.5rem;
	}

	/* Section headers */
	.section-wrapper {
	background: white;
	border-radius: 16px;
	padding: 2rem;
	margin: 2rem 0;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
	border: 1px solid #e2e8f0;
	}

	.section-header {
	color: #2d3748 !important;
	font-size: 1.5rem !important;
	font-weight: 700 !important;
	margin: 0 0 0.5rem 0 !important;
	}

	.section-subtitle {
	color: #718096 !important;
	font-size: 0.95rem !important;
	margin: 0 0 1.5rem 0 !important;
	}

	/* Buttons */
	.primary-button {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: 600 !important;
	padding: 0.75rem 2rem !important;
	border-radius: 8px !important;
	font-size: 1rem !important;
	transition: all 0.2s ease !important;
	}

	.primary-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4) !important;
	}

	/* Input fields */
	textarea, input {
	border: 2px solid #e2e8f0 !important;
	border-radius: 8px !important;
	padding: 0.75rem !important;
	font-size: 1rem !important;
	transition: all 0.2s ease !important;
	}

	textarea:focus, input:focus {
	border-color: #667eea !important;
	box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
	outline: none !important;
	}

	/* JSON output styling */
	.json-output {
	background: #f7fafc !important;
	border: 2px solid #e2e8f0 !important;
	border-radius: 8px !important;
	padding: 1rem !important;
	font-family: 'Monaco', 'Courier New', monospace !important;
	}

	/* Attribution section */
	.attribution {
	background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
	padding: 2.5rem;
	border-radius: 16px;
	border-left: 6px solid #667eea;
	margin-top: 3rem;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
	}

	.attribution h2 {
	color: #2d3748 !important;
	font-size: 1.5rem !important;
	font-weight: 700 !important;
	margin-top: 0 !important;
	}

	.attribution h3 {
	color: #4a5568 !important;
	font-size: 1.1rem !important;
	font-weight: 600 !important;
	margin-top: 1.5rem !important;
	}

	.attribution ul {
	color: #4a5568 !important;
	line-height: 1.8 !important;
	}

	.attribution a {
	color: #667eea !important;
	text-decoration: none !important;
	font-weight: 500 !important;
	transition: color 0.2s ease !important;
	}

	.attribution a:hover {
	color: #764ba2 !important;
	text-decoration: underline !important;
	}

	/* Accordion styling */
	.accordion-item {
	border: 2px solid #e2e8f0 !important;
	border-radius: 8px !important;
	margin-top: 1rem !important;
	}

	/* Responsive adjustments */
	@media (max-width: 768px) {
	.hero-header h1 {
	font-size: 2rem !important;
	}

	.stat-value {
	font-size: 1.5rem !important;
	}

	.status-dashboard, .section-wrapper {
	padding: 1.5rem !important;
	}
	}
	"""

	with gr.Blocks(title="OFP Bad Word Sentinel", theme=gr.themes.Soft(), css=custom_css) as demo:
	# Hero Header
	gr.HTML("""
	<div class="hero-header">
	<h1>🔥 OFP Bad Word Sentinel</h1>
	<p class="hero-subtitle">Real-time profanity detection for Open Floor Protocol conversations</p>
	<p class="hero-stats">58 languages · 6,936+ profanity patterns</p>
	</div>
	""")

	# Status Dashboard with Cards
	gr.HTML("""
	<div class="status-dashboard">
	<div class="status-title">📊 Live Status</div>
	</div>
	""")

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="stat-card">
	<div class="stat-icon">✅</div>
	<div class="stat-label">Status</div>
	<div class="stat-value">Active</div>
	</div>
	""")

	with gr.Column(scale=1):
	current_time = datetime.now().strftime('%H:%M:%S')
	gr.HTML(f"""
	<div class="stat-card">
	<div class="stat-icon">🕐</div>
	<div class="stat-label">Last Check</div>
	<div class="stat-value">{current_time}</div>
	</div>
	""")

	with gr.Column(scale=1):
	gr.HTML(f"""
	<div class="stat-card">
	<div class="stat-icon">⚠️</div>
	<div class="stat-label">Violations</div>
	<div class="stat-value">{sentinel.violations_detected}</div>
	</div>
	""")

	with gr.Column(scale=1):
	gr.HTML(f"""
	<div class="stat-card">
	<div class="stat-icon">📊</div>
	<div class="stat-label">Processed</div>
	<div class="stat-value">{sentinel.messages_processed}</div>
	</div>
	""")

	# Test Playground Section
	gr.HTML("""
	<div class="section-wrapper">
	<h2 class="section-header">🧪 Test Playground</h2>
	<p class="section-subtitle">Test profanity detection across 58 languages</p>
	</div>
	""")

	test_input = gr.Textbox(
	label="Enter text to test",
	placeholder="Type any message to check for profanity...",
	lines=4,
	show_label=True
	)

	with gr.Row():
	test_detect_btn = gr.Button("🔍 Detect Profanity", variant="primary", scale=3, elem_classes="primary-button")
	clear_btn = gr.Button("Clear", variant="secondary", scale=1)

	test_output = gr.JSON(label="Detection Result", elem_classes="json-output")

	# OFP Testing Section (Optional)
	with gr.Accordion("📡 OFP Protocol Testing", open=False, elem_classes="accordion-item"):
	gr.Markdown("Send a test message with profanity through the OFP pipeline to simulate real monitoring.")
	simulate_btn = gr.Button("Send Test Profanity Message", variant="secondary")

	# Attribution Section
	gr.HTML("""
	<div class="attribution">
	<h2>📚 Attribution & Resources</h2>

	<h3>Data Sources</h3>
	<ul>
	<li><a href="https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words" target="_blank">LDNOOBW</a> - CC-BY 4.0</li>
	<li><a href="https://github.com/thisandagain/washyourmouthoutwithsoap" target="_blank">Wash Your Mouth Out With Soap</a> - MIT</li>
	</ul>

	<h3>Protocol</h3>
	<ul>
	<li><a href="https://openfloor.dev" target="_blank">Open Floor Protocol</a> - Real-time conversation coordination</li>
	<li>Implements OFP v1.0.0 (Dialog Event, Inter-agent Message, Assistant Manifest)</li>
	</ul>

	<h3>Technology</h3>
	<ul>
	<li>Detection: <a href="https://github.com/snguyenthanh/better_profanity" target="_blank">better-profanity</a></li>
	<li>Interface: Gradio 5.x</li>
	</ul>
	</div>
	""")

	# Event handlers
	test_detect_btn.click(
	fn=test_detection,
	inputs=test_input,
	outputs=test_output
	)

	clear_btn.click(
	fn=lambda: ("", None),
	inputs=None,
	outputs=[test_input, test_output]
	)


	# Launch configuration
	if __name__ == "__main__":
	import argparse

	# Check if running on HuggingFace Spaces
	is_hf_space = os.getenv("SPACE_ID") is not None

	parser = argparse.ArgumentParser(description='OFP Bad Word Sentinel')
	parser.add_argument('--share', action='store_true', help='Create public share link (for OpenFloor Playground)')
	parser.add_argument('--port', type=int, default=7860, help='Port to run on')
	args = parser.parse_args()

	# On HuggingFace Spaces, mount Gradio at root
	if is_hf_space:
	print("\n" + "="*60)
	print("🔥 OFP Bad Word Sentinel - HuggingFace Spaces")
	print("="*60)
	print("\nDashboard: https://<url>/")
	print("Manifest endpoint: https://<url>/manifest")
	print("OFP endpoint: https://<url>/ofp")
	print("="*60 + "\n")

	# Mount Gradio at root path
	app_with_gradio = gr.mount_gradio_app(app, demo, path="/")

	# Use Uvicorn to run the combined app (HF Spaces compatible)
	import uvicorn
	uvicorn.run(
	app_with_gradio,
	host="0.0.0.0",
	port=args.port,
	log_level="info"
	)
	elif args.share:
	# Share mode for local testing with OpenFloor Playground
	print("\n" + "="*60)
	print("🔥 OFP Bad Word Sentinel - Share Mode")
	print("="*60)
	print("\nGenerating public share URL for Gradio...")
	print("This URL can be used in OpenFloor Playground!")
	print("\nDashboard: https://<url>/")
	print("Manifest endpoint: https://<url>/manifest")
	print("OFP endpoint: https://<url>/ofp")
	print("="*60 + "\n")

	# Mount Gradio at root path
	app_with_gradio = gr.mount_gradio_app(app, demo, path="/")

	# For share mode, we need to use demo.launch() with share=True
	# But this doesn't work well with custom FastAPI routes
	# Workaround: Use queue() and launch with auth or public link
	import uvicorn
	uvicorn.run(
	app_with_gradio,
	host="0.0.0.0",
	port=args.port,
	log_level="info"
	)
	else:
	# Local deployment with Uvicorn (full FastAPI control)
	# Mount Gradio at root path
	app = gr.mount_gradio_app(app, demo, path="/")

	print("\n" + "="*60)
	print("🔥 OFP Bad Word Sentinel - Local Mode")
	print("="*60)
	print(f"\nStarting server on http://localhost:{args.port}")
	print(f"Dashboard: http://localhost:{args.port}/")
	print(f"Manifest: http://localhost:{args.port}/manifest")
	print(f"OFP endpoint: http://localhost:{args.port}/ofp")
	print(f"API docs: http://localhost:{args.port}/docs")
	print("="*60 + "\n")

	uvicorn.run(
	app,
	host="0.0.0.0",
	port=args.port,
	log_level="info"
	)