Spaces:

sbicy
/

prof-demo

Sleeping

App Files Files Community

sbicy commited on Oct 29, 2025

Commit

deff797

verified ·

1 Parent(s): b0ed454

Upload 17 files

Browse files

Files changed (17) hide show

src/api/__pycache__/main.cpython-311.pyc +0 -0
src/api/__pycache__/routes.cpython-311.pyc +0 -0
src/api/enhanced_ui.html +511 -0
src/api/main.py +210 -0
src/api/routes.py +116 -0
src/core/__pycache__/ai_classifier.cpython-311.pyc +0 -0
src/core/__pycache__/classifier.cpython-311.pyc +0 -0
src/core/__pycache__/delexicalizer.cpython-311.pyc +0 -0
src/core/__pycache__/detector.cpython-311.pyc +0 -0
src/core/ai_classifier.py +69 -0
src/core/classifier.py +49 -0
src/core/delexicalizer.py +61 -0
src/core/detector.py +65 -0
src/utils/__pycache__/config.cpython-311.pyc +0 -0
src/utils/__pycache__/logger.cpython-311.pyc +0 -0
src/utils/config.py +27 -0
src/utils/logger.py +119 -0

src/api/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (8.92 kB). View file

src/api/__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (5.32 kB). View file

src/api/enhanced_ui.html ADDED Viewed

	@@ -0,0 +1,511 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Context-Aware Profanity Handler - Interactive Demo</title>
+    <style>
+        * { box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background: #f5f5f7;
+        }
+        .header {
+            max-width: 1400px;
+            margin: 0 auto 30px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            padding: 40px;
+            border-radius: 12px;
+            color: white;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.1);
+        }
+        .header h1 { margin: 0 0 10px; font-size: 36px; }
+        .header p { margin: 0; opacity: 0.95; font-size: 18px; }
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 20px;
+        }
+        .panel {
+            background: white;
+            padding: 30px;
+            border-radius: 12px;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.08);
+        }
+        .panel.full { grid-column: 1 / -1; }
+        h2 {
+            margin-top: 0;
+            color: #1d1d1f;
+            font-size: 24px;
+            border-bottom: 2px solid #667eea;
+            padding-bottom: 10px;
+        }
+        .input-group {
+            margin: 20px 0;
+        }
+        label {
+            display: block;
+            margin-bottom: 8px;
+            font-weight: 600;
+            color: #1d1d1f;
+        }
+        textarea, select {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #d2d2d7;
+            border-radius: 8px;
+            font-size: 15px;
+            font-family: inherit;
+            transition: border-color 0.2s;
+        }
+        textarea:focus, select:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        textarea { min-height: 120px; resize: vertical; }
+        .checkbox-group {
+            display: flex;
+            gap: 20px;
+            margin: 20px 0;
+        }
+        .checkbox-group label {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            font-weight: 500;
+        }
+        input[type="checkbox"] {
+            width: 18px;
+            height: 18px;
+            cursor: pointer;
+        }
+        button {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 14px 32px;
+            border: none;
+            border-radius: 8px;
+            cursor: pointer;
+            font-size: 16px;
+            font-weight: 600;
+            transition: transform 0.2s, box-shadow 0.2s;
+            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+        }
+        button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 16px rgba(102, 126, 234, 0.5);
+        }
+        button:active {
+            transform: translateY(0);
+        }
+        .badge {
+            display: inline-block;
+            padding: 6px 12px;
+            border-radius: 6px;
+            font-size: 13px;
+            font-weight: 600;
+            margin: 4px;
+        }
+        .badge.safe { background: #d1f4e0; color: #0f5132; }
+        .badge.mild { background: #fff3cd; color: #997404; }
+        .badge.explicit { background: #f8d7da; color: #842029; }
+        .badge.slur { background: #f5c2c7; color: #58151c; }
+        .badge.threat { background: #ea868f; color: #58151c; }
+        .comparison {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 20px;
+            margin-top: 20px;
+        }
+        .comparison-item {
+            padding: 15px;
+            background: #f5f5f7;
+            border-radius: 8px;
+            border-left: 4px solid #667eea;
+        }
+        .comparison-item h4 {
+            margin-top: 0;
+            color: #667eea;
+            font-size: 14px;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+        .comparison-item pre {
+            margin: 0;
+            white-space: pre-wrap;
+            word-wrap: break-word;
+            font-size: 14px;
+            line-height: 1.6;
+        }
+        .metrics {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 15px;
+            margin: 20px 0;
+        }
+        .metric {
+            background: #f5f5f7;
+            padding: 20px;
+            border-radius: 8px;
+            text-align: center;
+        }
+        .metric-value {
+            font-size: 32px;
+            font-weight: 700;
+            color: #667eea;
+            margin: 10px 0;
+        }
+        .metric-label {
+            font-size: 13px;
+            color: #86868b;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+        .log-viewer {
+            background: #1d1d1f;
+            color: #f5f5f7;
+            padding: 20px;
+            border-radius: 8px;
+            font-family: 'Courier New', monospace;
+            font-size: 13px;
+            max-height: 400px;
+            overflow-y: auto;
+        }
+        .log-entry {
+            margin: 10px 0;
+            padding: 10px;
+            background: rgba(255,255,255,0.05);
+            border-radius: 4px;
+        }
+        .log-redacted {
+            color: #ff9f0a;
+        }
+        .log-verbatim {
+            color: #30d158;
+        }
+        .example-box {
+            background: linear-gradient(135deg, #e0e7ff 0%, #f0e7ff 100%);
+            padding: 20px;
+            border-radius: 8px;
+            margin: 20px 0;
+            border-left: 4px solid #667eea;
+        }
+        .example-box h3 {
+            margin-top: 0;
+            color: #1d1d1f;
+        }
+        .tab-container {
+            margin-top: 20px;
+        }
+        .tabs {
+            display: flex;
+            gap: 10px;
+            border-bottom: 2px solid #d2d2d7;
+            margin-bottom: 20px;
+        }
+        .tab {
+            padding: 10px 20px;
+            cursor: pointer;
+            border: none;
+            background: none;
+            font-size: 15px;
+            font-weight: 600;
+            color: #86868b;
+            border-bottom: 3px solid transparent;
+            transition: all 0.2s;
+        }
+        .tab.active {
+            color: #667eea;
+            border-bottom-color: #667eea;
+        }
+        .tab-content {
+            display: none;
+        }
+        .tab-content.active {
+            display: block;
+        }
+        .hidden { display: none !important; }
+        .ai-scores {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
+            gap: 10px;
+            margin: 15px 0;
+        }
+        .ai-score {
+            background: #f5f5f7;
+            padding: 12px;
+            border-radius: 6px;
+            text-align: center;
+        }
+        .ai-score-label {
+            font-size: 11px;
+            color: #86868b;
+            text-transform: uppercase;
+        }
+        .ai-score-value {
+            font-size: 20px;
+            font-weight: 700;
+            color: #1d1d1f;
+            margin-top: 5px;
+        }
+    </style>
+</head>
+<body>
+    <div class="header">
+        <h1>🧩 Context-Aware Profanity Handler</h1>
+        <p>Interactive demonstration of context-aware profanity detection, delexicalization, and audit logging</p>
+    </div>
+    <div class="container">
+        <!-- Input Panel -->
+        <div class="panel">
+            <h2>📝 Input</h2>
+            <div class="example-box">
+                <h3>💡 Example Use Case</h3>
+                <p><strong>Scenario:</strong> A user wants to generate a report about an asset with an explicit song title.</p>
+                <p><strong>Input:</strong> "Report on asset: <em>Do You Want to Fuck Me Tonight</em>"</p>
+                <p><strong>Context:</strong> Song Title (Entity Name)</p>
+            </div>
+            <div class="input-group">
+                <label for="text">Text to Analyze:</label>
+                <textarea id="text" placeholder="Enter text here...">Report on asset: Do You Want to Fuck Me Tonight</textarea>
+            </div>
+            <div class="input-group">
+                <label for="context">Content Category:</label>
+                <select id="context">
+                    <option value="song_title">Song Title</option>
+                    <option value="entity_name">Entity Name</option>
+                    <option value="brand_name">Brand Name</option>
+                    <option value="user_input">User Input</option>
+                </select>
+            </div>
+            <div class="checkbox-group">
+                <label>
+                    <input type="checkbox" id="strict_mode"> Strict Mode
+                </label>
+                <label>
+                    <input type="checkbox" id="use_ai" checked> Use AI Classifier
+                </label>
+                <label>
+                    <input type="checkbox" id="include_explicit"> Include in Export
+                </label>
+            </div>
+            <button onclick="analyzeText()">🔍 Analyze Text</button>
+        </div>
+        <!-- Results Panel -->
+        <div class="panel">
+            <h2>📊 Analysis Results</h2>
+            <div id="results" style="color: #86868b; text-align: center; padding: 40px;">
+                Run an analysis to see results here
+            </div>
+        </div>
+        <!-- Comparison Panel -->
+        <div class="panel full hidden" id="comparisonPanel">
+            <h2>🔄 Text Comparison</h2>
+            <div class="comparison">
+                <div class="comparison-item">
+                    <h4>📄 Original Text (Verbatim)</h4>
+                    <pre id="originalText"></pre>
+                </div>
+                <div class="comparison-item">
+                    <h4>✨ Delexicalized Text (Safe for AI)</h4>
+                    <pre id="safeText"></pre>
+                </div>
+                <div class="comparison-item">
+                    <h4>📤 Export Text (Based on Preference)</h4>
+                    <pre id="exportText"></pre>
+                </div>
+                <div class="comparison-item">
+                    <h4>🔍 Detected Words</h4>
+                    <pre id="detectedWords"></pre>
+                </div>
+            </div>
+        </div>
+        <!-- Logs Panel -->
+        <div class="panel full hidden" id="logsPanel">
+            <h2>📋 Audit Logs Visualization</h2>
+            <div class="tab-container">
+                <div class="tabs">
+                    <button class="tab active" onclick="switchTab('redacted')">📊 Redacted Logs (Analytics)</button>
+                    <button class="tab" onclick="switchTab('verbatim')">🔐 Verbatim Logs (Compliance)</button>
+                </div>
+                <div class="tab-content active" id="redacted-tab">
+                    <p style="color: #86868b; margin-bottom: 15px;">
+                        <strong>Purpose:</strong> Safe for analytics and monitoring. Contains metadata without sensitive content.
+                    </p>
+                    <div class="log-viewer" id="redactedLogs">
+                        <div class="log-redacted">Waiting for analysis...</div>
+                    </div>
+                </div>
+                <div class="tab-content" id="verbatim-tab">
+                    <p style="color: #86868b; margin-bottom: 15px;">
+                        <strong>Purpose:</strong> Full audit trail for compliance. Access should be restricted (RBAC).
+                    </p>
+                    <div class="log-viewer" id="verbatimLogs">
+                        <div class="log-verbatim">Waiting for analysis...</div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        let currentRequestId = null;
+        function switchTab(tabName) {
+            document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+            document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
+            event.target.classList.add('active');
+            document.getElementById(tabName + '-tab').classList.add('active');
+        }
+        async function analyzeText() {
+            const text = document.getElementById('text').value;
+            const context = document.getElementById('context').value;
+            const strict_mode = document.getElementById('strict_mode').checked;
+            const use_ai = document.getElementById('use_ai').checked;
+            const include_explicit_in_export = document.getElementById('include_explicit').checked;
+            const resultsDiv = document.getElementById('results');
+            resultsDiv.innerHTML = '<div style="text-align: center; padding: 20px;">⏳ Analyzing...</div>';
+            try {
+                const response = await fetch('/analyze', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ text, context, strict_mode, use_ai, include_explicit_in_export })
+                });
+                const data = await response.json();
+                currentRequestId = data.request_id;
+                // Display results
+                displayResults(data, text);
+                // Show comparison panel
+                displayComparison(text, data);
+                // Load and display logs
+                await displayLogs(data.request_id);
+            } catch (error) {
+                resultsDiv.innerHTML = `<div style="color: #d1180b;">Error: ${error.message}</div>`;
+            }
+        }
+        function displayResults(data, originalText) {
+            const resultsDiv = document.getElementById('results');
+            let html = '<div class="metrics">';
+            html += `<div class="metric">
+                <div class="metric-label">Profanity</div>
+                <div class="metric-value">${data.contains_profanity ? '⚠️' : '✅'}</div>
+            </div>`;
+            html += `<div class="metric">
+                <div class="metric-label">Toxicity</div>
+                <div class="metric-value"><span class="badge ${data.toxicity_level}">${data.toxicity_level.toUpperCase()}</span></div>
+            </div>`;
+            html += '</div>';
+            html += `<div style="margin: 20px 0;">
+                <strong>Message:</strong>
+                <div style="padding: 15px; background: #f5f5f7; border-radius: 8px; margin-top: 10px;">
+                    ${data.message}
+                </div>
+            </div>`;
+            if (data.ai_confidence) {
+                html += '<div style="margin: 20px 0;"><strong>AI Confidence Scores:</strong><div class="ai-scores">';
+                for (const [label, score] of Object.entries(data.ai_confidence)) {
+                    html += `<div class="ai-score">
+                        <div class="ai-score-label">${label}</div>
+                        <div class="ai-score-value">${(score * 100).toFixed(1)}%</div>
+                    </div>`;
+                }
+                html += '</div></div>';
+            }
+            html += `<div style="margin-top: 20px; font-size: 13px; color: #86868b;">
+                Request ID: <code>${data.request_id}</code>
+            </div>`;
+            resultsDiv.innerHTML = html;
+        }
+        function displayComparison(originalText, data) {
+            document.getElementById('comparisonPanel').classList.remove('hidden');
+            document.getElementById('originalText').textContent = originalText;
+            document.getElementById('safeText').textContent = data.safe_text;
+            document.getElementById('exportText').textContent = data.export_text;
+            document.getElementById('detectedWords').textContent =
+                data.detected_words && data.detected_words.length > 0
+                    ? data.detected_words.join(', ')
+                    : 'None detected';
+        }
+        async function displayLogs(requestId) {
+            document.getElementById('logsPanel').classList.remove('hidden');
+            try {
+                // Load redacted logs
+                const redactedResponse = await fetch('/logs/redacted');
+                const redactedData = await redactedResponse.json();
+                const redactedLogs = document.getElementById('redactedLogs');
+                if (redactedData.logs && redactedData.logs.length > 0) {
+                    redactedLogs.innerHTML = redactedData.logs.slice(-5).reverse().map(log => `
+                        <div class="log-entry log-redacted">
+                            <strong>Request ID:</strong> ${log.request_id}<br>
+                            <strong>Timestamp:</strong> ${new Date(log.timestamp).toLocaleString()}<br>
+                            <strong>Context:</strong> ${log.context}<br>
+                            <strong>Profanity:</strong> ${log.contains_profanity ? 'Yes' : 'No'}<br>
+                            <strong>Toxicity:</strong> ${log.toxicity_level}<br>
+                            <strong>Text Hash:</strong> ${log.text_hash}<br>
+                            <strong>Text Length:</strong> ${log.text_length} chars
+                        </div>
+                    `).join('');
+                }
+                // Load verbatim log for current request
+                const verbatimResponse = await fetch(`/logs/verbatim/${requestId}`);
+                const verbatimData = await verbatimResponse.json();
+                const verbatimLogs = document.getElementById('verbatimLogs');
+                if (verbatimData.request_id) {
+                    verbatimLogs.innerHTML = `
+                        <div class="log-entry log-verbatim">
+                            <strong>⚠️ COMPLIANCE ACCESS ONLY ⚠️</strong><br><br>
+                            <strong>Request ID:</strong> ${verbatimData.request_id}<br>
+                            <strong>Timestamp:</strong> ${new Date(verbatimData.timestamp).toLocaleString()}<br>
+                            <strong>Context:</strong> ${verbatimData.context}<br>
+                            <strong>Original Text:</strong> ${verbatimData.original_text}<br>
+                            <strong>Safe Text:</strong> ${verbatimData.safe_text}<br>
+                            <strong>Profanity:</strong> ${verbatimData.contains_profanity ? 'Yes' : 'No'}<br>
+                            <strong>Toxicity:</strong> ${verbatimData.toxicity_level}
+                        </div>
+                    `;
+                } else {
+                    verbatimLogs.innerHTML = '<div class="log-verbatim">Verbatim logs not available or disabled</div>';
+                }
+            } catch (error) {
+                console.error('Error loading logs:', error);
+            }
+        }
+    </script>
+</body>
+</html>

src/api/main.py ADDED Viewed

	@@ -0,0 +1,210 @@

+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from pathlib import Path
+from .routes import app
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    """Serve an interactive demo page with visualizations."""
+    ui_path = Path(__file__).parent / "enhanced_ui.html"
+    with open(ui_path, 'r') as f:
+        return f.read()
+@app.get("/simple", response_class=HTMLResponse)
+async def simple_demo():
+    """Serve the original simple demo page."""
+    return """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Context-Aware Profanity Handler - Interactive Demo</title>
+        <style>
+            body {
+                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+                max-width: 900px;
+                margin: 40px auto;
+                padding: 20px;
+                background: #f5f5f5;
+            }
+            .container {
+                background: white;
+                padding: 30px;
+                border-radius: 8px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            h1 {
+                color: #2c3e50;
+                border-bottom: 3px solid #3498db;
+                padding-bottom: 10px;
+            }
+            .input-group {
+                margin: 20px 0;
+            }
+            label {
+                display: block;
+                margin-bottom: 5px;
+                font-weight: 600;
+                color: #34495e;
+            }
+            textarea, select {
+                width: 100%;
+                padding: 10px;
+                border: 1px solid #ddd;
+                border-radius: 4px;
+                font-size: 14px;
+                box-sizing: border-box;
+            }
+            textarea {
+                min-height: 100px;
+                resize: vertical;
+            }
+            button {
+                background: #3498db;
+                color: white;
+                padding: 12px 30px;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 16px;
+                font-weight: 600;
+            }
+            button:hover {
+                background: #2980b9;
+            }
+            .result {
+                margin-top: 20px;
+                padding: 20px;
+                border-radius: 4px;
+                display: none;
+            }
+            .result.safe {
+                background: #d4edda;
+                border: 1px solid #c3e6cb;
+                color: #155724;
+            }
+            .result.warning {
+                background: #fff3cd;
+                border: 1px solid #ffeeba;
+                color: #856404;
+            }
+            .result.error {
+                background: #f8d7da;
+                border: 1px solid #f5c6cb;
+                color: #721c24;
+            }
+            .example {
+                background: #e8f4f8;
+                padding: 15px;
+                border-radius: 4px;
+                margin: 20px 0;
+                border-left: 4px solid #3498db;
+            }
+            .example h3 {
+                margin-top: 0;
+                color: #2c3e50;
+            }
+            .badge {
+                display: inline-block;
+                padding: 4px 8px;
+                border-radius: 3px;
+                font-size: 12px;
+                font-weight: 600;
+                margin-right: 5px;
+            }
+            .badge.safe { background: #d4edda; color: #155724; }
+            .badge.mild { background: #fff3cd; color: #856404; }
+            .badge.explicit { background: #f8d7da; color: #721c24; }
+            .badge.slur { background: #f5c6cb; color: #721c24; }
+            .badge.threat { background: #f5c6cb; color: #721c24; }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>🧩 Context-Aware Profanity Handler</h1>
+            <p>A demonstration of context-aware profanity detection and handling for AI-assisted reporting.</p>
+            <div class="example">
+                <h3>💡 Example Use Case</h3>
+                <p><strong>Input:</strong> "Report on asset: <em>Do You Want to F*** Me Tonight</em>"</p>
+                <p><strong>Context:</strong> Song Title (Entity Name)</p>
+                <p><strong>Expected Result:</strong> Detected but allowed, with transparent feedback about safe rendering.</p>
+            </div>
+            <div class="input-group">
+                <label for="text">Text to Analyze:</label>
+                <textarea id="text" placeholder="Enter text here, e.g., a song title, brand name, or user input...">Report on asset: Do You Want to Fuck Me Tonight</textarea>
+            </div>
+            <div class="input-group">
+                <label for="context">Content Category:</label>
+                <select id="context">
+                    <option value="song_title">Song Title</option>
+                    <option value="entity_name">Entity Name</option>
+                    <option value="brand_name">Brand Name</option>
+                    <option value="user_input">User Input</option>
+                </select>
+            </div>
+            <div class="input-group">
+                <label>
+                    <input type="checkbox" id="strict_mode"> Strict Mode
+                </label>
+            </div>
+            <button onclick="analyzeText()">Analyze Text</button>
+            <div id="result" class="result"></div>
+        </div>
+        <script>
+            async function analyzeText() {
+                const text = document.getElementById('text').value;
+                const context = document.getElementById('context').value;
+                const strict_mode = document.getElementById('strict_mode').checked;
+                const resultDiv = document.getElementById('result');
+                resultDiv.style.display = 'none';
+                try {
+                    const response = await fetch('/analyze', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                        body: JSON.stringify({ text, context, strict_mode })
+                    });
+                    const data = await response.json();
+                    let className = 'safe';
+                    if (data.toxicity_level === 'explicit' || data.toxicity_level === 'slur' || data.toxicity_level === 'threat') {
+                        className = 'error';
+                    } else if (data.toxicity_level === 'mild') {
+                        className = 'warning';
+                    }
+                    resultDiv.className = 'result ' + className;
+                    resultDiv.innerHTML = `
+                        <h3>Analysis Results</h3>
+                        <p><strong>Profanity Detected:</strong> ${data.contains_profanity ? 'Yes' : 'No'}</p>
+                        <p><strong>Toxicity Level:</strong> <span class="badge ${data.toxicity_level}">${data.toxicity_level.toUpperCase()}</span></p>
+                        <p><strong>Message:</strong> ${data.message}</p>
+                        <hr>
+                        <p><strong>Original Text:</strong><br><code>${text}</code></p>
+                        <p><strong>Safe Text:</strong><br><code>${data.safe_text}</code></p>
+                    `;
+                    resultDiv.style.display = 'block';
+                } catch (error) {
+                    resultDiv.className = 'result error';
+                    resultDiv.innerHTML = `<p>Error: ${error.message}</p>`;
+                    resultDiv.style.display = 'block';
+                }
+            }
+        </script>
+    </body>
+    </html>
+    """
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

src/api/routes.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import Optional, Dict, Any
+from datetime import datetime
+from ..core.detector import ContentCategory, ProfanityDetector
+from ..core.delexicalizer import Delexicalizer
+from ..core.classifier import ContextClassifier, ToxicityLevel
+from ..core.ai_classifier import AIClassifier
+from ..utils.logger import audit_logger
+app = FastAPI(
+    title="Profanity Handler API",
+    description="Context-aware profanity handling system for AI-assisted reporting",
+    version="0.1.0"
+)
+detector = ProfanityDetector()
+delexicalizer = Delexicalizer()
+classifier = ContextClassifier()
+ai_classifier = AIClassifier()
+class TextRequest(BaseModel):
+    text: str
+    context: ContentCategory
+    strict_mode: Optional[bool] = False
+    use_ai: Optional[bool] = True
+    include_explicit_in_export: Optional[bool] = False
+class TextResponse(BaseModel):
+    request_id: str
+    contains_profanity: bool
+    toxicity_level: ToxicityLevel
+    safe_text: Optional[str] = None
+    export_text: Optional[str] = None
+    message: str
+    ai_confidence: Optional[Dict[str, float]] = None
+    detected_words: Optional[list] = None
+@app.post("/analyze", response_model=TextResponse)
+async def analyze_text(request: TextRequest) -> TextResponse:
+    """Analyze text for profanity with context awareness."""
+    # Reset delexicalizer for new request
+    delexicalizer.reset()
+    # Check for profanity
+    has_profanity = detector.detect_profanity(
+        request.text,
+        request.context,
+        request.strict_mode
+    )
+    # Classify toxicity (use AI if requested and available)
+    ai_confidence = None
+    if request.use_ai:
+        toxicity, ai_confidence = ai_classifier.classify(request.text)
+    else:
+        toxicity = classifier.classify_context(request.text)
+    # Get detected words
+    detected_words = list(detector.get_detected_words()) if has_profanity else []
+    # Handle based on context and toxicity
+    if has_profanity:
+        safe_text = delexicalizer.delexicalize(request.text)
+        message = (
+            "Heads up: explicit language detected in record names. "
+            "Proceeding with safe rendering."
+        )
+    else:
+        safe_text = request.text
+        message = "No issues detected."
+    # Handle export text based on user preference
+    export_text = request.text if request.include_explicit_in_export else safe_text
+    # Log the request
+    request_id = audit_logger.log_request(
+        text=request.text,
+        context=request.context.value,
+        contains_profanity=has_profanity,
+        toxicity_level=toxicity.value,
+        safe_text=safe_text,
+        metadata={
+            "strict_mode": request.strict_mode,
+            "use_ai": request.use_ai,
+            "include_explicit_in_export": request.include_explicit_in_export
+        }
+    )
+    return TextResponse(
+        request_id=request_id,
+        contains_profanity=has_profanity,
+        toxicity_level=toxicity,
+        safe_text=safe_text,
+        export_text=export_text,
+        message=message,
+        ai_confidence=ai_confidence,
+        detected_words=detected_words
+    )
+@app.get("/logs/redacted")
+async def get_redacted_logs(date: Optional[str] = None):
+    """Get redacted logs for analytics (safe to expose)."""
+    logs = audit_logger.get_redacted_logs(date)
+    return {"logs": logs, "count": len(logs)}
+@app.get("/logs/verbatim/{request_id}")
+async def get_verbatim_log(request_id: str, date: Optional[str] = None):
+    """
+    Get verbatim log for compliance (should be access-controlled in production).
+    This endpoint demonstrates RBAC - in production, this would require special permissions.
+    """
+    log = audit_logger.get_verbatim_log(request_id, date)
+    return log

src/core/__pycache__/ai_classifier.cpython-311.pyc ADDED Viewed

Binary file (3.72 kB). View file

src/core/__pycache__/classifier.cpython-311.pyc ADDED Viewed

Binary file (2.72 kB). View file

src/core/__pycache__/delexicalizer.cpython-311.pyc ADDED Viewed

Binary file (3.47 kB). View file

src/core/__pycache__/detector.cpython-311.pyc ADDED Viewed

Binary file (3.88 kB). View file

src/core/ai_classifier.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from typing import Optional
+from .classifier import ToxicityLevel
+from ..utils.config import config
+class AIClassifier:
+    """AI-powered toxicity classifier using Hugging Face models."""
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self._initialized = False
+    def _initialize(self):
+        """Lazy load the model to avoid startup delays."""
+        if self._initialized:
+            return
+        try:
+            from transformers import pipeline
+            # Use a toxicity detection model
+            # This model works without authentication
+            self.model = pipeline(
+                "text-classification",
+                model="unitary/toxic-bert",
+                top_k=None,
+                token=config.HUGGINGFACE_TOKEN
+            )
+            self._initialized = True
+            print("✓ AI Classifier initialized with toxic-bert model")
+        except Exception as e:
+            print(f"⚠ Could not initialize AI model: {e}")
+            print("  Falling back to rule-based classification")
+            self._initialized = False
+    def classify(self, text: str) -> tuple[ToxicityLevel, dict]:
+        """
+        Classify text using AI model.
+        Returns:
+            Tuple of (ToxicityLevel, confidence_scores)
+        """
+        self._initialize()
+        if not self._initialized or self.model is None:
+            # Fallback to basic classification
+            return ToxicityLevel.SAFE, {}
+        try:
+            results = self.model(text)[0]
+            # toxic-bert returns labels like 'toxic', 'severe_toxic', 'obscene', etc.
+            scores = {item['label']: item['score'] for item in results}
+            # Determine toxicity level based on scores
+            if scores.get('severe_toxic', 0) > 0.5:
+                return ToxicityLevel.THREAT, scores
+            elif scores.get('obscene', 0) > 0.5:
+                return ToxicityLevel.EXPLICIT, scores
+            elif scores.get('insult', 0) > 0.4:
+                return ToxicityLevel.SLUR, scores
+            elif scores.get('toxic', 0) > 0.3:
+                return ToxicityLevel.MILD, scores
+            else:
+                return ToxicityLevel.SAFE, scores
+        except Exception as e:
+            print(f"Error during AI classification: {e}")
+            return ToxicityLevel.SAFE, {}

src/core/classifier.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from enum import Enum
+from typing import List
+import re
+class ToxicityLevel(Enum):
+    SAFE = "safe"
+    MILD = "mild"
+    EXPLICIT = "explicit"
+    SLUR = "slur"
+    THREAT = "threat"
+class ContextClassifier:
+    def __init__(self):
+        # Basic categorization of profanity by severity
+        self._mild = {'damn', 'crap', 'hell', 'ass'}
+        self._explicit = {'fuck', 'shit', 'bitch', 'piss', 'dick', 'cock', 'pussy'}
+        self._slurs = {'bastard'}  # Simplified - real implementation would be more comprehensive
+        self._threat_keywords = ['kill', 'die', 'death', 'hurt', 'harm']
+    def classify_context(self, text: str) -> ToxicityLevel:
+        """
+        Classify the toxicity level of text with context awareness.
+        Args:
+            text: Input text to classify
+        Returns:
+            ToxicityLevel: The classified toxicity level
+        """
+        text_lower = text.lower()
+        words = set(re.findall(r'\b\w+\b', text_lower))
+        # Check for threats first (highest priority)
+        if any(keyword in text_lower for keyword in self._threat_keywords):
+            return ToxicityLevel.THREAT
+        # Check for slurs
+        if words.intersection(self._slurs):
+            return ToxicityLevel.SLUR
+        # Check for explicit language
+        if words.intersection(self._explicit):
+            return ToxicityLevel.EXPLICIT
+        # Check for mild profanity
+        if words.intersection(self._mild):
+            return ToxicityLevel.MILD
+        return ToxicityLevel.SAFE

src/core/delexicalizer.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from typing import Dict, List, Optional
+import re
+class Delexicalizer:
+    def __init__(self):
+        self._placeholder_map: Dict[str, str] = {}
+        self._reverse_map: Dict[str, str] = {}
+        self._counter = 0
+        # Basic profanity list for detection
+        self._profanity_patterns = [
+            'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
+            'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
+        ]
+    def delexicalize(self, text: str) -> str:
+        """
+        Replace profane words with placeholders.
+        Args:
+            text: Input text containing potential profanity
+        Returns:
+            str: Text with profanity replaced by placeholders
+        """
+        result = text
+        for pattern in self._profanity_patterns:
+            # Find all occurrences (case-insensitive)
+            matches = list(re.finditer(r'\b' + pattern + r'\b', result, re.IGNORECASE))
+            for match in reversed(matches):  # Reverse to maintain positions
+                original_word = match.group()
+                placeholder = self._create_placeholder(original_word)
+                result = result[:match.start()] + placeholder + result[match.end():]
+        return result
+    def relexicalize(self, text: str) -> str:
+        """
+        Restore original words from placeholders.
+        Args:
+            text: Text with placeholders
+        Returns:
+            str: Original text with placeholders replaced
+        """
+        for placeholder, original in self._placeholder_map.items():
+            text = text.replace(placeholder, original)
+        return text
+    def _create_placeholder(self, word: str) -> str:
+        """Create a unique placeholder for a word."""
+        self._counter += 1
+        placeholder = f"<PROFANITY_{self._counter}>"
+        self._placeholder_map[placeholder] = word
+        self._reverse_map[word.lower()] = placeholder
+        return placeholder
+    def reset(self):
+        """Reset the delexicalizer state."""
+        self._placeholder_map.clear()
+        self._reverse_map.clear()
+        self._counter = 0

src/core/detector.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from enum import Enum
+from typing import List, Optional
+import re
+class ContentCategory(Enum):
+    ENTITY_NAME = "entity_name"
+    SONG_TITLE = "song_title"
+    BRAND_NAME = "brand_name"
+    USER_INPUT = "user_input"
+class ProfanityDetector:
+    def __init__(self):
+        # Initialize with basic profanity list
+        # In production, this would be loaded from a curated database
+        self._profanity_list = {
+            'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
+            'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
+        }
+        self._profanity_words = set()
+    def detect_profanity(
+        self,
+        text: str,
+        context: ContentCategory,
+        strict_mode: bool = False
+    ) -> bool:
+        """
+        Detect profanity in text with context awareness.
+        Args:
+            text: Input text to check
+            context: Category of the content (entity name, user input, etc.)
+            strict_mode: Whether to apply stricter rules
+        Returns:
+            bool: True if profanity detected, False otherwise
+        """
+        # If it's an entity name and not in strict mode, we're more permissive
+        if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
+            return self._check_with_context(text, context)
+        return self._check_standard(text)
+    def _check_with_context(self, text: str, context: ContentCategory) -> bool:
+        """Context-aware checking - more permissive for entity names."""
+        # For entity names, we detect but don't block
+        words = re.findall(r'\b\w+\b', text.lower())
+        found = [word for word in words if word in self._profanity_list]
+        if found:
+            self._profanity_words = set(found)
+            return True
+        return False
+    def _check_standard(self, text: str) -> bool:
+        """Standard profanity checking - stricter."""
+        words = re.findall(r'\b\w+\b', text.lower())
+        found = [word for word in words if word in self._profanity_list]
+        if found:
+            self._profanity_words = set(found)
+            return True
+        return False
+    def get_detected_words(self) -> set:
+        """Return the profane words that were detected."""
+        return self._profanity_words

src/utils/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (1.89 kB). View file

src/utils/__pycache__/logger.cpython-311.pyc ADDED Viewed

Binary file (6.67 kB). View file

src/utils/config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+from pathlib import Path
+from typing import Optional
+class Config:
+    """Application configuration."""
+    # Hugging Face
+    HUGGINGFACE_TOKEN: Optional[str] = os.getenv("HUGGINGFACE_TOKEN")
+    # Logging
+    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
+    ENABLE_VERBATIM_LOGS: bool = os.getenv("ENABLE_VERBATIM_LOGS", "true").lower() == "true"
+    # Paths
+    BASE_DIR = Path(__file__).parent.parent.parent
+    VERBATIM_LOG_PATH = BASE_DIR / os.getenv("VERBATIM_LOG_PATH", "logs/verbatim")
+    REDACTED_LOG_PATH = BASE_DIR / os.getenv("REDACTED_LOG_PATH", "logs/redacted")
+    @classmethod
+    def ensure_log_dirs(cls):
+        """Create log directories if they don't exist."""
+        cls.VERBATIM_LOG_PATH.mkdir(parents=True, exist_ok=True)
+        cls.REDACTED_LOG_PATH.mkdir(parents=True, exist_ok=True)
+config = Config()
+config.ensure_log_dirs()

src/utils/logger.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import json
+import hashlib
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict
+from .config import config
+class AuditLogger:
+    """Dual logging system: redacted for analytics, verbatim for compliance."""
+    def __init__(self):
+        self.verbatim_path = config.VERBATIM_LOG_PATH
+        self.redacted_path = config.REDACTED_LOG_PATH
+    def _generate_request_id(self, text: str) -> str:
+        """Generate a unique request ID."""
+        timestamp = datetime.utcnow().isoformat()
+        return hashlib.sha256(f"{timestamp}{text}".encode()).hexdigest()[:16]
+    def _redact_text(self, text: str) -> str:
+        """Redact sensitive content for analytics logs."""
+        # Replace with hash to preserve uniqueness while hiding content
+        return f"REDACTED_{hashlib.md5(text.encode()).hexdigest()[:8]}"
+    def log_request(
+        self,
+        text: str,
+        context: str,
+        contains_profanity: bool,
+        toxicity_level: str,
+        safe_text: str,
+        metadata: Dict[str, Any] = None
+    ) -> str:
+        """
+        Log a profanity check request to both redacted and verbatim logs.
+        Returns:
+            request_id: Unique identifier for this request
+        """
+        request_id = self._generate_request_id(text)
+        timestamp = datetime.utcnow().isoformat()
+        # Redacted log (for analytics)
+        redacted_entry = {
+            "request_id": request_id,
+            "timestamp": timestamp,
+            "context": context,
+            "contains_profanity": contains_profanity,
+            "toxicity_level": toxicity_level,
+            "text_hash": hashlib.md5(text.encode()).hexdigest(),
+            "text_length": len(text),
+            "metadata": metadata or {}
+        }
+        # Verbatim log (for compliance/audit)
+        verbatim_entry = {
+            "request_id": request_id,
+            "timestamp": timestamp,
+            "context": context,
+            "original_text": text,
+            "safe_text": safe_text,
+            "contains_profanity": contains_profanity,
+            "toxicity_level": toxicity_level,
+            "metadata": metadata or {}
+        }
+        # Write redacted log
+        redacted_file = self.redacted_path / f"{datetime.utcnow().strftime('%Y-%m-%d')}.jsonl"
+        with open(redacted_file, 'a') as f:
+            f.write(json.dumps(redacted_entry) + '\n')
+        # Write verbatim log (if enabled)
+        if config.ENABLE_VERBATIM_LOGS:
+            verbatim_file = self.verbatim_path / f"{datetime.utcnow().strftime('%Y-%m-%d')}.jsonl"
+            with open(verbatim_file, 'a') as f:
+                f.write(json.dumps(verbatim_entry) + '\n')
+        return request_id
+    def get_redacted_logs(self, date: str = None) -> list:
+        """Retrieve redacted logs for a specific date."""
+        if date is None:
+            date = datetime.utcnow().strftime('%Y-%m-%d')
+        log_file = self.redacted_path / f"{date}.jsonl"
+        if not log_file.exists():
+            return []
+        logs = []
+        with open(log_file, 'r') as f:
+            for line in f:
+                logs.append(json.loads(line))
+        return logs
+    def get_verbatim_log(self, request_id: str, date: str = None) -> dict:
+        """
+        Retrieve verbatim log for a specific request (compliance only).
+        This should be access-controlled in production.
+        """
+        if not config.ENABLE_VERBATIM_LOGS:
+            return {"error": "Verbatim logs are disabled"}
+        if date is None:
+            date = datetime.utcnow().strftime('%Y-%m-%d')
+        log_file = self.verbatim_path / f"{date}.jsonl"
+        if not log_file.exists():
+            return {"error": "Log file not found"}
+        with open(log_file, 'r') as f:
+            for line in f:
+                entry = json.loads(line)
+                if entry['request_id'] == request_id:
+                    return entry
+        return {"error": "Request ID not found"}
+# Singleton instance
+audit_logger = AuditLogger()