Upload 17 files
Browse files- src/api/__pycache__/main.cpython-311.pyc +0 -0
- src/api/__pycache__/routes.cpython-311.pyc +0 -0
- src/api/enhanced_ui.html +511 -0
- src/api/main.py +210 -0
- src/api/routes.py +116 -0
- src/core/__pycache__/ai_classifier.cpython-311.pyc +0 -0
- src/core/__pycache__/classifier.cpython-311.pyc +0 -0
- src/core/__pycache__/delexicalizer.cpython-311.pyc +0 -0
- src/core/__pycache__/detector.cpython-311.pyc +0 -0
- src/core/ai_classifier.py +69 -0
- src/core/classifier.py +49 -0
- src/core/delexicalizer.py +61 -0
- src/core/detector.py +65 -0
- src/utils/__pycache__/config.cpython-311.pyc +0 -0
- src/utils/__pycache__/logger.cpython-311.pyc +0 -0
- src/utils/config.py +27 -0
- src/utils/logger.py +119 -0
src/api/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (8.92 kB). View file
|
|
|
src/api/__pycache__/routes.cpython-311.pyc
ADDED
|
Binary file (5.32 kB). View file
|
|
|
src/api/enhanced_ui.html
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>Context-Aware Profanity Handler - Interactive Demo</title>
|
| 5 |
+
<style>
|
| 6 |
+
* { box-sizing: border-box; }
|
| 7 |
+
body {
|
| 8 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 9 |
+
margin: 0;
|
| 10 |
+
padding: 20px;
|
| 11 |
+
background: #f5f5f7;
|
| 12 |
+
}
|
| 13 |
+
.header {
|
| 14 |
+
max-width: 1400px;
|
| 15 |
+
margin: 0 auto 30px;
|
| 16 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 17 |
+
padding: 40px;
|
| 18 |
+
border-radius: 12px;
|
| 19 |
+
color: white;
|
| 20 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
|
| 21 |
+
}
|
| 22 |
+
.header h1 { margin: 0 0 10px; font-size: 36px; }
|
| 23 |
+
.header p { margin: 0; opacity: 0.95; font-size: 18px; }
|
| 24 |
+
.container {
|
| 25 |
+
max-width: 1400px;
|
| 26 |
+
margin: 0 auto;
|
| 27 |
+
display: grid;
|
| 28 |
+
grid-template-columns: 1fr 1fr;
|
| 29 |
+
gap: 20px;
|
| 30 |
+
}
|
| 31 |
+
.panel {
|
| 32 |
+
background: white;
|
| 33 |
+
padding: 30px;
|
| 34 |
+
border-radius: 12px;
|
| 35 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.08);
|
| 36 |
+
}
|
| 37 |
+
.panel.full { grid-column: 1 / -1; }
|
| 38 |
+
h2 {
|
| 39 |
+
margin-top: 0;
|
| 40 |
+
color: #1d1d1f;
|
| 41 |
+
font-size: 24px;
|
| 42 |
+
border-bottom: 2px solid #667eea;
|
| 43 |
+
padding-bottom: 10px;
|
| 44 |
+
}
|
| 45 |
+
.input-group {
|
| 46 |
+
margin: 20px 0;
|
| 47 |
+
}
|
| 48 |
+
label {
|
| 49 |
+
display: block;
|
| 50 |
+
margin-bottom: 8px;
|
| 51 |
+
font-weight: 600;
|
| 52 |
+
color: #1d1d1f;
|
| 53 |
+
}
|
| 54 |
+
textarea, select {
|
| 55 |
+
width: 100%;
|
| 56 |
+
padding: 12px;
|
| 57 |
+
border: 2px solid #d2d2d7;
|
| 58 |
+
border-radius: 8px;
|
| 59 |
+
font-size: 15px;
|
| 60 |
+
font-family: inherit;
|
| 61 |
+
transition: border-color 0.2s;
|
| 62 |
+
}
|
| 63 |
+
textarea:focus, select:focus {
|
| 64 |
+
outline: none;
|
| 65 |
+
border-color: #667eea;
|
| 66 |
+
}
|
| 67 |
+
textarea { min-height: 120px; resize: vertical; }
|
| 68 |
+
.checkbox-group {
|
| 69 |
+
display: flex;
|
| 70 |
+
gap: 20px;
|
| 71 |
+
margin: 20px 0;
|
| 72 |
+
}
|
| 73 |
+
.checkbox-group label {
|
| 74 |
+
display: flex;
|
| 75 |
+
align-items: center;
|
| 76 |
+
gap: 8px;
|
| 77 |
+
font-weight: 500;
|
| 78 |
+
}
|
| 79 |
+
input[type="checkbox"] {
|
| 80 |
+
width: 18px;
|
| 81 |
+
height: 18px;
|
| 82 |
+
cursor: pointer;
|
| 83 |
+
}
|
| 84 |
+
button {
|
| 85 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 86 |
+
color: white;
|
| 87 |
+
padding: 14px 32px;
|
| 88 |
+
border: none;
|
| 89 |
+
border-radius: 8px;
|
| 90 |
+
cursor: pointer;
|
| 91 |
+
font-size: 16px;
|
| 92 |
+
font-weight: 600;
|
| 93 |
+
transition: transform 0.2s, box-shadow 0.2s;
|
| 94 |
+
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
|
| 95 |
+
}
|
| 96 |
+
button:hover {
|
| 97 |
+
transform: translateY(-2px);
|
| 98 |
+
box-shadow: 0 6px 16px rgba(102, 126, 234, 0.5);
|
| 99 |
+
}
|
| 100 |
+
button:active {
|
| 101 |
+
transform: translateY(0);
|
| 102 |
+
}
|
| 103 |
+
.badge {
|
| 104 |
+
display: inline-block;
|
| 105 |
+
padding: 6px 12px;
|
| 106 |
+
border-radius: 6px;
|
| 107 |
+
font-size: 13px;
|
| 108 |
+
font-weight: 600;
|
| 109 |
+
margin: 4px;
|
| 110 |
+
}
|
| 111 |
+
.badge.safe { background: #d1f4e0; color: #0f5132; }
|
| 112 |
+
.badge.mild { background: #fff3cd; color: #997404; }
|
| 113 |
+
.badge.explicit { background: #f8d7da; color: #842029; }
|
| 114 |
+
.badge.slur { background: #f5c2c7; color: #58151c; }
|
| 115 |
+
.badge.threat { background: #ea868f; color: #58151c; }
|
| 116 |
+
.comparison {
|
| 117 |
+
display: grid;
|
| 118 |
+
grid-template-columns: 1fr 1fr;
|
| 119 |
+
gap: 20px;
|
| 120 |
+
margin-top: 20px;
|
| 121 |
+
}
|
| 122 |
+
.comparison-item {
|
| 123 |
+
padding: 15px;
|
| 124 |
+
background: #f5f5f7;
|
| 125 |
+
border-radius: 8px;
|
| 126 |
+
border-left: 4px solid #667eea;
|
| 127 |
+
}
|
| 128 |
+
.comparison-item h4 {
|
| 129 |
+
margin-top: 0;
|
| 130 |
+
color: #667eea;
|
| 131 |
+
font-size: 14px;
|
| 132 |
+
text-transform: uppercase;
|
| 133 |
+
letter-spacing: 0.5px;
|
| 134 |
+
}
|
| 135 |
+
.comparison-item pre {
|
| 136 |
+
margin: 0;
|
| 137 |
+
white-space: pre-wrap;
|
| 138 |
+
word-wrap: break-word;
|
| 139 |
+
font-size: 14px;
|
| 140 |
+
line-height: 1.6;
|
| 141 |
+
}
|
| 142 |
+
.metrics {
|
| 143 |
+
display: grid;
|
| 144 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 145 |
+
gap: 15px;
|
| 146 |
+
margin: 20px 0;
|
| 147 |
+
}
|
| 148 |
+
.metric {
|
| 149 |
+
background: #f5f5f7;
|
| 150 |
+
padding: 20px;
|
| 151 |
+
border-radius: 8px;
|
| 152 |
+
text-align: center;
|
| 153 |
+
}
|
| 154 |
+
.metric-value {
|
| 155 |
+
font-size: 32px;
|
| 156 |
+
font-weight: 700;
|
| 157 |
+
color: #667eea;
|
| 158 |
+
margin: 10px 0;
|
| 159 |
+
}
|
| 160 |
+
.metric-label {
|
| 161 |
+
font-size: 13px;
|
| 162 |
+
color: #86868b;
|
| 163 |
+
text-transform: uppercase;
|
| 164 |
+
letter-spacing: 0.5px;
|
| 165 |
+
}
|
| 166 |
+
.log-viewer {
|
| 167 |
+
background: #1d1d1f;
|
| 168 |
+
color: #f5f5f7;
|
| 169 |
+
padding: 20px;
|
| 170 |
+
border-radius: 8px;
|
| 171 |
+
font-family: 'Courier New', monospace;
|
| 172 |
+
font-size: 13px;
|
| 173 |
+
max-height: 400px;
|
| 174 |
+
overflow-y: auto;
|
| 175 |
+
}
|
| 176 |
+
.log-entry {
|
| 177 |
+
margin: 10px 0;
|
| 178 |
+
padding: 10px;
|
| 179 |
+
background: rgba(255,255,255,0.05);
|
| 180 |
+
border-radius: 4px;
|
| 181 |
+
}
|
| 182 |
+
.log-redacted {
|
| 183 |
+
color: #ff9f0a;
|
| 184 |
+
}
|
| 185 |
+
.log-verbatim {
|
| 186 |
+
color: #30d158;
|
| 187 |
+
}
|
| 188 |
+
.example-box {
|
| 189 |
+
background: linear-gradient(135deg, #e0e7ff 0%, #f0e7ff 100%);
|
| 190 |
+
padding: 20px;
|
| 191 |
+
border-radius: 8px;
|
| 192 |
+
margin: 20px 0;
|
| 193 |
+
border-left: 4px solid #667eea;
|
| 194 |
+
}
|
| 195 |
+
.example-box h3 {
|
| 196 |
+
margin-top: 0;
|
| 197 |
+
color: #1d1d1f;
|
| 198 |
+
}
|
| 199 |
+
.tab-container {
|
| 200 |
+
margin-top: 20px;
|
| 201 |
+
}
|
| 202 |
+
.tabs {
|
| 203 |
+
display: flex;
|
| 204 |
+
gap: 10px;
|
| 205 |
+
border-bottom: 2px solid #d2d2d7;
|
| 206 |
+
margin-bottom: 20px;
|
| 207 |
+
}
|
| 208 |
+
.tab {
|
| 209 |
+
padding: 10px 20px;
|
| 210 |
+
cursor: pointer;
|
| 211 |
+
border: none;
|
| 212 |
+
background: none;
|
| 213 |
+
font-size: 15px;
|
| 214 |
+
font-weight: 600;
|
| 215 |
+
color: #86868b;
|
| 216 |
+
border-bottom: 3px solid transparent;
|
| 217 |
+
transition: all 0.2s;
|
| 218 |
+
}
|
| 219 |
+
.tab.active {
|
| 220 |
+
color: #667eea;
|
| 221 |
+
border-bottom-color: #667eea;
|
| 222 |
+
}
|
| 223 |
+
.tab-content {
|
| 224 |
+
display: none;
|
| 225 |
+
}
|
| 226 |
+
.tab-content.active {
|
| 227 |
+
display: block;
|
| 228 |
+
}
|
| 229 |
+
.hidden { display: none !important; }
|
| 230 |
+
.ai-scores {
|
| 231 |
+
display: grid;
|
| 232 |
+
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
|
| 233 |
+
gap: 10px;
|
| 234 |
+
margin: 15px 0;
|
| 235 |
+
}
|
| 236 |
+
.ai-score {
|
| 237 |
+
background: #f5f5f7;
|
| 238 |
+
padding: 12px;
|
| 239 |
+
border-radius: 6px;
|
| 240 |
+
text-align: center;
|
| 241 |
+
}
|
| 242 |
+
.ai-score-label {
|
| 243 |
+
font-size: 11px;
|
| 244 |
+
color: #86868b;
|
| 245 |
+
text-transform: uppercase;
|
| 246 |
+
}
|
| 247 |
+
.ai-score-value {
|
| 248 |
+
font-size: 20px;
|
| 249 |
+
font-weight: 700;
|
| 250 |
+
color: #1d1d1f;
|
| 251 |
+
margin-top: 5px;
|
| 252 |
+
}
|
| 253 |
+
</style>
|
| 254 |
+
</head>
|
| 255 |
+
<body>
|
| 256 |
+
<div class="header">
|
| 257 |
+
<h1>🧩 Context-Aware Profanity Handler</h1>
|
| 258 |
+
<p>Interactive demonstration of context-aware profanity detection, delexicalization, and audit logging</p>
|
| 259 |
+
</div>
|
| 260 |
+
|
| 261 |
+
<div class="container">
|
| 262 |
+
<!-- Input Panel -->
|
| 263 |
+
<div class="panel">
|
| 264 |
+
<h2>📝 Input</h2>
|
| 265 |
+
|
| 266 |
+
<div class="example-box">
|
| 267 |
+
<h3>💡 Example Use Case</h3>
|
| 268 |
+
<p><strong>Scenario:</strong> A user wants to generate a report about an asset with an explicit song title.</p>
|
| 269 |
+
<p><strong>Input:</strong> "Report on asset: <em>Do You Want to Fuck Me Tonight</em>"</p>
|
| 270 |
+
<p><strong>Context:</strong> Song Title (Entity Name)</p>
|
| 271 |
+
</div>
|
| 272 |
+
|
| 273 |
+
<div class="input-group">
|
| 274 |
+
<label for="text">Text to Analyze:</label>
|
| 275 |
+
<textarea id="text" placeholder="Enter text here...">Report on asset: Do You Want to Fuck Me Tonight</textarea>
|
| 276 |
+
</div>
|
| 277 |
+
|
| 278 |
+
<div class="input-group">
|
| 279 |
+
<label for="context">Content Category:</label>
|
| 280 |
+
<select id="context">
|
| 281 |
+
<option value="song_title">Song Title</option>
|
| 282 |
+
<option value="entity_name">Entity Name</option>
|
| 283 |
+
<option value="brand_name">Brand Name</option>
|
| 284 |
+
<option value="user_input">User Input</option>
|
| 285 |
+
</select>
|
| 286 |
+
</div>
|
| 287 |
+
|
| 288 |
+
<div class="checkbox-group">
|
| 289 |
+
<label>
|
| 290 |
+
<input type="checkbox" id="strict_mode"> Strict Mode
|
| 291 |
+
</label>
|
| 292 |
+
<label>
|
| 293 |
+
<input type="checkbox" id="use_ai" checked> Use AI Classifier
|
| 294 |
+
</label>
|
| 295 |
+
<label>
|
| 296 |
+
<input type="checkbox" id="include_explicit"> Include in Export
|
| 297 |
+
</label>
|
| 298 |
+
</div>
|
| 299 |
+
|
| 300 |
+
<button onclick="analyzeText()">🔍 Analyze Text</button>
|
| 301 |
+
</div>
|
| 302 |
+
|
| 303 |
+
<!-- Results Panel -->
|
| 304 |
+
<div class="panel">
|
| 305 |
+
<h2>📊 Analysis Results</h2>
|
| 306 |
+
<div id="results" style="color: #86868b; text-align: center; padding: 40px;">
|
| 307 |
+
Run an analysis to see results here
|
| 308 |
+
</div>
|
| 309 |
+
</div>
|
| 310 |
+
|
| 311 |
+
<!-- Comparison Panel -->
|
| 312 |
+
<div class="panel full hidden" id="comparisonPanel">
|
| 313 |
+
<h2>🔄 Text Comparison</h2>
|
| 314 |
+
<div class="comparison">
|
| 315 |
+
<div class="comparison-item">
|
| 316 |
+
<h4>📄 Original Text (Verbatim)</h4>
|
| 317 |
+
<pre id="originalText"></pre>
|
| 318 |
+
</div>
|
| 319 |
+
<div class="comparison-item">
|
| 320 |
+
<h4>✨ Delexicalized Text (Safe for AI)</h4>
|
| 321 |
+
<pre id="safeText"></pre>
|
| 322 |
+
</div>
|
| 323 |
+
<div class="comparison-item">
|
| 324 |
+
<h4>📤 Export Text (Based on Preference)</h4>
|
| 325 |
+
<pre id="exportText"></pre>
|
| 326 |
+
</div>
|
| 327 |
+
<div class="comparison-item">
|
| 328 |
+
<h4>🔍 Detected Words</h4>
|
| 329 |
+
<pre id="detectedWords"></pre>
|
| 330 |
+
</div>
|
| 331 |
+
</div>
|
| 332 |
+
</div>
|
| 333 |
+
|
| 334 |
+
<!-- Logs Panel -->
|
| 335 |
+
<div class="panel full hidden" id="logsPanel">
|
| 336 |
+
<h2>📋 Audit Logs Visualization</h2>
|
| 337 |
+
|
| 338 |
+
<div class="tab-container">
|
| 339 |
+
<div class="tabs">
|
| 340 |
+
<button class="tab active" onclick="switchTab('redacted')">📊 Redacted Logs (Analytics)</button>
|
| 341 |
+
<button class="tab" onclick="switchTab('verbatim')">🔐 Verbatim Logs (Compliance)</button>
|
| 342 |
+
</div>
|
| 343 |
+
|
| 344 |
+
<div class="tab-content active" id="redacted-tab">
|
| 345 |
+
<p style="color: #86868b; margin-bottom: 15px;">
|
| 346 |
+
<strong>Purpose:</strong> Safe for analytics and monitoring. Contains metadata without sensitive content.
|
| 347 |
+
</p>
|
| 348 |
+
<div class="log-viewer" id="redactedLogs">
|
| 349 |
+
<div class="log-redacted">Waiting for analysis...</div>
|
| 350 |
+
</div>
|
| 351 |
+
</div>
|
| 352 |
+
|
| 353 |
+
<div class="tab-content" id="verbatim-tab">
|
| 354 |
+
<p style="color: #86868b; margin-bottom: 15px;">
|
| 355 |
+
<strong>Purpose:</strong> Full audit trail for compliance. Access should be restricted (RBAC).
|
| 356 |
+
</p>
|
| 357 |
+
<div class="log-viewer" id="verbatimLogs">
|
| 358 |
+
<div class="log-verbatim">Waiting for analysis...</div>
|
| 359 |
+
</div>
|
| 360 |
+
</div>
|
| 361 |
+
</div>
|
| 362 |
+
</div>
|
| 363 |
+
</div>
|
| 364 |
+
|
| 365 |
+
<script>
|
| 366 |
+
let currentRequestId = null;
|
| 367 |
+
|
| 368 |
+
function switchTab(tabName) {
|
| 369 |
+
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
| 370 |
+
document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
|
| 371 |
+
|
| 372 |
+
event.target.classList.add('active');
|
| 373 |
+
document.getElementById(tabName + '-tab').classList.add('active');
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
async function analyzeText() {
|
| 377 |
+
const text = document.getElementById('text').value;
|
| 378 |
+
const context = document.getElementById('context').value;
|
| 379 |
+
const strict_mode = document.getElementById('strict_mode').checked;
|
| 380 |
+
const use_ai = document.getElementById('use_ai').checked;
|
| 381 |
+
const include_explicit_in_export = document.getElementById('include_explicit').checked;
|
| 382 |
+
|
| 383 |
+
const resultsDiv = document.getElementById('results');
|
| 384 |
+
resultsDiv.innerHTML = '<div style="text-align: center; padding: 20px;">⏳ Analyzing...</div>';
|
| 385 |
+
|
| 386 |
+
try {
|
| 387 |
+
const response = await fetch('/analyze', {
|
| 388 |
+
method: 'POST',
|
| 389 |
+
headers: { 'Content-Type': 'application/json' },
|
| 390 |
+
body: JSON.stringify({ text, context, strict_mode, use_ai, include_explicit_in_export })
|
| 391 |
+
});
|
| 392 |
+
|
| 393 |
+
const data = await response.json();
|
| 394 |
+
currentRequestId = data.request_id;
|
| 395 |
+
|
| 396 |
+
// Display results
|
| 397 |
+
displayResults(data, text);
|
| 398 |
+
|
| 399 |
+
// Show comparison panel
|
| 400 |
+
displayComparison(text, data);
|
| 401 |
+
|
| 402 |
+
// Load and display logs
|
| 403 |
+
await displayLogs(data.request_id);
|
| 404 |
+
|
| 405 |
+
} catch (error) {
|
| 406 |
+
resultsDiv.innerHTML = `<div style="color: #d1180b;">Error: ${error.message}</div>`;
|
| 407 |
+
}
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
function displayResults(data, originalText) {
|
| 411 |
+
const resultsDiv = document.getElementById('results');
|
| 412 |
+
|
| 413 |
+
let html = '<div class="metrics">';
|
| 414 |
+
html += `<div class="metric">
|
| 415 |
+
<div class="metric-label">Profanity</div>
|
| 416 |
+
<div class="metric-value">${data.contains_profanity ? '⚠️' : '✅'}</div>
|
| 417 |
+
</div>`;
|
| 418 |
+
html += `<div class="metric">
|
| 419 |
+
<div class="metric-label">Toxicity</div>
|
| 420 |
+
<div class="metric-value"><span class="badge ${data.toxicity_level}">${data.toxicity_level.toUpperCase()}</span></div>
|
| 421 |
+
</div>`;
|
| 422 |
+
html += '</div>';
|
| 423 |
+
|
| 424 |
+
html += `<div style="margin: 20px 0;">
|
| 425 |
+
<strong>Message:</strong>
|
| 426 |
+
<div style="padding: 15px; background: #f5f5f7; border-radius: 8px; margin-top: 10px;">
|
| 427 |
+
${data.message}
|
| 428 |
+
</div>
|
| 429 |
+
</div>`;
|
| 430 |
+
|
| 431 |
+
if (data.ai_confidence) {
|
| 432 |
+
html += '<div style="margin: 20px 0;"><strong>AI Confidence Scores:</strong><div class="ai-scores">';
|
| 433 |
+
for (const [label, score] of Object.entries(data.ai_confidence)) {
|
| 434 |
+
html += `<div class="ai-score">
|
| 435 |
+
<div class="ai-score-label">${label}</div>
|
| 436 |
+
<div class="ai-score-value">${(score * 100).toFixed(1)}%</div>
|
| 437 |
+
</div>`;
|
| 438 |
+
}
|
| 439 |
+
html += '</div></div>';
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
html += `<div style="margin-top: 20px; font-size: 13px; color: #86868b;">
|
| 443 |
+
Request ID: <code>${data.request_id}</code>
|
| 444 |
+
</div>`;
|
| 445 |
+
|
| 446 |
+
resultsDiv.innerHTML = html;
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
function displayComparison(originalText, data) {
|
| 450 |
+
document.getElementById('comparisonPanel').classList.remove('hidden');
|
| 451 |
+
document.getElementById('originalText').textContent = originalText;
|
| 452 |
+
document.getElementById('safeText').textContent = data.safe_text;
|
| 453 |
+
document.getElementById('exportText').textContent = data.export_text;
|
| 454 |
+
document.getElementById('detectedWords').textContent =
|
| 455 |
+
data.detected_words && data.detected_words.length > 0
|
| 456 |
+
? data.detected_words.join(', ')
|
| 457 |
+
: 'None detected';
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
async function displayLogs(requestId) {
|
| 461 |
+
document.getElementById('logsPanel').classList.remove('hidden');
|
| 462 |
+
|
| 463 |
+
try {
|
| 464 |
+
// Load redacted logs
|
| 465 |
+
const redactedResponse = await fetch('/logs/redacted');
|
| 466 |
+
const redactedData = await redactedResponse.json();
|
| 467 |
+
|
| 468 |
+
const redactedLogs = document.getElementById('redactedLogs');
|
| 469 |
+
if (redactedData.logs && redactedData.logs.length > 0) {
|
| 470 |
+
redactedLogs.innerHTML = redactedData.logs.slice(-5).reverse().map(log => `
|
| 471 |
+
<div class="log-entry log-redacted">
|
| 472 |
+
<strong>Request ID:</strong> ${log.request_id}<br>
|
| 473 |
+
<strong>Timestamp:</strong> ${new Date(log.timestamp).toLocaleString()}<br>
|
| 474 |
+
<strong>Context:</strong> ${log.context}<br>
|
| 475 |
+
<strong>Profanity:</strong> ${log.contains_profanity ? 'Yes' : 'No'}<br>
|
| 476 |
+
<strong>Toxicity:</strong> ${log.toxicity_level}<br>
|
| 477 |
+
<strong>Text Hash:</strong> ${log.text_hash}<br>
|
| 478 |
+
<strong>Text Length:</strong> ${log.text_length} chars
|
| 479 |
+
</div>
|
| 480 |
+
`).join('');
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
// Load verbatim log for current request
|
| 484 |
+
const verbatimResponse = await fetch(`/logs/verbatim/${requestId}`);
|
| 485 |
+
const verbatimData = await verbatimResponse.json();
|
| 486 |
+
|
| 487 |
+
const verbatimLogs = document.getElementById('verbatimLogs');
|
| 488 |
+
if (verbatimData.request_id) {
|
| 489 |
+
verbatimLogs.innerHTML = `
|
| 490 |
+
<div class="log-entry log-verbatim">
|
| 491 |
+
<strong>⚠️ COMPLIANCE ACCESS ONLY ⚠️</strong><br><br>
|
| 492 |
+
<strong>Request ID:</strong> ${verbatimData.request_id}<br>
|
| 493 |
+
<strong>Timestamp:</strong> ${new Date(verbatimData.timestamp).toLocaleString()}<br>
|
| 494 |
+
<strong>Context:</strong> ${verbatimData.context}<br>
|
| 495 |
+
<strong>Original Text:</strong> ${verbatimData.original_text}<br>
|
| 496 |
+
<strong>Safe Text:</strong> ${verbatimData.safe_text}<br>
|
| 497 |
+
<strong>Profanity:</strong> ${verbatimData.contains_profanity ? 'Yes' : 'No'}<br>
|
| 498 |
+
<strong>Toxicity:</strong> ${verbatimData.toxicity_level}
|
| 499 |
+
</div>
|
| 500 |
+
`;
|
| 501 |
+
} else {
|
| 502 |
+
verbatimLogs.innerHTML = '<div class="log-verbatim">Verbatim logs not available or disabled</div>';
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
} catch (error) {
|
| 506 |
+
console.error('Error loading logs:', error);
|
| 507 |
+
}
|
| 508 |
+
}
|
| 509 |
+
</script>
|
| 510 |
+
</body>
|
| 511 |
+
</html>
|
src/api/main.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from .routes import app
|
| 5 |
+
|
| 6 |
+
@app.get("/", response_class=HTMLResponse)
|
| 7 |
+
async def root():
|
| 8 |
+
"""Serve an interactive demo page with visualizations."""
|
| 9 |
+
ui_path = Path(__file__).parent / "enhanced_ui.html"
|
| 10 |
+
with open(ui_path, 'r') as f:
|
| 11 |
+
return f.read()
|
| 12 |
+
|
| 13 |
+
@app.get("/simple", response_class=HTMLResponse)
|
| 14 |
+
async def simple_demo():
|
| 15 |
+
"""Serve the original simple demo page."""
|
| 16 |
+
return """
|
| 17 |
+
<!DOCTYPE html>
|
| 18 |
+
<html>
|
| 19 |
+
<head>
|
| 20 |
+
<title>Context-Aware Profanity Handler - Interactive Demo</title>
|
| 21 |
+
<style>
|
| 22 |
+
body {
|
| 23 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 24 |
+
max-width: 900px;
|
| 25 |
+
margin: 40px auto;
|
| 26 |
+
padding: 20px;
|
| 27 |
+
background: #f5f5f5;
|
| 28 |
+
}
|
| 29 |
+
.container {
|
| 30 |
+
background: white;
|
| 31 |
+
padding: 30px;
|
| 32 |
+
border-radius: 8px;
|
| 33 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 34 |
+
}
|
| 35 |
+
h1 {
|
| 36 |
+
color: #2c3e50;
|
| 37 |
+
border-bottom: 3px solid #3498db;
|
| 38 |
+
padding-bottom: 10px;
|
| 39 |
+
}
|
| 40 |
+
.input-group {
|
| 41 |
+
margin: 20px 0;
|
| 42 |
+
}
|
| 43 |
+
label {
|
| 44 |
+
display: block;
|
| 45 |
+
margin-bottom: 5px;
|
| 46 |
+
font-weight: 600;
|
| 47 |
+
color: #34495e;
|
| 48 |
+
}
|
| 49 |
+
textarea, select {
|
| 50 |
+
width: 100%;
|
| 51 |
+
padding: 10px;
|
| 52 |
+
border: 1px solid #ddd;
|
| 53 |
+
border-radius: 4px;
|
| 54 |
+
font-size: 14px;
|
| 55 |
+
box-sizing: border-box;
|
| 56 |
+
}
|
| 57 |
+
textarea {
|
| 58 |
+
min-height: 100px;
|
| 59 |
+
resize: vertical;
|
| 60 |
+
}
|
| 61 |
+
button {
|
| 62 |
+
background: #3498db;
|
| 63 |
+
color: white;
|
| 64 |
+
padding: 12px 30px;
|
| 65 |
+
border: none;
|
| 66 |
+
border-radius: 4px;
|
| 67 |
+
cursor: pointer;
|
| 68 |
+
font-size: 16px;
|
| 69 |
+
font-weight: 600;
|
| 70 |
+
}
|
| 71 |
+
button:hover {
|
| 72 |
+
background: #2980b9;
|
| 73 |
+
}
|
| 74 |
+
.result {
|
| 75 |
+
margin-top: 20px;
|
| 76 |
+
padding: 20px;
|
| 77 |
+
border-radius: 4px;
|
| 78 |
+
display: none;
|
| 79 |
+
}
|
| 80 |
+
.result.safe {
|
| 81 |
+
background: #d4edda;
|
| 82 |
+
border: 1px solid #c3e6cb;
|
| 83 |
+
color: #155724;
|
| 84 |
+
}
|
| 85 |
+
.result.warning {
|
| 86 |
+
background: #fff3cd;
|
| 87 |
+
border: 1px solid #ffeeba;
|
| 88 |
+
color: #856404;
|
| 89 |
+
}
|
| 90 |
+
.result.error {
|
| 91 |
+
background: #f8d7da;
|
| 92 |
+
border: 1px solid #f5c6cb;
|
| 93 |
+
color: #721c24;
|
| 94 |
+
}
|
| 95 |
+
.example {
|
| 96 |
+
background: #e8f4f8;
|
| 97 |
+
padding: 15px;
|
| 98 |
+
border-radius: 4px;
|
| 99 |
+
margin: 20px 0;
|
| 100 |
+
border-left: 4px solid #3498db;
|
| 101 |
+
}
|
| 102 |
+
.example h3 {
|
| 103 |
+
margin-top: 0;
|
| 104 |
+
color: #2c3e50;
|
| 105 |
+
}
|
| 106 |
+
.badge {
|
| 107 |
+
display: inline-block;
|
| 108 |
+
padding: 4px 8px;
|
| 109 |
+
border-radius: 3px;
|
| 110 |
+
font-size: 12px;
|
| 111 |
+
font-weight: 600;
|
| 112 |
+
margin-right: 5px;
|
| 113 |
+
}
|
| 114 |
+
.badge.safe { background: #d4edda; color: #155724; }
|
| 115 |
+
.badge.mild { background: #fff3cd; color: #856404; }
|
| 116 |
+
.badge.explicit { background: #f8d7da; color: #721c24; }
|
| 117 |
+
.badge.slur { background: #f5c6cb; color: #721c24; }
|
| 118 |
+
.badge.threat { background: #f5c6cb; color: #721c24; }
|
| 119 |
+
</style>
|
| 120 |
+
</head>
|
| 121 |
+
<body>
|
| 122 |
+
<div class="container">
|
| 123 |
+
<h1>🧩 Context-Aware Profanity Handler</h1>
|
| 124 |
+
<p>A demonstration of context-aware profanity detection and handling for AI-assisted reporting.</p>
|
| 125 |
+
|
| 126 |
+
<div class="example">
|
| 127 |
+
<h3>💡 Example Use Case</h3>
|
| 128 |
+
<p><strong>Input:</strong> "Report on asset: <em>Do You Want to F*** Me Tonight</em>"</p>
|
| 129 |
+
<p><strong>Context:</strong> Song Title (Entity Name)</p>
|
| 130 |
+
<p><strong>Expected Result:</strong> Detected but allowed, with transparent feedback about safe rendering.</p>
|
| 131 |
+
</div>
|
| 132 |
+
|
| 133 |
+
<div class="input-group">
|
| 134 |
+
<label for="text">Text to Analyze:</label>
|
| 135 |
+
<textarea id="text" placeholder="Enter text here, e.g., a song title, brand name, or user input...">Report on asset: Do You Want to Fuck Me Tonight</textarea>
|
| 136 |
+
</div>
|
| 137 |
+
|
| 138 |
+
<div class="input-group">
|
| 139 |
+
<label for="context">Content Category:</label>
|
| 140 |
+
<select id="context">
|
| 141 |
+
<option value="song_title">Song Title</option>
|
| 142 |
+
<option value="entity_name">Entity Name</option>
|
| 143 |
+
<option value="brand_name">Brand Name</option>
|
| 144 |
+
<option value="user_input">User Input</option>
|
| 145 |
+
</select>
|
| 146 |
+
</div>
|
| 147 |
+
|
| 148 |
+
<div class="input-group">
|
| 149 |
+
<label>
|
| 150 |
+
<input type="checkbox" id="strict_mode"> Strict Mode
|
| 151 |
+
</label>
|
| 152 |
+
</div>
|
| 153 |
+
|
| 154 |
+
<button onclick="analyzeText()">Analyze Text</button>
|
| 155 |
+
|
| 156 |
+
<div id="result" class="result"></div>
|
| 157 |
+
</div>
|
| 158 |
+
|
| 159 |
+
<script>
|
| 160 |
+
async function analyzeText() {
|
| 161 |
+
const text = document.getElementById('text').value;
|
| 162 |
+
const context = document.getElementById('context').value;
|
| 163 |
+
const strict_mode = document.getElementById('strict_mode').checked;
|
| 164 |
+
|
| 165 |
+
const resultDiv = document.getElementById('result');
|
| 166 |
+
resultDiv.style.display = 'none';
|
| 167 |
+
|
| 168 |
+
try {
|
| 169 |
+
const response = await fetch('/analyze', {
|
| 170 |
+
method: 'POST',
|
| 171 |
+
headers: {
|
| 172 |
+
'Content-Type': 'application/json',
|
| 173 |
+
},
|
| 174 |
+
body: JSON.stringify({ text, context, strict_mode })
|
| 175 |
+
});
|
| 176 |
+
|
| 177 |
+
const data = await response.json();
|
| 178 |
+
|
| 179 |
+
let className = 'safe';
|
| 180 |
+
if (data.toxicity_level === 'explicit' || data.toxicity_level === 'slur' || data.toxicity_level === 'threat') {
|
| 181 |
+
className = 'error';
|
| 182 |
+
} else if (data.toxicity_level === 'mild') {
|
| 183 |
+
className = 'warning';
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
resultDiv.className = 'result ' + className;
|
| 187 |
+
resultDiv.innerHTML = `
|
| 188 |
+
<h3>Analysis Results</h3>
|
| 189 |
+
<p><strong>Profanity Detected:</strong> ${data.contains_profanity ? 'Yes' : 'No'}</p>
|
| 190 |
+
<p><strong>Toxicity Level:</strong> <span class="badge ${data.toxicity_level}">${data.toxicity_level.toUpperCase()}</span></p>
|
| 191 |
+
<p><strong>Message:</strong> ${data.message}</p>
|
| 192 |
+
<hr>
|
| 193 |
+
<p><strong>Original Text:</strong><br><code>${text}</code></p>
|
| 194 |
+
<p><strong>Safe Text:</strong><br><code>${data.safe_text}</code></p>
|
| 195 |
+
`;
|
| 196 |
+
resultDiv.style.display = 'block';
|
| 197 |
+
} catch (error) {
|
| 198 |
+
resultDiv.className = 'result error';
|
| 199 |
+
resultDiv.innerHTML = `<p>Error: ${error.message}</p>`;
|
| 200 |
+
resultDiv.style.display = 'block';
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
</script>
|
| 204 |
+
</body>
|
| 205 |
+
</html>
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
if __name__ == "__main__":
|
| 209 |
+
import uvicorn
|
| 210 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
src/api/routes.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional, Dict, Any
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from ..core.detector import ContentCategory, ProfanityDetector
|
| 7 |
+
from ..core.delexicalizer import Delexicalizer
|
| 8 |
+
from ..core.classifier import ContextClassifier, ToxicityLevel
|
| 9 |
+
from ..core.ai_classifier import AIClassifier
|
| 10 |
+
from ..utils.logger import audit_logger
|
| 11 |
+
|
| 12 |
+
app = FastAPI(
|
| 13 |
+
title="Profanity Handler API",
|
| 14 |
+
description="Context-aware profanity handling system for AI-assisted reporting",
|
| 15 |
+
version="0.1.0"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
detector = ProfanityDetector()
|
| 19 |
+
delexicalizer = Delexicalizer()
|
| 20 |
+
classifier = ContextClassifier()
|
| 21 |
+
ai_classifier = AIClassifier()
|
| 22 |
+
|
| 23 |
+
class TextRequest(BaseModel):
|
| 24 |
+
text: str
|
| 25 |
+
context: ContentCategory
|
| 26 |
+
strict_mode: Optional[bool] = False
|
| 27 |
+
use_ai: Optional[bool] = True
|
| 28 |
+
include_explicit_in_export: Optional[bool] = False
|
| 29 |
+
|
| 30 |
+
class TextResponse(BaseModel):
|
| 31 |
+
request_id: str
|
| 32 |
+
contains_profanity: bool
|
| 33 |
+
toxicity_level: ToxicityLevel
|
| 34 |
+
safe_text: Optional[str] = None
|
| 35 |
+
export_text: Optional[str] = None
|
| 36 |
+
message: str
|
| 37 |
+
ai_confidence: Optional[Dict[str, float]] = None
|
| 38 |
+
detected_words: Optional[list] = None
|
| 39 |
+
|
| 40 |
+
@app.post("/analyze", response_model=TextResponse)
|
| 41 |
+
async def analyze_text(request: TextRequest) -> TextResponse:
|
| 42 |
+
"""Analyze text for profanity with context awareness."""
|
| 43 |
+
|
| 44 |
+
# Reset delexicalizer for new request
|
| 45 |
+
delexicalizer.reset()
|
| 46 |
+
|
| 47 |
+
# Check for profanity
|
| 48 |
+
has_profanity = detector.detect_profanity(
|
| 49 |
+
request.text,
|
| 50 |
+
request.context,
|
| 51 |
+
request.strict_mode
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Classify toxicity (use AI if requested and available)
|
| 55 |
+
ai_confidence = None
|
| 56 |
+
if request.use_ai:
|
| 57 |
+
toxicity, ai_confidence = ai_classifier.classify(request.text)
|
| 58 |
+
else:
|
| 59 |
+
toxicity = classifier.classify_context(request.text)
|
| 60 |
+
|
| 61 |
+
# Get detected words
|
| 62 |
+
detected_words = list(detector.get_detected_words()) if has_profanity else []
|
| 63 |
+
|
| 64 |
+
# Handle based on context and toxicity
|
| 65 |
+
if has_profanity:
|
| 66 |
+
safe_text = delexicalizer.delexicalize(request.text)
|
| 67 |
+
message = (
|
| 68 |
+
"Heads up: explicit language detected in record names. "
|
| 69 |
+
"Proceeding with safe rendering."
|
| 70 |
+
)
|
| 71 |
+
else:
|
| 72 |
+
safe_text = request.text
|
| 73 |
+
message = "No issues detected."
|
| 74 |
+
|
| 75 |
+
# Handle export text based on user preference
|
| 76 |
+
export_text = request.text if request.include_explicit_in_export else safe_text
|
| 77 |
+
|
| 78 |
+
# Log the request
|
| 79 |
+
request_id = audit_logger.log_request(
|
| 80 |
+
text=request.text,
|
| 81 |
+
context=request.context.value,
|
| 82 |
+
contains_profanity=has_profanity,
|
| 83 |
+
toxicity_level=toxicity.value,
|
| 84 |
+
safe_text=safe_text,
|
| 85 |
+
metadata={
|
| 86 |
+
"strict_mode": request.strict_mode,
|
| 87 |
+
"use_ai": request.use_ai,
|
| 88 |
+
"include_explicit_in_export": request.include_explicit_in_export
|
| 89 |
+
}
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
return TextResponse(
|
| 93 |
+
request_id=request_id,
|
| 94 |
+
contains_profanity=has_profanity,
|
| 95 |
+
toxicity_level=toxicity,
|
| 96 |
+
safe_text=safe_text,
|
| 97 |
+
export_text=export_text,
|
| 98 |
+
message=message,
|
| 99 |
+
ai_confidence=ai_confidence,
|
| 100 |
+
detected_words=detected_words
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
@app.get("/logs/redacted")
|
| 104 |
+
async def get_redacted_logs(date: Optional[str] = None):
|
| 105 |
+
"""Get redacted logs for analytics (safe to expose)."""
|
| 106 |
+
logs = audit_logger.get_redacted_logs(date)
|
| 107 |
+
return {"logs": logs, "count": len(logs)}
|
| 108 |
+
|
| 109 |
+
@app.get("/logs/verbatim/{request_id}")
|
| 110 |
+
async def get_verbatim_log(request_id: str, date: Optional[str] = None):
|
| 111 |
+
"""
|
| 112 |
+
Get verbatim log for compliance (should be access-controlled in production).
|
| 113 |
+
This endpoint demonstrates RBAC - in production, this would require special permissions.
|
| 114 |
+
"""
|
| 115 |
+
log = audit_logger.get_verbatim_log(request_id, date)
|
| 116 |
+
return log
|
src/core/__pycache__/ai_classifier.cpython-311.pyc
ADDED
|
Binary file (3.72 kB). View file
|
|
|
src/core/__pycache__/classifier.cpython-311.pyc
ADDED
|
Binary file (2.72 kB). View file
|
|
|
src/core/__pycache__/delexicalizer.cpython-311.pyc
ADDED
|
Binary file (3.47 kB). View file
|
|
|
src/core/__pycache__/detector.cpython-311.pyc
ADDED
|
Binary file (3.88 kB). View file
|
|
|
src/core/ai_classifier.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
from .classifier import ToxicityLevel
|
| 3 |
+
from ..utils.config import config
|
| 4 |
+
|
| 5 |
+
class AIClassifier:
|
| 6 |
+
"""AI-powered toxicity classifier using Hugging Face models."""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.model = None
|
| 10 |
+
self.tokenizer = None
|
| 11 |
+
self._initialized = False
|
| 12 |
+
|
| 13 |
+
def _initialize(self):
|
| 14 |
+
"""Lazy load the model to avoid startup delays."""
|
| 15 |
+
if self._initialized:
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from transformers import pipeline
|
| 20 |
+
|
| 21 |
+
# Use a toxicity detection model
|
| 22 |
+
# This model works without authentication
|
| 23 |
+
self.model = pipeline(
|
| 24 |
+
"text-classification",
|
| 25 |
+
model="unitary/toxic-bert",
|
| 26 |
+
top_k=None,
|
| 27 |
+
token=config.HUGGINGFACE_TOKEN
|
| 28 |
+
)
|
| 29 |
+
self._initialized = True
|
| 30 |
+
print("✓ AI Classifier initialized with toxic-bert model")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"⚠ Could not initialize AI model: {e}")
|
| 33 |
+
print(" Falling back to rule-based classification")
|
| 34 |
+
self._initialized = False
|
| 35 |
+
|
| 36 |
+
def classify(self, text: str) -> tuple[ToxicityLevel, dict]:
|
| 37 |
+
"""
|
| 38 |
+
Classify text using AI model.
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Tuple of (ToxicityLevel, confidence_scores)
|
| 42 |
+
"""
|
| 43 |
+
self._initialize()
|
| 44 |
+
|
| 45 |
+
if not self._initialized or self.model is None:
|
| 46 |
+
# Fallback to basic classification
|
| 47 |
+
return ToxicityLevel.SAFE, {}
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
results = self.model(text)[0]
|
| 51 |
+
|
| 52 |
+
# toxic-bert returns labels like 'toxic', 'severe_toxic', 'obscene', etc.
|
| 53 |
+
scores = {item['label']: item['score'] for item in results}
|
| 54 |
+
|
| 55 |
+
# Determine toxicity level based on scores
|
| 56 |
+
if scores.get('severe_toxic', 0) > 0.5:
|
| 57 |
+
return ToxicityLevel.THREAT, scores
|
| 58 |
+
elif scores.get('obscene', 0) > 0.5:
|
| 59 |
+
return ToxicityLevel.EXPLICIT, scores
|
| 60 |
+
elif scores.get('insult', 0) > 0.4:
|
| 61 |
+
return ToxicityLevel.SLUR, scores
|
| 62 |
+
elif scores.get('toxic', 0) > 0.3:
|
| 63 |
+
return ToxicityLevel.MILD, scores
|
| 64 |
+
else:
|
| 65 |
+
return ToxicityLevel.SAFE, scores
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error during AI classification: {e}")
|
| 69 |
+
return ToxicityLevel.SAFE, {}
|
src/core/classifier.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
from typing import List
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
class ToxicityLevel(Enum):
|
| 6 |
+
SAFE = "safe"
|
| 7 |
+
MILD = "mild"
|
| 8 |
+
EXPLICIT = "explicit"
|
| 9 |
+
SLUR = "slur"
|
| 10 |
+
THREAT = "threat"
|
| 11 |
+
|
| 12 |
+
class ContextClassifier:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
# Basic categorization of profanity by severity
|
| 15 |
+
self._mild = {'damn', 'crap', 'hell', 'ass'}
|
| 16 |
+
self._explicit = {'fuck', 'shit', 'bitch', 'piss', 'dick', 'cock', 'pussy'}
|
| 17 |
+
self._slurs = {'bastard'} # Simplified - real implementation would be more comprehensive
|
| 18 |
+
self._threat_keywords = ['kill', 'die', 'death', 'hurt', 'harm']
|
| 19 |
+
|
| 20 |
+
def classify_context(self, text: str) -> ToxicityLevel:
|
| 21 |
+
"""
|
| 22 |
+
Classify the toxicity level of text with context awareness.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
text: Input text to classify
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
ToxicityLevel: The classified toxicity level
|
| 29 |
+
"""
|
| 30 |
+
text_lower = text.lower()
|
| 31 |
+
words = set(re.findall(r'\b\w+\b', text_lower))
|
| 32 |
+
|
| 33 |
+
# Check for threats first (highest priority)
|
| 34 |
+
if any(keyword in text_lower for keyword in self._threat_keywords):
|
| 35 |
+
return ToxicityLevel.THREAT
|
| 36 |
+
|
| 37 |
+
# Check for slurs
|
| 38 |
+
if words.intersection(self._slurs):
|
| 39 |
+
return ToxicityLevel.SLUR
|
| 40 |
+
|
| 41 |
+
# Check for explicit language
|
| 42 |
+
if words.intersection(self._explicit):
|
| 43 |
+
return ToxicityLevel.EXPLICIT
|
| 44 |
+
|
| 45 |
+
# Check for mild profanity
|
| 46 |
+
if words.intersection(self._mild):
|
| 47 |
+
return ToxicityLevel.MILD
|
| 48 |
+
|
| 49 |
+
return ToxicityLevel.SAFE
|
src/core/delexicalizer.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Optional
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
class Delexicalizer:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self._placeholder_map: Dict[str, str] = {}
|
| 7 |
+
self._reverse_map: Dict[str, str] = {}
|
| 8 |
+
self._counter = 0
|
| 9 |
+
# Basic profanity list for detection
|
| 10 |
+
self._profanity_patterns = [
|
| 11 |
+
'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
|
| 12 |
+
'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
def delexicalize(self, text: str) -> str:
|
| 16 |
+
"""
|
| 17 |
+
Replace profane words with placeholders.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
text: Input text containing potential profanity
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
str: Text with profanity replaced by placeholders
|
| 24 |
+
"""
|
| 25 |
+
result = text
|
| 26 |
+
for pattern in self._profanity_patterns:
|
| 27 |
+
# Find all occurrences (case-insensitive)
|
| 28 |
+
matches = list(re.finditer(r'\b' + pattern + r'\b', result, re.IGNORECASE))
|
| 29 |
+
for match in reversed(matches): # Reverse to maintain positions
|
| 30 |
+
original_word = match.group()
|
| 31 |
+
placeholder = self._create_placeholder(original_word)
|
| 32 |
+
result = result[:match.start()] + placeholder + result[match.end():]
|
| 33 |
+
return result
|
| 34 |
+
|
| 35 |
+
def relexicalize(self, text: str) -> str:
|
| 36 |
+
"""
|
| 37 |
+
Restore original words from placeholders.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
text: Text with placeholders
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
str: Original text with placeholders replaced
|
| 44 |
+
"""
|
| 45 |
+
for placeholder, original in self._placeholder_map.items():
|
| 46 |
+
text = text.replace(placeholder, original)
|
| 47 |
+
return text
|
| 48 |
+
|
| 49 |
+
def _create_placeholder(self, word: str) -> str:
|
| 50 |
+
"""Create a unique placeholder for a word."""
|
| 51 |
+
self._counter += 1
|
| 52 |
+
placeholder = f"<PROFANITY_{self._counter}>"
|
| 53 |
+
self._placeholder_map[placeholder] = word
|
| 54 |
+
self._reverse_map[word.lower()] = placeholder
|
| 55 |
+
return placeholder
|
| 56 |
+
|
| 57 |
+
def reset(self):
|
| 58 |
+
"""Reset the delexicalizer state."""
|
| 59 |
+
self._placeholder_map.clear()
|
| 60 |
+
self._reverse_map.clear()
|
| 61 |
+
self._counter = 0
|
src/core/detector.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
class ContentCategory(Enum):
|
| 6 |
+
ENTITY_NAME = "entity_name"
|
| 7 |
+
SONG_TITLE = "song_title"
|
| 8 |
+
BRAND_NAME = "brand_name"
|
| 9 |
+
USER_INPUT = "user_input"
|
| 10 |
+
|
| 11 |
+
class ProfanityDetector:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
# Initialize with basic profanity list
|
| 14 |
+
# In production, this would be loaded from a curated database
|
| 15 |
+
self._profanity_list = {
|
| 16 |
+
'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
|
| 17 |
+
'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
|
| 18 |
+
}
|
| 19 |
+
self._profanity_words = set()
|
| 20 |
+
|
| 21 |
+
def detect_profanity(
|
| 22 |
+
self,
|
| 23 |
+
text: str,
|
| 24 |
+
context: ContentCategory,
|
| 25 |
+
strict_mode: bool = False
|
| 26 |
+
) -> bool:
|
| 27 |
+
"""
|
| 28 |
+
Detect profanity in text with context awareness.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
text: Input text to check
|
| 32 |
+
context: Category of the content (entity name, user input, etc.)
|
| 33 |
+
strict_mode: Whether to apply stricter rules
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
bool: True if profanity detected, False otherwise
|
| 37 |
+
"""
|
| 38 |
+
# If it's an entity name and not in strict mode, we're more permissive
|
| 39 |
+
if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
|
| 40 |
+
return self._check_with_context(text, context)
|
| 41 |
+
|
| 42 |
+
return self._check_standard(text)
|
| 43 |
+
|
| 44 |
+
def _check_with_context(self, text: str, context: ContentCategory) -> bool:
|
| 45 |
+
"""Context-aware checking - more permissive for entity names."""
|
| 46 |
+
# For entity names, we detect but don't block
|
| 47 |
+
words = re.findall(r'\b\w+\b', text.lower())
|
| 48 |
+
found = [word for word in words if word in self._profanity_list]
|
| 49 |
+
if found:
|
| 50 |
+
self._profanity_words = set(found)
|
| 51 |
+
return True
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
def _check_standard(self, text: str) -> bool:
|
| 55 |
+
"""Standard profanity checking - stricter."""
|
| 56 |
+
words = re.findall(r'\b\w+\b', text.lower())
|
| 57 |
+
found = [word for word in words if word in self._profanity_list]
|
| 58 |
+
if found:
|
| 59 |
+
self._profanity_words = set(found)
|
| 60 |
+
return True
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
def get_detected_words(self) -> set:
|
| 64 |
+
"""Return the profane words that were detected."""
|
| 65 |
+
return self._profanity_words
|
src/utils/__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (1.89 kB). View file
|
|
|
src/utils/__pycache__/logger.cpython-311.pyc
ADDED
|
Binary file (6.67 kB). View file
|
|
|
src/utils/config.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
class Config:
|
| 6 |
+
"""Application configuration."""
|
| 7 |
+
|
| 8 |
+
# Hugging Face
|
| 9 |
+
HUGGINGFACE_TOKEN: Optional[str] = os.getenv("HUGGINGFACE_TOKEN")
|
| 10 |
+
|
| 11 |
+
# Logging
|
| 12 |
+
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
| 13 |
+
ENABLE_VERBATIM_LOGS: bool = os.getenv("ENABLE_VERBATIM_LOGS", "true").lower() == "true"
|
| 14 |
+
|
| 15 |
+
# Paths
|
| 16 |
+
BASE_DIR = Path(__file__).parent.parent.parent
|
| 17 |
+
VERBATIM_LOG_PATH = BASE_DIR / os.getenv("VERBATIM_LOG_PATH", "logs/verbatim")
|
| 18 |
+
REDACTED_LOG_PATH = BASE_DIR / os.getenv("REDACTED_LOG_PATH", "logs/redacted")
|
| 19 |
+
|
| 20 |
+
@classmethod
|
| 21 |
+
def ensure_log_dirs(cls):
|
| 22 |
+
"""Create log directories if they don't exist."""
|
| 23 |
+
cls.VERBATIM_LOG_PATH.mkdir(parents=True, exist_ok=True)
|
| 24 |
+
cls.REDACTED_LOG_PATH.mkdir(parents=True, exist_ok=True)
|
| 25 |
+
|
| 26 |
+
config = Config()
|
| 27 |
+
config.ensure_log_dirs()
|
src/utils/logger.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import hashlib
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any, Dict
|
| 6 |
+
from .config import config
|
| 7 |
+
|
| 8 |
+
class AuditLogger:
|
| 9 |
+
"""Dual logging system: redacted for analytics, verbatim for compliance."""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.verbatim_path = config.VERBATIM_LOG_PATH
|
| 13 |
+
self.redacted_path = config.REDACTED_LOG_PATH
|
| 14 |
+
|
| 15 |
+
def _generate_request_id(self, text: str) -> str:
|
| 16 |
+
"""Generate a unique request ID."""
|
| 17 |
+
timestamp = datetime.utcnow().isoformat()
|
| 18 |
+
return hashlib.sha256(f"{timestamp}{text}".encode()).hexdigest()[:16]
|
| 19 |
+
|
| 20 |
+
def _redact_text(self, text: str) -> str:
|
| 21 |
+
"""Redact sensitive content for analytics logs."""
|
| 22 |
+
# Replace with hash to preserve uniqueness while hiding content
|
| 23 |
+
return f"REDACTED_{hashlib.md5(text.encode()).hexdigest()[:8]}"
|
| 24 |
+
|
| 25 |
+
def log_request(
|
| 26 |
+
self,
|
| 27 |
+
text: str,
|
| 28 |
+
context: str,
|
| 29 |
+
contains_profanity: bool,
|
| 30 |
+
toxicity_level: str,
|
| 31 |
+
safe_text: str,
|
| 32 |
+
metadata: Dict[str, Any] = None
|
| 33 |
+
) -> str:
|
| 34 |
+
"""
|
| 35 |
+
Log a profanity check request to both redacted and verbatim logs.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
request_id: Unique identifier for this request
|
| 39 |
+
"""
|
| 40 |
+
request_id = self._generate_request_id(text)
|
| 41 |
+
timestamp = datetime.utcnow().isoformat()
|
| 42 |
+
|
| 43 |
+
# Redacted log (for analytics)
|
| 44 |
+
redacted_entry = {
|
| 45 |
+
"request_id": request_id,
|
| 46 |
+
"timestamp": timestamp,
|
| 47 |
+
"context": context,
|
| 48 |
+
"contains_profanity": contains_profanity,
|
| 49 |
+
"toxicity_level": toxicity_level,
|
| 50 |
+
"text_hash": hashlib.md5(text.encode()).hexdigest(),
|
| 51 |
+
"text_length": len(text),
|
| 52 |
+
"metadata": metadata or {}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
# Verbatim log (for compliance/audit)
|
| 56 |
+
verbatim_entry = {
|
| 57 |
+
"request_id": request_id,
|
| 58 |
+
"timestamp": timestamp,
|
| 59 |
+
"context": context,
|
| 60 |
+
"original_text": text,
|
| 61 |
+
"safe_text": safe_text,
|
| 62 |
+
"contains_profanity": contains_profanity,
|
| 63 |
+
"toxicity_level": toxicity_level,
|
| 64 |
+
"metadata": metadata or {}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# Write redacted log
|
| 68 |
+
redacted_file = self.redacted_path / f"{datetime.utcnow().strftime('%Y-%m-%d')}.jsonl"
|
| 69 |
+
with open(redacted_file, 'a') as f:
|
| 70 |
+
f.write(json.dumps(redacted_entry) + '\n')
|
| 71 |
+
|
| 72 |
+
# Write verbatim log (if enabled)
|
| 73 |
+
if config.ENABLE_VERBATIM_LOGS:
|
| 74 |
+
verbatim_file = self.verbatim_path / f"{datetime.utcnow().strftime('%Y-%m-%d')}.jsonl"
|
| 75 |
+
with open(verbatim_file, 'a') as f:
|
| 76 |
+
f.write(json.dumps(verbatim_entry) + '\n')
|
| 77 |
+
|
| 78 |
+
return request_id
|
| 79 |
+
|
| 80 |
+
def get_redacted_logs(self, date: str = None) -> list:
|
| 81 |
+
"""Retrieve redacted logs for a specific date."""
|
| 82 |
+
if date is None:
|
| 83 |
+
date = datetime.utcnow().strftime('%Y-%m-%d')
|
| 84 |
+
|
| 85 |
+
log_file = self.redacted_path / f"{date}.jsonl"
|
| 86 |
+
if not log_file.exists():
|
| 87 |
+
return []
|
| 88 |
+
|
| 89 |
+
logs = []
|
| 90 |
+
with open(log_file, 'r') as f:
|
| 91 |
+
for line in f:
|
| 92 |
+
logs.append(json.loads(line))
|
| 93 |
+
return logs
|
| 94 |
+
|
| 95 |
+
def get_verbatim_log(self, request_id: str, date: str = None) -> dict:
|
| 96 |
+
"""
|
| 97 |
+
Retrieve verbatim log for a specific request (compliance only).
|
| 98 |
+
This should be access-controlled in production.
|
| 99 |
+
"""
|
| 100 |
+
if not config.ENABLE_VERBATIM_LOGS:
|
| 101 |
+
return {"error": "Verbatim logs are disabled"}
|
| 102 |
+
|
| 103 |
+
if date is None:
|
| 104 |
+
date = datetime.utcnow().strftime('%Y-%m-%d')
|
| 105 |
+
|
| 106 |
+
log_file = self.verbatim_path / f"{date}.jsonl"
|
| 107 |
+
if not log_file.exists():
|
| 108 |
+
return {"error": "Log file not found"}
|
| 109 |
+
|
| 110 |
+
with open(log_file, 'r') as f:
|
| 111 |
+
for line in f:
|
| 112 |
+
entry = json.loads(line)
|
| 113 |
+
if entry['request_id'] == request_id:
|
| 114 |
+
return entry
|
| 115 |
+
|
| 116 |
+
return {"error": "Request ID not found"}
|
| 117 |
+
|
| 118 |
+
# Singleton instance
|
| 119 |
+
audit_logger = AuditLogger()
|