nixaut-codelabs commited on
Commit
b3e6062
·
verified ·
1 Parent(s): 9d8e365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +468 -194
app.py CHANGED
@@ -37,7 +37,6 @@ def ensure_local_model(repo_id: str, local_dir: str, tries: int = 3, sleep_s: fl
37
  return local_dir
38
 
39
  model_path = ensure_local_model(MODEL_REPO, LOCAL_DIR)
40
-
41
  tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
42
 
43
  gemma_chat_template_simplified = (
@@ -64,17 +63,185 @@ model = AutoModelForCausalLM.from_pretrained(
64
  )
65
  model.eval()
66
 
67
- MODERATION_SYSTEM_PROMPT = (
68
- "You are a multilingual content moderation classifier. "
69
- "You MUST respond with exactly one lowercase letter: 's' for safe, 'u' for unsafe. "
70
- "No explanations, no punctuation, no extra words. "
71
- "If the message contains hate speech, harassment, sexual content involving minors, "
72
- "extreme violence, self-harm encouragement, or other unsafe material, respond 'u'. "
73
- "Otherwise respond 's'."
74
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- def build_prompt(message, max_ctx_tokens=128):
77
- full_user_message = f"{MODERATION_SYSTEM_PROMPT}\n\nUser input: '{message}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  messages = [{"role": "user", "content": full_user_message}]
79
 
80
  text = tokenizer.apply_chat_template(
@@ -83,8 +250,8 @@ def build_prompt(message, max_ctx_tokens=128):
83
  add_generation_prompt=True
84
  )
85
 
86
- while len(tokenizer(text, add_special_tokens=False).input_ids) > max_ctx_tokens and len(full_user_message) > 100:
87
- full_user_message = full_user_message[:-50]
88
  messages[0]['content'] = full_user_message
89
  text = tokenizer.apply_chat_template(
90
  messages,
@@ -97,49 +264,19 @@ def enforce_s_u(text: str) -> str:
97
  text_lower = text.strip().lower()
98
  if "u" in text_lower and "s" not in text_lower:
99
  return "u"
100
- if "unsafe" in text_lower:
101
  return "u"
102
  return "s"
103
 
104
- def format_classification_result(classification, tokens_per_second, processing_time):
105
- if classification == "s":
106
- status_emoji = "✅"
107
- status_text = "SAFE"
108
- status_color = "#22c55e"
109
- description = "Content appears to be safe and appropriate."
110
- else:
111
- status_emoji = "🚫"
112
- status_text = "UNSAFE"
113
- status_color = "#ef4444"
114
- description = "Content may contain inappropriate or harmful material."
115
-
116
- result_html = f"""
117
- <div style="text-align: center; padding: 20px; border-radius: 12px;
118
- background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
119
- border: 2px solid {status_color}; margin: 10px 0;">
120
- <div style="font-size: 48px; margin-bottom: 10px;">{status_emoji}</div>
121
- <div style="font-size: 24px; font-weight: bold; color: {status_color}; margin-bottom: 8px;">
122
- {status_text}
123
- </div>
124
- <div style="font-size: 16px; color: #64748b; margin-bottom: 15px;">
125
- {description}
126
- </div>
127
- <div style="display: flex; justify-content: center; gap: 20px; font-size: 14px; color: #475569;">
128
- <span>⚡ {tokens_per_second:.1f} tok/s</span>
129
- <span>⏱️ {processing_time:.2f}s</span>
130
- </div>
131
- </div>
132
- """
133
- return result_html
134
-
135
- def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Progress()):
136
  if not message.strip():
137
- return format_classification_result("s", 0, 0)
138
 
139
- progress(0, desc="Preparing classification...")
140
- text = build_prompt(message)
141
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
142
  do_sample = bool(temperature and temperature > 0.0)
 
143
  gen_kwargs = dict(
144
  max_new_tokens=max_tokens,
145
  do_sample=do_sample,
@@ -162,185 +299,322 @@ def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Pr
162
 
163
  partial_text = ""
164
  token_count = 0
165
- start_time = None
166
-
167
- progress(0.3, desc="Processing content...")
168
 
169
  with torch.inference_mode():
170
  thread.start()
171
  try:
172
  for chunk in streamer:
173
- if start_time is None:
174
- start_time = time.time()
175
  partial_text += chunk
176
  token_count += 1
177
- progress(0.3 + (token_count / max_tokens) * 0.6, desc="Analyzing...")
178
  finally:
179
  thread.join()
180
 
181
- final_label = enforce_s_u(partial_text)
182
- end_time = time.time() if start_time else time.time()
183
- duration = max(1e-6, end_time - start_time)
184
- tps = token_count / duration if duration > 0 else 0.0
185
 
186
- progress(1.0, desc="Complete!")
187
 
188
- return format_classification_result(final_label, tps, duration)
189
-
190
- custom_css = """
191
- .main-container {
192
- max-width: 1200px !important;
193
- margin: 0 auto !important;
194
- }
195
-
196
- .header-section {
197
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
198
- padding: 2rem;
199
- border-radius: 16px;
200
- margin-bottom: 2rem;
201
- color: white;
202
- text-align: center;
203
- }
204
-
205
- .classification-panel {
206
- background: white;
207
- border-radius: 16px;
208
- padding: 2rem;
209
- box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
210
- border: 1px solid #e2e8f0;
211
- }
212
 
213
- .example-card {
214
- transition: transform 0.2s ease;
215
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- .example-card:hover {
218
- transform: translateY(-2px);
219
- }
220
 
221
- .gradio-container {
222
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
223
- }
 
 
 
224
 
225
- .input-section {
226
- background: #f8fafc;
227
- border-radius: 12px;
228
- padding: 1.5rem;
229
- border: 1px solid #e2e8f0;
230
- }
231
- """
232
-
233
- with gr.Blocks(css=custom_css, title="AI Content Moderator", theme=gr.themes.Soft()) as demo:
234
- with gr.Column(elem_classes="main-container"):
235
- gr.HTML("""
236
- <div class="header-section">
237
- <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem; font-weight: 700;">
238
- 🛡️ AI Content Moderator
239
- </h1>
240
- <p style="font-size: 1.2rem; opacity: 0.9; margin: 0;">
241
- Advanced multilingual content classification powered by AI
242
- </p>
243
- </div>
244
- """)
245
-
246
- with gr.Row():
247
- with gr.Column(scale=3):
248
- with gr.Group(elem_classes="input-section"):
249
- gr.Markdown("### 📝 Content Analysis")
250
- text_input = gr.Textbox(
251
- label="Text to Analyze",
252
- placeholder="Enter any text in any language for content moderation analysis...",
253
- lines=6,
254
- max_lines=10,
255
- show_label=False
256
- )
257
-
258
- with gr.Row():
259
- classify_btn = gr.Button(
260
- "🔍 Analyze Content",
261
- variant="primary",
262
- size="lg",
263
- scale=2
264
- )
265
- clear_btn = gr.Button(
266
- "🗑️ Clear",
267
- variant="secondary",
268
- size="lg",
269
- scale=1
270
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- with gr.Column(scale=2):
273
- with gr.Group(elem_classes="classification-panel"):
274
- gr.Markdown("### 📊 Classification Result")
275
- result_display = gr.HTML(
276
- value=format_classification_result("s", 0, 0),
277
- label="Result"
278
- )
279
-
280
- with gr.Accordion("⚙️ Advanced Configuration", open=False):
281
- with gr.Row():
282
- max_tokens_slider = gr.Slider(
283
- minimum=1, maximum=10, value=3, step=1,
284
- label="Max Tokens",
285
- info="Maximum number of tokens to generate"
286
- )
287
- temp_slider = gr.Slider(
288
- minimum=0.0, maximum=1.0, value=0.1, step=0.1,
289
- label="Temperature",
290
- info="Controls randomness in generation"
291
  )
292
- top_p_slider = gr.Slider(
293
- minimum=0.1, maximum=1.0, value=0.95, step=0.05,
294
- label="Top-p",
295
- info="Nucleus sampling parameter"
 
 
296
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- gr.Markdown("### 💡 Try These Examples")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
- example_data = [
301
- ["Hello, how are you today? I hope you're having a wonderful time!"],
302
- ["I hate you and I will find you and hurt you badly."],
303
- ["C'est une belle journée pour apprendre la programmation et l'intelligence artificielle."],
304
- ["I can't take this anymore. I want to end everything and disappear forever."],
305
- ["¡Hola! Me encanta aprender nuevos idiomas y conocer diferentes culturas."],
306
- ["You're absolutely worthless and nobody will ever love someone like you."]
307
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- examples = gr.Examples(
310
- examples=example_data,
311
- inputs=text_input,
312
- examples_per_page=6
 
313
  )
314
-
315
- gr.Markdown("""
316
  ---
317
- <div style="text-align: center; padding: 1rem; color: #64748b; font-size: 0.9rem;">
318
- <p><strong>🌍 Multilingual Support:</strong> English, Spanish, French, German, and many more languages</p>
319
- <p><strong>🚀 Real-time Analysis:</strong> Fast content classification with detailed feedback</p>
320
- <p><strong>🔒 Privacy First:</strong> All processing happens locally on your machine</p>
321
- </div>
322
- """)
323
-
324
- classify_btn.click(
325
- fn=classify_text_stream,
326
- inputs=[text_input, max_tokens_slider, temp_slider, top_p_slider],
327
- outputs=result_display,
328
- show_progress=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  )
330
 
331
  clear_btn.click(
332
- fn=lambda: ("", format_classification_result("s", 0, 0)),
333
- outputs=[text_input, result_display]
334
  )
335
 
336
  if __name__ == "__main__":
337
  with torch.inference_mode():
338
  _ = model.generate(
339
- **tokenizer(["Hi"], return_tensors="pt").to(model.device),
340
  max_new_tokens=1, do_sample=False, use_cache=True
341
  )
342
- print("🚀 Starting AI Content Moderator...")
343
- demo.queue(max_size=64).launch(
344
  server_name="0.0.0.0",
345
  server_port=7860,
346
  share=False,
 
37
  return local_dir
38
 
39
  model_path = ensure_local_model(MODEL_REPO, LOCAL_DIR)
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
41
 
42
  gemma_chat_template_simplified = (
 
63
  )
64
  model.eval()
65
 
66
+ LANGUAGE_CONFIGS = {
67
+ "All Languages": {
68
+ "code": "all",
69
+ "prompt": """You are an advanced multilingual content moderation classifier with expertise in detecting harmful content across ALL languages including but not limited to: English, Turkish, Spanish, French, German, Italian, Portuguese, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Dutch, Polish, Swedish, Norwegian, Finnish, Danish, Greek, Hebrew, Thai, Vietnamese, Indonesian, Malay, and many others.
70
+
71
+ CRITICAL INSTRUCTIONS:
72
+ - Analyze content in ANY language with extreme precision
73
+ - Be culturally aware and context-sensitive for each language
74
+ - Detect subtle harmful patterns, cultural slurs, and implicit threats
75
+ - Consider regional variations, dialects, and code-switching
76
+ - Pay special attention to transliterated harmful content
77
+
78
+ You MUST respond with exactly one lowercase letter: 's' for safe, 'u' for unsafe.
79
+ NO explanations, NO punctuation, NO extra words.
80
+
81
+ Content is UNSAFE ('u') if it contains:
82
+ - Hate speech, discrimination, or prejudice against any group
83
+ - Threats, violence, or harassment in any form
84
+ - Sexual content involving minors or explicit sexual material
85
+ - Self-harm encouragement or suicide promotion
86
+ - Extremist ideologies or radicalization content
87
+ - Doxxing, privacy violations, or personal attacks
88
+ - Spam, scams, or fraudulent content
89
+ - Drug trafficking or illegal activities promotion
90
+ - Cultural slurs or offensive language in any language
91
+ - Implicit threats disguised as jokes or metaphors
92
+
93
+ Otherwise respond 's' for safe content."""
94
+ },
95
+ "English": {
96
+ "code": "en",
97
+ "prompt": """You are a specialized English content moderation classifier with deep understanding of English language nuances, slang, cultural references, and implicit meanings.
98
+
99
+ CRITICAL INSTRUCTIONS FOR ENGLISH CONTENT:
100
+ - Detect sophisticated threats, sarcasm used maliciously, and implied harm
101
+ - Recognize English slang, internet terminology, and cultural references
102
+ - Identify cyberbullying patterns and passive-aggressive language
103
+ - Be aware of English-specific hate speech and discrimination patterns
104
+ - Detect coded language and dog whistles commonly used in English
105
+
106
+ You MUST respond with exactly one lowercase letter: 's' for safe, 'u' for unsafe.
107
+ NO explanations, NO punctuation, NO extra words.
108
+
109
+ Content is UNSAFE ('u') if it contains hate speech, threats, harassment, sexual content involving minors, self-harm encouragement, extremist content, or other harmful material in English.
110
+ Otherwise respond 's' for safe content."""
111
+ },
112
+ "Turkish": {
113
+ "code": "tr",
114
+ "prompt": """Sen Türkçe içerik moderasyonu konusunda uzmanlaşmış, Türk kültürü ve dilinin inceliklerini derinlemesine bilen bir sınıflandırıcısın.
115
+
116
+ TÜRKÇE İÇERİK İÇİN KRİTİK TALİMATLAR:
117
+ - Türkçe'ye özgü küfür, hakaret ve tehdit kalıplarını tespit et
118
+ - Kültürel referansları, argo ve yerel deyişleri anla
119
+ - Örtülü tehditleri, ima edilen şiddeti ve pasif-agresif dili yakala
120
+ - Türkçe klavye hatalarını ve kasıtlı çarpıtmaları dikkate al
121
+ - Nefret söylemi, ayrımcılık ve toplumsal önyargıları tespit et
122
+ - Türkçe internet slangı ve sosyal medya dilini analiz et
123
+
124
+ Kesinlikle tek küçük harf ile yanıtlamalısın: güvenli için 's', güvensiz için 'u'.
125
+ AÇIKLAMA YOK, NOKTALAMA YOK, FAZLA KELİME YOK.
126
+
127
+ İçerik şu durumda GÜVENSİZ ('u'): nefret söylemi, tehdit, taciz, küçükleri içeren cinsel içerik, kendine zarar vermeyi teşvik, aşırılık içeriği veya diğer zararlı materyaller içeriyorsa.
128
+ Aksi halde güvenli içerik için 's' yanıtla."""
129
+ },
130
+ "Spanish": {
131
+ "code": "es",
132
+ "prompt": """Eres un clasificador especializado de moderación de contenido en español con profundo conocimiento de las variaciones culturales del español en diferentes países y regiones.
133
+
134
+ INSTRUCCIONES CRÍTICAS PARA CONTENIDO EN ESPAÑOL:
135
+ - Detecta insultos, amenazas y patrones de odio específicos del español
136
+ - Reconoce variaciones regionales (España, México, Argentina, Colombia, etc.)
137
+ - Identifica lenguaje implícito, sarcasmo malicioso y amenazas veladas
138
+ - Comprende jerga de internet, modismos y referencias culturales hispanas
139
+ - Detecta discriminación, xenofobia y discurso de odio en español
140
+ - Analiza contenido que mezcle español con otros idiomas
141
+
142
+ Debes responder con exactamente una letra minúscula: 's' para seguro, 'u' para inseguro.
143
+ SIN explicaciones, SIN puntuación, SIN palabras extra.
144
+
145
+ El contenido es INSEGURO ('u') si contiene: discurso de odio, amenazas, acoso, contenido sexual con menores, promoción de autolesiones, contenido extremista u otro material dañino en español.
146
+ De lo contrario responde 's' para contenido seguro."""
147
+ },
148
+ "French": {
149
+ "code": "fr",
150
+ "prompt": """Vous êtes un classificateur spécialisé de modération de contenu français avec une compréhension approfondie des nuances culturelles et linguistiques du français dans différentes régions francophones.
151
+
152
+ INSTRUCTIONS CRITIQUES POUR LE CONTENU FRANÇAIS:
153
+ - Détecter les insultes, menaces et discours haineux spécifiques au français
154
+ - Reconnaître les variations régionales (France, Québec, Belgique, Suisse, Afrique francophone)
155
+ - Identifier le langage implicite, le sarcasme malveillant et les menaces voilées
156
+ - Comprendre l'argot internet, les expressions idiomatiques et références culturelles françaises
157
+ - Détecter la discrimination, la xenophobie et les propos haineux en français
158
+ - Analyser le contenu mélant français et autres langues
159
+
160
+ Vous DEVEZ répondre avec exactement une lettre minuscule: 's' pour sûr, 'u' pour dangereux.
161
+ AUCUNE explication, AUCUNE ponctuation, AUCUN mot supplémentaire.
162
+
163
+ Le contenu est DANGEREUX ('u') s'il contient: discours de haine, menaces, harcèlement, contenu sexuel impliquant des mineurs, encouragement à l'automutilation, contenu extrémiste ou autre matériel nuisible en français.
164
+ Sinon répondez 's' pour un contenu sûr."""
165
+ },
166
+ "German": {
167
+ "code": "de",
168
+ "prompt": """Sie sind ein spezialisierter deutscher Content-Moderations-Klassifikator mit tiefem Verständnis für deutsche Sprachnuancen, kulturelle Besonderheiten und regionale Variationen.
169
+
170
+ KRITISCHE ANWEISUNGEN FÜR DEUTSCHEN INHALT:
171
+ - Erkennen Sie spezifisch deutsche Beleidigungen, Drohungen und Hassmuster
172
+ - Verstehen Sie regionale Variationen (Deutschland, Österreich, Schweiz)
173
+ - Identifizieren Sie implizite Sprache, bösartigen Sarkasmus und versteckte Drohungen
174
+ - Erkennen Sie deutschen Internet-Slang, Redewendungen und kulturelle Referenzen
175
+ - Detektieren Sie Diskriminierung, Fremdenfeindlichkeit und Hassrede auf Deutsch
176
+ - Analysieren Sie Inhalte, die Deutsch mit anderen Sprachen vermischen
177
+
178
+ Sie MÜSSEN mit genau einem Kleinbuchstaben antworten: 's' für sicher, 'u' für unsicher.
179
+ KEINE Erklärungen, KEINE Satzzeichen, KEINE zusätzlichen Wörter.
180
+
181
+ Inhalt ist UNSICHER ('u') wenn er enthält: Hassrede, Drohungen, Belästigung, sexuelle Inhalte mit Minderjährigen, Selbstverletzungsförderung, extremistische Inhalte oder anderes schädliches Material auf Deutsch.
182
+ Andernfalls antworten Sie 's' für sicheren Inhalt."""
183
+ },
184
+ "Russian": {
185
+ "code": "ru",
186
+ "prompt": """Вы специализированный классификатор модерации русскоязычного контента с глубоким пониманием русской культуры, сленга и языковых особенностей всех русскоговорящих регионов.
187
+
188
+ КРИТИЧЕСКИЕ ИНСТРУКЦИИ ДЛЯ РУССКОГО КОНТЕНТА:
189
+ - Выявляйте специфические для русского языка оскорбления, угрозы и паттерны ненависти
190
+ - Понимайте региональные вариации (Россия, Украина, Беларусь, Казахстан и др.)
191
+ - Идентифицируйте скрытые угрозы, злонамеренный сарказм и имплицитную агрессию
192
+ - Распознавайте русский интернет-сленг, мемы и культурные отсылки
193
+ - Детектируйте дискриминацию, ксенофобию и язык ненависти на русском
194
+ - Анализируйте контент, смешивающий русский с другими языками
195
+
196
+ Вы ДОЛЖНЫ отвечать точно одной строчной буквой: 's' для безопасного, 'u' для опасного.
197
+ НИКАКИХ объяснений, НИКАКИХ знаков препинания, НИКАКИХ лишних слов.
198
+
199
+ Контент ОПАСЕН ('u') если содержит: язык ненависти, угрозы, домогательства, сексуальный контент с участием несовершеннолетних, поощрение самоповреждения, экстремистский контент или другие вредные материалы на русском языке.
200
+ В противном случае отвечайте 's' для безопасного контента."""
201
+ },
202
+ "Chinese": {
203
+ "code": "zh",
204
+ "prompt": """您是专业的中文内容审核分类器,对中文语言的细微差别、文化内涵以及简体中文和繁体中文的区域变化有深刻理解。
205
 
206
+ 中文内容的关键指令:
207
+ - 识别中文特有的侮辱、威胁和仇恨言论模式
208
+ - 理解地域变化(大陆、台湾、香港、新加坡等)
209
+ - 识别暗示性语言、恶意讽刺和隐藏威胁
210
+ - 识别中文网络俚语、表情包和文化梗
211
+ - 检测歧视、排外和中文仇恨言论
212
+ - 分析中英文混合或其他语言混合内容
213
+ - 注意拼音、数字代码和谐音等规避审查的表达方式
214
+
215
+ 您必须只用一个小写字母回复:安全内容用's',不安全内容用'u'。
216
+ 不要解释,不要标点,不要多余的词。
217
+
218
+ 内容不安全('u')的条件:包含仇恨言论、威胁、骚扰、涉及未成年人的性内容、鼓励自残、极端主义内容或其他中文有害材料。
219
+ 否则回复's'表示内容安全。"""
220
+ },
221
+ "Arabic": {
222
+ "code": "ar",
223
+ "prompt": """أنت مصنف متخصص في إشراف المحتوى العربي مع فهم عميق للفروق اللغوية العربية والاختلافات الثقافية عبر المناطق الناطقة بالعربية.
224
+
225
+ تعليمات حاسمة للمحتوى العربي:
226
+ - اكتشف الإهانات والتهديدات وأنماط الكراهية الخاصة باللغة العربية
227
+ - افهم الاختلافات الإقليمية (الخليج، المغرب العربي، المشرق، مصر)
228
+ - تعرف على اللغة الضمنية والسخرية الخبيثة والتهديدات المبطنة
229
+ - اكتشف العامية العربية وأسلوب الإنترنت والمراجع الثقافية
230
+ - رصد التمييز وكراهية الأجانب وخطاب الكراهية بالعربية
231
+ - حلل المحتوى الذي يخلط العربية مع لغات أخرى
232
+ - انتبه للكتابة بالأرقام والحروف اللاتينية (عربيزي)
233
+
234
+ يجب أن ترد بحرف صغير واحد بالضبط: 's' للآمن، 'u' للغير آمن.
235
+ بدون تفسيرات، بدون علامات ترقيم، بدون كلمات إضافية.
236
+
237
+ المحتوى غير آمن ('u') إذا كان يحتوي على: خطاب كراهية، تهديدات، مضايقة، محتوى جنسي يشمل قاصرين، تشجيع إيذاء النفس، محتوى متطرف أو مواد ضارة أخرى بالعربية.
238
+ وإلا أجب 's' للمحتوى الآمن."""
239
+ }
240
+ }
241
+
242
+ def build_prompt(message, language, max_ctx_tokens=256):
243
+ system_prompt = LANGUAGE_CONFIGS[language]["prompt"]
244
+ full_user_message = f"{system_prompt}\n\nUser input to analyze: '{message}'"
245
  messages = [{"role": "user", "content": full_user_message}]
246
 
247
  text = tokenizer.apply_chat_template(
 
250
  add_generation_prompt=True
251
  )
252
 
253
+ while len(tokenizer(text, add_special_tokens=False).input_ids) > max_ctx_tokens and len(full_user_message) > 200:
254
+ full_user_message = full_user_message[:-100]
255
  messages[0]['content'] = full_user_message
256
  text = tokenizer.apply_chat_template(
257
  messages,
 
264
  text_lower = text.strip().lower()
265
  if "u" in text_lower and "s" not in text_lower:
266
  return "u"
267
+ if "unsafe" in text_lower or "dangerous" in text_lower or "harmful" in text_lower:
268
  return "u"
269
  return "s"
270
 
271
+ def classify_content(message, language, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  if not message.strip():
273
+ return "⚠️ Please enter text to analyze", "", ""
274
 
275
+ start_time = time.time()
276
+ text = build_prompt(message, language)
277
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
278
  do_sample = bool(temperature and temperature > 0.0)
279
+
280
  gen_kwargs = dict(
281
  max_new_tokens=max_tokens,
282
  do_sample=do_sample,
 
299
 
300
  partial_text = ""
301
  token_count = 0
 
 
 
302
 
303
  with torch.inference_mode():
304
  thread.start()
305
  try:
306
  for chunk in streamer:
 
 
307
  partial_text += chunk
308
  token_count += 1
 
309
  finally:
310
  thread.join()
311
 
312
+ end_time = time.time()
313
+ processing_time = end_time - start_time
314
+ tokens_per_second = token_count / processing_time if processing_time > 0 else 0
 
315
 
316
+ final_classification = enforce_s_u(partial_text)
317
 
318
+ if final_classification == "s":
319
+ result = "✅ SAFE CONTENT"
320
+ description = "This content appears to be appropriate and does not violate community guidelines."
321
+ color = "green"
322
+ else:
323
+ result = "🚫 UNSAFE CONTENT"
324
+ description = "This content may contain harmful, inappropriate, or policy-violating material."
325
+ color = "red"
326
+
327
+ performance = f"⚡ Processing: {processing_time:.3f}s | 🚀 Speed: {tokens_per_second:.1f} tok/s | 🔤 Tokens: {token_count}"
328
+
329
+ return result, description, performance
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ def get_example_texts():
332
+ return {
333
+ "All Languages": [
334
+ "Hello! How are you today?",
335
+ "I hate you and will find you.",
336
+ "Hola, ¿cómo estás? Espero que tengas un buen día.",
337
+ "Je vais te tuer si tu continues.",
338
+ "Ich liebe es, neue Sprachen zu lernen!",
339
+ ],
340
+ "English": [
341
+ "Have a wonderful day everyone!",
342
+ "I'm going to hunt you down and make you pay.",
343
+ "This is an amazing community project.",
344
+ "You should just end it all, nobody cares about you.",
345
+ ],
346
+ "Turkish": [
347
+ "Bugün hava çok güzel, keyifli bir gün geçiriyorum.",
348
+ "Seni bulacağım ve öldüreceğim.",
349
+ "Türkiye'de yaşamak çok güzel.",
350
+ "Sen çok değersizsin, kimse seni sevmez.",
351
+ ],
352
+ "Spanish": [
353
+ "¡Qué hermoso día para aprender algo nuevo!",
354
+ "Te voy a encontrar y te voy a lastimar.",
355
+ "Me encanta la cultura española.",
356
+ "Deberías matarte, nadie te va a extrañar.",
357
+ ],
358
+ "French": [
359
+ "J'adore apprendre de nouvelles choses chaque jour.",
360
+ "Je vais te faire du mal si tu continues.",
361
+ "Paris est une ville magnifique.",
362
+ "Tu devrais te suicider, personne ne t'aime.",
363
+ ],
364
+ "German": [
365
+ "Ich freue mich auf das Wochenende!",
366
+ "Ich werde dich finden und dir wehtun.",
367
+ "Deutschland hat eine reiche Kultur.",
368
+ "Du solltest dich umbringen, du bist wertlos.",
369
+ ],
370
+ "Russian": [
371
+ "Сегодня прекрасный день для изучения нового!",
372
+ "Я найду тебя и причиню боль.",
373
+ "Россия - прекрасная страна с богатой историей.",
374
+ "Ты должен покончить с собой, ты никому не нужен.",
375
+ ],
376
+ "Chinese": [
377
+ "今天是学习新知识的美好一天!",
378
+ "我会找到你并伤害你。",
379
+ "中国有着悠久的历史和灿烂的文化。",
380
+ "你应该去死,没人会想念你。",
381
+ ],
382
+ "Arabic": [
383
+ "أحب تعلم أشياء جديدة كل يوم.",
384
+ "سأجدك وأؤذيك إذا واصلت هذا.",
385
+ "العالم العربي له تاريخ عريق وثقافة غنية.",
386
+ "يجب أن تقتل نفسك، لا أحد يهتم بك.",
387
+ ]
388
+ }
389
 
390
+ def update_examples(language):
391
+ examples = get_example_texts()
392
+ return gr.Dataset(samples=[[ex] for ex in examples.get(language, [])])
393
 
394
+ theme = gr.themes.Soft(
395
+ primary_hue="blue",
396
+ secondary_hue="gray",
397
+ neutral_hue="gray",
398
+ font=gr.themes.GoogleFont("Inter")
399
+ )
400
 
401
+ with gr.Blocks(
402
+ theme=theme,
403
+ title="🛡️ AI Content Moderator Pro",
404
+ css="""
405
+ .main-header {
406
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
407
+ color: white;
408
+ padding: 2rem;
409
+ border-radius: 16px;
410
+ margin-bottom: 2rem;
411
+ text-align: center;
412
+ }
413
+ .result-safe {
414
+ background: linear-gradient(135deg, #d4edda 0%, #c3e6cb 100%);
415
+ border: 2px solid #28a745;
416
+ color: #155724;
417
+ padding: 1.5rem;
418
+ border-radius: 12px;
419
+ margin: 1rem 0;
420
+ }
421
+ .result-unsafe {
422
+ background: linear-gradient(135deg, #f8d7da 0%, #f5c6cb 100%);
423
+ border: 2px solid #dc3545;
424
+ color: #721c24;
425
+ padding: 1.5rem;
426
+ border-radius: 12px;
427
+ margin: 1rem 0;
428
+ }
429
+ .performance-info {
430
+ background: #f8f9fa;
431
+ padding: 1rem;
432
+ border-radius: 8px;
433
+ margin-top: 1rem;
434
+ font-family: monospace;
435
+ font-size: 0.9rem;
436
+ }
437
+ .language-selector {
438
+ background: white;
439
+ border: 2px solid #007bff;
440
+ border-radius: 8px;
441
+ padding: 0.5rem;
442
+ }
443
+ .analysis-panel {
444
+ background: #ffffff;
445
+ border: 1px solid #e9ecef;
446
+ border-radius: 12px;
447
+ padding: 2rem;
448
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
449
+ }
450
+ .examples-section {
451
+ background: #f8f9fa;
452
+ border-radius: 12px;
453
+ padding: 1.5rem;
454
+ margin-top: 2rem;
455
+ }
456
+ """
457
+ ) as app:
458
+
459
+ gr.HTML("""
460
+ <div class="main-header">
461
+ <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem; font-weight: 700;">
462
+ 🛡️ AI Content Moderator Pro
463
+ </h1>
464
+ <p style="font-size: 1.2rem; opacity: 0.9; margin: 0;">
465
+ Advanced Multilingual Content Safety Classification System
466
+ </p>
467
+ </div>
468
+ """)
469
+
470
+ with gr.Row():
471
+ with gr.Column(scale=2):
472
+ gr.Markdown("## 🔍 Content Analysis")
473
 
474
+ with gr.Group(elem_classes="analysis-panel"):
475
+ language_dropdown = gr.Dropdown(
476
+ choices=list(LANGUAGE_CONFIGS.keys()),
477
+ value="All Languages",
478
+ label="🌍 Analysis Language Mode",
479
+ info="Select the primary language or use 'All Languages' for multilingual detection",
480
+ elem_classes="language-selector"
 
 
 
 
 
 
 
 
 
 
 
 
481
  )
482
+
483
+ text_input = gr.Textbox(
484
+ label="📝 Content to Analyze",
485
+ placeholder="Enter any text content here for safety analysis...\n\nSupports multiple languages and cultural contexts.",
486
+ lines=8,
487
+ max_lines=15
488
  )
489
+
490
+ with gr.Row():
491
+ analyze_btn = gr.Button(
492
+ "🔍 Analyze Content",
493
+ variant="primary",
494
+ size="lg",
495
+ scale=3
496
+ )
497
+ clear_btn = gr.Button(
498
+ "🗑️ Clear All",
499
+ variant="secondary",
500
+ size="lg",
501
+ scale=1
502
+ )
503
 
504
+ with gr.Column(scale=2):
505
+ gr.Markdown("## 📊 Analysis Results")
506
+
507
+ result_output = gr.Textbox(
508
+ label="🎯 Classification Result",
509
+ interactive=False,
510
+ lines=2
511
+ )
512
+
513
+ description_output = gr.Textbox(
514
+ label="📋 Detailed Analysis",
515
+ interactive=False,
516
+ lines=3
517
+ )
518
+
519
+ performance_output = gr.Textbox(
520
+ label="⚡ Performance Metrics",
521
+ interactive=False,
522
+ lines=1,
523
+ elem_classes="performance-info"
524
+ )
525
+
526
+ with gr.Accordion("⚙️ Advanced Model Configuration", open=False):
527
+ gr.Markdown("### Fine-tune the analysis parameters for optimal results")
528
 
529
+ with gr.Row():
530
+ max_tokens_slider = gr.Slider(
531
+ minimum=1,
532
+ maximum=10,
533
+ value=3,
534
+ step=1,
535
+ label="🔢 Max Tokens",
536
+ info="Maximum tokens to generate (higher = more detailed analysis)"
537
+ )
538
+
539
+ temperature_slider = gr.Slider(
540
+ minimum=0.0,
541
+ maximum=1.0,
542
+ value=0.1,
543
+ step=0.1,
544
+ label="🌡️ Temperature",
545
+ info="Randomness in generation (0 = deterministic, 1 = creative)"
546
+ )
547
+
548
+ top_p_slider = gr.Slider(
549
+ minimum=0.1,
550
+ maximum=1.0,
551
+ value=0.95,
552
+ step=0.05,
553
+ label="🎯 Top-p (Nucleus Sampling)",
554
+ info="Diversity of token selection (lower = more focused)"
555
+ )
556
+
557
+ with gr.Group(elem_classes="examples-section"):
558
+ gr.Markdown("## 💡 Interactive Examples")
559
+ gr.Markdown("*Examples automatically update based on your selected language mode*")
560
 
561
+ examples_dataset = gr.Dataset(
562
+ components=[text_input],
563
+ samples=[[ex] for ex in get_example_texts()["All Languages"]],
564
+ type="index",
565
+ label="Click any example to test it:"
566
  )
567
+
568
+ gr.Markdown("""
569
  ---
570
+ ### 🌟 Features & Capabilities
571
+
572
+ **🌍 Multilingual Support:** Advanced detection across 20+ languages with cultural awareness
573
+ **🎯 High Precision:** Specialized models for different language families and regions
574
+ **🚀 Real-time Analysis:** Fast processing with detailed performance metrics
575
+ **🔒 Privacy Focused:** All processing happens locally on your infrastructure
576
+ **🛡️ Comprehensive Detection:** Hate speech, threats, harassment, explicit content, and more
577
+ **🎨 Cultural Awareness:** Understanding of regional variations, slang, and cultural contexts
578
+ """)
579
+
580
+ def on_language_change(language):
581
+ return update_examples(language)
582
+
583
+ def on_example_select(evt: gr.SelectData):
584
+ examples = get_example_texts()
585
+ current_language = "All Languages" # Default fallback
586
+ return examples[current_language][evt.index]
587
+
588
+ language_dropdown.change(
589
+ fn=on_language_change,
590
+ inputs=language_dropdown,
591
+ outputs=examples_dataset
592
+ )
593
+
594
+ examples_dataset.select(
595
+ fn=on_example_select,
596
+ outputs=text_input
597
+ )
598
+
599
+ analyze_btn.click(
600
+ fn=classify_content,
601
+ inputs=[text_input, language_dropdown, max_tokens_slider, temperature_slider, top_p_slider],
602
+ outputs=[result_output, description_output, performance_output]
603
  )
604
 
605
  clear_btn.click(
606
+ fn=lambda: ("", "Ready for analysis...", "Select content and language to begin", ""),
607
+ outputs=[text_input, result_output, description_output, performance_output]
608
  )
609
 
610
  if __name__ == "__main__":
611
  with torch.inference_mode():
612
  _ = model.generate(
613
+ **tokenizer(["Test"], return_tensors="pt").to(model.device),
614
  max_new_tokens=1, do_sample=False, use_cache=True
615
  )
616
+ print("🚀 Starting AI Content Moderator Pro...")
617
+ app.queue(max_size=64).launch(
618
  server_name="0.0.0.0",
619
  server_port=7860,
620
  share=False,