nixaut-codelabs commited on
Commit
f2361fc
·
verified ·
1 Parent(s): 5a39cfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -60
app.py CHANGED
@@ -6,12 +6,11 @@ import gradio as gr
6
  from huggingface_hub import snapshot_download
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
 
9
- # --- Model ve Ortam Ayarları (Değişiklik yok) ---
10
  MODEL_REPO = "daniel-dona/gemma-3-270m-it"
11
  LOCAL_DIR = os.path.join(os.getcwd(), "local_model")
12
 
13
  os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
14
- os.environ.setdefault("OMP_NUM_THREADS", str(os.cpu_count() or 2))
15
  os.environ.setdefault("MKL_NUM_THREADS", os.environ["OMP_NUM_THREADS"])
16
  os.environ.setdefault("OMP_PROC_BIND", "TRUE")
17
 
@@ -74,20 +73,23 @@ MODERATION_SYSTEM_PROMPT = (
74
  "Otherwise respond 's'."
75
  )
76
 
77
- # --- Yardımcı Fonksiyonlar (Değişiklik yok) ---
78
  def build_prompt(message, max_ctx_tokens=128):
79
  full_user_message = f"{MODERATION_SYSTEM_PROMPT}\n\nUser input: '{message}'"
80
  messages = [{"role": "user", "content": full_user_message}]
81
 
82
  text = tokenizer.apply_chat_template(
83
- messages, tokenize=False, add_generation_prompt=True
 
 
84
  )
85
 
86
  while len(tokenizer(text, add_special_tokens=False).input_ids) > max_ctx_tokens and len(full_user_message) > 100:
87
  full_user_message = full_user_message[:-50]
88
  messages[0]['content'] = full_user_message
89
  text = tokenizer.apply_chat_template(
90
- messages, tokenize=False, add_generation_prompt=True
 
 
91
  )
92
  return text
93
 
@@ -99,31 +101,24 @@ def enforce_s_u(text: str) -> str:
99
  return "u"
100
  return "s"
101
 
102
- # --- YENİ: Sonuçları Gradio bileşenlerine formatlayan fonksiyon ---
103
  def format_classification_result(classification, tokens_per_second, processing_time):
104
- """
105
- Sınıflandırma sonucunu birden fazla Gradio bileşenine uygun bir demet (tuple) olarak döndürür.
106
- """
107
  if classification == "s":
108
- label = {"label": "GÜVENLİ", "confidences": [{"label": "GÜVENLİ", "confidence": 1.0}]}
109
- description = "İçeriğin güvenli ve uygun olduğu tespit edildi."
110
  else:
111
- label = {"label": "🚫 GÜVENLİ DEĞİL", "confidences": [{"label": "GÜVENLİ DEĞİL", "confidence": 1.0}]}
112
- description = "İçerik, uygunsuz veya zararlı materyal barındırıyor olabilir."
113
 
114
  tps_str = f"{tokens_per_second:.1f} tok/s"
115
  time_str = f"{processing_time:.2f} s"
116
 
117
  return label, description, tps_str, time_str
118
 
119
-
120
- # --- GÜNCELLENDİ: Ana Sınıflandırma Fonksiyonu ---
121
  def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Progress()):
122
  if not message.strip():
123
- # HTML yerine yeni formatta varsayılan değerleri döndür
124
  return format_classification_result("s", 0, 0)
125
 
126
- progress(0, desc="Sınıflandırma hazırlanıyor...")
127
  text = build_prompt(message)
128
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
129
  do_sample = bool(temperature and temperature > 0.0)
@@ -151,7 +146,7 @@ def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Pr
151
  token_count = 0
152
  start_time = None
153
 
154
- progress(0.3, desc="İçerik işleniyor...")
155
 
156
  with torch.inference_mode():
157
  thread.start()
@@ -161,7 +156,7 @@ def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Pr
161
  start_time = time.time()
162
  partial_text += chunk
163
  token_count += 1
164
- progress(0.3 + (token_count / max_tokens) * 0.6, desc="Analiz ediliyor...")
165
  finally:
166
  thread.join()
167
 
@@ -170,18 +165,15 @@ def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Pr
170
  duration = max(1e-6, end_time - start_time)
171
  tps = token_count / duration if duration > 0 else 0.0
172
 
173
- progress(1.0, desc="Tamamlandı!")
174
 
175
- # HTML dizesi yerine, birden çok bileşen için bir demet (tuple) döndür
176
  return format_classification_result(final_label, tps, duration)
177
 
178
-
179
- # --- YENİ: Gradio Arayüzü (HTML/CSS olmadan) ---
180
- with gr.Blocks(title="AI İçerik Moderatörü", theme=gr.themes.Soft()) as demo:
181
  gr.Markdown(
182
  """
183
- # 🛡️ AI İçerik Moderatörü
184
- Yapay zeka ile güçlendirilmiş, çok dilli ve gelişmiş içerik sınıflandırma aracı.
185
  """
186
  )
187
 
@@ -189,90 +181,81 @@ with gr.Blocks(title="AI İçerik Moderatörü", theme=gr.themes.Soft()) as demo
189
  with gr.Column(scale=3):
190
  with gr.Group():
191
  text_input = gr.Textbox(
192
- label="Analiz Edilecek Metin",
193
- placeholder="İçerik denetimi analizi için herhangi bir dilde metin girin...",
194
  lines=8,
195
  max_lines=15
196
  )
197
  with gr.Row():
198
- clear_btn = gr.Button("🗑️ Temizle", variant="secondary")
199
- classify_btn = gr.Button("🔍 Analiz Et", variant="primary", scale=2)
200
 
201
  with gr.Column(scale=2):
202
- gr.Markdown("### 📊 Sınıflandırma Sonucu")
203
- result_label = gr.Label(label="Durum", num_top_classes=1)
204
- result_description = gr.Markdown(value="*Analiz için bir metin girip 'Analiz Et' butonuna tıklayın.*")
205
  with gr.Row():
206
- tps_output = gr.Textbox(label="Performans (Token/sn)", interactive=False)
207
- time_output = gr.Textbox(label="İşlem Süresi (sn)", interactive=False)
208
 
209
- with gr.Accordion("⚙️ Gelişmiş Ayarlar", open=False):
210
  with gr.Row():
211
  max_tokens_slider = gr.Slider(
212
  minimum=1, maximum=10, value=3, step=1,
213
- label="Maksimum Token",
214
- info="Modelin üreteceği maksimum token sayısı (genellikle 1-3 arası yeterlidir)"
215
  )
216
  temp_slider = gr.Slider(
217
  minimum=0.0, maximum=1.0, value=0.1, step=0.1,
218
- label="Sıcaklık (Temperature)",
219
- info="Üretimdeki rastgeleliği kontrol eder"
220
  )
221
  top_p_slider = gr.Slider(
222
  minimum=0.1, maximum=1.0, value=0.95, step=0.05,
223
  label="Top-p",
224
- info="Çekirdek örnekleme parametresi"
225
  )
226
 
227
- gr.Markdown("### 💡 Örnekleri Deneyin")
228
  gr.Examples(
229
  examples=[
230
- ["Merhaba, bugün nasılsın? Umarım harika bir gün geçiriyorsundur!"],
231
- ["Senden nefret ediyorum, seni bulup canını çok yakacağım."],
232
  ["C'est une belle journée pour apprendre la programmation et l'intelligence artificielle."],
233
- ["Artık dayanamıyorum. Her şeyi bitirip sonsuza dek yok olmak istiyorum."],
234
  ["¡Hola! Me encanta aprender nuevos idiomas y conocer diferentes culturas."],
235
- ["Sen kesinlikle değersiz birisin ve kimse senin gibi birini sevmeyecek."]
236
  ],
237
  inputs=text_input,
238
- label="Örnek Metinler"
239
  )
240
 
241
- # --- Butonların Olay Yöneticileri ---
242
-
243
- # Varsayılan durumu döndüren bir yardımcı fonksiyon
244
  def get_default_state():
245
- default_label, default_desc, default_tps, default_time = format_classification_result("s", 0, 0)
246
- # Etiketin boş görünmesi için başlangıçta None olarak ayarlıyoruz
247
- return "", None, "*Analiz için bir metin girin.*", "0.0 tok/s", "0.00 s"
248
 
249
- # classify_btn tıklandığında, birden fazla çıkışı günceller
250
  classify_btn.click(
251
  fn=classify_text_stream,
252
  inputs=[text_input, max_tokens_slider, temp_slider, top_p_slider],
253
  outputs=[result_label, result_description, tps_output, time_output]
254
  )
255
 
256
- # clear_btn tıklandığında, hem girişi hem de tüm çıkışları temizler
257
  clear_btn.click(
258
  fn=get_default_state,
259
  outputs=[text_input, result_label, result_description, tps_output, time_output]
260
  )
261
 
262
- # Arayüz ilk yüklendiğinde varsayılan durumu ayarla
263
  demo.load(
264
- fn=lambda: (None, "*Analiz için bir metin girin.*", "0.0 tok/s", "0.00 s"),
265
  outputs=[result_label, result_description, tps_output, time_output]
266
  )
267
 
268
  if __name__ == "__main__":
269
- # Modelin ilk çıkarım için ısınmasını sağla
270
  with torch.inference_mode():
271
  _ = model.generate(
272
- **tokenizer(["Merhaba"], return_tensors="pt").to(model.device),
273
  max_new_tokens=1, do_sample=False, use_cache=True
274
  )
275
- print("🚀 AI İçerik Moderatörü Başlatılıyor...")
276
  demo.queue(max_size=64).launch(
277
  server_name="0.0.0.0",
278
  server_port=7860,
 
6
  from huggingface_hub import snapshot_download
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
 
 
9
  MODEL_REPO = "daniel-dona/gemma-3-270m-it"
10
  LOCAL_DIR = os.path.join(os.getcwd(), "local_model")
11
 
12
  os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
13
+ os.environ.setdefault("OMP_NUM_THREADS", str(os.cpu_count() or 1))
14
  os.environ.setdefault("MKL_NUM_THREADS", os.environ["OMP_NUM_THREADS"])
15
  os.environ.setdefault("OMP_PROC_BIND", "TRUE")
16
 
 
73
  "Otherwise respond 's'."
74
  )
75
 
 
76
  def build_prompt(message, max_ctx_tokens=128):
77
  full_user_message = f"{MODERATION_SYSTEM_PROMPT}\n\nUser input: '{message}'"
78
  messages = [{"role": "user", "content": full_user_message}]
79
 
80
  text = tokenizer.apply_chat_template(
81
+ messages,
82
+ tokenize=False,
83
+ add_generation_prompt=True
84
  )
85
 
86
  while len(tokenizer(text, add_special_tokens=False).input_ids) > max_ctx_tokens and len(full_user_message) > 100:
87
  full_user_message = full_user_message[:-50]
88
  messages[0]['content'] = full_user_message
89
  text = tokenizer.apply_chat_template(
90
+ messages,
91
+ tokenize=False,
92
+ add_generation_prompt=True
93
  )
94
  return text
95
 
 
101
  return "u"
102
  return "s"
103
 
 
104
  def format_classification_result(classification, tokens_per_second, processing_time):
 
 
 
105
  if classification == "s":
106
+ label = "✅ SAFE"
107
+ description = "Content appears to be safe and appropriate."
108
  else:
109
+ label = "🚫 UNSAFE"
110
+ description = "Content may contain inappropriate or harmful material."
111
 
112
  tps_str = f"{tokens_per_second:.1f} tok/s"
113
  time_str = f"{processing_time:.2f} s"
114
 
115
  return label, description, tps_str, time_str
116
 
 
 
117
  def classify_text_stream(message, max_tokens, temperature, top_p, progress=gr.Progress()):
118
  if not message.strip():
 
119
  return format_classification_result("s", 0, 0)
120
 
121
+ progress(0, desc="Preparing classification...")
122
  text = build_prompt(message)
123
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
124
  do_sample = bool(temperature and temperature > 0.0)
 
146
  token_count = 0
147
  start_time = None
148
 
149
+ progress(0.3, desc="Processing content...")
150
 
151
  with torch.inference_mode():
152
  thread.start()
 
156
  start_time = time.time()
157
  partial_text += chunk
158
  token_count += 1
159
+ progress(0.3 + (token_count / max_tokens) * 0.6, desc="Analyzing...")
160
  finally:
161
  thread.join()
162
 
 
165
  duration = max(1e-6, end_time - start_time)
166
  tps = token_count / duration if duration > 0 else 0.0
167
 
168
+ progress(1.0, desc="Complete!")
169
 
 
170
  return format_classification_result(final_label, tps, duration)
171
 
172
+ with gr.Blocks(title="AI Content Moderator", theme=gr.themes.Soft()) as demo:
 
 
173
  gr.Markdown(
174
  """
175
+ # 🛡️ AI Content Moderator
176
+ An advanced, multilingual content classification tool powered by AI.
177
  """
178
  )
179
 
 
181
  with gr.Column(scale=3):
182
  with gr.Group():
183
  text_input = gr.Textbox(
184
+ label="Text to Analyze",
185
+ placeholder="Enter any text in any language for content moderation analysis...",
186
  lines=8,
187
  max_lines=15
188
  )
189
  with gr.Row():
190
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary")
191
+ classify_btn = gr.Button("🔍 Analyze Content", variant="primary", scale=2)
192
 
193
  with gr.Column(scale=2):
194
+ gr.Markdown("### 📊 Classification Result")
195
+ result_label = gr.Label(label="Status")
196
+ result_description = gr.Markdown(value="*Enter text and click 'Analyze Content' to see the result.*")
197
  with gr.Row():
198
+ tps_output = gr.Textbox(label="Performance (tok/s)", interactive=False)
199
+ time_output = gr.Textbox(label="Processing Time (s)", interactive=False)
200
 
201
+ with gr.Accordion("⚙️ Advanced Configuration", open=False):
202
  with gr.Row():
203
  max_tokens_slider = gr.Slider(
204
  minimum=1, maximum=10, value=3, step=1,
205
+ label="Max New Tokens",
206
+ info="Maximum number of tokens for the model to generate (1-3 is usually sufficient)."
207
  )
208
  temp_slider = gr.Slider(
209
  minimum=0.0, maximum=1.0, value=0.1, step=0.1,
210
+ label="Temperature",
211
+ info="Controls the randomness of the generation."
212
  )
213
  top_p_slider = gr.Slider(
214
  minimum=0.1, maximum=1.0, value=0.95, step=0.05,
215
  label="Top-p",
216
+ info="Nucleus sampling parameter."
217
  )
218
 
219
+ gr.Markdown("### 💡 Try These Examples")
220
  gr.Examples(
221
  examples=[
222
+ ["Hello, how are you today? I hope you're having a wonderful time!"],
223
+ ["I hate you and I will find you and hurt you badly."],
224
  ["C'est une belle journée pour apprendre la programmation et l'intelligence artificielle."],
225
+ ["I can't take this anymore. I want to end everything and disappear forever."],
226
  ["¡Hola! Me encanta aprender nuevos idiomas y conocer diferentes culturas."],
227
+ ["You're absolutely worthless and nobody will ever love someone like you."]
228
  ],
229
  inputs=text_input,
230
+ label="Example Prompts"
231
  )
232
 
 
 
 
233
  def get_default_state():
234
+ return "", None, "*Enter text and click 'Analyze Content' to see the result.*", "0.0 tok/s", "0.00 s"
 
 
235
 
 
236
  classify_btn.click(
237
  fn=classify_text_stream,
238
  inputs=[text_input, max_tokens_slider, temp_slider, top_p_slider],
239
  outputs=[result_label, result_description, tps_output, time_output]
240
  )
241
 
 
242
  clear_btn.click(
243
  fn=get_default_state,
244
  outputs=[text_input, result_label, result_description, tps_output, time_output]
245
  )
246
 
 
247
  demo.load(
248
+ fn=lambda: (None, "*Enter text and click 'Analyze Content' to see the result.*", "0.0 tok/s", "0.00 s"),
249
  outputs=[result_label, result_description, tps_output, time_output]
250
  )
251
 
252
  if __name__ == "__main__":
 
253
  with torch.inference_mode():
254
  _ = model.generate(
255
+ **tokenizer(["Hello"], return_tensors="pt").to(model.device),
256
  max_new_tokens=1, do_sample=False, use_cache=True
257
  )
258
+ print("🚀 Starting AI Content Moderator...")
259
  demo.queue(max_size=64).launch(
260
  server_name="0.0.0.0",
261
  server_port=7860,