zazaman commited on
Commit
1ff012c
Β·
1 Parent(s): 1af1f14

Add comprehensive logging with flush for translation debugging

Browse files
Files changed (3) hide show
  1. app.py +15 -4
  2. backend.py +13 -4
  3. llm_clients/qwen_translator.py +17 -4
app.py CHANGED
@@ -77,22 +77,30 @@ class DetailedBackend(Backend):
77
  try:
78
  # Translate if non-English
79
  if not is_english_by_ascii_letters_only(prompt):
80
- print("🌍 Detected non-English input (web). Translating to English...")
 
81
  try:
82
  translator_client = self._get_translator_client()
83
  translation_start = time.time()
84
  translated_prompt = translator_client.generate_content(prompt)
85
  translation_time = (time.time() - translation_start) * 1000
86
  was_translated = True
87
- print(f" βœ… Translated to English ({translation_time:.1f}ms): '{translated_prompt[:100]}...'")
 
88
  except Exception as e:
89
  error_msg = str(e)
90
- print(f"⚠️ Translation failed: {error_msg}")
91
- print(f" Proceeding with original text (may cause classification issues).")
92
  # Continue with original - classifier may still work
93
  translated_prompt = prompt
 
 
 
 
94
 
95
  # Classify with ModernBERT (always on English/translated text)
 
 
96
  ai_response = self.attack_detector.generate_content(translated_prompt)
97
  json_response = self._extract_json_from_response(ai_response)
98
  ai_result = json.loads(json_response)
@@ -101,6 +109,9 @@ class DetailedBackend(Backend):
101
 
102
  safety_status = ai_result.get("safety_status", "unsafe")
103
  is_safe = safety_status.lower() == "safe"
 
 
 
104
 
105
  result["ai_detection"] = {
106
  "is_safe": is_safe,
 
77
  try:
78
  # Translate if non-English
79
  if not is_english_by_ascii_letters_only(prompt):
80
+ print("🌍 Detected non-English input (web). Translating to English...", flush=True)
81
+ print(f" Original text: '{prompt[:100]}...'", flush=True)
82
  try:
83
  translator_client = self._get_translator_client()
84
  translation_start = time.time()
85
  translated_prompt = translator_client.generate_content(prompt)
86
  translation_time = (time.time() - translation_start) * 1000
87
  was_translated = True
88
+ print(f" βœ… Translated to English ({translation_time:.1f}ms): '{translated_prompt[:200]}...'", flush=True)
89
+ print(f" πŸ” Will classify translated text (length: {len(translated_prompt)} chars)", flush=True)
90
  except Exception as e:
91
  error_msg = str(e)
92
+ print(f"⚠️ Translation failed: {error_msg}", flush=True)
93
+ print(f" Proceeding with original text (may cause classification issues).", flush=True)
94
  # Continue with original - classifier may still work
95
  translated_prompt = prompt
96
+ was_translated = False
97
+ else:
98
+ print(f" βœ… Text is English, no translation needed", flush=True)
99
+ translated_prompt = prompt
100
 
101
  # Classify with ModernBERT (always on English/translated text)
102
+ print(f" πŸ” Classifying text: '{translated_prompt[:100]}...'", flush=True)
103
+ print(f" Text length: {len(translated_prompt)} chars, was_translated: {was_translated}", flush=True)
104
  ai_response = self.attack_detector.generate_content(translated_prompt)
105
  json_response = self._extract_json_from_response(ai_response)
106
  ai_result = json.loads(json_response)
 
109
 
110
  safety_status = ai_result.get("safety_status", "unsafe")
111
  is_safe = safety_status.lower() == "safe"
112
+ confidence = ai_result.get("confidence", 0.0)
113
+
114
+ print(f" πŸ“Š Classification result: safety_status='{safety_status}', is_safe={is_safe}, confidence={confidence:.2f}", flush=True)
115
 
116
  result["ai_detection"] = {
117
  "is_safe": is_safe,
backend.py CHANGED
@@ -161,21 +161,28 @@ class Backend:
161
  # Check if prompt is non-English and translate if needed
162
  if not is_english_by_ascii_letters_only(prompt):
163
  try:
164
- print("🌍 Detected non-English input. Translating to English...")
 
165
  translator_client = self._get_translator_client()
166
  translation_start = time.time()
167
  translated_prompt = translator_client.generate_content(prompt)
168
  translation_time = (time.time() - translation_start) * 1000
169
- print(f" βœ… Translated to English ({translation_time:.1f}ms): '{translated_prompt[:100]}...'")
 
170
  except Exception as e:
171
  error_msg = str(e)
172
- print(f"⚠️ Translation failed: {error_msg}")
173
- print(f" Proceeding with original text (may cause classification issues).")
174
  # Continue with original prompt - the classifier might still work or fail gracefully
175
  translated_prompt = prompt
 
 
 
176
 
177
  try:
178
  # Measure classification latency (always use ModernBERT on translated/English text)
 
 
179
  start_time = time.time()
180
  response = self.attack_detector.generate_content(translated_prompt)
181
  end_time = time.time()
@@ -193,6 +200,8 @@ class Backend:
193
 
194
  is_safe = safety_status.lower() == "safe"
195
 
 
 
196
  if not is_safe:
197
  block_reason = f"πŸ€– AI Security Scanner: Detected {attack_type} attack (confidence: {confidence:.2f}, latency: {latency_ms:.1f}ms). Reason: {reason}"
198
  if original_prompt != translated_prompt:
 
161
  # Check if prompt is non-English and translate if needed
162
  if not is_english_by_ascii_letters_only(prompt):
163
  try:
164
+ print("🌍 Detected non-English input. Translating to English...", flush=True)
165
+ print(f" Original text: '{prompt[:100]}...'", flush=True)
166
  translator_client = self._get_translator_client()
167
  translation_start = time.time()
168
  translated_prompt = translator_client.generate_content(prompt)
169
  translation_time = (time.time() - translation_start) * 1000
170
+ print(f" βœ… Translated to English ({translation_time:.1f}ms): '{translated_prompt[:200]}...'", flush=True)
171
+ print(f" πŸ” Will classify translated text (length: {len(translated_prompt)} chars)", flush=True)
172
  except Exception as e:
173
  error_msg = str(e)
174
+ print(f"⚠️ Translation failed: {error_msg}", flush=True)
175
+ print(f" Proceeding with original text (may cause classification issues).", flush=True)
176
  # Continue with original prompt - the classifier might still work or fail gracefully
177
  translated_prompt = prompt
178
+ else:
179
+ print(f" βœ… Text is English, no translation needed", flush=True)
180
+ translated_prompt = prompt
181
 
182
  try:
183
  # Measure classification latency (always use ModernBERT on translated/English text)
184
+ print(f" πŸ” Classifying text: '{translated_prompt[:100]}...'", flush=True)
185
+ print(f" Text length: {len(translated_prompt)} chars", flush=True)
186
  start_time = time.time()
187
  response = self.attack_detector.generate_content(translated_prompt)
188
  end_time = time.time()
 
200
 
201
  is_safe = safety_status.lower() == "safe"
202
 
203
+ print(f" πŸ“Š Classification result: safety_status='{safety_status}', is_safe={is_safe}, confidence={confidence:.2f}", flush=True)
204
+
205
  if not is_safe:
206
  block_reason = f"πŸ€– AI Security Scanner: Detected {attack_type} attack (confidence: {confidence:.2f}, latency: {latency_ms:.1f}ms). Reason: {reason}"
207
  if original_prompt != translated_prompt:
llm_clients/qwen_translator.py CHANGED
@@ -274,8 +274,9 @@ class QwenTranslatorClient(LlmClient):
274
 
275
  try:
276
  # Run the binary and capture output
277
- print(f" πŸ”„ Running translation with llama.cpp binary...")
278
- print(f" Command: {' '.join(cmd[:3])}... (model: {os.path.basename(model_path)})")
 
279
 
280
  result = subprocess.run(
281
  cmd,
@@ -285,35 +286,47 @@ class QwenTranslatorClient(LlmClient):
285
  check=False # Don't raise on non-zero exit, we'll check manually
286
  )
287
 
 
 
 
 
288
  # Check if command succeeded
289
  if result.returncode != 0:
290
  error_msg = f"llama.cpp binary exited with code {result.returncode}"
291
  if result.stderr:
292
  error_msg += f"\nStderr: {result.stderr[:500]}"
 
293
  if result.stdout:
294
  error_msg += f"\nStdout: {result.stdout[:500]}"
295
- print(f" ❌ {error_msg}")
 
296
  raise RuntimeError(error_msg)
297
 
298
  # Parse the output
299
  output = result.stdout.strip()
300
 
301
  if not output:
 
302
  raise RuntimeError("llama.cpp binary returned empty output")
303
 
 
 
304
  # The output might include the prompt, so we need to extract just the generated part
305
  # Look for the assistant response after the prompt
306
  if "<|im_start|>assistant" in output:
307
  # Extract everything after the assistant tag
308
  output = output.split("<|im_start|>assistant")[-1].strip()
 
309
 
310
  # Remove any remaining chat format tokens
311
  translated_text = output.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
312
 
313
  if not translated_text:
 
 
314
  raise RuntimeError("Translation output is empty after parsing")
315
 
316
- print(f" βœ… Translation completed: '{translated_text[:100]}...'")
317
 
318
  except subprocess.TimeoutExpired:
319
  error_msg = "Translation timed out after 60 seconds"
 
274
 
275
  try:
276
  # Run the binary and capture output
277
+ print(f" πŸ”„ Running translation with llama.cpp binary...", flush=True)
278
+ print(f" Command: {' '.join(cmd[:3])}... (model: {os.path.basename(model_path)})", flush=True)
279
+ print(f" Input prompt length: {len(translation_prompt)} chars", flush=True)
280
 
281
  result = subprocess.run(
282
  cmd,
 
286
  check=False # Don't raise on non-zero exit, we'll check manually
287
  )
288
 
289
+ print(f" Binary exit code: {result.returncode}", flush=True)
290
+ print(f" Stdout length: {len(result.stdout)} chars", flush=True)
291
+ print(f" Stderr length: {len(result.stderr)} chars", flush=True)
292
+
293
  # Check if command succeeded
294
  if result.returncode != 0:
295
  error_msg = f"llama.cpp binary exited with code {result.returncode}"
296
  if result.stderr:
297
  error_msg += f"\nStderr: {result.stderr[:500]}"
298
+ print(f" Stderr: {result.stderr[:500]}", flush=True)
299
  if result.stdout:
300
  error_msg += f"\nStdout: {result.stdout[:500]}"
301
+ print(f" Stdout: {result.stdout[:500]}", flush=True)
302
+ print(f" ❌ {error_msg}", flush=True)
303
  raise RuntimeError(error_msg)
304
 
305
  # Parse the output
306
  output = result.stdout.strip()
307
 
308
  if not output:
309
+ print(f" ❌ Empty output from binary", flush=True)
310
  raise RuntimeError("llama.cpp binary returned empty output")
311
 
312
+ print(f" Raw output (first 200 chars): {output[:200]}", flush=True)
313
+
314
  # The output might include the prompt, so we need to extract just the generated part
315
  # Look for the assistant response after the prompt
316
  if "<|im_start|>assistant" in output:
317
  # Extract everything after the assistant tag
318
  output = output.split("<|im_start|>assistant")[-1].strip()
319
+ print(f" Extracted after assistant tag: {output[:200]}", flush=True)
320
 
321
  # Remove any remaining chat format tokens
322
  translated_text = output.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
323
 
324
  if not translated_text:
325
+ print(f" ❌ Translation output is empty after parsing", flush=True)
326
+ print(f" Original output was: {result.stdout[:500]}", flush=True)
327
  raise RuntimeError("Translation output is empty after parsing")
328
 
329
+ print(f" βœ… Translation completed ({len(translated_text)} chars): '{translated_text[:200]}...'", flush=True)
330
 
331
  except subprocess.TimeoutExpired:
332
  error_msg = "Translation timed out after 60 seconds"