Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

zazaman commited on Nov 9

Commit

1ff012c

1 Parent(s): 1af1f14

Add comprehensive logging with flush for translation debugging

Browse files

Files changed (3) hide show

app.py +15 -4
backend.py +13 -4
llm_clients/qwen_translator.py +17 -4

app.py CHANGED Viewed

@@ -77,22 +77,30 @@ class DetailedBackend(Backend):
             try:
                 # Translate if non-English
                 if not is_english_by_ascii_letters_only(prompt):
-                    print("🌍 Detected non-English input (web). Translating to English...")
                     try:
                         translator_client = self._get_translator_client()
                         translation_start = time.time()
                         translated_prompt = translator_client.generate_content(prompt)
                         translation_time = (time.time() - translation_start) * 1000
                         was_translated = True
-                        print(f"   ✅ Translated to English ({translation_time:.1f}ms): '{translated_prompt[:100]}...'")
                     except Exception as e:
                         error_msg = str(e)
-                        print(f"⚠️  Translation failed: {error_msg}")
-                        print(f"   Proceeding with original text (may cause classification issues).")
                         # Continue with original - classifier may still work
                         translated_prompt = prompt
                 # Classify with ModernBERT (always on English/translated text)
                 ai_response = self.attack_detector.generate_content(translated_prompt)
                 json_response = self._extract_json_from_response(ai_response)
                 ai_result = json.loads(json_response)
@@ -101,6 +109,9 @@ class DetailedBackend(Backend):
                 safety_status = ai_result.get("safety_status", "unsafe")
                 is_safe = safety_status.lower() == "safe"
                 result["ai_detection"] = {
                     "is_safe": is_safe,

             try:
                 # Translate if non-English
                 if not is_english_by_ascii_letters_only(prompt):
+                    print("🌍 Detected non-English input (web). Translating to English...", flush=True)
+                    print(f"   Original text: '{prompt[:100]}...'", flush=True)
                     try:
                         translator_client = self._get_translator_client()
                         translation_start = time.time()
                         translated_prompt = translator_client.generate_content(prompt)
                         translation_time = (time.time() - translation_start) * 1000
                         was_translated = True
+                        print(f"   ✅ Translated to English ({translation_time:.1f}ms): '{translated_prompt[:200]}...'", flush=True)
+                        print(f"   🔍 Will classify translated text (length: {len(translated_prompt)} chars)", flush=True)
                     except Exception as e:
                         error_msg = str(e)
+                        print(f"⚠️  Translation failed: {error_msg}", flush=True)
+                        print(f"   Proceeding with original text (may cause classification issues).", flush=True)
                         # Continue with original - classifier may still work
                         translated_prompt = prompt
+                        was_translated = False
+                else:
+                    print(f"   ✅ Text is English, no translation needed", flush=True)
+                    translated_prompt = prompt
                 # Classify with ModernBERT (always on English/translated text)
+                print(f"   🔍 Classifying text: '{translated_prompt[:100]}...'", flush=True)
+                print(f"   Text length: {len(translated_prompt)} chars, was_translated: {was_translated}", flush=True)
                 ai_response = self.attack_detector.generate_content(translated_prompt)
                 json_response = self._extract_json_from_response(ai_response)
                 ai_result = json.loads(json_response)
                 safety_status = ai_result.get("safety_status", "unsafe")
                 is_safe = safety_status.lower() == "safe"
+                confidence = ai_result.get("confidence", 0.0)
+                print(f"   📊 Classification result: safety_status='{safety_status}', is_safe={is_safe}, confidence={confidence:.2f}", flush=True)
                 result["ai_detection"] = {
                     "is_safe": is_safe,

backend.py CHANGED Viewed

@@ -161,21 +161,28 @@ class Backend:
         # Check if prompt is non-English and translate if needed
         if not is_english_by_ascii_letters_only(prompt):
             try:
-                print("🌍 Detected non-English input. Translating to English...")
                 translator_client = self._get_translator_client()
                 translation_start = time.time()
                 translated_prompt = translator_client.generate_content(prompt)
                 translation_time = (time.time() - translation_start) * 1000
-                print(f"   ✅ Translated to English ({translation_time:.1f}ms): '{translated_prompt[:100]}...'")
             except Exception as e:
                 error_msg = str(e)
-                print(f"⚠️  Translation failed: {error_msg}")
-                print(f"   Proceeding with original text (may cause classification issues).")
                 # Continue with original prompt - the classifier might still work or fail gracefully
                 translated_prompt = prompt
         try:
             # Measure classification latency (always use ModernBERT on translated/English text)
             start_time = time.time()
             response = self.attack_detector.generate_content(translated_prompt)
             end_time = time.time()
@@ -193,6 +200,8 @@ class Backend:
                 is_safe = safety_status.lower() == "safe"
                 if not is_safe:
                     block_reason = f"🤖 AI Security Scanner: Detected {attack_type} attack (confidence: {confidence:.2f}, latency: {latency_ms:.1f}ms). Reason: {reason}"
                     if original_prompt != translated_prompt:

         # Check if prompt is non-English and translate if needed
         if not is_english_by_ascii_letters_only(prompt):
             try:
+                print("🌍 Detected non-English input. Translating to English...", flush=True)
+                print(f"   Original text: '{prompt[:100]}...'", flush=True)
                 translator_client = self._get_translator_client()
                 translation_start = time.time()
                 translated_prompt = translator_client.generate_content(prompt)
                 translation_time = (time.time() - translation_start) * 1000
+                print(f"   ✅ Translated to English ({translation_time:.1f}ms): '{translated_prompt[:200]}...'", flush=True)
+                print(f"   🔍 Will classify translated text (length: {len(translated_prompt)} chars)", flush=True)
             except Exception as e:
                 error_msg = str(e)
+                print(f"⚠️  Translation failed: {error_msg}", flush=True)
+                print(f"   Proceeding with original text (may cause classification issues).", flush=True)
                 # Continue with original prompt - the classifier might still work or fail gracefully
                 translated_prompt = prompt
+        else:
+            print(f"   ✅ Text is English, no translation needed", flush=True)
+            translated_prompt = prompt
         try:
             # Measure classification latency (always use ModernBERT on translated/English text)
+            print(f"   🔍 Classifying text: '{translated_prompt[:100]}...'", flush=True)
+            print(f"   Text length: {len(translated_prompt)} chars", flush=True)
             start_time = time.time()
             response = self.attack_detector.generate_content(translated_prompt)
             end_time = time.time()
                 is_safe = safety_status.lower() == "safe"
+                print(f"   📊 Classification result: safety_status='{safety_status}', is_safe={is_safe}, confidence={confidence:.2f}", flush=True)
                 if not is_safe:
                     block_reason = f"🤖 AI Security Scanner: Detected {attack_type} attack (confidence: {confidence:.2f}, latency: {latency_ms:.1f}ms). Reason: {reason}"
                     if original_prompt != translated_prompt:

llm_clients/qwen_translator.py CHANGED Viewed

@@ -274,8 +274,9 @@ class QwenTranslatorClient(LlmClient):
         try:
             # Run the binary and capture output
-            print(f"   🔄 Running translation with llama.cpp binary...")
-            print(f"   Command: {' '.join(cmd[:3])}... (model: {os.path.basename(model_path)})")
             result = subprocess.run(
                 cmd,
@@ -285,35 +286,47 @@ class QwenTranslatorClient(LlmClient):
                 check=False  # Don't raise on non-zero exit, we'll check manually
             )
             # Check if command succeeded
             if result.returncode != 0:
                 error_msg = f"llama.cpp binary exited with code {result.returncode}"
                 if result.stderr:
                     error_msg += f"\nStderr: {result.stderr[:500]}"
                 if result.stdout:
                     error_msg += f"\nStdout: {result.stdout[:500]}"
-                print(f"   ❌ {error_msg}")
                 raise RuntimeError(error_msg)
             # Parse the output
             output = result.stdout.strip()
             if not output:
                 raise RuntimeError("llama.cpp binary returned empty output")
             # The output might include the prompt, so we need to extract just the generated part
             # Look for the assistant response after the prompt
             if "<|im_start|>assistant" in output:
                 # Extract everything after the assistant tag
                 output = output.split("<|im_start|>assistant")[-1].strip()
             # Remove any remaining chat format tokens
             translated_text = output.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
             if not translated_text:
                 raise RuntimeError("Translation output is empty after parsing")
-            print(f"   ✅ Translation completed: '{translated_text[:100]}...'")
         except subprocess.TimeoutExpired:
             error_msg = "Translation timed out after 60 seconds"

         try:
             # Run the binary and capture output
+            print(f"   🔄 Running translation with llama.cpp binary...", flush=True)
+            print(f"   Command: {' '.join(cmd[:3])}... (model: {os.path.basename(model_path)})", flush=True)
+            print(f"   Input prompt length: {len(translation_prompt)} chars", flush=True)
             result = subprocess.run(
                 cmd,
                 check=False  # Don't raise on non-zero exit, we'll check manually
             )
+            print(f"   Binary exit code: {result.returncode}", flush=True)
+            print(f"   Stdout length: {len(result.stdout)} chars", flush=True)
+            print(f"   Stderr length: {len(result.stderr)} chars", flush=True)
             # Check if command succeeded
             if result.returncode != 0:
                 error_msg = f"llama.cpp binary exited with code {result.returncode}"
                 if result.stderr:
                     error_msg += f"\nStderr: {result.stderr[:500]}"
+                    print(f"   Stderr: {result.stderr[:500]}", flush=True)
                 if result.stdout:
                     error_msg += f"\nStdout: {result.stdout[:500]}"
+                    print(f"   Stdout: {result.stdout[:500]}", flush=True)
+                print(f"   ❌ {error_msg}", flush=True)
                 raise RuntimeError(error_msg)
             # Parse the output
             output = result.stdout.strip()
             if not output:
+                print(f"   ❌ Empty output from binary", flush=True)
                 raise RuntimeError("llama.cpp binary returned empty output")
+            print(f"   Raw output (first 200 chars): {output[:200]}", flush=True)
             # The output might include the prompt, so we need to extract just the generated part
             # Look for the assistant response after the prompt
             if "<|im_start|>assistant" in output:
                 # Extract everything after the assistant tag
                 output = output.split("<|im_start|>assistant")[-1].strip()
+                print(f"   Extracted after assistant tag: {output[:200]}", flush=True)
             # Remove any remaining chat format tokens
             translated_text = output.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
             if not translated_text:
+                print(f"   ❌ Translation output is empty after parsing", flush=True)
+                print(f"   Original output was: {result.stdout[:500]}", flush=True)
                 raise RuntimeError("Translation output is empty after parsing")
+            print(f"   ✅ Translation completed ({len(translated_text)} chars): '{translated_text[:200]}...'", flush=True)
         except subprocess.TimeoutExpired:
             error_msg = "Translation timed out after 60 seconds"