Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on 15 days ago

Commit

491ce2b

1 Parent(s): 56292f7

Add debug logging for JSON escaping in HTML attributes

Added detailed debug output to track how JSON is escaped before
being embedded in HTML data attributes. This will help identify
where the truncation is occurring.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (2) hide show

app.py +6 -4
visualization/html_generator.py +1 -0

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ SUPPORT_DIR = SCRIPT_DIR / "support"
 # Text length limits
 MAX_TEXT_LENGTH = 4000
-MIN_TEXT_LENGTH = 10
 # Global model cache
 _qwen_model = None
@@ -172,6 +172,8 @@ def initialize_models():
 def wrap_html_in_iframe(html: str) -> str:
     """Wrap HTML in an iframe for Gradio display."""
     escaped = html.replace('"', '&quot;')
     return f'''
     <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
@@ -199,7 +201,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
     try:
         # Step 1: Evaluate Qwen (using cached model)
-        progress(desc="Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
@@ -215,7 +217,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
         print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
         # Step 2: Evaluate RWKV7 (using cached model)
-        progress(desc="Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
@@ -230,7 +232,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
         print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
         # Step 3: Generate visualization
-        progress(desc="Generating visualization...")
         print(f"[DEBUG] Starting HTML generation...")
         print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
         print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")

 # Text length limits
 MAX_TEXT_LENGTH = 4000
+MIN_TEXT_LENGTH = 1
 # Global model cache
 _qwen_model = None
 def wrap_html_in_iframe(html: str) -> str:
     """Wrap HTML in an iframe for Gradio display."""
+    # For srcdoc attribute, we only need to escape quotes
+    # The HTML entities inside (like &quot;, &#10;) should remain as-is
     escaped = html.replace('"', '&quot;')
     return f'''
     <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
     try:
         # Step 1: Evaluate Qwen (using cached model)
+        progress(0, desc="Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
         print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
         # Step 2: Evaluate RWKV7 (using cached model)
+        progress(0, desc="Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
         print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
         # Step 3: Generate visualization
+        progress(0, desc="Generating visualization...")
         print(f"[DEBUG] Starting HTML generation...")
         print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
         print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")

visualization/html_generator.py CHANGED Viewed

@@ -424,6 +424,7 @@ def generate_comparison_html(
                     if token_count == 0:
                         print(f"[DEBUG] Successfully generated topk_a_json")
                         print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
                 except Exception as e:
                     print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")

                     if token_count == 0:
                         print(f"[DEBUG] Successfully generated topk_a_json")
                         print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
+                        print(f"[DEBUG] After escape_for_attr: {escape_for_attr(topk_a_json)[:200]}")
                 except Exception as e:
                     print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")