Jellyfish042 Claude Sonnet 4.5 commited on
Commit
491ce2b
·
1 Parent(s): 56292f7

Add debug logging for JSON escaping in HTML attributes

Browse files

Added detailed debug output to track how JSON is escaped before
being embedded in HTML data attributes. This will help identify
where the truncation is occurring.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +6 -4
  2. visualization/html_generator.py +1 -0
app.py CHANGED
@@ -27,7 +27,7 @@ SUPPORT_DIR = SCRIPT_DIR / "support"
27
 
28
  # Text length limits
29
  MAX_TEXT_LENGTH = 4000
30
- MIN_TEXT_LENGTH = 10
31
 
32
  # Global model cache
33
  _qwen_model = None
@@ -172,6 +172,8 @@ def initialize_models():
172
 
173
  def wrap_html_in_iframe(html: str) -> str:
174
  """Wrap HTML in an iframe for Gradio display."""
 
 
175
  escaped = html.replace('"', '&quot;')
176
  return f'''
177
  <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
@@ -199,7 +201,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
199
 
200
  try:
201
  # Step 1: Evaluate Qwen (using cached model)
202
- progress(desc="Evaluating with Qwen3...")
203
  result_qwen = evaluate_hf_single_sample(
204
  _qwen_model,
205
  _qwen_tokenizer,
@@ -215,7 +217,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
215
  print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
216
 
217
  # Step 2: Evaluate RWKV7 (using cached model)
218
- progress(desc="Evaluating with RWKV7...")
219
  result_rwkv = evaluate_rwkv7_single_sample(
220
  _rwkv_model,
221
  _rwkv_tokenizer,
@@ -230,7 +232,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
230
  print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
231
 
232
  # Step 3: Generate visualization
233
- progress(desc="Generating visualization...")
234
  print(f"[DEBUG] Starting HTML generation...")
235
  print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
236
  print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
 
27
 
28
  # Text length limits
29
  MAX_TEXT_LENGTH = 4000
30
+ MIN_TEXT_LENGTH = 1
31
 
32
  # Global model cache
33
  _qwen_model = None
 
172
 
173
  def wrap_html_in_iframe(html: str) -> str:
174
  """Wrap HTML in an iframe for Gradio display."""
175
+ # For srcdoc attribute, we only need to escape quotes
176
+ # The HTML entities inside (like &quot;, &#10;) should remain as-is
177
  escaped = html.replace('"', '&quot;')
178
  return f'''
179
  <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
 
201
 
202
  try:
203
  # Step 1: Evaluate Qwen (using cached model)
204
+ progress(0, desc="Evaluating with Qwen3...")
205
  result_qwen = evaluate_hf_single_sample(
206
  _qwen_model,
207
  _qwen_tokenizer,
 
217
  print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
218
 
219
  # Step 2: Evaluate RWKV7 (using cached model)
220
+ progress(0, desc="Evaluating with RWKV7...")
221
  result_rwkv = evaluate_rwkv7_single_sample(
222
  _rwkv_model,
223
  _rwkv_tokenizer,
 
232
  print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
233
 
234
  # Step 3: Generate visualization
235
+ progress(0, desc="Generating visualization...")
236
  print(f"[DEBUG] Starting HTML generation...")
237
  print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
238
  print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
visualization/html_generator.py CHANGED
@@ -424,6 +424,7 @@ def generate_comparison_html(
424
  if token_count == 0:
425
  print(f"[DEBUG] Successfully generated topk_a_json")
426
  print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
 
427
  except Exception as e:
428
  print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
429
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
 
424
  if token_count == 0:
425
  print(f"[DEBUG] Successfully generated topk_a_json")
426
  print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
427
+ print(f"[DEBUG] After escape_for_attr: {escape_for_attr(topk_a_json)[:200]}")
428
  except Exception as e:
429
  print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
430
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")