Jellyfish042 Claude Sonnet 4.5 commited on
Commit
f59198d
·
1 Parent(s): 491ce2b

Fix JSON truncation by using base64 encoding

Browse files

Replaced direct JSON embedding in HTML attributes with base64 encoding
to avoid issues with special characters breaking HTML attribute boundaries.
This completely eliminates the escaping problems that were causing the
"Unexpected end of JSON input" error in Top 10 predictions.

Changes:
- Encode topk JSON data as base64 before embedding in data attributes
- Decode base64 in JavaScript before parsing JSON
- Update debug logging to show base64 encoding status

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. visualization/html_generator.py +17 -10
visualization/html_generator.py CHANGED
@@ -420,11 +420,13 @@ def generate_comparison_html(
420
  pred[1],
421
  [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
422
  ]
423
- topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
 
 
424
  if token_count == 0:
425
- print(f"[DEBUG] Successfully generated topk_a_json")
426
- print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
427
- print(f"[DEBUG] After escape_for_attr: {escape_for_attr(topk_a_json)[:200]}")
428
  except Exception as e:
429
  print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
430
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
@@ -439,10 +441,13 @@ def generate_comparison_html(
439
  print(f"[DEBUG] pred structure: {pred}")
440
  try:
441
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
442
- topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
 
 
443
  if token_count == 0:
444
- print(f"[DEBUG] Successfully generated topk_b_json")
445
- print(f"[DEBUG] Sample topk_b_json: {topk_b_json[:200]}")
 
446
  except Exception as e:
447
  print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
448
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
@@ -823,9 +828,11 @@ def generate_comparison_html(
823
  const top5A = token.getAttribute('data-topk-a') || '';
824
  const top5B = token.getAttribute('data-topk-b') || '';
825
 
826
- function formatTopkColumn(topkJson, modelName, titleClass) {{
827
- if (!topkJson) return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">N/A</div></div>';
828
  try {{
 
 
829
  const data = JSON.parse(topkJson);
830
  const [actualId, rank, topkList] = data;
831
  let html = '<div class="topk-column">';
@@ -851,7 +858,7 @@ def generate_comparison_html(
851
  return html;
852
  }} catch (e) {{
853
  console.error('Error in formatTopkColumn for ' + modelName + ':', e);
854
- console.error('topkJson:', topkJson);
855
  return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">Error: ' + e.message + '</div></div>';
856
  }}
857
  }}
 
420
  pred[1],
421
  [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
422
  ]
423
+ # Use base64 encoding to avoid escaping issues
424
+ import base64
425
+ topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
426
  if token_count == 0:
427
+ print(f"[DEBUG] Successfully generated topk_a_json (base64)")
428
+ print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
429
+ print(f"[DEBUG] Base64 length: {len(topk_a_json)}")
430
  except Exception as e:
431
  print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
432
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
 
441
  print(f"[DEBUG] pred structure: {pred}")
442
  try:
443
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
444
+ # Use base64 encoding to avoid escaping issues
445
+ import base64
446
+ topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
447
  if token_count == 0:
448
+ print(f"[DEBUG] Successfully generated topk_b_json (base64)")
449
+ print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
450
+ print(f"[DEBUG] Base64 length: {len(topk_b_json)}")
451
  except Exception as e:
452
  print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
453
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
 
828
  const top5A = token.getAttribute('data-topk-a') || '';
829
  const top5B = token.getAttribute('data-topk-b') || '';
830
 
831
+ function formatTopkColumn(topkBase64, modelName, titleClass) {{
832
+ if (!topkBase64) return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">N/A</div></div>';
833
  try {{
834
+ // Decode base64 to JSON string
835
+ const topkJson = atob(topkBase64);
836
  const data = JSON.parse(topkJson);
837
  const [actualId, rank, topkList] = data;
838
  let html = '<div class="topk-column">';
 
858
  return html;
859
  }} catch (e) {{
860
  console.error('Error in formatTopkColumn for ' + modelName + ':', e);
861
+ console.error('topkBase64:', topkBase64);
862
  return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">Error: ' + e.message + '</div></div>';
863
  }}
864
  }}