Spaces:
Running
Running
Commit
·
f59198d
1
Parent(s):
491ce2b
Fix JSON truncation by using base64 encoding
Browse filesReplaced direct JSON embedding in HTML attributes with base64 encoding
to avoid issues with special characters breaking HTML attribute boundaries.
This completely eliminates the escaping problems that were causing the
"Unexpected end of JSON input" error in Top 10 predictions.
Changes:
- Encode topk JSON data as base64 before embedding in data attributes
- Decode base64 in JavaScript before parsing JSON
- Update debug logging to show base64 encoding status
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- visualization/html_generator.py +17 -10
visualization/html_generator.py
CHANGED
|
@@ -420,11 +420,13 @@ def generate_comparison_html(
|
|
| 420 |
pred[1],
|
| 421 |
[[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
|
| 422 |
]
|
| 423 |
-
|
|
|
|
|
|
|
| 424 |
if token_count == 0:
|
| 425 |
-
print(f"[DEBUG] Successfully generated topk_a_json")
|
| 426 |
-
print(f"[DEBUG]
|
| 427 |
-
print(f"[DEBUG]
|
| 428 |
except Exception as e:
|
| 429 |
print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
|
| 430 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
|
@@ -439,10 +441,13 @@ def generate_comparison_html(
|
|
| 439 |
print(f"[DEBUG] pred structure: {pred}")
|
| 440 |
try:
|
| 441 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 442 |
-
|
|
|
|
|
|
|
| 443 |
if token_count == 0:
|
| 444 |
-
print(f"[DEBUG] Successfully generated topk_b_json")
|
| 445 |
-
print(f"[DEBUG]
|
|
|
|
| 446 |
except Exception as e:
|
| 447 |
print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
|
| 448 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
|
@@ -823,9 +828,11 @@ def generate_comparison_html(
|
|
| 823 |
const top5A = token.getAttribute('data-topk-a') || '';
|
| 824 |
const top5B = token.getAttribute('data-topk-b') || '';
|
| 825 |
|
| 826 |
-
function formatTopkColumn(
|
| 827 |
-
if (!
|
| 828 |
try {{
|
|
|
|
|
|
|
| 829 |
const data = JSON.parse(topkJson);
|
| 830 |
const [actualId, rank, topkList] = data;
|
| 831 |
let html = '<div class="topk-column">';
|
|
@@ -851,7 +858,7 @@ def generate_comparison_html(
|
|
| 851 |
return html;
|
| 852 |
}} catch (e) {{
|
| 853 |
console.error('Error in formatTopkColumn for ' + modelName + ':', e);
|
| 854 |
-
console.error('
|
| 855 |
return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">Error: ' + e.message + '</div></div>';
|
| 856 |
}}
|
| 857 |
}}
|
|
|
|
| 420 |
pred[1],
|
| 421 |
[[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
|
| 422 |
]
|
| 423 |
+
# Use base64 encoding to avoid escaping issues
|
| 424 |
+
import base64
|
| 425 |
+
topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
| 426 |
if token_count == 0:
|
| 427 |
+
print(f"[DEBUG] Successfully generated topk_a_json (base64)")
|
| 428 |
+
print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
|
| 429 |
+
print(f"[DEBUG] Base64 length: {len(topk_a_json)}")
|
| 430 |
except Exception as e:
|
| 431 |
print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
|
| 432 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
|
|
|
| 441 |
print(f"[DEBUG] pred structure: {pred}")
|
| 442 |
try:
|
| 443 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 444 |
+
# Use base64 encoding to avoid escaping issues
|
| 445 |
+
import base64
|
| 446 |
+
topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
| 447 |
if token_count == 0:
|
| 448 |
+
print(f"[DEBUG] Successfully generated topk_b_json (base64)")
|
| 449 |
+
print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
|
| 450 |
+
print(f"[DEBUG] Base64 length: {len(topk_b_json)}")
|
| 451 |
except Exception as e:
|
| 452 |
print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
|
| 453 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
|
|
|
| 828 |
const top5A = token.getAttribute('data-topk-a') || '';
|
| 829 |
const top5B = token.getAttribute('data-topk-b') || '';
|
| 830 |
|
| 831 |
+
function formatTopkColumn(topkBase64, modelName, titleClass) {{
|
| 832 |
+
if (!topkBase64) return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">N/A</div></div>';
|
| 833 |
try {{
|
| 834 |
+
// Decode base64 to JSON string
|
| 835 |
+
const topkJson = atob(topkBase64);
|
| 836 |
const data = JSON.parse(topkJson);
|
| 837 |
const [actualId, rank, topkList] = data;
|
| 838 |
let html = '<div class="topk-column">';
|
|
|
|
| 858 |
return html;
|
| 859 |
}} catch (e) {{
|
| 860 |
console.error('Error in formatTopkColumn for ' + modelName + ':', e);
|
| 861 |
+
console.error('topkBase64:', topkBase64);
|
| 862 |
return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">Error: ' + e.message + '</div></div>';
|
| 863 |
}}
|
| 864 |
}}
|