Spaces:
Running
Running
Commit
·
491ce2b
1
Parent(s):
56292f7
Add debug logging for JSON escaping in HTML attributes
Browse filesAdded detailed debug output to track how JSON is escaped before
being embedded in HTML data attributes. This will help identify
where the truncation is occurring.
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- app.py +6 -4
- visualization/html_generator.py +1 -0
app.py
CHANGED
|
@@ -27,7 +27,7 @@ SUPPORT_DIR = SCRIPT_DIR / "support"
|
|
| 27 |
|
| 28 |
# Text length limits
|
| 29 |
MAX_TEXT_LENGTH = 4000
|
| 30 |
-
MIN_TEXT_LENGTH =
|
| 31 |
|
| 32 |
# Global model cache
|
| 33 |
_qwen_model = None
|
|
@@ -172,6 +172,8 @@ def initialize_models():
|
|
| 172 |
|
| 173 |
def wrap_html_in_iframe(html: str) -> str:
|
| 174 |
"""Wrap HTML in an iframe for Gradio display."""
|
|
|
|
|
|
|
| 175 |
escaped = html.replace('"', '"')
|
| 176 |
return f'''
|
| 177 |
<div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
|
|
@@ -199,7 +201,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 199 |
|
| 200 |
try:
|
| 201 |
# Step 1: Evaluate Qwen (using cached model)
|
| 202 |
-
progress(desc="Evaluating with Qwen3...")
|
| 203 |
result_qwen = evaluate_hf_single_sample(
|
| 204 |
_qwen_model,
|
| 205 |
_qwen_tokenizer,
|
|
@@ -215,7 +217,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 215 |
print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
|
| 216 |
|
| 217 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 218 |
-
progress(desc="Evaluating with RWKV7...")
|
| 219 |
result_rwkv = evaluate_rwkv7_single_sample(
|
| 220 |
_rwkv_model,
|
| 221 |
_rwkv_tokenizer,
|
|
@@ -230,7 +232,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 230 |
print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
|
| 231 |
|
| 232 |
# Step 3: Generate visualization
|
| 233 |
-
progress(desc="Generating visualization...")
|
| 234 |
print(f"[DEBUG] Starting HTML generation...")
|
| 235 |
print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
|
| 236 |
print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
|
|
|
|
| 27 |
|
| 28 |
# Text length limits
|
| 29 |
MAX_TEXT_LENGTH = 4000
|
| 30 |
+
MIN_TEXT_LENGTH = 1
|
| 31 |
|
| 32 |
# Global model cache
|
| 33 |
_qwen_model = None
|
|
|
|
| 172 |
|
| 173 |
def wrap_html_in_iframe(html: str) -> str:
|
| 174 |
"""Wrap HTML in an iframe for Gradio display."""
|
| 175 |
+
# For srcdoc attribute, we only need to escape quotes
|
| 176 |
+
# The HTML entities inside (like ", ) should remain as-is
|
| 177 |
escaped = html.replace('"', '"')
|
| 178 |
return f'''
|
| 179 |
<div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
|
|
|
|
| 201 |
|
| 202 |
try:
|
| 203 |
# Step 1: Evaluate Qwen (using cached model)
|
| 204 |
+
progress(0, desc="Evaluating with Qwen3...")
|
| 205 |
result_qwen = evaluate_hf_single_sample(
|
| 206 |
_qwen_model,
|
| 207 |
_qwen_tokenizer,
|
|
|
|
| 217 |
print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
|
| 218 |
|
| 219 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 220 |
+
progress(0, desc="Evaluating with RWKV7...")
|
| 221 |
result_rwkv = evaluate_rwkv7_single_sample(
|
| 222 |
_rwkv_model,
|
| 223 |
_rwkv_tokenizer,
|
|
|
|
| 232 |
print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
|
| 233 |
|
| 234 |
# Step 3: Generate visualization
|
| 235 |
+
progress(0, desc="Generating visualization...")
|
| 236 |
print(f"[DEBUG] Starting HTML generation...")
|
| 237 |
print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
|
| 238 |
print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
|
visualization/html_generator.py
CHANGED
|
@@ -424,6 +424,7 @@ def generate_comparison_html(
|
|
| 424 |
if token_count == 0:
|
| 425 |
print(f"[DEBUG] Successfully generated topk_a_json")
|
| 426 |
print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
|
|
|
|
| 427 |
except Exception as e:
|
| 428 |
print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
|
| 429 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
|
|
|
| 424 |
if token_count == 0:
|
| 425 |
print(f"[DEBUG] Successfully generated topk_a_json")
|
| 426 |
print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
|
| 427 |
+
print(f"[DEBUG] After escape_for_attr: {escape_for_attr(topk_a_json)[:200]}")
|
| 428 |
except Exception as e:
|
| 429 |
print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
|
| 430 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|