Spaces:
Running
Running
Commit ·
24f1b39
1
Parent(s): 951e8dd
Optimize debug logging to reduce output volume
Browse files- Only print detailed debug info for first token
- Add token_count variable to track processing
- Print sample JSON output for verification
- Reduce console spam while maintaining debugging capability
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- visualization/html_generator.py +19 -10
visualization/html_generator.py
CHANGED
|
@@ -302,6 +302,7 @@ def generate_comparison_html(
|
|
| 302 |
|
| 303 |
# Build tokens based on common boundaries
|
| 304 |
tokens = []
|
|
|
|
| 305 |
for i in range(len(common_boundaries) - 1):
|
| 306 |
start_byte = common_boundaries[i]
|
| 307 |
end_byte = common_boundaries[i + 1]
|
|
@@ -375,8 +376,9 @@ def generate_comparison_html(
|
|
| 375 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 376 |
if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
|
| 377 |
pred = topk_predictions_a[model_a_token_idx]
|
| 378 |
-
|
| 379 |
-
|
|
|
|
| 380 |
try:
|
| 381 |
decoded_pred = [
|
| 382 |
pred[0],
|
|
@@ -384,27 +386,34 @@ def generate_comparison_html(
|
|
| 384 |
[[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
|
| 385 |
]
|
| 386 |
topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
|
| 387 |
-
|
|
|
|
|
|
|
| 388 |
except Exception as e:
|
| 389 |
-
print(f"[DEBUG] Error generating topk_a_json: {e}")
|
| 390 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 391 |
if len(pred) > 2:
|
| 392 |
-
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
| 393 |
if topk_predictions_b is not None and model_b_token_ranges:
|
| 394 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 395 |
if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
|
| 396 |
pred = topk_predictions_b[model_b_token_idx]
|
| 397 |
-
|
| 398 |
-
|
|
|
|
| 399 |
try:
|
| 400 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 401 |
topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
|
| 402 |
-
|
|
|
|
|
|
|
| 403 |
except Exception as e:
|
| 404 |
-
print(f"[DEBUG] Error generating topk_b_json: {e}")
|
| 405 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 406 |
if len(pred) > 2:
|
| 407 |
-
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
|
|
|
|
|
|
| 408 |
|
| 409 |
token_deltas = deltas[byte_start:byte_end]
|
| 410 |
avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
|
|
|
|
| 302 |
|
| 303 |
# Build tokens based on common boundaries
|
| 304 |
tokens = []
|
| 305 |
+
token_count = 0
|
| 306 |
for i in range(len(common_boundaries) - 1):
|
| 307 |
start_byte = common_boundaries[i]
|
| 308 |
end_byte = common_boundaries[i + 1]
|
|
|
|
| 376 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 377 |
if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
|
| 378 |
pred = topk_predictions_a[model_a_token_idx]
|
| 379 |
+
if token_count == 0: # Only print for first token
|
| 380 |
+
print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
|
| 381 |
+
print(f"[DEBUG] pred structure: {pred}")
|
| 382 |
try:
|
| 383 |
decoded_pred = [
|
| 384 |
pred[0],
|
|
|
|
| 386 |
[[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
|
| 387 |
]
|
| 388 |
topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
|
| 389 |
+
if token_count == 0:
|
| 390 |
+
print(f"[DEBUG] Successfully generated topk_a_json")
|
| 391 |
+
print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
|
| 392 |
except Exception as e:
|
| 393 |
+
print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
|
| 394 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 395 |
if len(pred) > 2:
|
| 396 |
+
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
| 397 |
if topk_predictions_b is not None and model_b_token_ranges:
|
| 398 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 399 |
if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
|
| 400 |
pred = topk_predictions_b[model_b_token_idx]
|
| 401 |
+
if token_count == 0: # Only print for first token
|
| 402 |
+
print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
|
| 403 |
+
print(f"[DEBUG] pred structure: {pred}")
|
| 404 |
try:
|
| 405 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 406 |
topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
|
| 407 |
+
if token_count == 0:
|
| 408 |
+
print(f"[DEBUG] Successfully generated topk_b_json")
|
| 409 |
+
print(f"[DEBUG] Sample topk_b_json: {topk_b_json[:200]}")
|
| 410 |
except Exception as e:
|
| 411 |
+
print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
|
| 412 |
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 413 |
if len(pred) > 2:
|
| 414 |
+
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
| 415 |
+
|
| 416 |
+
token_count += 1
|
| 417 |
|
| 418 |
token_deltas = deltas[byte_start:byte_end]
|
| 419 |
avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
|