Jellyfish042 Claude Sonnet 4.5 commited on
Commit
24f1b39
·
1 Parent(s): 951e8dd

Optimize debug logging to reduce output volume

Browse files

- Only print detailed debug info for first token
- Add token_count variable to track processing
- Print sample JSON output for verification
- Reduce console spam while maintaining debugging capability

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. visualization/html_generator.py +19 -10
visualization/html_generator.py CHANGED
@@ -302,6 +302,7 @@ def generate_comparison_html(
302
 
303
  # Build tokens based on common boundaries
304
  tokens = []
 
305
  for i in range(len(common_boundaries) - 1):
306
  start_byte = common_boundaries[i]
307
  end_byte = common_boundaries[i + 1]
@@ -375,8 +376,9 @@ def generate_comparison_html(
375
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
376
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
377
  pred = topk_predictions_a[model_a_token_idx]
378
- print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
379
- print(f"[DEBUG] pred structure: {pred}")
 
380
  try:
381
  decoded_pred = [
382
  pred[0],
@@ -384,27 +386,34 @@ def generate_comparison_html(
384
  [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
385
  ]
386
  topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
387
- print(f"[DEBUG] Successfully generated topk_a_json")
 
 
388
  except Exception as e:
389
- print(f"[DEBUG] Error generating topk_a_json: {e}")
390
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
391
  if len(pred) > 2:
392
- print(f"[DEBUG] pred[2] content: {pred[2][:3]}") # First 3 items
393
  if topk_predictions_b is not None and model_b_token_ranges:
394
  model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
395
  if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
396
  pred = topk_predictions_b[model_b_token_idx]
397
- print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
398
- print(f"[DEBUG] pred structure: {pred}")
 
399
  try:
400
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
401
  topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
402
- print(f"[DEBUG] Successfully generated topk_b_json")
 
 
403
  except Exception as e:
404
- print(f"[DEBUG] Error generating topk_b_json: {e}")
405
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
406
  if len(pred) > 2:
407
- print(f"[DEBUG] pred[2] content: {pred[2][:3]}") # First 3 items
 
 
408
 
409
  token_deltas = deltas[byte_start:byte_end]
410
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
 
302
 
303
  # Build tokens based on common boundaries
304
  tokens = []
305
+ token_count = 0
306
  for i in range(len(common_boundaries) - 1):
307
  start_byte = common_boundaries[i]
308
  end_byte = common_boundaries[i + 1]
 
376
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
377
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
378
  pred = topk_predictions_a[model_a_token_idx]
379
+ if token_count == 0: # Only print for first token
380
+ print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
381
+ print(f"[DEBUG] pred structure: {pred}")
382
  try:
383
  decoded_pred = [
384
  pred[0],
 
386
  [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
387
  ]
388
  topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
389
+ if token_count == 0:
390
+ print(f"[DEBUG] Successfully generated topk_a_json")
391
+ print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
392
  except Exception as e:
393
+ print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
394
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
395
  if len(pred) > 2:
396
+ print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
397
  if topk_predictions_b is not None and model_b_token_ranges:
398
  model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
399
  if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
400
  pred = topk_predictions_b[model_b_token_idx]
401
+ if token_count == 0: # Only print for first token
402
+ print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
403
+ print(f"[DEBUG] pred structure: {pred}")
404
  try:
405
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
406
  topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
407
+ if token_count == 0:
408
+ print(f"[DEBUG] Successfully generated topk_b_json")
409
+ print(f"[DEBUG] Sample topk_b_json: {topk_b_json[:200]}")
410
  except Exception as e:
411
+ print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
412
  print(f"[DEBUG] pred[2] type: {type(pred[2])}")
413
  if len(pred) > 2:
414
+ print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
415
+
416
+ token_count += 1
417
 
418
  token_deltas = deltas[byte_start:byte_end]
419
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0