Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on Jan 18

Commit

24f1b39

1 Parent(s): 951e8dd

Optimize debug logging to reduce output volume

- Only print detailed debug info for first token
- Add token_count variable to track processing
- Print sample JSON output for verification
- Reduce console spam while maintaining debugging capability

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show

visualization/html_generator.py +19 -10

visualization/html_generator.py CHANGED Viewed

@@ -302,6 +302,7 @@ def generate_comparison_html(
     # Build tokens based on common boundaries
     tokens = []
     for i in range(len(common_boundaries) - 1):
         start_byte = common_boundaries[i]
         end_byte = common_boundaries[i + 1]
@@ -375,8 +376,9 @@ def generate_comparison_html(
             model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
             if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
                 pred = topk_predictions_a[model_a_token_idx]
-                print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
-                print(f"[DEBUG] pred structure: {pred}")
                 try:
                     decoded_pred = [
                         pred[0],
@@ -384,27 +386,34 @@ def generate_comparison_html(
                         [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
                     ]
                     topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
-                    print(f"[DEBUG] Successfully generated topk_a_json")
                 except Exception as e:
-                    print(f"[DEBUG] Error generating topk_a_json: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")
                     if len(pred) > 2:
-                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")  # First 3 items
         if topk_predictions_b is not None and model_b_token_ranges:
             model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
             if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
                 pred = topk_predictions_b[model_b_token_idx]
-                print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
-                print(f"[DEBUG] pred structure: {pred}")
                 try:
                     decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
                     topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
-                    print(f"[DEBUG] Successfully generated topk_b_json")
                 except Exception as e:
-                    print(f"[DEBUG] Error generating topk_b_json: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")
                     if len(pred) > 2:
-                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")  # First 3 items
         token_deltas = deltas[byte_start:byte_end]
         avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0

     # Build tokens based on common boundaries
     tokens = []
+    token_count = 0
     for i in range(len(common_boundaries) - 1):
         start_byte = common_boundaries[i]
         end_byte = common_boundaries[i + 1]
             model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
             if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
                 pred = topk_predictions_a[model_a_token_idx]
+                if token_count == 0:  # Only print for first token
+                    print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
+                    print(f"[DEBUG] pred structure: {pred}")
                 try:
                     decoded_pred = [
                         pred[0],
                         [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
                     ]
                     topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
+                    if token_count == 0:
+                        print(f"[DEBUG] Successfully generated topk_a_json")
+                        print(f"[DEBUG] Sample topk_a_json: {topk_a_json[:200]}")
                 except Exception as e:
+                    print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")
                     if len(pred) > 2:
+                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
         if topk_predictions_b is not None and model_b_token_ranges:
             model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
             if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
                 pred = topk_predictions_b[model_b_token_idx]
+                if token_count == 0:  # Only print for first token
+                    print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
+                    print(f"[DEBUG] pred structure: {pred}")
                 try:
                     decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
                     topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
+                    if token_count == 0:
+                        print(f"[DEBUG] Successfully generated topk_b_json")
+                        print(f"[DEBUG] Sample topk_b_json: {topk_b_json[:200]}")
                 except Exception as e:
+                    print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
                     print(f"[DEBUG] pred[2] type: {type(pred[2])}")
                     if len(pred) > 2:
+                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
+        token_count += 1
         token_deltas = deltas[byte_start:byte_end]
         avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0