Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on 20 days ago

Commit

951e8dd

1 Parent(s): 52ba00f

Add comprehensive debug logging for Top 10 predictions

- Add debug prints in app.py to track evaluation results
- Log tokenizer types and top5_predictions data structure
- Add detailed logging in html_generator.py for token mapping
- Track topk JSON generation process with error details
- Print pred structure and decoding attempts for debugging

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (2) hide show

app.py +21 -3
visualization/html_generator.py +45 -8

app.py CHANGED Viewed

@@ -199,24 +199,41 @@ def run_evaluation(text: str, progress=gr.Progress()):
     try:
         # Step 1: Evaluate Qwen (using cached model)
-        progress(0, desc="Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
             text,
             bos_mode="add_newline_token"
         )
         # Step 2: Evaluate RWKV7 (using cached model)
-        progress(0, desc="Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
             text
         )
         # Step 3: Generate visualization
-        progress(0, desc="Generating visualization...")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
@@ -230,6 +247,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
             model_type_a="hf",
             model_type_b="rwkv7"
         )
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)

     try:
         # Step 1: Evaluate Qwen (using cached model)
+        progress(desc="Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
             text,
             bos_mode="add_newline_token"
         )
+        print(f"[DEBUG] Qwen evaluation complete")
+        print(f"[DEBUG] Qwen top5_predictions type: {type(result_qwen.get('top5_predictions'))}")
+        print(f"[DEBUG] Qwen top5_predictions length: {len(result_qwen.get('top5_predictions', []))}")
+        if result_qwen.get('top5_predictions'):
+            print(f"[DEBUG] Qwen first prediction sample: {result_qwen['top5_predictions'][0]}")
+        print(f"[DEBUG] Qwen tokenizer type: {type(result_qwen.get('tokenizer'))}")
+        print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
         # Step 2: Evaluate RWKV7 (using cached model)
+        progress(desc="Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
             text
         )
+        print(f"[DEBUG] RWKV evaluation complete")
+        print(f"[DEBUG] RWKV top5_predictions type: {type(result_rwkv.get('top5_predictions'))}")
+        print(f"[DEBUG] RWKV top5_predictions length: {len(result_rwkv.get('top5_predictions', []))}")
+        if result_rwkv.get('top5_predictions'):
+            print(f"[DEBUG] RWKV first prediction sample: {result_rwkv['top5_predictions'][0]}")
+        print(f"[DEBUG] RWKV tokenizer type: {type(result_rwkv.get('tokenizer'))}")
+        print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
         # Step 3: Generate visualization
+        progress(desc="Generating visualization...")
+        print(f"[DEBUG] Starting HTML generation...")
+        print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
+        print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
             model_type_a="hf",
             model_type_b="rwkv7"
         )
+        print(f"[DEBUG] HTML generation complete")
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)

visualization/html_generator.py CHANGED Viewed

@@ -274,6 +274,25 @@ def generate_comparison_html(
     model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
     model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
     def get_tokens_for_range(byte_start, byte_end, token_list):
         result = []
         for idx, (t_start, t_end, t_str) in enumerate(token_list):
@@ -356,18 +375,36 @@ def generate_comparison_html(
             model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
             if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
                 pred = topk_predictions_a[model_a_token_idx]
-                decoded_pred = [
-                    pred[0],
-                    pred[1],
-                    [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
-                ]
-                topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
         if topk_predictions_b is not None and model_b_token_ranges:
             model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
             if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
                 pred = topk_predictions_b[model_b_token_idx]
-                decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
-                topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
         token_deltas = deltas[byte_start:byte_end]
         avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0

     model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
     model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
+    print(f"[DEBUG HTML] tokenizer_a: {tokenizer_a}")
+    print(f"[DEBUG HTML] tokenizer_b: {tokenizer_b}")
+    print(f"[DEBUG HTML] model_type_a: {model_type_a}")
+    print(f"[DEBUG HTML] model_type_b: {model_type_b}")
+    print(f"[DEBUG HTML] model_a_token_ranges length: {len(model_a_token_ranges)}")
+    print(f"[DEBUG HTML] model_b_token_ranges length: {len(model_b_token_ranges)}")
+    if model_a_token_ranges:
+        print(f"[DEBUG HTML] model_a first token range: {model_a_token_ranges[0]}")
+    if model_b_token_ranges:
+        print(f"[DEBUG HTML] model_b first token range: {model_b_token_ranges[0]}")
+    print(f"[DEBUG HTML] topk_predictions_a type: {type(topk_predictions_a)}")
+    print(f"[DEBUG HTML] topk_predictions_b type: {type(topk_predictions_b)}")
+    if topk_predictions_a:
+        print(f"[DEBUG HTML] topk_predictions_a length: {len(topk_predictions_a)}")
+        print(f"[DEBUG HTML] topk_predictions_a[0]: {topk_predictions_a[0]}")
+    if topk_predictions_b:
+        print(f"[DEBUG HTML] topk_predictions_b length: {len(topk_predictions_b)}")
+        print(f"[DEBUG HTML] topk_predictions_b[0]: {topk_predictions_b[0]}")
     def get_tokens_for_range(byte_start, byte_end, token_list):
         result = []
         for idx, (t_start, t_end, t_str) in enumerate(token_list):
             model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
             if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
                 pred = topk_predictions_a[model_a_token_idx]
+                print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
+                print(f"[DEBUG] pred structure: {pred}")
+                try:
+                    decoded_pred = [
+                        pred[0],
+                        pred[1],
+                        [[tid, prob, decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
+                    ]
+                    topk_a_json = json.dumps(decoded_pred, ensure_ascii=False)
+                    print(f"[DEBUG] Successfully generated topk_a_json")
+                except Exception as e:
+                    print(f"[DEBUG] Error generating topk_a_json: {e}")
+                    print(f"[DEBUG] pred[2] type: {type(pred[2])}")
+                    if len(pred) > 2:
+                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")  # First 3 items
         if topk_predictions_b is not None and model_b_token_ranges:
             model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
             if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
                 pred = topk_predictions_b[model_b_token_idx]
+                print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
+                print(f"[DEBUG] pred structure: {pred}")
+                try:
+                    decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
+                    topk_b_json = json.dumps(decoded_pred, ensure_ascii=False)
+                    print(f"[DEBUG] Successfully generated topk_b_json")
+                except Exception as e:
+                    print(f"[DEBUG] Error generating topk_b_json: {e}")
+                    print(f"[DEBUG] pred[2] type: {type(pred[2])}")
+                    if len(pred) > 2:
+                        print(f"[DEBUG] pred[2] content: {pred[2][:3]}")  # First 3 items
         token_deltas = deltas[byte_start:byte_end]
         avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0