Jellyfish042 Claude Sonnet 4.5 commited on
Commit
fa6172d
·
1 Parent(s): f59198d

Remove debug logging and swap Model A/B positions

Browse files

Changes:
- Removed all debug print statements from app.py and html_generator.py
- Swapped model positions: RWKV7 is now Model A, Qwen3 is now Model B
- Green now indicates RWKV7 performs better, Red indicates Qwen3 performs better

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +10 -30
  2. visualization/html_generator.py +2 -41
app.py CHANGED
@@ -208,13 +208,6 @@ def run_evaluation(text: str, progress=gr.Progress()):
208
  text,
209
  bos_mode="add_newline_token"
210
  )
211
- print(f"[DEBUG] Qwen evaluation complete")
212
- print(f"[DEBUG] Qwen top5_predictions type: {type(result_qwen.get('top5_predictions'))}")
213
- print(f"[DEBUG] Qwen top5_predictions length: {len(result_qwen.get('top5_predictions', []))}")
214
- if result_qwen.get('top5_predictions'):
215
- print(f"[DEBUG] Qwen first prediction sample: {result_qwen['top5_predictions'][0]}")
216
- print(f"[DEBUG] Qwen tokenizer type: {type(result_qwen.get('tokenizer'))}")
217
- print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
218
 
219
  # Step 2: Evaluate RWKV7 (using cached model)
220
  progress(0, desc="Evaluating with RWKV7...")
@@ -223,39 +216,26 @@ def run_evaluation(text: str, progress=gr.Progress()):
223
  _rwkv_tokenizer,
224
  text
225
  )
226
- print(f"[DEBUG] RWKV evaluation complete")
227
- print(f"[DEBUG] RWKV top5_predictions type: {type(result_rwkv.get('top5_predictions'))}")
228
- print(f"[DEBUG] RWKV top5_predictions length: {len(result_rwkv.get('top5_predictions', []))}")
229
- if result_rwkv.get('top5_predictions'):
230
- print(f"[DEBUG] RWKV first prediction sample: {result_rwkv['top5_predictions'][0]}")
231
- print(f"[DEBUG] RWKV tokenizer type: {type(result_rwkv.get('tokenizer'))}")
232
- print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
233
 
234
  # Step 3: Generate visualization
235
  progress(0, desc="Generating visualization...")
236
- print(f"[DEBUG] Starting HTML generation...")
237
- print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
238
- print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
239
  html = generate_comparison_html(
240
  text=text,
241
- byte_losses_a=result_qwen["byte_wise_losses"],
242
- byte_losses_b=result_rwkv["byte_wise_losses"],
243
- model_a_name="Qwen3-1.7B-Base",
244
- model_b_name="RWKV7-G1C-1.5B",
245
- topk_predictions_a=result_qwen["top5_predictions"],
246
- topk_predictions_b=result_rwkv["top5_predictions"],
247
- tokenizer_a=result_qwen["tokenizer"],
248
- tokenizer_b=result_rwkv["tokenizer"],
249
- model_type_a="hf",
250
- model_type_b="rwkv7"
251
  )
252
- print(f"[DEBUG] HTML generation complete")
253
 
254
  # Wrap HTML for iframe display
255
  wrapped_html = wrap_html_in_iframe(html)
256
 
257
- print("Done!")
258
-
259
  return wrapped_html
260
 
261
  except torch.cuda.OutOfMemoryError:
 
208
  text,
209
  bos_mode="add_newline_token"
210
  )
 
 
 
 
 
 
 
211
 
212
  # Step 2: Evaluate RWKV7 (using cached model)
213
  progress(0, desc="Evaluating with RWKV7...")
 
216
  _rwkv_tokenizer,
217
  text
218
  )
 
 
 
 
 
 
 
219
 
220
  # Step 3: Generate visualization
221
  progress(0, desc="Generating visualization...")
 
 
 
222
  html = generate_comparison_html(
223
  text=text,
224
+ byte_losses_a=result_rwkv["byte_wise_losses"],
225
+ byte_losses_b=result_qwen["byte_wise_losses"],
226
+ model_a_name="RWKV7-G1C-1.5B",
227
+ model_b_name="Qwen3-1.7B-Base",
228
+ topk_predictions_a=result_rwkv["top5_predictions"],
229
+ topk_predictions_b=result_qwen["top5_predictions"],
230
+ tokenizer_a=result_rwkv["tokenizer"],
231
+ tokenizer_b=result_qwen["tokenizer"],
232
+ model_type_a="rwkv7",
233
+ model_type_b="hf"
234
  )
 
235
 
236
  # Wrap HTML for iframe display
237
  wrapped_html = wrap_html_in_iframe(html)
238
 
 
 
239
  return wrapped_html
240
 
241
  except torch.cuda.OutOfMemoryError:
visualization/html_generator.py CHANGED
@@ -274,25 +274,6 @@ def generate_comparison_html(
274
  model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
275
  model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
276
 
277
- print(f"[DEBUG HTML] tokenizer_a: {tokenizer_a}")
278
- print(f"[DEBUG HTML] tokenizer_b: {tokenizer_b}")
279
- print(f"[DEBUG HTML] model_type_a: {model_type_a}")
280
- print(f"[DEBUG HTML] model_type_b: {model_type_b}")
281
- print(f"[DEBUG HTML] model_a_token_ranges length: {len(model_a_token_ranges)}")
282
- print(f"[DEBUG HTML] model_b_token_ranges length: {len(model_b_token_ranges)}")
283
- if model_a_token_ranges:
284
- print(f"[DEBUG HTML] model_a first token range: {model_a_token_ranges[0]}")
285
- if model_b_token_ranges:
286
- print(f"[DEBUG HTML] model_b first token range: {model_b_token_ranges[0]}")
287
- print(f"[DEBUG HTML] topk_predictions_a type: {type(topk_predictions_a)}")
288
- print(f"[DEBUG HTML] topk_predictions_b type: {type(topk_predictions_b)}")
289
- if topk_predictions_a:
290
- print(f"[DEBUG HTML] topk_predictions_a length: {len(topk_predictions_a)}")
291
- print(f"[DEBUG HTML] topk_predictions_a[0]: {topk_predictions_a[0]}")
292
- if topk_predictions_b:
293
- print(f"[DEBUG HTML] topk_predictions_b length: {len(topk_predictions_b)}")
294
- print(f"[DEBUG HTML] topk_predictions_b[0]: {topk_predictions_b[0]}")
295
-
296
  def get_tokens_for_range(byte_start, byte_end, token_list):
297
  result = []
298
  for idx, (t_start, t_end, t_str) in enumerate(token_list):
@@ -411,9 +392,6 @@ def generate_comparison_html(
411
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
412
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
413
  pred = topk_predictions_a[model_a_token_idx]
414
- if token_count == 0: # Only print for first token
415
- print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
416
- print(f"[DEBUG] pred structure: {pred}")
417
  try:
418
  decoded_pred = [
419
  pred[0],
@@ -423,36 +401,19 @@ def generate_comparison_html(
423
  # Use base64 encoding to avoid escaping issues
424
  import base64
425
  topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
426
- if token_count == 0:
427
- print(f"[DEBUG] Successfully generated topk_a_json (base64)")
428
- print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
429
- print(f"[DEBUG] Base64 length: {len(topk_a_json)}")
430
  except Exception as e:
431
- print(f"[DEBUG] Error generating topk_a_json at byte {byte_start}: {e}")
432
- print(f"[DEBUG] pred[2] type: {type(pred[2])}")
433
- if len(pred) > 2:
434
- print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
435
  if topk_predictions_b is not None and model_b_token_ranges:
436
  model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
437
  if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
438
  pred = topk_predictions_b[model_b_token_idx]
439
- if token_count == 0: # Only print for first token
440
- print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
441
- print(f"[DEBUG] pred structure: {pred}")
442
  try:
443
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
444
  # Use base64 encoding to avoid escaping issues
445
  import base64
446
  topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
447
- if token_count == 0:
448
- print(f"[DEBUG] Successfully generated topk_b_json (base64)")
449
- print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
450
- print(f"[DEBUG] Base64 length: {len(topk_b_json)}")
451
  except Exception as e:
452
- print(f"[DEBUG] Error generating topk_b_json at byte {byte_start}: {e}")
453
- print(f"[DEBUG] pred[2] type: {type(pred[2])}")
454
- if len(pred) > 2:
455
- print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
456
 
457
  token_count += 1
458
 
 
274
  model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
275
  model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  def get_tokens_for_range(byte_start, byte_end, token_list):
278
  result = []
279
  for idx, (t_start, t_end, t_str) in enumerate(token_list):
 
392
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
393
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
394
  pred = topk_predictions_a[model_a_token_idx]
 
 
 
395
  try:
396
  decoded_pred = [
397
  pred[0],
 
401
  # Use base64 encoding to avoid escaping issues
402
  import base64
403
  topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
 
 
 
 
404
  except Exception as e:
405
+ pass
 
 
 
406
  if topk_predictions_b is not None and model_b_token_ranges:
407
  model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
408
  if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
409
  pred = topk_predictions_b[model_b_token_idx]
 
 
 
410
  try:
411
  decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
412
  # Use base64 encoding to avoid escaping issues
413
  import base64
414
  topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
 
 
 
 
415
  except Exception as e:
416
+ pass
 
 
 
417
 
418
  token_count += 1
419