Spaces:
Running
Running
Commit
·
fa6172d
1
Parent(s):
f59198d
Remove debug logging and swap Model A/B positions
Browse filesChanges:
- Removed all debug print statements from app.py and html_generator.py
- Swapped model positions: RWKV7 is now Model A, Qwen3 is now Model B
- Green now indicates RWKV7 performs better, Red indicates Qwen3 performs better
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- app.py +10 -30
- visualization/html_generator.py +2 -41
app.py
CHANGED
|
@@ -208,13 +208,6 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 208 |
text,
|
| 209 |
bos_mode="add_newline_token"
|
| 210 |
)
|
| 211 |
-
print(f"[DEBUG] Qwen evaluation complete")
|
| 212 |
-
print(f"[DEBUG] Qwen top5_predictions type: {type(result_qwen.get('top5_predictions'))}")
|
| 213 |
-
print(f"[DEBUG] Qwen top5_predictions length: {len(result_qwen.get('top5_predictions', []))}")
|
| 214 |
-
if result_qwen.get('top5_predictions'):
|
| 215 |
-
print(f"[DEBUG] Qwen first prediction sample: {result_qwen['top5_predictions'][0]}")
|
| 216 |
-
print(f"[DEBUG] Qwen tokenizer type: {type(result_qwen.get('tokenizer'))}")
|
| 217 |
-
print(f"[DEBUG] Qwen tokenizer: {result_qwen.get('tokenizer')}")
|
| 218 |
|
| 219 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 220 |
progress(0, desc="Evaluating with RWKV7...")
|
|
@@ -223,39 +216,26 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 223 |
_rwkv_tokenizer,
|
| 224 |
text
|
| 225 |
)
|
| 226 |
-
print(f"[DEBUG] RWKV evaluation complete")
|
| 227 |
-
print(f"[DEBUG] RWKV top5_predictions type: {type(result_rwkv.get('top5_predictions'))}")
|
| 228 |
-
print(f"[DEBUG] RWKV top5_predictions length: {len(result_rwkv.get('top5_predictions', []))}")
|
| 229 |
-
if result_rwkv.get('top5_predictions'):
|
| 230 |
-
print(f"[DEBUG] RWKV first prediction sample: {result_rwkv['top5_predictions'][0]}")
|
| 231 |
-
print(f"[DEBUG] RWKV tokenizer type: {type(result_rwkv.get('tokenizer'))}")
|
| 232 |
-
print(f"[DEBUG] RWKV tokenizer: {result_rwkv.get('tokenizer')}")
|
| 233 |
|
| 234 |
# Step 3: Generate visualization
|
| 235 |
progress(0, desc="Generating visualization...")
|
| 236 |
-
print(f"[DEBUG] Starting HTML generation...")
|
| 237 |
-
print(f"[DEBUG] Passing tokenizer_a: {result_qwen['tokenizer']}")
|
| 238 |
-
print(f"[DEBUG] Passing tokenizer_b: {result_rwkv['tokenizer']}")
|
| 239 |
html = generate_comparison_html(
|
| 240 |
text=text,
|
| 241 |
-
byte_losses_a=
|
| 242 |
-
byte_losses_b=
|
| 243 |
-
model_a_name="
|
| 244 |
-
model_b_name="
|
| 245 |
-
topk_predictions_a=
|
| 246 |
-
topk_predictions_b=
|
| 247 |
-
tokenizer_a=
|
| 248 |
-
tokenizer_b=
|
| 249 |
-
model_type_a="
|
| 250 |
-
model_type_b="
|
| 251 |
)
|
| 252 |
-
print(f"[DEBUG] HTML generation complete")
|
| 253 |
|
| 254 |
# Wrap HTML for iframe display
|
| 255 |
wrapped_html = wrap_html_in_iframe(html)
|
| 256 |
|
| 257 |
-
print("Done!")
|
| 258 |
-
|
| 259 |
return wrapped_html
|
| 260 |
|
| 261 |
except torch.cuda.OutOfMemoryError:
|
|
|
|
| 208 |
text,
|
| 209 |
bos_mode="add_newline_token"
|
| 210 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 213 |
progress(0, desc="Evaluating with RWKV7...")
|
|
|
|
| 216 |
_rwkv_tokenizer,
|
| 217 |
text
|
| 218 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# Step 3: Generate visualization
|
| 221 |
progress(0, desc="Generating visualization...")
|
|
|
|
|
|
|
|
|
|
| 222 |
html = generate_comparison_html(
|
| 223 |
text=text,
|
| 224 |
+
byte_losses_a=result_rwkv["byte_wise_losses"],
|
| 225 |
+
byte_losses_b=result_qwen["byte_wise_losses"],
|
| 226 |
+
model_a_name="RWKV7-G1C-1.5B",
|
| 227 |
+
model_b_name="Qwen3-1.7B-Base",
|
| 228 |
+
topk_predictions_a=result_rwkv["top5_predictions"],
|
| 229 |
+
topk_predictions_b=result_qwen["top5_predictions"],
|
| 230 |
+
tokenizer_a=result_rwkv["tokenizer"],
|
| 231 |
+
tokenizer_b=result_qwen["tokenizer"],
|
| 232 |
+
model_type_a="rwkv7",
|
| 233 |
+
model_type_b="hf"
|
| 234 |
)
|
|
|
|
| 235 |
|
| 236 |
# Wrap HTML for iframe display
|
| 237 |
wrapped_html = wrap_html_in_iframe(html)
|
| 238 |
|
|
|
|
|
|
|
| 239 |
return wrapped_html
|
| 240 |
|
| 241 |
except torch.cuda.OutOfMemoryError:
|
visualization/html_generator.py
CHANGED
|
@@ -274,25 +274,6 @@ def generate_comparison_html(
|
|
| 274 |
model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
|
| 275 |
model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
|
| 276 |
|
| 277 |
-
print(f"[DEBUG HTML] tokenizer_a: {tokenizer_a}")
|
| 278 |
-
print(f"[DEBUG HTML] tokenizer_b: {tokenizer_b}")
|
| 279 |
-
print(f"[DEBUG HTML] model_type_a: {model_type_a}")
|
| 280 |
-
print(f"[DEBUG HTML] model_type_b: {model_type_b}")
|
| 281 |
-
print(f"[DEBUG HTML] model_a_token_ranges length: {len(model_a_token_ranges)}")
|
| 282 |
-
print(f"[DEBUG HTML] model_b_token_ranges length: {len(model_b_token_ranges)}")
|
| 283 |
-
if model_a_token_ranges:
|
| 284 |
-
print(f"[DEBUG HTML] model_a first token range: {model_a_token_ranges[0]}")
|
| 285 |
-
if model_b_token_ranges:
|
| 286 |
-
print(f"[DEBUG HTML] model_b first token range: {model_b_token_ranges[0]}")
|
| 287 |
-
print(f"[DEBUG HTML] topk_predictions_a type: {type(topk_predictions_a)}")
|
| 288 |
-
print(f"[DEBUG HTML] topk_predictions_b type: {type(topk_predictions_b)}")
|
| 289 |
-
if topk_predictions_a:
|
| 290 |
-
print(f"[DEBUG HTML] topk_predictions_a length: {len(topk_predictions_a)}")
|
| 291 |
-
print(f"[DEBUG HTML] topk_predictions_a[0]: {topk_predictions_a[0]}")
|
| 292 |
-
if topk_predictions_b:
|
| 293 |
-
print(f"[DEBUG HTML] topk_predictions_b length: {len(topk_predictions_b)}")
|
| 294 |
-
print(f"[DEBUG HTML] topk_predictions_b[0]: {topk_predictions_b[0]}")
|
| 295 |
-
|
| 296 |
def get_tokens_for_range(byte_start, byte_end, token_list):
|
| 297 |
result = []
|
| 298 |
for idx, (t_start, t_end, t_str) in enumerate(token_list):
|
|
@@ -411,9 +392,6 @@ def generate_comparison_html(
|
|
| 411 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 412 |
if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
|
| 413 |
pred = topk_predictions_a[model_a_token_idx]
|
| 414 |
-
if token_count == 0: # Only print for first token
|
| 415 |
-
print(f"[DEBUG] Processing token at byte {byte_start}, model_a_token_idx={model_a_token_idx}")
|
| 416 |
-
print(f"[DEBUG] pred structure: {pred}")
|
| 417 |
try:
|
| 418 |
decoded_pred = [
|
| 419 |
pred[0],
|
|
@@ -423,36 +401,19 @@ def generate_comparison_html(
|
|
| 423 |
# Use base64 encoding to avoid escaping issues
|
| 424 |
import base64
|
| 425 |
topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
| 426 |
-
if token_count == 0:
|
| 427 |
-
print(f"[DEBUG] Successfully generated topk_a_json (base64)")
|
| 428 |
-
print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
|
| 429 |
-
print(f"[DEBUG] Base64 length: {len(topk_a_json)}")
|
| 430 |
except Exception as e:
|
| 431 |
-
|
| 432 |
-
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 433 |
-
if len(pred) > 2:
|
| 434 |
-
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
| 435 |
if topk_predictions_b is not None and model_b_token_ranges:
|
| 436 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 437 |
if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
|
| 438 |
pred = topk_predictions_b[model_b_token_idx]
|
| 439 |
-
if token_count == 0: # Only print for first token
|
| 440 |
-
print(f"[DEBUG] Processing token at byte {byte_start}, model_b_token_idx={model_b_token_idx}")
|
| 441 |
-
print(f"[DEBUG] pred structure: {pred}")
|
| 442 |
try:
|
| 443 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 444 |
# Use base64 encoding to avoid escaping issues
|
| 445 |
import base64
|
| 446 |
topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
| 447 |
-
if token_count == 0:
|
| 448 |
-
print(f"[DEBUG] Successfully generated topk_b_json (base64)")
|
| 449 |
-
print(f"[DEBUG] Original JSON length: {len(json.dumps(decoded_pred, ensure_ascii=False))}")
|
| 450 |
-
print(f"[DEBUG] Base64 length: {len(topk_b_json)}")
|
| 451 |
except Exception as e:
|
| 452 |
-
|
| 453 |
-
print(f"[DEBUG] pred[2] type: {type(pred[2])}")
|
| 454 |
-
if len(pred) > 2:
|
| 455 |
-
print(f"[DEBUG] pred[2] content: {pred[2][:3]}")
|
| 456 |
|
| 457 |
token_count += 1
|
| 458 |
|
|
|
|
| 274 |
model_a_token_ranges = build_byte_to_token_map(text, tokenizer_a, model_type_a)
|
| 275 |
model_b_token_ranges = build_byte_to_token_map(text, tokenizer_b, model_type_b)
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
def get_tokens_for_range(byte_start, byte_end, token_list):
|
| 278 |
result = []
|
| 279 |
for idx, (t_start, t_end, t_str) in enumerate(token_list):
|
|
|
|
| 392 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 393 |
if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
|
| 394 |
pred = topk_predictions_a[model_a_token_idx]
|
|
|
|
|
|
|
|
|
|
| 395 |
try:
|
| 396 |
decoded_pred = [
|
| 397 |
pred[0],
|
|
|
|
| 401 |
# Use base64 encoding to avoid escaping issues
|
| 402 |
import base64
|
| 403 |
topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
except Exception as e:
|
| 405 |
+
pass
|
|
|
|
|
|
|
|
|
|
| 406 |
if topk_predictions_b is not None and model_b_token_ranges:
|
| 407 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 408 |
if model_b_token_idx is not None and model_b_token_idx < len(topk_predictions_b):
|
| 409 |
pred = topk_predictions_b[model_b_token_idx]
|
|
|
|
|
|
|
|
|
|
| 410 |
try:
|
| 411 |
decoded_pred = [pred[0], pred[1], [[tid, prob, decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
|
| 412 |
# Use base64 encoding to avoid escaping issues
|
| 413 |
import base64
|
| 414 |
topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode('utf-8')).decode('ascii')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
except Exception as e:
|
| 416 |
+
pass
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
token_count += 1
|
| 419 |
|