nathanael-fijalkow commited on
Commit
aee63fa
·
1 Parent(s): c8419c3

removed interactive demo and improved username finding

Browse files
Files changed (3) hide show
  1. app.py +103 -60
  2. requirements.txt +1 -1
  3. src/evaluate.py +5 -0
app.py CHANGED
@@ -307,6 +307,42 @@ def play_move(
307
  )
308
 
309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  def evaluate_legal_moves(
311
  model_id: str,
312
  n_positions: int,
@@ -332,8 +368,16 @@ def evaluate_legal_moves(
332
  progress(0.2, desc=f"Testing {n_positions} positions...")
333
  results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
334
 
335
- # Extract user_id from model_id (format: user_id/model_name)
336
- user_id = model_id.split('/')[0] if '/' in model_id else 'unknown'
 
 
 
 
 
 
 
 
337
 
338
  # Update leaderboard - only if improved
339
  leaderboard = load_leaderboard()
@@ -579,62 +623,62 @@ with gr.Blocks(
579
  - Try weight tying to save parameters
580
  """)
581
 
582
- # Interactive Demo Tab
583
- with gr.TabItem("🎮 Interactive Demo"):
584
- gr.Markdown("### Test a Model")
585
-
586
- with gr.Row():
587
- with gr.Column(scale=1):
588
- with gr.Row():
589
- model_dropdown = gr.Dropdown(
590
- choices=get_available_models(),
591
- label="Select Model",
592
- value=None,
593
- scale=4,
594
- )
595
- refresh_models_btn = gr.Button("🔄", scale=1)
596
- temperature_slider = gr.Slider(
597
- minimum=0.1,
598
- maximum=2.0,
599
- value=0.7,
600
- step=0.1,
601
- label="Temperature",
602
- )
603
-
604
- with gr.Row():
605
- play_btn = gr.Button("Model Move", variant="primary")
606
- reset_btn = gr.Button("Reset")
607
-
608
- status_text = gr.Textbox(label="Status", interactive=False)
609
-
610
- with gr.Column(scale=1):
611
- board_display = gr.HTML(value=render_board_svg())
612
-
613
- # Hidden state
614
- current_fen = gr.State("startpos")
615
- move_history = gr.State("")
616
-
617
- def refresh_models():
618
- return gr.update(choices=get_available_models())
619
-
620
- refresh_models_btn.click(
621
- refresh_models,
622
- outputs=[model_dropdown],
623
- )
624
-
625
- play_btn.click(
626
- play_move,
627
- inputs=[model_dropdown, current_fen, move_history, temperature_slider],
628
- outputs=[board_display, current_fen, move_history, status_text],
629
- )
630
-
631
- def reset_game():
632
- return render_board_svg(), "startpos", "", "Game reset!"
633
-
634
- reset_btn.click(
635
- reset_game,
636
- outputs=[board_display, current_fen, move_history, status_text],
637
- )
638
 
639
  # Legal Move Evaluation Tab
640
  with gr.TabItem("Legal Move Eval"):
@@ -651,9 +695,8 @@ with gr.Blocks(
651
  legal_model = gr.Dropdown(
652
  choices=get_available_models(),
653
  label="Model to Evaluate",
654
- scale=4,
655
  )
656
- refresh_legal_models_btn = gr.Button("🔄", scale=1)
657
  legal_positions = gr.Slider(
658
  minimum=100,
659
  maximum=1000,
 
307
  )
308
 
309
 
310
+ def get_model_submitter(model_id: str) -> Optional[str]:
311
+ """Extract the submitter's username from the model's README on HuggingFace.
312
+
313
+ Returns None if the submitter cannot be determined.
314
+ """
315
+ try:
316
+ from huggingface_hub import hf_hub_download
317
+ import re
318
+
319
+ # Download the README.md from the model repo
320
+ readme_path = hf_hub_download(
321
+ repo_id=model_id,
322
+ filename="README.md",
323
+ token=HF_TOKEN,
324
+ )
325
+
326
+ with open(readme_path, "r") as f:
327
+ readme_content = f.read()
328
+
329
+ # Look for the pattern: **Submitted by**: [username](https://huggingface.co/username)
330
+ match = re.search(r'\*\*Submitted by\*\*:\s*\[([^\]]+)\]', readme_content)
331
+ if match:
332
+ return match.group(1)
333
+
334
+ # Fallback: try to get from model info
335
+ from huggingface_hub import model_info
336
+ info = model_info(model_id, token=HF_TOKEN)
337
+ if info.author:
338
+ return info.author
339
+
340
+ except Exception as e:
341
+ print(f"Could not extract submitter from model: {e}")
342
+
343
+ return None
344
+
345
+
346
  def evaluate_legal_moves(
347
  model_id: str,
348
  n_positions: int,
 
368
  progress(0.2, desc=f"Testing {n_positions} positions...")
369
  results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
370
 
371
+ # Extract user_id from model's README (submitted by field)
372
+ user_id = get_model_submitter(model_id)
373
+ if user_id is None:
374
+ return f"""## Evaluation Failed
375
+
376
+ Could not determine the submitter for model `{model_id}`.
377
+
378
+ Please ensure your model was submitted using the official submission script (`submit.py`),
379
+ which adds the required metadata to the README.md file.
380
+ """
381
 
382
  # Update leaderboard - only if improved
383
  leaderboard = load_leaderboard()
 
623
  - Try weight tying to save parameters
624
  """)
625
 
626
+ # Interactive Demo Tab (commented out for now)
627
+ # with gr.TabItem("🎮 Interactive Demo"):
628
+ # gr.Markdown("### Test a Model")
629
+ #
630
+ # with gr.Row():
631
+ # with gr.Column(scale=1):
632
+ # with gr.Row():
633
+ # model_dropdown = gr.Dropdown(
634
+ # choices=get_available_models(),
635
+ # label="Select Model",
636
+ # value=None,
637
+ # scale=4,
638
+ # )
639
+ # refresh_models_btn = gr.Button("🔄", scale=1)
640
+ # temperature_slider = gr.Slider(
641
+ # minimum=0.1,
642
+ # maximum=2.0,
643
+ # value=0.7,
644
+ # step=0.1,
645
+ # label="Temperature",
646
+ # )
647
+ #
648
+ # with gr.Row():
649
+ # play_btn = gr.Button("Model Move", variant="primary")
650
+ # reset_btn = gr.Button("Reset")
651
+ #
652
+ # status_text = gr.Textbox(label="Status", interactive=False)
653
+ #
654
+ # with gr.Column(scale=1):
655
+ # board_display = gr.HTML(value=render_board_svg())
656
+ #
657
+ # # Hidden state
658
+ # current_fen = gr.State("startpos")
659
+ # move_history = gr.State("")
660
+ #
661
+ # def refresh_models():
662
+ # return gr.update(choices=get_available_models())
663
+ #
664
+ # refresh_models_btn.click(
665
+ # refresh_models,
666
+ # outputs=[model_dropdown],
667
+ # )
668
+ #
669
+ # play_btn.click(
670
+ # play_move,
671
+ # inputs=[model_dropdown, current_fen, move_history, temperature_slider],
672
+ # outputs=[board_display, current_fen, move_history, status_text],
673
+ # )
674
+ #
675
+ # def reset_game():
676
+ # return render_board_svg(), "startpos", "", "Game reset!"
677
+ #
678
+ # reset_btn.click(
679
+ # reset_game,
680
+ # outputs=[board_display, current_fen, move_history, status_text],
681
+ # )
682
 
683
  # Legal Move Evaluation Tab
684
  with gr.TabItem("Legal Move Eval"):
 
695
  legal_model = gr.Dropdown(
696
  choices=get_available_models(),
697
  label="Model to Evaluate",
 
698
  )
699
+ refresh_legal_models_btn = gr.Button("🔄", scale=0, min_width=40)
700
  legal_positions = gr.Slider(
701
  minimum=100,
702
  maximum=1000,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio>=4.44.0
2
  transformers>=4.40.0
3
  torch>=2.0.0
4
  python-chess>=1.999
 
1
+ gradio>=4.44.1
2
  transformers>=4.40.0
3
  torch>=2.0.0
4
  python-chess>=1.999
src/evaluate.py CHANGED
@@ -300,6 +300,7 @@ class ChessEvaluator:
300
  n_positions: int = 1000,
301
  temperature: float = 0.7,
302
  verbose: bool = True,
 
303
  ) -> dict:
304
  """
305
  Evaluate the model's ability to generate legal moves.
@@ -311,10 +312,14 @@ class ChessEvaluator:
311
  n_positions: Number of positions to test.
312
  temperature: Sampling temperature.
313
  verbose: Whether to print progress.
 
314
 
315
  Returns:
316
  Dictionary with legal move statistics.
317
  """
 
 
 
318
  results = {
319
  "total_positions": 0,
320
  "legal_first_try": 0,
 
300
  n_positions: int = 1000,
301
  temperature: float = 0.7,
302
  verbose: bool = True,
303
+ seed: int = 42,
304
  ) -> dict:
305
  """
306
  Evaluate the model's ability to generate legal moves.
 
312
  n_positions: Number of positions to test.
313
  temperature: Sampling temperature.
314
  verbose: Whether to print progress.
315
+ seed: Random seed for reproducibility.
316
 
317
  Returns:
318
  Dictionary with legal move statistics.
319
  """
320
+ # Set seed for deterministic evaluation
321
+ random.seed(seed)
322
+
323
  results = {
324
  "total_positions": 0,
325
  "legal_first_try": 0,