Spaces:
Running
Running
Commit
·
551785b
1
Parent(s):
36fc570
minor fix + link to Github
Browse files
README.md
CHANGED
|
@@ -14,6 +14,8 @@ license: mit
|
|
| 14 |
|
| 15 |
This Space hosts the evaluation arena for the LLM Chess Challenge.
|
| 16 |
|
|
|
|
|
|
|
| 17 |
## Features
|
| 18 |
|
| 19 |
- **Interactive Demo**: Test any submitted model against Stockfish
|
|
|
|
| 14 |
|
| 15 |
This Space hosts the evaluation arena for the LLM Chess Challenge.
|
| 16 |
|
| 17 |
+
**Chess Challenge Template**: https://github.com/nathanael-fijalkow/ChessChallengeTemplate
|
| 18 |
+
|
| 19 |
## Features
|
| 20 |
|
| 21 |
- **Interactive Demo**: Test any submitted model against Stockfish
|
app.py
CHANGED
|
@@ -495,6 +495,42 @@ with gr.Blocks(
|
|
| 495 |
""")
|
| 496 |
|
| 497 |
with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
# Leaderboard Tab
|
| 499 |
with gr.TabItem("🏆 Leaderboard"):
|
| 500 |
gr.Markdown("### Current Rankings")
|
|
@@ -622,42 +658,6 @@ with gr.Blocks(
|
|
| 622 |
inputs=[eval_model, eval_level, eval_games],
|
| 623 |
outputs=eval_results,
|
| 624 |
)
|
| 625 |
-
|
| 626 |
-
# Submission Guide Tab
|
| 627 |
-
with gr.TabItem("How to Submit"):
|
| 628 |
-
gr.Markdown(f"""
|
| 629 |
-
### Submitting Your Model
|
| 630 |
-
|
| 631 |
-
1. **Train your model** using the Chess Challenge template
|
| 632 |
-
|
| 633 |
-
2. **Push to Hugging Face Hub**:
|
| 634 |
-
```python
|
| 635 |
-
from chess_challenge import ChessForCausalLM, ChessTokenizer
|
| 636 |
-
|
| 637 |
-
# After training
|
| 638 |
-
model.push_to_hub("your-model-name", organization="{ORGANIZATION}")
|
| 639 |
-
tokenizer.push_to_hub("your-model-name", organization="{ORGANIZATION}")
|
| 640 |
-
```
|
| 641 |
-
|
| 642 |
-
3. **Verify your submission** by checking the model page on Hugging Face
|
| 643 |
-
|
| 644 |
-
4. **Run evaluations**:
|
| 645 |
-
- First: **Legal Move Eval** (quick sanity check)
|
| 646 |
-
- Then: **Win Rate Eval** (full ELO computation)
|
| 647 |
-
|
| 648 |
-
### Requirements
|
| 649 |
-
|
| 650 |
-
- Model must be under **1M parameters**
|
| 651 |
-
- Model must use the `ChessConfig` and `ChessForCausalLM` classes
|
| 652 |
-
- Include the tokenizer with your submission
|
| 653 |
-
|
| 654 |
-
### Tips for Better Performance
|
| 655 |
-
|
| 656 |
-
- Experiment with different architectures (layers, heads, dimensions)
|
| 657 |
-
- Try weight tying to save parameters
|
| 658 |
-
- Fine-tune on high-quality games only
|
| 659 |
-
- Use RL fine-tuning with Stockfish rewards
|
| 660 |
-
""")
|
| 661 |
|
| 662 |
|
| 663 |
if __name__ == "__main__":
|
|
|
|
| 495 |
""")
|
| 496 |
|
| 497 |
with gr.Tabs():
|
| 498 |
+
# Submission Guide Tab
|
| 499 |
+
with gr.TabItem("How to Submit"):
|
| 500 |
+
gr.Markdown(f"""
|
| 501 |
+
### Submitting Your Model
|
| 502 |
+
|
| 503 |
+
1. **Train your model** using the [Chess Challenge Template](https://github.com/nathanael-fijalkow/ChessChallengeTemplate)
|
| 504 |
+
|
| 505 |
+
2. **Push to Hugging Face Hub**:
|
| 506 |
+
```python
|
| 507 |
+
from chess_challenge import ChessForCausalLM, ChessTokenizer
|
| 508 |
+
|
| 509 |
+
# After training
|
| 510 |
+
model.push_to_hub("your-model-name", organization="{ORGANIZATION}")
|
| 511 |
+
tokenizer.push_to_hub("your-model-name", organization="{ORGANIZATION}")
|
| 512 |
+
```
|
| 513 |
+
|
| 514 |
+
3. **Verify your submission** by checking the model page on Hugging Face
|
| 515 |
+
|
| 516 |
+
4. **Run evaluations**:
|
| 517 |
+
- First: **Legal Move Eval** (quick sanity check)
|
| 518 |
+
- Then: **Win Rate Eval** (full ELO computation)
|
| 519 |
+
|
| 520 |
+
### Requirements
|
| 521 |
+
|
| 522 |
+
- Model must be under **1M parameters**
|
| 523 |
+
- Model must use the `ChessConfig` and `ChessForCausalLM` classes
|
| 524 |
+
- Include the tokenizer with your submission
|
| 525 |
+
|
| 526 |
+
### Tips for Better Performance
|
| 527 |
+
|
| 528 |
+
- Experiment with different architectures (layers, heads, dimensions)
|
| 529 |
+
- Try weight tying to save parameters
|
| 530 |
+
- Fine-tune on high-quality games only
|
| 531 |
+
- Use RL fine-tuning with Stockfish rewards
|
| 532 |
+
""")
|
| 533 |
+
|
| 534 |
# Leaderboard Tab
|
| 535 |
with gr.TabItem("🏆 Leaderboard"):
|
| 536 |
gr.Markdown("### Current Rankings")
|
|
|
|
| 658 |
inputs=[eval_model, eval_level, eval_games],
|
| 659 |
outputs=eval_results,
|
| 660 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
|
| 662 |
|
| 663 |
if __name__ == "__main__":
|