update model reference and improve output format in README and app.py
Browse files
README.md
CHANGED
|
@@ -13,7 +13,7 @@ short_description: Play against chess-playing reasoning LLM
|
|
| 13 |
|
| 14 |
# ♔ Chess Reasoner
|
| 15 |
|
| 16 |
-
Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasoner](https://huggingface.co/nuriyev/chess-reasoner)**, a Qwen3-4B model
|
| 17 |
|
| 18 |
## 🎮 How to Play
|
| 19 |
|
|
@@ -28,20 +28,20 @@ Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasone
|
|
| 28 |
|-----------|-------|
|
| 29 |
| Base Model | [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507) |
|
| 30 |
| Training | SFT with LoRA (r=32) on reasoning traces |
|
| 31 |
-
| Dataset | [
|
| 32 |
-
| Output Format | `<
|
| 33 |
|
| 34 |
## 📋 Output Format
|
| 35 |
|
| 36 |
The model outputs structured reasoning:
|
| 37 |
```
|
| 38 |
-
<
|
| 39 |
<uci_move>d4d8</uci_move>
|
| 40 |
```
|
| 41 |
|
| 42 |
## ⚠️ Limitations
|
| 43 |
|
| 44 |
-
|
| 45 |
|
| 46 |
## 🔗 Links
|
| 47 |
|
|
|
|
| 13 |
|
| 14 |
# ♔ Chess Reasoner
|
| 15 |
|
| 16 |
+
Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasoner-grpo](https://huggingface.co/nuriyev/chess-reasoner-grpo)**, a Qwen3-4B model tuned to play chess with detailed reasoning traces.
|
| 17 |
|
| 18 |
## 🎮 How to Play
|
| 19 |
|
|
|
|
| 28 |
|-----------|-------|
|
| 29 |
| Base Model | [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507) |
|
| 30 |
| Training | SFT with LoRA (r=32) on reasoning traces |
|
| 31 |
+
| Dataset | [aicrowd/ChessExplained](https://huggingface.co/datasets/aicrowd/ChessExplained) |
|
| 32 |
+
| Output Format | `<reason>...</reason><uci_move>move</uci_move>` |
|
| 33 |
|
| 34 |
## 📋 Output Format
|
| 35 |
|
| 36 |
The model outputs structured reasoning:
|
| 37 |
```
|
| 38 |
+
<reason>The opponent left their queen undefended. Taking it wins material.</reason>
|
| 39 |
<uci_move>d4d8</uci_move>
|
| 40 |
```
|
| 41 |
|
| 42 |
## ⚠️ Limitations
|
| 43 |
|
| 44 |
+
Model is still very bad at playing chess! I am working on creating a beast. Coming soon...
|
| 45 |
|
| 46 |
## 🔗 Links
|
| 47 |
|
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 11 |
# Model Loading
|
| 12 |
# ============================================================================
|
| 13 |
|
| 14 |
-
MODEL_ID = "nuriyev/chess-reasoner"
|
| 15 |
|
| 16 |
print("Loading model...")
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
@@ -20,84 +20,48 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 20 |
torch_dtype=torch.float16,
|
| 21 |
device_map="auto",
|
| 22 |
trust_remote_code=True,
|
|
|
|
| 23 |
)
|
| 24 |
model.eval()
|
| 25 |
print("Model loaded!")
|
| 26 |
|
| 27 |
-
# Custom chat template (matching training)
|
| 28 |
-
CHAT_TEMPLATE = """{%- if messages[0].role == 'system' %}
|
| 29 |
-
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 30 |
-
{%- endif %}
|
| 31 |
-
{%- for message in messages %}
|
| 32 |
-
{%- if message.role == 'user' or (message.role == 'system' and not loop.first) %}
|
| 33 |
-
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
|
| 34 |
-
{%- elif message.role == 'assistant' %}
|
| 35 |
-
{{- '<|im_start|>assistant\n' + message.content + '<|im_end|>\n' }}
|
| 36 |
-
{%- endif %}
|
| 37 |
-
{%- endfor %}
|
| 38 |
-
{%- if add_generation_prompt %}
|
| 39 |
-
{{- '<|im_start|>assistant\n<think>\n' }}
|
| 40 |
-
{%- endif %}"""
|
| 41 |
-
|
| 42 |
-
tokenizer.chat_template = CHAT_TEMPLATE
|
| 43 |
-
|
| 44 |
-
# ============================================================================
|
| 45 |
-
# Chess Rendering (matching training exactly)
|
| 46 |
-
# ============================================================================
|
| 47 |
-
|
| 48 |
-
UNICODE_PIECES = {
|
| 49 |
-
'P': '♙', 'R': '♖', 'N': '♘', 'B': '♗', 'Q': '♕', 'K': '♔',
|
| 50 |
-
'p': '♟', 'r': '♜', 'n': '♞', 'b': '♝', 'q': '♛', 'k': '♚',
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def render_board_unicode(board: chess.Board) -> str:
|
| 55 |
-
"""Render the chess board using Unicode pieces (matching training format)."""
|
| 56 |
-
lines = []
|
| 57 |
-
files = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
|
| 58 |
-
ranks = ['8', '7', '6', '5', '4', '3', '2', '1']
|
| 59 |
-
|
| 60 |
-
coord_parts = [f" {file} " for file in files]
|
| 61 |
-
coord_line = " " + "".join(coord_parts) + " "
|
| 62 |
-
lines.append(coord_line)
|
| 63 |
-
|
| 64 |
-
border_width = len(files) * 3
|
| 65 |
-
lines.append(" +" + "-" * border_width + "+")
|
| 66 |
-
|
| 67 |
-
for rank_idx, rank in enumerate(ranks):
|
| 68 |
-
line_parts = [f"{rank} |"]
|
| 69 |
-
for file_idx, file in enumerate(files):
|
| 70 |
-
square = chess.parse_square(file + rank)
|
| 71 |
-
piece = board.piece_at(square)
|
| 72 |
-
piece_char = "·" if piece is None else UNICODE_PIECES[piece.symbol(
|
| 73 |
-
)]
|
| 74 |
-
line_parts.append(f" {piece_char} ")
|
| 75 |
-
line_parts.append(f"| {rank}")
|
| 76 |
-
lines.append("".join(line_parts))
|
| 77 |
-
|
| 78 |
-
lines.append(" +" + "-" * border_width + "+")
|
| 79 |
-
lines.append(coord_line)
|
| 80 |
-
return "\n".join(lines)
|
| 81 |
-
|
| 82 |
|
| 83 |
# ============================================================================
|
| 84 |
# Prompts (matching training exactly)
|
| 85 |
# ============================================================================
|
| 86 |
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
|
| 91 |
-
|
| 92 |
-
<
|
| 93 |
-
<uci_move>your_move</uci_move>"""
|
| 94 |
|
| 95 |
-
|
| 96 |
-
Board (Fen): {{ fen }}
|
| 97 |
-
Turn: It is your turn ({{ turn }})
|
| 98 |
-
Legal Moves: {{ legal_moves }}
|
| 99 |
-
Board (Unicode):
|
| 100 |
-
{{ board_utf }}""")
|
| 101 |
|
| 102 |
|
| 103 |
# ============================================================================
|
|
@@ -111,12 +75,11 @@ def get_model_move(fen: str) -> tuple[str, str, str]:
|
|
| 111 |
turn = "white" if board.turn else "black"
|
| 112 |
|
| 113 |
messages = [
|
| 114 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 115 |
{"role": "user", "content": USER_PROMPT.render(
|
| 116 |
fen=fen,
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
)},
|
| 121 |
]
|
| 122 |
|
|
@@ -143,7 +106,7 @@ def get_model_move(fen: str) -> tuple[str, str, str]:
|
|
| 143 |
outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=False)
|
| 144 |
|
| 145 |
# Parse the output
|
| 146 |
-
think_match = re.search(r'<
|
| 147 |
move_match = re.search(r'<uci_move>(.*?)</uci_move>', generated)
|
| 148 |
|
| 149 |
reasoning = think_match.group(1).strip(
|
|
@@ -265,7 +228,7 @@ with gr.Blocks(title="♟️ Chess Reasoner") as demo:
|
|
| 265 |
|
| 266 |
gr.Markdown("""
|
| 267 |
---
|
| 268 |
-
**Model:** [nuriyev/chess-reasoner](https://huggingface.co/nuriyev/chess-reasoner) • Fine-tuned from Qwen3-4B-Instruct
|
| 269 |
""")
|
| 270 |
|
| 271 |
# Events
|
|
|
|
| 11 |
# Model Loading
|
| 12 |
# ============================================================================
|
| 13 |
|
| 14 |
+
MODEL_ID = "nuriyev/chess-reasoner-grpo"
|
| 15 |
|
| 16 |
print("Loading model...")
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
|
|
| 20 |
torch_dtype=torch.float16,
|
| 21 |
device_map="auto",
|
| 22 |
trust_remote_code=True,
|
| 23 |
+
revision="b7e531a630fd35065f9c8287f4bd21dff42f871b",
|
| 24 |
)
|
| 25 |
model.eval()
|
| 26 |
print("Model loaded!")
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# ============================================================================
|
| 30 |
# Prompts (matching training exactly)
|
| 31 |
# ============================================================================
|
| 32 |
|
| 33 |
+
USER_PROMPT = Template("""You are an expert chess player.
|
| 34 |
+
|
| 35 |
+
Given a current game state, you must select the best legal next move. Think in 1-2 sentences, then output your chosen move.
|
| 36 |
+
|
| 37 |
+
## State
|
| 38 |
+
|
| 39 |
+
Board:
|
| 40 |
+
{% set fen_board = FEN.split()[0] %}
|
| 41 |
+
{%- set ns = namespace(board='') -%}
|
| 42 |
+
{%- for char in fen_board -%}
|
| 43 |
+
{%- if char in '12345678' -%}
|
| 44 |
+
{%- set ns.board = ns.board ~ '.' * (char|int) -%}
|
| 45 |
+
{%- elif char != '/' -%}
|
| 46 |
+
{%- set ns.board = ns.board ~ char -%}
|
| 47 |
+
{%- endif -%}
|
| 48 |
+
{%- endfor -%}
|
| 49 |
+
{#- Output coordinate grid by file -#}
|
| 50 |
+
{%- set files = 'abcdefgh' -%}
|
| 51 |
+
{% for f in range(8) %}
|
| 52 |
+
{%- for r in range(1, 9) -%}
|
| 53 |
+
{{ files[f] }}{{ r }}:{{ ns.board[(8-r)*8 + f] }}{% if r < 8 %} {% endif -%}
|
| 54 |
+
{%- endfor %}
|
| 55 |
+
{% endfor %}
|
| 56 |
+
Turn: It is your turn ({{ side_to_move }})
|
| 57 |
+
Legal Moves: {{ legal_moves_uci }}
|
| 58 |
|
| 59 |
+
## Output format
|
| 60 |
|
| 61 |
+
<reason>...brief thinking (1-2 first-person very short concise sentences, identifying threat or opportunity, then deciding on the best move to play next)...</reason>
|
| 62 |
+
<uci_move>...your_move...</uci_move>
|
|
|
|
| 63 |
|
| 64 |
+
NOTE: capital letters are white, lowercase are black.""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
# ============================================================================
|
|
|
|
| 75 |
turn = "white" if board.turn else "black"
|
| 76 |
|
| 77 |
messages = [
|
|
|
|
| 78 |
{"role": "user", "content": USER_PROMPT.render(
|
| 79 |
fen=fen,
|
| 80 |
+
side_to_move=turn,
|
| 81 |
+
legal_moves_uci=", ".join([move.uci()
|
| 82 |
+
for move in board.legal_moves])
|
| 83 |
)},
|
| 84 |
]
|
| 85 |
|
|
|
|
| 106 |
outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=False)
|
| 107 |
|
| 108 |
# Parse the output
|
| 109 |
+
think_match = re.search(r'<reason>(.*?)</reason>', generated, re.DOTALL)
|
| 110 |
move_match = re.search(r'<uci_move>(.*?)</uci_move>', generated)
|
| 111 |
|
| 112 |
reasoning = think_match.group(1).strip(
|
|
|
|
| 228 |
|
| 229 |
gr.Markdown("""
|
| 230 |
---
|
| 231 |
+
**Model:** [nuriyev/chess-reasoner-grpo](https://huggingface.co/nuriyev/chess-reasoner-grpo) • Fine-tuned from Qwen3-4B-Instruct
|
| 232 |
""")
|
| 233 |
|
| 234 |
# Events
|