nuriyev commited on
Commit
c789271
·
1 Parent(s): 2da0b5e

update model reference and improve output format in README and app.py

Browse files
Files changed (2) hide show
  1. README.md +5 -5
  2. app.py +36 -73
README.md CHANGED
@@ -13,7 +13,7 @@ short_description: Play against chess-playing reasoning LLM
13
 
14
  # ♔ Chess Reasoner
15
 
16
- Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasoner](https://huggingface.co/nuriyev/chess-reasoner)**, a Qwen3-4B model fine-tuned to output structured reasoning before selecting moves.
17
 
18
  ## 🎮 How to Play
19
 
@@ -28,20 +28,20 @@ Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasone
28
  |-----------|-------|
29
  | Base Model | [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507) |
30
  | Training | SFT with LoRA (r=32) on reasoning traces |
31
- | Dataset | [nuriyev/chess-reasoning](https://huggingface.co/datasets/nuriyev/chess-reasoning) |
32
- | Output Format | `<think>reasoning</think><uci_move>move</uci_move>` |
33
 
34
  ## 📋 Output Format
35
 
36
  The model outputs structured reasoning:
37
  ```
38
- <think>The opponent left their queen undefended. Taking it wins material.</think>
39
  <uci_move>d4d8</uci_move>
40
  ```
41
 
42
  ## ⚠️ Limitations
43
 
44
- This is an **SFT checkpoint** focused on format alignment. The model outputs valid reasoning but hasn't been optimized for chess strength via reinforcement learning yet. A GRPO stage using Stockfish rewards is planned.
45
 
46
  ## 🔗 Links
47
 
 
13
 
14
  # ♔ Chess Reasoner
15
 
16
+ Play chess against a reasoning LLM! This demo showcases **[nuriyev/chess-reasoner-grpo](https://huggingface.co/nuriyev/chess-reasoner-grpo)**, a Qwen3-4B model tuned to play chess with detailed reasoning traces.
17
 
18
  ## 🎮 How to Play
19
 
 
28
  |-----------|-------|
29
  | Base Model | [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507) |
30
  | Training | SFT with LoRA (r=32) on reasoning traces |
31
+ | Dataset | [aicrowd/ChessExplained](https://huggingface.co/datasets/aicrowd/ChessExplained) |
32
+ | Output Format | `<reason>...</reason><uci_move>move</uci_move>` |
33
 
34
  ## 📋 Output Format
35
 
36
  The model outputs structured reasoning:
37
  ```
38
+ <reason>The opponent left their queen undefended. Taking it wins material.</reason>
39
  <uci_move>d4d8</uci_move>
40
  ```
41
 
42
  ## ⚠️ Limitations
43
 
44
+ Model is still very bad at playing chess! I am working on creating a beast. Coming soon...
45
 
46
  ## 🔗 Links
47
 
app.py CHANGED
@@ -11,7 +11,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
11
  # Model Loading
12
  # ============================================================================
13
 
14
- MODEL_ID = "nuriyev/chess-reasoner"
15
 
16
  print("Loading model...")
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -20,84 +20,48 @@ model = AutoModelForCausalLM.from_pretrained(
20
  torch_dtype=torch.float16,
21
  device_map="auto",
22
  trust_remote_code=True,
 
23
  )
24
  model.eval()
25
  print("Model loaded!")
26
 
27
- # Custom chat template (matching training)
28
- CHAT_TEMPLATE = """{%- if messages[0].role == 'system' %}
29
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
30
- {%- endif %}
31
- {%- for message in messages %}
32
- {%- if message.role == 'user' or (message.role == 'system' and not loop.first) %}
33
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
34
- {%- elif message.role == 'assistant' %}
35
- {{- '<|im_start|>assistant\n' + message.content + '<|im_end|>\n' }}
36
- {%- endif %}
37
- {%- endfor %}
38
- {%- if add_generation_prompt %}
39
- {{- '<|im_start|>assistant\n<think>\n' }}
40
- {%- endif %}"""
41
-
42
- tokenizer.chat_template = CHAT_TEMPLATE
43
-
44
- # ============================================================================
45
- # Chess Rendering (matching training exactly)
46
- # ============================================================================
47
-
48
- UNICODE_PIECES = {
49
- 'P': '♙', 'R': '♖', 'N': '♘', 'B': '♗', 'Q': '♕', 'K': '♔',
50
- 'p': '♟', 'r': '♜', 'n': '♞', 'b': '♝', 'q': '♛', 'k': '♚',
51
- }
52
-
53
-
54
- def render_board_unicode(board: chess.Board) -> str:
55
- """Render the chess board using Unicode pieces (matching training format)."""
56
- lines = []
57
- files = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
58
- ranks = ['8', '7', '6', '5', '4', '3', '2', '1']
59
-
60
- coord_parts = [f" {file} " for file in files]
61
- coord_line = " " + "".join(coord_parts) + " "
62
- lines.append(coord_line)
63
-
64
- border_width = len(files) * 3
65
- lines.append(" +" + "-" * border_width + "+")
66
-
67
- for rank_idx, rank in enumerate(ranks):
68
- line_parts = [f"{rank} |"]
69
- for file_idx, file in enumerate(files):
70
- square = chess.parse_square(file + rank)
71
- piece = board.piece_at(square)
72
- piece_char = "·" if piece is None else UNICODE_PIECES[piece.symbol(
73
- )]
74
- line_parts.append(f" {piece_char} ")
75
- line_parts.append(f"| {rank}")
76
- lines.append("".join(line_parts))
77
-
78
- lines.append(" +" + "-" * border_width + "+")
79
- lines.append(coord_line)
80
- return "\n".join(lines)
81
-
82
 
83
  # ============================================================================
84
  # Prompts (matching training exactly)
85
  # ============================================================================
86
 
87
- SYSTEM_PROMPT = """You are an expert chess player.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- Given a current game state, you must select the best next move. Think in 1-2 sentences, then output your chosen move.
90
 
91
- Output format:
92
- <think>brief thinking (2 sentences max)</think>
93
- <uci_move>your_move</uci_move>"""
94
 
95
- USER_PROMPT = Template("""Here is the current game state
96
- Board (Fen): {{ fen }}
97
- Turn: It is your turn ({{ turn }})
98
- Legal Moves: {{ legal_moves }}
99
- Board (Unicode):
100
- {{ board_utf }}""")
101
 
102
 
103
  # ============================================================================
@@ -111,12 +75,11 @@ def get_model_move(fen: str) -> tuple[str, str, str]:
111
  turn = "white" if board.turn else "black"
112
 
113
  messages = [
114
- {"role": "system", "content": SYSTEM_PROMPT},
115
  {"role": "user", "content": USER_PROMPT.render(
116
  fen=fen,
117
- board_utf=render_board_unicode(board),
118
- turn=turn,
119
- legal_moves=", ".join([move.uci() for move in board.legal_moves])
120
  )},
121
  ]
122
 
@@ -143,7 +106,7 @@ def get_model_move(fen: str) -> tuple[str, str, str]:
143
  outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=False)
144
 
145
  # Parse the output
146
- think_match = re.search(r'<think>(.*?)</think>', generated, re.DOTALL)
147
  move_match = re.search(r'<uci_move>(.*?)</uci_move>', generated)
148
 
149
  reasoning = think_match.group(1).strip(
@@ -265,7 +228,7 @@ with gr.Blocks(title="♟️ Chess Reasoner") as demo:
265
 
266
  gr.Markdown("""
267
  ---
268
- **Model:** [nuriyev/chess-reasoner](https://huggingface.co/nuriyev/chess-reasoner) • Fine-tuned from Qwen3-4B-Instruct
269
  """)
270
 
271
  # Events
 
11
  # Model Loading
12
  # ============================================================================
13
 
14
+ MODEL_ID = "nuriyev/chess-reasoner-grpo"
15
 
16
  print("Loading model...")
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
20
  torch_dtype=torch.float16,
21
  device_map="auto",
22
  trust_remote_code=True,
23
+ revision="b7e531a630fd35065f9c8287f4bd21dff42f871b",
24
  )
25
  model.eval()
26
  print("Model loaded!")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # ============================================================================
30
  # Prompts (matching training exactly)
31
  # ============================================================================
32
 
33
+ USER_PROMPT = Template("""You are an expert chess player.
34
+
35
+ Given a current game state, you must select the best legal next move. Think in 1-2 sentences, then output your chosen move.
36
+
37
+ ## State
38
+
39
+ Board:
40
+ {% set fen_board = FEN.split()[0] %}
41
+ {%- set ns = namespace(board='') -%}
42
+ {%- for char in fen_board -%}
43
+ {%- if char in '12345678' -%}
44
+ {%- set ns.board = ns.board ~ '.' * (char|int) -%}
45
+ {%- elif char != '/' -%}
46
+ {%- set ns.board = ns.board ~ char -%}
47
+ {%- endif -%}
48
+ {%- endfor -%}
49
+ {#- Output coordinate grid by file -#}
50
+ {%- set files = 'abcdefgh' -%}
51
+ {% for f in range(8) %}
52
+ {%- for r in range(1, 9) -%}
53
+ {{ files[f] }}{{ r }}:{{ ns.board[(8-r)*8 + f] }}{% if r < 8 %} {% endif -%}
54
+ {%- endfor %}
55
+ {% endfor %}
56
+ Turn: It is your turn ({{ side_to_move }})
57
+ Legal Moves: {{ legal_moves_uci }}
58
 
59
+ ## Output format
60
 
61
+ <reason>...brief thinking (1-2 first-person very short concise sentences, identifying threat or opportunity, then deciding on the best move to play next)...</reason>
62
+ <uci_move>...your_move...</uci_move>
 
63
 
64
+ NOTE: capital letters are white, lowercase are black.""")
 
 
 
 
 
65
 
66
 
67
  # ============================================================================
 
75
  turn = "white" if board.turn else "black"
76
 
77
  messages = [
 
78
  {"role": "user", "content": USER_PROMPT.render(
79
  fen=fen,
80
+ side_to_move=turn,
81
+ legal_moves_uci=", ".join([move.uci()
82
+ for move in board.legal_moves])
83
  )},
84
  ]
85
 
 
106
  outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=False)
107
 
108
  # Parse the output
109
+ think_match = re.search(r'<reason>(.*?)</reason>', generated, re.DOTALL)
110
  move_match = re.search(r'<uci_move>(.*?)</uci_move>', generated)
111
 
112
  reasoning = think_match.group(1).strip(
 
228
 
229
  gr.Markdown("""
230
  ---
231
+ **Model:** [nuriyev/chess-reasoner-grpo](https://huggingface.co/nuriyev/chess-reasoner-grpo) • Fine-tuned from Qwen3-4B-Instruct
232
  """)
233
 
234
  # Events