nph4rd commited on
Commit
ad995e5
·
1 Parent(s): 41ab4e6

update model

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +2 -2
README.md CHANGED
@@ -32,4 +32,4 @@ Play a simplified version of Hanabi with a trained AI model!
32
  - **Hint:** `1HR`, `1HG`, `1H1`, `1H2`, `1H3` - Tell the AI about their Red/Green cards or their 1s/2s/3s
33
 
34
  ## Model
35
- The AI uses [nph4rd/Qwen3-0.6B-Tiny-Hanabi-RL-300](https://huggingface.co/nph4rd/Qwen3-0.6B-Tiny-Hanabi-RL-300), a Qwen3-0.6B model fine-tuned with reinforcement learning on this Tiny Hanabi environment.
 
32
  - **Hint:** `1HR`, `1HG`, `1H1`, `1H2`, `1H3` - Tell the AI about their Red/Green cards or their 1s/2s/3s
33
 
34
  ## Model
35
+ The AI uses [nph4rd/Qwen3-1.7B-Tiny-Hanabi-XML-RL-12-2](https://huggingface.co/nph4rd/Qwen3-1.7B-Tiny-Hanabi-XML-RL-12-2), a Qwen3-1.7B model fine-tuned with reinforcement learning on this Tiny Hanabi environment.
app.py CHANGED
@@ -15,7 +15,7 @@ RANKS = (1, 2, 3)
15
  HAND_SIZE = 2
16
  MAX_INFO_TOKENS = 8
17
  MAX_LIFE_TOKENS = 3
18
- MODEL_ID = "nph4rd/Qwen3-0.6B-Tiny-Hanabi-RL-300"
19
 
20
  COLOR_NAMES = {"R": "Red", "G": "Green"}
21
  COLOR_HEX = {"R": "#e63946", "G": "#2a9d8f"}
@@ -669,7 +669,7 @@ def load_model():
669
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
670
  model = AutoModelForCausalLM.from_pretrained(
671
  MODEL_ID,
672
- torch_dtype=torch.float16,
673
  device_map="auto",
674
  )
675
  model.eval()
 
15
  HAND_SIZE = 2
16
  MAX_INFO_TOKENS = 8
17
  MAX_LIFE_TOKENS = 3
18
+ MODEL_ID = "nph4rd/Qwen3-1.7B-Tiny-Hanabi-XML-RL-12-2"
19
 
20
  COLOR_NAMES = {"R": "Red", "G": "Green"}
21
  COLOR_HEX = {"R": "#e63946", "G": "#2a9d8f"}
 
669
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
670
  model = AutoModelForCausalLM.from_pretrained(
671
  MODEL_ID,
672
+ torch_dtype=torch.float32,
673
  device_map="auto",
674
  )
675
  model.eval()