Harryis commited on
Commit
d421d96
·
verified ·
1 Parent(s): cb1ba2a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -1
README.md CHANGED
@@ -6,6 +6,8 @@ tags:
6
  - multi-task
7
  - scout
8
  - ppo
 
 
9
  ---
10
 
11
  # SCOUT-Multitask Sequential RL Agent
@@ -51,4 +53,8 @@ model_name = "Harryis/SCOUT_multitask"
51
  tokenizer = AutoTokenizer.from_pretrained(model_name)
52
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
53
 
54
- # Example: Prompt the model for a Sudoku move or Sokoban action
 
 
 
 
 
6
  - multi-task
7
  - scout
8
  - ppo
9
+ papers:
10
+ - 2601.21754
11
  ---
12
 
13
  # SCOUT-Multitask Sequential RL Agent
 
53
  tokenizer = AutoTokenizer.from_pretrained(model_name)
54
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
55
 
56
+ # Example: Prompt the model for a Sudoku move or Sokoban action
57
+
58
+ **Links:**
59
+ - 📄 **Paper:** [SCOUT: Sequential RL with Exploration & Distillation](https://huggingface.co/papers/2601.21754)
60
+ - 💻 **Code:** [Github](https://github.com/Harry-mic/SCOUT)