Spaces:

100XZX001
/

code-review-training

Sleeping

100XZX001 commited on Apr 25

Commit

45d29c0

verified ·

1 Parent(s): 95b8e01

Update training.py

Files changed (1) hide show

training.py CHANGED Viewed

@@ -1,8 +1,7 @@
 # training.py – Memory‑safe: Phi‑3‑mini + Expert Demos + Fast PPO (2 iterations)
 import os
-os.environ["TRITON_INTERPRET"] = "1"          # force CPU interpretation, no ptxas
-os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
-os.environ["TORCHINDUCTOR_CPP_WRAPPER"] = "0"  # stay in Python     # Issue #12: prevent OOM from parallel tokenization
 import torch._dynamo
 torch._dynamo.config.disable = True
@@ -79,7 +78,7 @@ def map_to_env(action: AgentAction):
 def load_model():
     model, tokenizer = FastLanguageModel.from_pretrained(
     model_name="unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
-    max_seq_length=2048,
     load_in_4bit=True,
     )
     model = FastLanguageModel.get_peft_model(
@@ -336,7 +335,7 @@ def generate_action_with_logprob(prompt, model, tokenizer, temperature=0.0, max_
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=128,
                 do_sample=(temperature > 0),
                 temperature=max(temperature, 0.01) if temperature > 0 else 1.0,
                 min_new_tokens=1,

 # training.py – Memory‑safe: Phi‑3‑mini + Expert Demos + Fast PPO (2 iterations)
 import os
+os.environ["TRITON_DISABLE"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "false""  # stay in Python     # Issue #12: prevent OOM from parallel tokenization
 import torch._dynamo
 torch._dynamo.config.disable = True
 def load_model():
     model, tokenizer = FastLanguageModel.from_pretrained(
     model_name="unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
+    max_seq_length=768,
     load_in_4bit=True,
     )
     model = FastLanguageModel.get_peft_model(
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=64,
                 do_sample=(temperature > 0),
                 temperature=max(temperature, 0.01) if temperature > 0 else 1.0,
                 min_new_tokens=1,