Spaces:

100XZX001
/

code-review-training

Sleeping

100XZX001 commited on Apr 25

Commit

6d77d18

verified ·

1 Parent(s): 659a9e2

Update training.py

Files changed (1) hide show

training.py CHANGED Viewed

@@ -81,11 +81,6 @@ def map_to_env(action: AgentAction):
 def load_model():
     model_name = "microsoft/Phi-3-mini-4k-instruct"
-    # Manually fix the config to avoid rope_scaling KeyError
-    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
-    if not hasattr(config, 'rope_scaling') or config.rope_scaling is None:
-        config.rope_scaling = {"type": "linear", "factor": 1.0}
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.bfloat16,
@@ -95,11 +90,9 @@ def load_model():
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        config=config,                       # use the patched config
         quantization_config=bnb_config,
         device_map="auto",
-        trust_remote_code=True,
-        attn_implementation="eager",         # force eager, avoid flash-attn
         torch_dtype=torch.bfloat16,
     )

 def load_model():
     model_name = "microsoft/Phi-3-mini-4k-instruct"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.bfloat16,
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         device_map="auto",
+        attn_implementation="eager",   # avoid flash‑attn
         torch_dtype=torch.bfloat16,
     )