savan360 commited on
Commit
6986b77
·
verified ·
1 Parent(s): 7a034d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -1,20 +1,17 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
2
- from peft import PeftModel # <-- important
3
 
4
- BASE = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # your true base
5
- ADAPTER = "savan360/Qwen_prompt_creator" # your LoRA repo (adapter-only)
 
6
 
7
  tok = AutoTokenizer.from_pretrained(BASE, trust_remote_code=True)
8
- if tok.pad_token is None and hasattr(tok, "eos_token"):
9
- tok.pad_token = tok.eos_token
10
 
11
- # (Optional) 4-bit to save VRAM
12
- bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16")
13
 
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
- BASE, trust_remote_code=True, quantization_config=bnb, device_map="auto"
16
  )
17
-
18
- model = PeftModel.from_pretrained(base_model, ADAPTER) # attach LoRA
19
-
20
- # ...your generation code...
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
2
+ from peft import PeftModel
3
 
4
+ BASE = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # your base
5
+ REPO = "savan360/Qwen_prompt_creator" # your repo (root)
6
+ SUBFOLDER = "adapter" # where adapter files live
7
 
8
  tok = AutoTokenizer.from_pretrained(BASE, trust_remote_code=True)
9
+ if tok.pad_token is None and hasattr(tok, "eos_token"): tok.pad_token = tok.eos_token
 
10
 
11
+ bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
12
+ bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True)
13
 
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
+ BASE, trust_remote_code=True, device_map="auto", quantization_config=bnb, dtype="auto"
16
  )
17
+ model = PeftModel.from_pretrained(base_model, REPO, subfolder=SUBFOLDER)