arasaltan commited on
Commit
2531a82
Β·
verified Β·
1 Parent(s): d81f75e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -10,21 +10,20 @@ LORA_PATH = "./"
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
- # Base model
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
- repo_id,
16
- device_map='auto',
17
- offload_folder="offload/" )
 
 
 
 
18
 
19
  base_model.config.use_cache = False
20
 
21
- model = PeftModel.from_pretrained(
22
- base_model,
23
- output_dir,
24
- offload_folder = "offload/"
25
- )
26
- # Load LoRA
27
- model = PeftModel.from_pretrained(model, LORA_PATH)
28
  model.eval()
29
 
30
 
@@ -46,7 +45,7 @@ Answer:
46
  output = model.generate(
47
  **inputs,
48
  max_new_tokens=int(max_tokens),
49
- do_sample=False,
50
  eos_token_id=tokenizer.eos_token_id
51
  )
52
 
@@ -54,7 +53,6 @@ Answer:
54
  return tokenizer.decode(generated, skip_special_tokens=True)
55
 
56
 
57
- # Gradio UI
58
  demo = gr.Interface(
59
  fn=chat,
60
  inputs=[
@@ -67,4 +65,4 @@ demo = gr.Interface(
67
  )
68
 
69
  if __name__ == "__main__":
70
- demo.launch(share=True)
 
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
+ # Base model (disk offload enabled)
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
+ BASE_MODEL,
16
+ torch_dtype=torch.float32,
17
+ device_map="auto",
18
+ offload_folder="offload",
19
+ offload_state_dict=True,
20
+ low_cpu_mem_usage=True
21
+ )
22
 
23
  base_model.config.use_cache = False
24
 
25
+ # Load LoRA (SADECE 1 KEZ)
26
+ model = PeftModel.from_pretrained(base_model, LORA_PATH)
 
 
 
 
 
27
  model.eval()
28
 
29
 
 
45
  output = model.generate(
46
  **inputs,
47
  max_new_tokens=int(max_tokens),
48
+ do_sample=False,
49
  eos_token_id=tokenizer.eos_token_id
50
  )
51
 
 
53
  return tokenizer.decode(generated, skip_special_tokens=True)
54
 
55
 
 
56
  demo = gr.Interface(
57
  fn=chat,
58
  inputs=[
 
65
  )
66
 
67
  if __name__ == "__main__":
68
+ demo.launch()