arasaltan commited on
Commit
25c815e
Β·
verified Β·
1 Parent(s): a45df4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -52
app.py CHANGED
@@ -3,81 +3,52 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
 
6
- # ===== MODEL LOAD=====
7
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8
- LORA_PATH = "./deepseek-lab-assistant"
 
9
 
10
- tokenizer = AutoTokenizer.from_pretrained(
11
- BASE_MODEL,
12
- trust_remote_code=True
13
- )
14
  tokenizer.pad_token = tokenizer.eos_token
15
 
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  BASE_MODEL,
18
- torch_dtype=torch.float16,
19
- device_map="auto",
20
- trust_remote_code=True
21
  )
22
 
 
23
  model = PeftModel.from_pretrained(model, LORA_PATH)
24
  model.eval()
25
 
26
 
27
- # ===== CHAT FUNCTION =====
28
- def respond(
29
- message,
30
- history,
31
- system_message,
32
- max_tokens,
33
- temperature,
34
- top_p,
35
- ):
36
- # history = [{"role": "user"/"assistant", "content": "..."}]
37
-
38
- prompt = system_message + "\n\n"
39
-
40
- for h in history:
41
- prompt += f"{h['role'].capitalize()}: {h['content']}\n"
42
-
43
- prompt += f"User: {message}\nAssistant:"
44
-
45
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
 
47
  with torch.no_grad():
48
  output = model.generate(
49
  **inputs,
50
- max_new_tokens=max_tokens,
51
  temperature=temperature,
52
- top_p=top_p,
53
- do_sample=temperature > 0,
54
  )
55
 
56
- text = tokenizer.decode(output[0], skip_special_tokens=True)
57
-
58
- if "Assistant:" in text:
59
- text = text.split("Assistant:")[-1].strip()
60
 
61
- return text
62
 
63
-
64
- # ===== GRADIO UI =====
65
- chatbot = gr.ChatInterface(
66
- respond,
67
- type="messages",
68
- additional_inputs=[
69
- gr.Textbox(
70
- value="You are a helpful lab assistant. Explain ideas clearly. Do not rush to final answers.",
71
- label="System message",
72
- ),
73
- gr.Slider(1, 1024, value=256, step=1, label="Max new tokens"),
74
- gr.Slider(0.0, 1.5, value=0.3, step=0.05, label="Temperature"),
75
- gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
76
  ],
 
 
77
  )
78
 
79
- with gr.Blocks() as demo:
80
- chatbot.render()
81
-
82
  if __name__ == "__main__":
83
- demo.launch()
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
 
 
6
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
7
+ LORA_PATH = "./"
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ #Tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
 
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
+ #Base model
15
  model = AutoModelForCausalLM.from_pretrained(
16
  BASE_MODEL,
17
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
18
+ device_map="auto"
 
19
  )
20
 
21
+
22
  model = PeftModel.from_pretrained(model, LORA_PATH)
23
  model.eval()
24
 
25
 
26
+ def chat(prompt, max_new_tokens=256, temperature=0.7):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
28
 
29
  with torch.no_grad():
30
  output = model.generate(
31
  **inputs,
32
+ max_new_tokens=max_new_tokens,
33
  temperature=temperature,
34
+ do_sample=True,
35
+ eos_token_id=tokenizer.eos_token_id
36
  )
37
 
38
+ return tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
39
 
 
40
 
41
+ # Gradio UI
42
+ demo = gr.Interface(
43
+ fn=chat,
44
+ inputs=[
45
+ gr.Textbox(lines=5, label="Prompt"),
46
+ gr.Slider(1, 1024, value=256, label="Max tokens"),
47
+ gr.Slider(0.1, 1.5, value=0.7, label="Temperature"),
 
 
 
 
 
 
48
  ],
49
+ outputs=gr.Textbox(lines=10, label="Output"),
50
+ title="DeepSeek Lab Assistant (LoRA)",
51
  )
52
 
 
 
 
53
  if __name__ == "__main__":
54
+ demo.launch(True)