OzTianlu commited on
Commit
61f4875
·
verified ·
1 Parent(s): 3340bc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ from threading import Thread
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
5
+ import gradio as gr
6
+ MODEL_ID = "NoesisLab/Kai-30B-Instruct"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "NoesisLab/Spartacus-1B-Instruct",
11
+ )
12
+
13
+ @spaces.GPU
14
+ def respond(message, history):
15
+ messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
16
+ for msg in history:
17
+ messages.append({"role": msg["role"], "content": msg["content"]})
18
+ messages.append({"role": "user", "content": message})
19
+
20
+ input_ids = tokenizer.apply_chat_template(
21
+ messages, add_generation_prompt=True, return_tensors="pt"
22
+ ).to(model.device)
23
+
24
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
25
+
26
+ generate_kwargs = dict(
27
+ input_ids=input_ids,
28
+ streamer=streamer,
29
+ temperature=0.5,
30
+ top_p=0.9,
31
+ do_sample=True,
32
+ )
33
+
34
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
35
+ thread.start()
36
+
37
+ response = ""
38
+ for token in streamer:
39
+ response += token
40
+ yield response
41
+
42
+
43
+ demo = gr.ChatInterface(
44
+ fn=respond,
45
+ title="Chat with Kai-30B-Instruct",
46
+ description="Chat with NoesisLab/Kai-30B-Instruct",
47
+ )
48
+
49
+ if __name__ == "__main__":
50
+ demo.launch()