OzTianlu commited on
Commit
c35a5b8
·
verified ·
1 Parent(s): 1f60639

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +55 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from threading import Thread
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
5
+
6
+ MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto",
13
+ )
14
+
15
+
16
+ def respond(message, history):
17
+ messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
18
+ for user_msg, bot_msg in history:
19
+ messages.append({"role": "user", "content": user_msg})
20
+ messages.append({"role": "assistant", "content": bot_msg})
21
+ messages.append({"role": "user", "content": message})
22
+
23
+ input_ids = tokenizer.apply_chat_template(
24
+ messages, add_generation_prompt=True, return_tensors="pt"
25
+ ).to(model.device)
26
+
27
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
28
+
29
+ generate_kwargs = dict(
30
+ input_ids=input_ids,
31
+ streamer=streamer,
32
+ max_new_tokens=1024,
33
+ temperature=0.7,
34
+ top_p=0.9,
35
+ do_sample=True,
36
+ )
37
+
38
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
39
+ thread.start()
40
+
41
+ response = ""
42
+ for token in streamer:
43
+ response += token
44
+ yield response
45
+
46
+
47
+ demo = gr.ChatInterface(
48
+ fn=respond,
49
+ title="Spartacus Chat",
50
+ description="Chat with NoesisLab/Spartacus-1B-Instruct",
51
+ theme=gr.themes.Soft(),
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ accelerate