OzTianlu commited on
Commit
c4758d0
·
verified ·
1 Parent(s): ebbe32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -27
app.py CHANGED
@@ -1,50 +1,40 @@
1
  import gradio as gr
2
  import torch
3
- import spaces
4
  from threading import Thread
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
 
7
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
8
 
9
- # 静态加载 Tokenizer (不占 GPU)
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
- model = None
 
 
 
 
 
12
 
 
13
  @spaces.GPU
14
  def respond(message, history):
15
- global model
16
- # ZeroGPU 核心逻辑:在装饰器函数内初始化并移动到 CUDA
17
- if model is None:
18
- model = AutoModelForCausalLM.from_pretrained(
19
- MODEL_ID,
20
- torch_dtype=torch.float16,
21
- trust_remote_code=True,
22
- ).to("cuda")
23
-
24
- # Gradio 5.x 的 history 已经是 [{'role': 'user', 'content': '...'}, ...] 格式
25
- # 直接拼接到 messages 即可
26
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
27
- messages.extend(history)
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  input_ids = tokenizer.apply_chat_template(
31
- messages,
32
- add_generation_prompt=True,
33
- return_tensors="pt"
34
- ).to("cuda")
35
 
36
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
37
-
38
  generate_kwargs = dict(
39
  input_ids=input_ids,
40
  streamer=streamer,
41
- max_new_tokens=2048,
42
- temperature=0.6,
43
- top_p=0.95,
44
  do_sample=True,
45
  )
46
 
47
- # 启动异步生成线程
48
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
49
  thread.start()
50
 
@@ -53,13 +43,11 @@ def respond(message, history):
53
  response += token
54
  yield response
55
 
56
- # 使用最新版的配置参数
57
  demo = gr.ChatInterface(
58
  fn=respond,
59
- type="messages", # 这需要 gradio>=5.0.0
60
  title="Spartacus Chat",
61
  description="Chat with NoesisLab/Spartacus-1B-Instruct",
62
- examples=["Who are you?", "Explain the concept of Noesis."],
63
  )
64
 
65
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import torch
 
3
  from threading import Thread
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
5
 
6
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
7
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto",
13
+ trust_remote_code=True,
14
+ )
15
 
16
+ import spaces
17
  @spaces.GPU
18
  def respond(message, history):
 
 
 
 
 
 
 
 
 
 
 
19
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
20
+ for msg in history:
21
+ messages.append({"role": msg["role"], "content": msg["content"]})
22
  messages.append({"role": "user", "content": message})
23
 
24
  input_ids = tokenizer.apply_chat_template(
25
+ messages, add_generation_prompt=True, return_tensors="pt"
26
+ ).to(model.device)
 
 
27
 
28
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
29
+
30
  generate_kwargs = dict(
31
  input_ids=input_ids,
32
  streamer=streamer,
33
+ temperature=0.5,
34
+ top_p=0.9,
 
35
  do_sample=True,
36
  )
37
 
 
38
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
39
  thread.start()
40
 
 
43
  response += token
44
  yield response
45
 
46
+
47
  demo = gr.ChatInterface(
48
  fn=respond,
 
49
  title="Spartacus Chat",
50
  description="Chat with NoesisLab/Spartacus-1B-Instruct",
 
51
  )
52
 
53
  if __name__ == "__main__":