OzTianlu commited on
Commit
190ebf5
·
verified ·
1 Parent(s): 72af96d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -16
app.py CHANGED
@@ -1,34 +1,30 @@
1
  import gradio as gr
2
  import torch
3
- import spaces # 必须导入
4
  from threading import Thread
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
 
7
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
8
 
9
- # Tokenizer 可以留在全局,因为它不占 GPU
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
-
12
- # 建议将模型定义为全局变量,但在函数内初始化,避免重复加载
13
  model = None
14
 
15
- @spaces.GPU # 必须加在生成函数上
16
  def respond(message, history):
17
  global model
18
- # 只有在第一次运行且 GPU 环境就绪时加载模型
19
  if model is None:
20
  model = AutoModelForCausalLM.from_pretrained(
21
  MODEL_ID,
22
  torch_dtype=torch.float16,
23
- # device_map="auto" 在 ZeroGPU 中容易报错,建议直接 .to("cuda")
24
  trust_remote_code=True,
25
  ).to("cuda")
26
 
 
27
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
28
- for msg in history:
29
- # 注意:Gradio history 结构可能需要适配
30
- role = "assistant" if msg['role'] == 'assistant' else "user"
31
- messages.append({"role": role, "content": msg["content"]})
32
 
33
  messages.append({"role": "user", "content": message})
34
 
@@ -41,20 +37,21 @@ def respond(message, history):
41
  generate_kwargs = dict(
42
  input_ids=input_ids,
43
  streamer=streamer,
44
- max_new_tokens=2048, # 建议加上限制,防止超时
45
- temperature=0.6,
46
- top_p=0.95,
47
  do_sample=True,
48
  )
49
 
50
- # 在 ZeroGPU 中使用 Thread 也是允许的
51
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
52
  thread.start()
53
 
54
  response = ""
55
  for token in streamer:
56
- yield token # 直接 yield 增量或累计文本
 
57
 
 
58
  demo = gr.ChatInterface(
59
  fn=respond,
60
  title="Spartacus Chat",
 
1
  import gradio as gr
2
  import torch
3
+ import spaces
4
  from threading import Thread
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
 
7
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
8
 
9
+ # Tokenizer 静态加载
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
11
  model = None
12
 
13
+ @spaces.GPU
14
  def respond(message, history):
15
  global model
 
16
  if model is None:
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_ID,
19
  torch_dtype=torch.float16,
 
20
  trust_remote_code=True,
21
  ).to("cuda")
22
 
23
+ # 适配旧版 Gradio 的 history 格式: [[user, assistant], [user, assistant]]
24
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
25
+ for user_msg, assistant_msg in history:
26
+ messages.append({"role": "user", "content": user_msg})
27
+ messages.append({"role": "assistant", "content": assistant_msg})
 
28
 
29
  messages.append({"role": "user", "content": message})
30
 
 
37
  generate_kwargs = dict(
38
  input_ids=input_ids,
39
  streamer=streamer,
40
+ max_new_tokens=1024,
41
+ temperature=0.5,
42
+ top_p=0.9,
43
  do_sample=True,
44
  )
45
 
 
46
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
47
  thread.start()
48
 
49
  response = ""
50
  for token in streamer:
51
+ response += token
52
+ yield response # 旧版 ChatInterface 需要 yield 完整的累计字符串
53
 
54
+ # 移除 type="messages" 参数
55
  demo = gr.ChatInterface(
56
  fn=respond,
57
  title="Spartacus Chat",