OzTianlu commited on
Commit
48ffa26
·
verified ·
1 Parent(s): bf76245

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -6,32 +6,31 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
6
 
7
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
8
 
9
- # 静态加载 Tokenizer
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
  model = None
12
 
13
  @spaces.GPU
14
  def respond(message, history):
15
  global model
 
16
  if model is None:
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_ID,
19
- dtype=torch.float16,
20
  trust_remote_code=True,
21
  ).to("cuda")
22
 
23
- # 针对旧版 Gradio 结构处理:history 是 [[q1, a1], [q2, a2]]
 
24
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
25
-
26
- if history:
27
- for user_msg, assistant_msg in history:
28
- messages.append({"role": "user", "content": user_msg})
29
- messages.append({"role": "assistant", "content": assistant_msg})
30
-
31
  messages.append({"role": "user", "content": message})
32
 
33
  input_ids = tokenizer.apply_chat_template(
34
- messages, add_generation_prompt=True, return_tensors="pt"
 
 
35
  ).to("cuda")
36
 
37
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -40,11 +39,12 @@ def respond(message, history):
40
  input_ids=input_ids,
41
  streamer=streamer,
42
  max_new_tokens=2048,
43
- temperature=0.5,
44
- top_p=0.9,
45
  do_sample=True,
46
  )
47
 
 
48
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
49
  thread.start()
50
 
@@ -53,11 +53,13 @@ def respond(message, history):
53
  response += token
54
  yield response
55
 
56
- # 彻底删掉 type 参数,只保留基础的配置
57
  demo = gr.ChatInterface(
58
  fn=respond,
 
59
  title="Spartacus Chat",
60
  description="Chat with NoesisLab/Spartacus-1B-Instruct",
 
61
  )
62
 
63
  if __name__ == "__main__":
 
6
 
7
  MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
8
 
9
+ # 静态加载 Tokenizer (不占 GPU)
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
11
  model = None
12
 
13
  @spaces.GPU
14
  def respond(message, history):
15
  global model
16
+ # ZeroGPU 核心逻辑:在装饰器函数内初始化并移动到 CUDA
17
  if model is None:
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_ID,
20
+ torch_dtype=torch.float16,
21
  trust_remote_code=True,
22
  ).to("cuda")
23
 
24
+ # Gradio 5.x 的 history 已经是 [{'role': 'user', 'content': '...'}, ...] 格式
25
+ # 直接拼接到 messages 即可
26
  messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
27
+ messages.extend(history)
 
 
 
 
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  input_ids = tokenizer.apply_chat_template(
31
+ messages,
32
+ add_generation_prompt=True,
33
+ return_tensors="pt"
34
  ).to("cuda")
35
 
36
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
39
  input_ids=input_ids,
40
  streamer=streamer,
41
  max_new_tokens=2048,
42
+ temperature=0.6,
43
+ top_p=0.95,
44
  do_sample=True,
45
  )
46
 
47
+ # 启动异步生成线程
48
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
49
  thread.start()
50
 
 
53
  response += token
54
  yield response
55
 
56
+ # 使用新版的配置参数
57
  demo = gr.ChatInterface(
58
  fn=respond,
59
+ type="messages", # 这需要 gradio>=5.0.0
60
  title="Spartacus Chat",
61
  description="Chat with NoesisLab/Spartacus-1B-Instruct",
62
+ examples=["Who are you?", "Explain the concept of Noesis."],
63
  )
64
 
65
  if __name__ == "__main__":