Spaces:

caobin
/

llm_assistant

Sleeping

App Files Files Community

caobin commited on Dec 10, 2025

Commit

3a8e995

verified ·

1 Parent(s): f41e6d1

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -48

app.py CHANGED Viewed

@@ -1,84 +1,59 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 MODEL_ID = "caobin/llm-caobin"
 # 加载 tokenizer 和模型
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-# 根据是否有 GPU 自动设置 dtype
-dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    torch_dtype=dtype,
     trust_remote_code=True
 )
-model.to(device)
-model.eval()
-MAX_HISTORY = 3  # 只保留最近几轮对话
 def chat_fn(message, history):
-    """
-    message: 用户最新输入
-    history: [{"role": "user"/"assistant", "content": str}, ...]
-    """
-    # 只保留最近 MAX_HISTORY 轮
-    recent_history = history[-MAX_HISTORY*2:]  # user+assistant = 2 条消息一轮
-    # 拼接 prompt
     full_prompt = ""
-    for msg in recent_history:
-        if msg["role"] == "user":
-            full_prompt += f"<|user|>{msg['content']}"
-        elif msg["role"] == "assistant":
-            full_prompt += f"<|assistant|>{msg['content']}"
     full_prompt += f"<|user|>{message}<|assistant|>"
-    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
-    # 生成回复
     output_ids = model.generate(
         **inputs,
-        max_new_tokens=512,
         temperature=0.7,
         top_p=0.9,
         do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
     )
-    # 只 decode 新生成部分
-    generated_text = tokenizer.decode(output_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
-    return generated_text.strip()
-def respond(message, chat_history):
-    # chat_history 是 Gradio 最新格式 [{"role":..., "content":...}, ...]
-    response = chat_fn(message, chat_history)
-    # 更新聊天历史
-    new_history = chat_history + [
-        {"role": "user", "content": message},
-        {"role": "assistant", "content": response}
-    ]
-    return "", new_history
-# Gradio 界面
 with gr.Blocks(title="caobin LLM Chatbot") as demo:
     gr.Markdown("# 🤖 caobin's AI assistant")
-    chatbot = gr.Chatbot([], height=450)  # 初始化为空列表
     msg = gr.Textbox(label="输入你的问题")
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
 demo.launch()

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# 模型 ID
 MODEL_ID = "caobin/llm-caobin"
 # 加载 tokenizer 和模型
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    device_map="auto",  # CPU 上会自动映射到 CPU
     trust_remote_code=True
 )
+# 聊天函数
 def chat_fn(message, history):
+    # 只保留最近 3 轮历史
+    history = history[-3:]
     full_prompt = ""
+    for user_msg, bot_msg in history:
+        full_prompt += f"<|user|>{user_msg}<|assistant|>{bot_msg}"
     full_prompt += f"<|user|>{message}<|assistant|>"
+    # tokenizer 转 tensor
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+    # 生成回答
     output_ids = model.generate(
         **inputs,
+        max_new_tokens=256,
         temperature=0.7,
         top_p=0.9,
         do_sample=True,
     )
+    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    if "<|assistant|>" in output_text:
+        output_text = output_text.split("<|assistant|>")[-1]
+    return output_text.strip()
+# Gradio UI
 with gr.Blocks(title="caobin LLM Chatbot") as demo:
     gr.Markdown("# 🤖 caobin's AI assistant")
+    chatbot = gr.Chatbot(height=450)
     msg = gr.Textbox(label="输入你的问题")
+    def respond(message, chat_history):
+        response = chat_fn(message, chat_history)
+        chat_history.append((message, response))
+        return "", chat_history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
 demo.launch()