Spaces:

wy-wu
/

cloud-chatbot

Runtime error

App Files Files Community

wy-wu commited on Sep 15, 2025

Commit

a03ce67

verified ·

1 Parent(s): 516130a

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -68

app.py CHANGED Viewed

@@ -1,76 +1,21 @@
-import os
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# 🔹 CPU 省時小技巧：限制多執行緒
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-# 🔹 使用 TinyLlama 模型（更快）
-MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-def load_pipe(model_id=MODEL_ID):
-    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.float32,   # CPU 環境建議 float32
-        low_cpu_mem_usage=True
-    )
-    return pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        device=-1                    # -1 = CPU
-    )
-pipe = load_pipe()
-SYSTEM_PROMPT = "You are a helpful assistant. Please answer concisely in Traditional Chinese."
-MAX_TURNS = 3   # 最多保留最近 3 回合，避免輸入過長拖慢
-def chat(history, user_msg):
-    # 🔹 縮短歷史，避免輸入過大拖慢
-    history = history[-2*MAX_TURNS:]
-    prompt = ""
-    for role, text in history:
-        prompt += f"{role}: {text}\n"
-    prompt = f"{prompt}system: {SYSTEM_PROMPT}\nuser: {user_msg}\nassistant:"
-    out = pipe(
-        prompt,
-        max_new_tokens=128,          # 🔹 限制輸出長度，加快生成
         do_sample=True,
         temperature=0.7,
-        top_p=0.9,
-        top_k=50,
-        repetition_penalty=1.1,      # 🔹 減少重複
-        eos_token_id=pipe.tokenizer.eos_token_id,
-        num_return_sequences=1
-    )[0]["generated_text"]
-    reply = out.split("assistant:")[-1].strip()
-    history.append(("user", user_msg))
-    history.append(("assistant", reply))
-    return history, ""
-with gr.Blocks() as demo:
-    gr.Markdown("## Chatbot 範例 - TinyLlama-1.1B-Chat (CPU)")
-    chatbox = gr.Chatbot(height=350)
-    msg = gr.Textbox(label="輸入訊息")
-    clear = gr.Button("清空對話")
-    state = gr.State([])
-    def init():
-        return []
-    msg.submit(chat, [state, msg], [state, msg]).then(
-        lambda h: ([(h[i], h[i+1]) for i in range(0, len(h), 2)], ""),
-        inputs=state,
-        outputs=[chatbox, msg]
     )
-    clear.click(init, outputs=state).then(lambda: [], outputs=chatbox)
 demo.launch()

 import gradio as gr
+from transformers import pipeline
+# 使用 TinyLlama-1.1B-Chat 模型
+generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+def chat(message, history):
+    # 生成回覆
+    result = generator(
+        message,
+        max_new_tokens=128,      # 建議用 max_new_tokens（取代 max_length）
         do_sample=True,
         temperature=0.7,
+        top_p=0.9
     )
+    reply = result[0]["generated_text"]
+    return reply
+demo = gr.ChatInterface(fn=chat, title="AI 聊天機器人 (TinyLlama-1.1B-Chat)")
 demo.launch()