Spaces:

Vanhwbt
/

API_chatbot

Runtime error

Vanhwbt commited on Feb 13

Commit

5557ff6

1 Parent(s): f0df364

update

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,26 +1,44 @@
 import os
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-# Model này cực nhẹ, chạy trên CPU HF Space rất mượt
-model_id = "Qwen/Qwen2.5-1.5B-Instruct"
-# Tải tokenizer và model
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 pipe = pipeline(
-    "text-generation",
-    model=model_id,
-    torch_dtype="auto",
-    device_map="auto"
 )
 def chat(message, history):
-    messages = [{"role": "user", "content": message}]
-    # Format chuẩn cho Qwen
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
-    return outputs[0]["generated_text"].split("<|im_start|>assistant\n")[-1]
-demo = gr.ChatInterface(fn=chat, title="Gemma thì chậm, Qwen thì đậm chất chơi!")
 demo.launch()

 import os
 import gradio as gr
+import torch
+from transformers import pipeline
+# "Chiến thần" siêu nhẹ của nhà Meta
+model_id = "meta-llama/Llama-3.2-1B-Instruct"
+# Khởi tạo pipeline chat
+# Lưu ý: Llama 3.2 cần transformers bản mới nhất
 pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.bfloat16, # Tối ưu cho CPU/GPU đời mới
+    device_map="auto",
 )
 def chat(message, history):
+    # Tạo cấu trúc hội thoại đúng chuẩn Llama 3
+    messages = []
+    for h in history:
+        messages.append({"role": "user", "content": h[0]})
+        messages.append({"role": "assistant", "content": h[1]})
+    messages.append({"role": "user", "content": message})
+    # Gọi Llama trả lời
+    outputs = pipe(
+        messages,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+    )
+    return outputs[0]["generated_text"][-1]["content"]
+# Tạo giao diện API
+demo = gr.ChatInterface(
+    fn=chat,
+    title="Llama-3.2-1B: Nhỏ mà có võ!",
+    description="Backend siêu tốc cho Website của bạn."
+)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-transformers
 torch
 accelerate
 gradio

+transformers>=4.45.0
 torch
 accelerate
 gradio