Spaces:

haduykien
/

conext-lab_llma3

Sleeping

App Files Files Community

haduykien commited on Sep 16, 2025

Commit

b3f0da0

verified ·

1 Parent(s): 1ab6647

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -31

app.py CHANGED Viewed

@@ -1,52 +1,53 @@
-import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# Lấy token từ Secrets (Settings > New secret trong Space)
-hf_token = os.environ.get("HF_TOKEN")
-# Model ID (có thể đổi sang model open nếu không có quyền)
-model_id = "meta-llama/Llama-3.1-8B-Instruct"
-# Tải tokenizer và pipeline với token
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
 pipe = pipeline(
     "text-generation",
-    model=model_id,
     tokenizer=tokenizer,
-    device_map="auto",        # tự động chọn GPU/CPU
-    torch_dtype="auto",       # FP16 nếu GPU hỗ trợ
-    token=hf_token
 )
-# Hàm trả lời
-def chat(user_input, history):
-    messages = [{"role": "system", "content": "Bạn là một trợ lý AI hữu ích."}]
-    for h in history:
-        messages.append({"role": "user", "content": h[0]})
-        messages.append({"role": "assistant", "content": h[1]})
-    messages.append({"role": "user", "content": user_input})
-    response = pipe(messages, max_new_tokens=300, do_sample=True, temperature=0.7)
-    # Hugging Face pipeline trả về có thể khác format → ta xử lý
-    if isinstance(response[0]["generated_text"], list):
-        answer = response[0]["generated_text"][-1]["content"]
-    else:
-        answer = response[0]["generated_text"]
-    history.append((user_input, answer))
     return history, history
-# UI Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 Chatbot chạy bằng LLaMA trên Hugging Face Space")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(placeholder="Nhập tin nhắn...")
-    clear = gr.Button("Xoá hội thoại")
     state = gr.State([])
-    msg.submit(chat, [msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state], queue=False)
 demo.launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# Model nhỏ hơn để chạy được trên Space
+MODEL_ID = "context-labs/meta-llama-Llama-3.2-3B-Instruct-FP16"
+# Load tokenizer + model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    device_map="auto",   # tự động dùng GPU nếu có
+    torch_dtype="auto"
+)
+# Tạo pipeline
 pipe = pipeline(
     "text-generation",
+    model=model,
     tokenizer=tokenizer,
+    max_new_tokens=512
 )
+# Hàm chat
+def chat_fn(message, history):
+    # format lại hội thoại cho giống chat
+    prompt = ""
+    for user, bot in history:
+        prompt += f"User: {user}\nAssistant: {bot}\n"
+    prompt += f"User: {message}\nAssistant:"
+    outputs = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9)
+    reply = outputs[0]["generated_text"][len(prompt):]
+    return reply.strip()
+# Hàm Gradio
+def respond(message, history):
+    reply = chat_fn(message, history)
+    history.append((message, reply))
     return history, history
+# UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 💬 Chat với LLaMA 3.2 3B FP16")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(placeholder="Nhập tin nhắn...")
+    clear = gr.Button("Xóa hội thoại")
     state = gr.State([])
+    msg.submit(respond, [msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state], queue=False)
 demo.launch()