Spaces:

phamhoangf
/

structaware-chat

Runtime error

phamhoangf commited on Oct 2, 2025

Commit

8a8eb9f

verified ·

1 Parent(s): 14b1cea

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,44 @@
 import gradio as gr
-with gr.Blocks(fill_height=True) as demo:
-    with gr.Sidebar():
-        gr.Markdown("# Inference Provider")
-        gr.Markdown("This Space showcases the Qwen/Qwen3-4B-Instruct-2507 model, served by the nscale API. Sign in with your Hugging Face account to use this API.")
-        button = gr.LoginButton("Sign in")
-    gr.load("models/Qwen/Qwen3-4B-Instruct-2507", accept_token=button, provider="nscale")
-demo.launch()

 import gradio as gr
+import torch
+from transformers import pipeline, AutoTokenizer
+# Tải mô hình và tokenizer
+# device_map="auto" sẽ tự động sử dụng GPU nếu có
+model_id = "phamhoangf/struct-aware-baseline-qwen3-4b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+def predict(message, history):
+    # Xây dựng prompt từ lịch sử trò chuyện theo template của Qwen2
+    messages = []
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    # Tạo prompt hoàn chỉnh
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # Tạo văn bản
+    outputs = pipe(
+        prompt,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+    )
+    # Trích xuất phần trả lời
+    generated_text = outputs[0]["generated_text"]
+    # Lấy phần văn bản mới được tạo ra (sau prompt)
+    response = generated_text[len(prompt):]
+    return response
+# Tạo giao diện Chat, giao diện này cũng tự động tạo ra một API
+gr.ChatInterface(predict).launch()