Spaces:

phamhoangf
/

structaware-chat

Runtime error

App Files Files Community

phamhoangf commited on Oct 2, 2025

Commit

88f5ef8

verified ·

1 Parent(s): 00c5fdd

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -30

app.py CHANGED Viewed

@@ -1,54 +1,79 @@
 import gradio as gr
 import torch
-from transformers import pipeline, AutoTokenizer
 import os
 # Lấy token từ secrets của Space
 hf_token = os.environ.get("HF_TOKEN")
-# Tải mô hình và tokenizer
-# device_map="auto" sẽ tự động sử dụng GPU nếu có
 model_id = "phamhoangf/struct-aware-baseline-qwen3-4b"
-# Sử dụng token để xác thực khi tải tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
-# ---- SỬA LỖI Ở ĐÂY ----
-# Truyền token trực tiếp cho pipeline, không dùng model_kwargs
-pipe = pipeline(
-    "text-generation",
-    model=model_id,
     torch_dtype=torch.bfloat16,
     device_map="auto",
-    token=hf_token  # Sửa từ 'model_kwargs' thành cách này
 )
 def predict(message, history):
-    # Xây dựng prompt từ lịch sử trò chuyện theo template của Qwen2
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
-    # Tạo prompt hoàn chỉnh
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # Tạo văn bản
-    outputs = pipe(
-        prompt,
-        max_new_tokens=256,
         do_sample=True,
         temperature=0.7,
-        top_k=20,
-        top_p=0.8,
     )
-    # Trích xuất phần trả lời
-    generated_text = outputs[0]["generated_text"]
-    # Lấy phần văn bản mới được tạo ra (sau prompt)
-    response = generated_text[len(prompt):]
-    return response
-# Tạo giao diện Chat, giao diện này cũng tự động tạo ra một API
-gr.ChatInterface(predict).launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
 import os
+# --- 1. CÀI ĐẶT MODEL ---
 # Lấy token từ secrets của Space
 hf_token = os.environ.get("HF_TOKEN")
 model_id = "phamhoangf/struct-aware-baseline-qwen3-4b"
+# Tải tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+# Tải model trực tiếp thay vì dùng pipeline
+# Điều này cho phép chúng ta truy cập hàm .generate() với streamer
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
     torch_dtype=torch.bfloat16,
     device_map="auto",
+    token=hf_token
 )
+# --- 2. HÀM DỰ ĐOÁN ĐÃ HỖ TRỢ STREAMING ---
 def predict(message, history):
+    # Xây dựng prompt từ lịch sử trò chuyện
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # Tokenize input
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Tạo một streamer
+    # skip_prompt=True để không lặp lại prompt trong output
+    # skip_special_tokens=True để bỏ qua các token như </s>
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # Các tham số cho việc sinh token
+    generation_kwargs = dict(
+        **inputs,
+        streamer=streamer,
+        max_new_tokens=1024,
         do_sample=True,
         temperature=0.7,
+        top_p=0.95,
+        top_k=50,
     )
+    # Chạy việc sinh token trong một luồng (thread) riêng
+    # để nó không block luồng chính đang trả kết quả cho Gradio
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Yield từng phần của văn bản được tạo ra
+    generated_text = ""
+    for new_text in streamer:
+        generated_text += new_text
+        yield generated_text # Trả về chuỗi đã được cập nhật cho Gradio
+# --- 3. TẠO GIAO DIỆN ---
+# Giữ nguyên như cũ
+gr.ChatInterface(
+    predict,
+    title="Struct-Aware Baseline Qwen3 4B",
+    description="Giao diện chat cho mô hình phamhoangf/struct-aware-baseline-qwen3-4b"
+).launch()```