Spaces:

Huy0502
/

Research_Assistant

Sleeping

App Files Files Community

Huy0502 commited on Oct 2, 2025

Commit

d67b7fe

verified ·

1 Parent(s): 36b23b0

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -144

app.py CHANGED Viewed

@@ -1,145 +1,34 @@
-import gradio as gr
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer
-import torch
-# Tải mô hình và tokenizer cục bộ với tối ưu hóa tài nguyên
-model_name = "Qwen/Qwen2.5-VL-3B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    model_name,
-    device_map="auto",  # Tự động phân bổ GPU/CPU
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Giảm bộ nhớ với float16 nếu có GPU
-    low_cpu_mem_usage=True,  # Tối ưu hóa tài nguyên CPU
-)
-# Hàm xử lý phản hồi của chatbot
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    # Định dạng messages cho mô hình
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    # Chuyển messages thành chuỗi đầu vào
-    input_text = ""
-    for msg in messages:
-        role = "System" if msg["role"] == "system" else "User"
-        input_text += f"{role}: {msg['content']}\n"
-    # Tokenize và tạo phản hồi
-    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        do_sample=True,
-    )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Trả về phản hồi dần dần (giả lập stream)
-    words = response.split()
-    full_response = ""
-    for i in range(0, len(words), 5):  # Trả về từng nhóm 5 từ để giả lập stream
-        full_response += " ".join(words[i:i+5]) + " "
-        yield full_response.strip()
-# Hàm trung gian để xử lý đúng số tham số
-def handle_submit(message, chatbot, system_message, max_tokens, temperature, top_p):
-    return respond(message, chatbot, system_message, max_tokens, temperature, top_p)
-# Định nghĩa theme cho giao diện
-theme = gr.themes.Soft(
-    primary_hue="blue",
-    secondary_hue="gray",
-    neutral_hue="slate",
-).set(
-    body_background_fill="*neutral_50",
-    panel_background_fill="*neutral_100",
-    button_primary_background_fill="*primary_500",
-    button_primary_text_color="white",
-)
-# CSS tùy chỉnh để cải thiện giao diện
-custom_css = """
-#chatbot-container {
-    height: 80vh;
-    border-radius: 10px;
-    overflow-y: auto;
-}
-#sidebar {
-    background-color: #f8fafc;
-    padding: 20px;
-    border-right: 1px solid #e2e8f0;
-}
-#input-section {
-    padding: 10px;
-    background-color: #ffffff;
-    border-top: 1px solid #e2e8f0;
-}
-"""
-# Tạo giao diện với Gradio Blocks
-with gr.Blocks(theme=theme, css=custom_css, title="Research Assistant Chatbot") as demo:
-    with gr.Row():
-        # Sidebar cho các thông số điều chỉnh
-        with gr.Column(scale=1, min_width=300, elem_id="sidebar"):
-            gr.Markdown("## Cài đặt Chatbot")
-            system_message = gr.Textbox(
-                value="""Bạn là một trợ lý nghiên cứu viên, có thể hỗ trợ các nhà nghiên cứu viết các bài báo khoa học chính xác, hoàn thiện. Bạn sẽ viết nội dung "Tổng quan" - Abstract - cho bài báo dựa trên các ý chính trong bài nghiên cứu được cung cấp. Nội dung được viết trong khoảng 8 đến 10 câu, viết thành đoạn và không xuống dòng, đảm bảo thông tin bao phủ gồm thực trạng, chủ đề, phương pháp nghiên cứu, kết quả và điểm mạnh của nghiên cứu. Hãy yêu cầu người dùng cung cấp các ý chính của bài nghiên cứu để bạn có thể viết nội dung Tổng quan đầy đủ và chính xác.""",
-                label="System Message",
-                lines=10,
-            )
-            max_tokens = gr.Slider(
-                minimum=1,
-                maximum=2048,
-                value=512,
-                step=1,
-                label="Max New Tokens",
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=4.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature",
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-p (Nucleus Sampling)",
-            )
-        # Khu vực chính cho chatbot
-        with gr.Column(scale=3):
-            gr.Markdown("## Trợ lý nghiên cứu khoa học")
-            chatbot = gr.Chatbot(
-                type="messages",
-                elem_id="chatbot-container",
-                height=600,
-            )
-            with gr.Row(elem_id="input-section"):
-                message = gr.Textbox(
-                    placeholder="Nhập các ý chính của bài nghiên cứu hoặc câu hỏi của bạn...",
-                    show_label=False,
-                    container=False,
-                )
-                submit_button = gr.Button("Gửi", variant="primary")
-            # Liên kết hàm handle_submit với các inputs
-            submit_button.click(
-                fn=handle_submit,
-                inputs=[message, chatbot, system_message, max_tokens, temperature, top_p],
-                outputs=chatbot,
-            )
-if __name__ == "__main__":
     demo.launch()

+import gradio as gr
+from huggingface_hub import InferenceClient
+def respond( message, history: list[dict[str, str]], system_message, max_tokens, temperature, top_p, hf_token: gr.OAuthToken, ):
+    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
+    messages = [{"role": "system", "content": system_message}] messages.extend(history)
+    messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ):
+        choices = message.choices
+        token = ""
+        if len(choices) and choices[0].delta.content:
+            token = choices[0].delta.content
+        response += token
+        yield response
+chatbot = gr.ChatInterface( respond, type="messages",
+                           additional_inputs=[ gr.Textbox(value="""Bạn là một trợ lý nghiên cứu viên, có thể hỗ trợ các nhà nghiên cứu viết các bài báo khoa học chính xác, hoàn thiện. \ Bạn sẽ viết nội dung "Tổng quan" - Abstract - cho bài báo dựa trên các ý chính trong bài nghiên cứu được cung cấp. \ Nội dung được viết trong khoảng 8 đến 10 câu, viết thành đoạn và không xuống dòng, đảm bảo thông tin bao phủ gồm thực trạng, chủ đề, phương pháp nghiên cứu, \ kết quả và điểm mạnh của nghiên cứu. Hãy yêu cầu người dùng cung cấp các ý chính của bài nghiên cứu để bạn có thể viết nội dung Tổng quan đầy đủ và chính xác.""", label="System message"),
+                                              gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+                                              gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+                                              gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ),
+                                             ],
+                          )
+with gr.Blocks() as demo:
+    with gr.Sidebar():
+        gr.LoginButton()
+    chatbot.render()
+if __name__ == "__main__":
     demo.launch()