Spaces:

Jiaqi-hkust
/

Robust-R1

Running on Zero

App Files Files Community

Jiaqi-hkust commited on Dec 18, 2025

Commit

16db65e

verified ·

1 Parent(s): 685215b

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +33 -49

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import gradio as gr
-print(f"当前使用的 Gradio 版本是: {gr.__version__}")
 import os
 import torch
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
-import html
 # 导入 spaces 模块用于 GPU 检测
 is_spaces = os.getenv("SPACE_ID") is not None
@@ -25,10 +23,7 @@ def gpu_decorator(func):
         return GPU(func)
     return func
-# 条件安装 flash-attn（延迟到模型加载时，避免启动时 CUDA 检查）
-# 注意：在 ZeroGPU 环境中，启动时 CUDA 可能还不可用
-# flash-attn 将在模型加载时根据实际 CUDA 可用性决定是否使用
 sys_prompt = """First output the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
@@ -44,8 +39,14 @@ if not is_spaces:
 MODEL_PATH = os.getenv("MODEL_PATH", "Jiaqi-hkust/Robust-R1-RL")
 print(f"==========================================")
-print(f"Initializing application...")
 print(f"==========================================")
 class ModelHandler:
@@ -61,20 +62,21 @@ class ModelHandler:
             self.processor = AutoProcessor.from_pretrained(self.model_path)
             if torch.cuda.is_available():
                 device_capability = torch.cuda.get_device_capability()
-                use_flash_attention = device_capability[0] >= 8
-                print(f"🔧 CUDA available, device capability: {device_capability}")
             else:
-                use_flash_attention = False
                 print(f"🔧 Using CPU or non-CUDA device")
             self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
                 self.model_path,
                 torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                 device_map="auto",
-                # attn_implementation="flash_attention_2" if use_flash_attention else "sdpa",
-                attn_implementation="sdpa",
                 trust_remote_code=True
             )
             print("✅ Model loaded successfully!")
@@ -82,11 +84,8 @@ class ModelHandler:
             print(f"❌ Model loading failed: {e}")
             raise e
     def predict(self, messages, temperature, max_tokens):
-        # 注意：这里接收到的 messages 已经是标准的 [{'role': 'user', 'content': [...]}, ...]
-        # 我们需要做一个深拷贝，避免修改 UI 上的 history 显示 System Prompt
         import copy
         messages_payload = copy.deepcopy(messages)
@@ -95,7 +94,6 @@ class ModelHandler:
             content = messages_payload[-1]["content"]
             sys_prompt_fmt = "\n" + " ".join(sys_prompt.split())
-            # 现在的 content 肯定是 list (因为我们上面的 respond 函数构建的是 list)
             if isinstance(content, list):
                 text_found = False
                 for item in content:
@@ -108,7 +106,6 @@ class ModelHandler:
             elif isinstance(content, str):
                 messages_payload[-1]["content"] += sys_prompt_fmt
-        # 后续逻辑保持不变 ...
         text_prompt = self.processor.apply_chat_template(
             messages_payload, tokenize=False, add_generation_prompt=True
         )
@@ -169,37 +166,31 @@ def get_model_handler():
 @gpu_decorator
 def respond(user_msg, history, temp, tokens):
     """
-    针对 type="messages" 的 Chatbot 重写的响应函数
-    history 现在的格式直接是: [{'role': 'user', 'content': ...}, {'role': 'assistant', ...}]
     """
-    # 1. 构建当前用户的消息内容 (OpenAI 多模态格式)
     user_content = []
-    # 处理图片/文件
     files = user_msg.get("files", [])
     for f in files:
-        # qwen_vl_utils 识别 "image" 字段作为本地路径
         user_content.append({"type": "image", "image": f})
-    # 处理文本
     text = user_msg.get("text", "")
     if text:
         user_content.append({"type": "text", "text": text})
-    # 如果既没图也没��，直接返回
     if not user_content:
         yield history, gr.MultimodalTextbox(value=None, interactive=True)
         return
     # 2. 将用户消息加入历史
-    # 注意：这里直接 append 一个 dict，而不是 tuple
     history.append({
         "role": "user",
         "content": user_content
     })
-    # 立即更新 UI，让用户看到自己的输入（图文会在同一个气泡里）
     yield history, gr.MultimodalTextbox(value=None, interactive=False)
     # 3. 调用模型
@@ -209,8 +200,8 @@ def respond(user_msg, history, temp, tokens):
         history.append({"role": "assistant", "content": ""})
         full_response = ""
-        # 调用你的 handler.predict (注意：你需要稍微调整 handler.predict 里的 sys_prompt 处理逻辑，见下文建议)
-        for chunk in handler.predict(history[:-1], temp, tokens): # 传入除最后一条空回复外的历史
             full_response += chunk
             history[-1]["content"] = full_response
             yield history, gr.MultimodalTextbox(interactive=False)
@@ -218,32 +209,31 @@ def respond(user_msg, history, temp, tokens):
     except Exception as e:
         import traceback
         traceback.print_exc()
-        history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
         yield history, gr.MultimodalTextbox(interactive=True)
     # 恢复输入框
     yield history, gr.MultimodalTextbox(interactive=True)
 def create_chat_ui():
-    custom_css = """
-    .gradio-container { font-family: 'Inter', sans-serif; }
-    #chatbot { height: 650px !important; overflow-y: auto; }
-    """
-    with gr.Blocks(title="Robust-R1", css=custom_css) as demo:
         with gr.Row():
             gr.Markdown("# 🤖 Robust-R1: Degradation-Aware Reasoning")
         with gr.Row():
             with gr.Column(scale=4):
-                # 【关键修改】添加 type="messages"
                 chatbot = gr.Chatbot(
                     elem_id="chatbot",
                     label="Chat",
                     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
                     height=650,
-                    type="messages"  # <--- 这里是重点！
                 )
                 chat_input = gr.MultimodalTextbox(
@@ -269,7 +259,6 @@ def create_chat_ui():
         gr.Markdown("---")
         gr.Markdown("### 📚 Examples")
-        gr.Markdown("Click the examples below to quickly fill the input box and start a conversation")
         example_images_dir = os.path.join(project_dir, "assets")
@@ -291,7 +280,7 @@ def create_chat_ui():
                 examples_per_page=3
             )
         else:
-            gr.Markdown("*No example images available, please manually upload images for testing*")
         chat_input.submit(
             respond,
@@ -299,7 +288,6 @@ def create_chat_ui():
             outputs=[chatbot, chat_input]
         )
-        # 清空历史只需要返回空列表 []
         clear_btn.click(lambda: ([], None), outputs=[chatbot, chat_input])
     return demo
@@ -307,16 +295,12 @@ def create_chat_ui():
 if __name__ == "__main__":
     demo = create_chat_ui()
-    custom_css = """
-    .gradio-container { font-family: 'Inter', sans-serif; }
-    #chatbot { height: 650px !important; overflow-y: auto; }
-    """
     if is_spaces:
         print(f"🚀 Running on Hugging Face Spaces: {os.getenv('SPACE_ID')}")
         demo.launch(
             theme=gr.themes.Soft(),
-            css=custom_css,
             show_error=True,
             allowed_paths=[project_dir] if project_dir else None
         )
@@ -324,10 +308,10 @@ if __name__ == "__main__":
         print(f"🚀 Service is starting, please visit: http://localhost:7860")
         demo.launch(
             theme=gr.themes.Soft(),
-            css=custom_css,
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
             show_error=True,
             allowed_paths=[project_dir]
-        )

 import gradio as gr
 import os
 import torch
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # 导入 spaces 模块用于 GPU 检测
 is_spaces = os.getenv("SPACE_ID") is not None
         return GPU(func)
     return func
+# 系统提示词
 sys_prompt = """First output the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
 MODEL_PATH = os.getenv("MODEL_PATH", "Jiaqi-hkust/Robust-R1-RL")
+# 定义 CSS (移到全局，方便管理)
+CUSTOM_CSS = """
+.gradio-container { font-family: 'Inter', sans-serif; }
+#chatbot { height: 650px !important; overflow-y: auto; }
+"""
 print(f"==========================================")
+print(f"Initializing application (Gradio {gr.__version__})...")
 print(f"==========================================")
 class ModelHandler:
             self.processor = AutoProcessor.from_pretrained(self.model_path)
+            # 智能判断 Flash Attention
+            use_flash_attention = False
             if torch.cuda.is_available():
                 device_capability = torch.cuda.get_device_capability()
+                if device_capability[0] >= 8:
+                    use_flash_attention = True
+                    print(f"🔧 CUDA available with Ampere+, utilizing Flash Attention 2")
             else:
                 print(f"🔧 Using CPU or non-CUDA device")
             self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
                 self.model_path,
                 torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                 device_map="auto",
+                attn_implementation="flash_attention_2" if use_flash_attention else "sdpa",
                 trust_remote_code=True
             )
             print("✅ Model loaded successfully!")
             print(f"❌ Model loading failed: {e}")
             raise e
     def predict(self, messages, temperature, max_tokens):
+        # 深拷贝消息，避免修改 UI 历史
         import copy
         messages_payload = copy.deepcopy(messages)
             content = messages_payload[-1]["content"]
             sys_prompt_fmt = "\n" + " ".join(sys_prompt.split())
             if isinstance(content, list):
                 text_found = False
                 for item in content:
             elif isinstance(content, str):
                 messages_payload[-1]["content"] += sys_prompt_fmt
         text_prompt = self.processor.apply_chat_template(
             messages_payload, tokenize=False, add_generation_prompt=True
         )
 @gpu_decorator
 def respond(user_msg, history, temp, tokens):
     """
+    针对 type="messages" 的 Chatbot 响应函数
     """
+    # 1. 构建当前用户的消息内容
     user_content = []
     files = user_msg.get("files", [])
     for f in files:
         user_content.append({"type": "image", "image": f})
     text = user_msg.get("text", "")
     if text:
         user_content.append({"type": "text", "text": text})
     if not user_content:
         yield history, gr.MultimodalTextbox(value=None, interactive=True)
         return
     # 2. 将用户消息加入历史
     history.append({
         "role": "user",
         "content": user_content
     })
+    # 立即更新 UI
     yield history, gr.MultimodalTextbox(value=None, interactive=False)
     # 3. 调用模型
         history.append({"role": "assistant", "content": ""})
         full_response = ""
+        # 传入 history[:-1] 避免传入空的 assistant 消息导致模板报错
+        for chunk in handler.predict(history[:-1], temp, tokens):
             full_response += chunk
             history[-1]["content"] = full_response
             yield history, gr.MultimodalTextbox(interactive=False)
     except Exception as e:
         import traceback
         traceback.print_exc()
+        # 如果还没加 assistant 消息就报错了，补一个
+        if not history or history[-1].get("role") != "assistant":
+             history.append({"role": "assistant", "content": ""})
+        history[-1]["content"] = f"❌ Error: {str(e)}"
         yield history, gr.MultimodalTextbox(interactive=True)
     # 恢复输入框
     yield history, gr.MultimodalTextbox(interactive=True)
 def create_chat_ui():
+    # 【修复点 1】: 这里不要传 css 参数
+    with gr.Blocks(title="Robust-R1") as demo:
         with gr.Row():
             gr.Markdown("# 🤖 Robust-R1: Degradation-Aware Reasoning")
         with gr.Row():
             with gr.Column(scale=4):
+                # Chatbot 设置 type="messages"
                 chatbot = gr.Chatbot(
                     elem_id="chatbot",
                     label="Chat",
                     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
                     height=650,
+                    type="messages"
                 )
                 chat_input = gr.MultimodalTextbox(
         gr.Markdown("---")
         gr.Markdown("### 📚 Examples")
         example_images_dir = os.path.join(project_dir, "assets")
                 examples_per_page=3
             )
         else:
+            gr.Markdown("*No example images available.*")
         chat_input.submit(
             respond,
             outputs=[chatbot, chat_input]
         )
         clear_btn.click(lambda: ([], None), outputs=[chatbot, chat_input])
     return demo
 if __name__ == "__main__":
     demo = create_chat_ui()
     if is_spaces:
         print(f"🚀 Running on Hugging Face Spaces: {os.getenv('SPACE_ID')}")
+        # 【修复点 2】: CSS 放在 launch 里
         demo.launch(
             theme=gr.themes.Soft(),
+            css=CUSTOM_CSS,
             show_error=True,
             allowed_paths=[project_dir] if project_dir else None
         )
         print(f"🚀 Service is starting, please visit: http://localhost:7860")
         demo.launch(
             theme=gr.themes.Soft(),
+            css=CUSTOM_CSS,
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
             show_error=True,
             allowed_paths=[project_dir]
+        )