Spaces:

Jiaqi-hkust
/

Robust-R1

Running on Zero

App Files Files Community

Jiaqi-hkust commited on 13 days ago

Commit

78ed009

verified ·

1 Parent(s): 8aa41c0

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +50 -38

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ def gpu_decorator(func):
 # 注意：在 ZeroGPU 环境中，启动时 CUDA 可能还不可用
 # flash-attn 将在模型加载时根据实际 CUDA 可用性决定是否使用
-sys_prompt = """First output the the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
         and then summarize the content of reasoning and the give the answer in <CONCLUSION> <CONCLUSION_END> tags,
@@ -188,7 +188,6 @@ class ModelHandler:
 model_handler = None
-@gpu_decorator  # 标记此函数需要 GPU
 def get_model_handler():
     """Get model handler with lazy loading"""
     global model_handler
@@ -197,6 +196,53 @@ def get_model_handler():
         model_handler = ModelHandler(MODEL_PATH)
     return model_handler
 def create_chat_ui():
     custom_css = """
     .gradio-container { font-family: 'Inter', sans-serif; }
@@ -214,7 +260,8 @@ def create_chat_ui():
                     elem_id="chatbot",
                     label="Chat",
                     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
-                    height=650
                 )
                 chat_input = gr.MultimodalTextbox(
@@ -264,41 +311,6 @@ def create_chat_ui():
         else:
             gr.Markdown("*No example images available, please manually upload images for testing*")
-        async def respond(user_msg, history, temp, tokens):
-            text = user_msg.get("text", "").strip()
-            files = user_msg.get("files", [])
-            user_content = list(files)
-            if text: user_content.append(text)
-            if not files and text: user_message = {"role": "user", "content": text}
-            else: user_message = {"role": "user", "content": user_content}
-            history.append(user_message)
-            yield history, gr.MultimodalTextbox(value=None, interactive=False)
-            history.append({"role": "assistant", "content": ""})
-            try:
-                previous_history = history[:-2] if len(history) >= 2 else []
-                handler = get_model_handler()
-                generated_text = ""
-                for chunk in handler.predict(user_msg, previous_history, temp, tokens):
-                    generated_text = chunk
-                    safe_text = generated_text.replace("<", "&lt;").replace(">", "&gt;")
-                    history[-1]["content"] = safe_text
-                    yield history, gr.MultimodalTextbox(interactive=False)
-            except Exception as e:
-                import traceback
-                traceback.print_exc()
-                history[-1]["content"] = f"❌ Inference error: {str(e)}"
-                yield history, gr.MultimodalTextbox(interactive=True)
-            yield history, gr.MultimodalTextbox(value=None, interactive=True)
         chat_input.submit(
             respond,
             inputs=[chat_input, chatbot, temperature, max_tokens],

 # 注意：在 ZeroGPU 环境中，启动时 CUDA 可能还不可用
 # flash-attn 将在模型加载时根据实际 CUDA 可用性决定是否使用
+sys_prompt = """First output the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
         and then summarize the content of reasoning and the give the answer in <CONCLUSION> <CONCLUSION_END> tags,
 model_handler = None
 def get_model_handler():
     """Get model handler with lazy loading"""
     global model_handler
         model_handler = ModelHandler(MODEL_PATH)
     return model_handler
+@gpu_decorator
+async def respond(user_msg, history, temp, tokens):
+    text = user_msg.get("text", "").strip()
+    files = user_msg.get("files", [])
+    # ### <<< 修改点 3：构建正确的多模态消息格式
+    # 不能直接 append 路径字符串，要用字典 {"type": "image", "image": path}
+    user_content = []
+    for file_path in files:
+        user_content.append({"type": "image", "image": file_path})
+    if text:
+        user_content.append({"type": "text", "text": text})
+    # 构建符合 type="messages" 的用户消息
+    user_message = {"role": "user", "content": user_content}
+    history.append(user_message)
+    # 此时先 yield 一次，让用户看到自己的输入
+    yield history, gr.MultimodalTextbox(value=None, interactive=False)
+    history.append({"role": "assistant", "content": ""})
+    try:
+        # 截取历史记录（只取之前的对话，不包含当前这一轮，避免重复）
+        previous_history = history[:-2] if len(history) >= 2 else []
+        # 在这里调用 handler，此时我们在 @gpu_decorator 的保护下，可以访问 GPU
+        handler = get_model_handler()
+        generated_text = ""
+        # 传递原始的 user_msg 字典给 predict，或者根据需要调整 predict 的输入
+        # 注意：你的 predict 函数解析逻辑需要适配
+        for chunk in handler.predict(user_msg, previous_history, temp, tokens):
+            generated_text = chunk
+            safe_text = generated_text.replace("<", "&lt;").replace(">", "&gt;")
+            history[-1]["content"] = safe_text
+            yield history, gr.MultimodalTextbox(interactive=False)
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        history[-1]["content"] = f"❌ Error: {str(e)}"
+        yield history, gr.MultimodalTextbox(interactive=True)
+    yield history, gr.MultimodalTextbox(value=None, interactive=True)
 def create_chat_ui():
     custom_css = """
     .gradio-container { font-family: 'Inter', sans-serif; }
                     elem_id="chatbot",
                     label="Chat",
                     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
+                    height=650,
+                    type="messages"
                 )
                 chat_input = gr.MultimodalTextbox(
         else:
             gr.Markdown("*No example images available, please manually upload images for testing*")
         chat_input.submit(
             respond,
             inputs=[chat_input, chatbot, temperature, max_tokens],