Spaces:

Jiaqi-hkust
/

Robust-R1

Running on Zero

App Files Files Community

Jiaqi-hkust commited on 18 days ago

Commit

b4fe2c0

verified ·

1 Parent(s): 610318e

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +35 -53

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ if is_spaces:
         print("⚠️ spaces module not available, GPU detection may not work")
 def gpu_decorator(func):
-    """条件应用 GPU 装饰器"""
     if spaces_available and GPU is not None:
         return GPU(func)
     return func
@@ -31,14 +30,12 @@ def gpu_decorator(func):
 MODEL_PATH = os.getenv("MODEL_PATH", "Jiaqi-hkust/Robust-R1-RL")
 PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
-# 系统提示词
 SYS_PROMPT = """First output the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
         and then summarize the content of reasoning and the give the answer in <CONCLUSION> <CONCLUSION_END> tags,
         provides the user with the answer briefly in <ANSWER> <ANSWER_END>."""
-# CSS 样式
 CUSTOM_CSS = """
 .gradio-container { font-family: 'Inter', sans-serif; }
 """
@@ -58,7 +55,6 @@ class ModelHandler:
             print(f"⏳ Loading model weights from {self.model_path}...")
             self.processor = AutoProcessor.from_pretrained(self.model_path)
-            # 智能判断 Flash Attention
             use_flash_attention = False
             if torch.cuda.is_available():
                 device_capability = torch.cuda.get_device_capability()
@@ -81,7 +77,6 @@ class ModelHandler:
 model_handler = None
 def get_model_handler():
-    """懒加载模型句柄"""
     global model_handler
     if model_handler is None:
         model_handler = ModelHandler(MODEL_PATH)
@@ -93,33 +88,26 @@ def get_model_handler():
 @gpu_decorator
 def respond(message, history, temperature, max_tokens):
     """
-    符合 gr.ChatInterface 标准的生成函数
-    message: dict (multimodal=True时) -> {'text': str, 'files': list} [cite: 140]
     history: list of dicts -> OpenAI 风格历史记录
     """
     handler = get_model_handler()
-    # 1. 构建当前用户消息 (转换为 OpenAI/Qwen 格式)
-    # message['files'] 包含文件路径列表
     current_user_content = []
-    # 处理图片
     if message.get("files"):
         for file_path in message["files"]:
             current_user_content.append({"type": "image", "image": file_path})
-    # 处理文本
     user_text = message.get("text", "")
     if user_text:
         current_user_content.append({"type": "text", "text": user_text})
-    # 2. 构建完整的对话列表 (History + Current Message)
-    # 注意：ChatInterface 的 history 包含之前的内容，不包含当前这一条
     conversation = copy.deepcopy(history)
     conversation.append({"role": "user", "content": current_user_content})
-    # 3. 注入 System Prompt (添加到最后一条用户消息的文本中)
-    # 保持您原有的逻辑：将 prompt 拼接到最后一条消息
     last_content = conversation[-1]["content"]
     sys_prompt_fmt = "\n" + " ".join(SYS_PROMPT.split())
@@ -132,7 +120,7 @@ def respond(message, history, temperature, max_tokens):
     if not text_injected:
         last_content.append({"type": "text", "text": sys_prompt_fmt})
-    # 4. 预处理输入
     text_prompt = handler.processor.apply_chat_template(
         conversation, tokenize=False, add_generation_prompt=True
     )
@@ -147,7 +135,6 @@ def respond(message, history, temperature, max_tokens):
     )
     inputs = inputs.to(handler.model.device)
-    # 5. 生成参数
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=max_tokens,
@@ -155,80 +142,75 @@ def respond(message, history, temperature, max_tokens):
         do_sample=True if temperature > 0 else False,
     )
-    # 6. 流式生成 (Yielding response) [cite: 85]
     try:
-        input_length = inputs['input_ids'].shape[1]
-        # 注意：这里为了简化演示，使用非流式的 generate，然后模拟流式输出
-        # 如果需要真正的 token 级流式，需要使用 TextIteratorStreamer
-        # 但为了保持您原有逻辑的稳定性，我们先获取结果再 yield
         with torch.no_grad():
             generated_ids = handler.model.generate(**generation_kwargs)
         generated_ids = generated_ids[0][input_length:]
         generated_text = handler.processor.tokenizer.decode(
             generated_ids,
             skip_special_tokens=True
         )
-        # 简单模拟流式效果（或直接返回）
         yield generated_text
     except Exception as e:
         import traceback
         traceback.print_exc()
-        yield f"❌ Generation error: {str(e)}"
 # ==========================================
-# 5. 构建 UI (ChatInterface)
 # ==========================================
-# 准备 Examples 数据
 example_images_dir = os.path.join(PROJECT_DIR, "assets")
 examples_data = []
-# 定义示例数据源
-raw_examples = [
-    ("What type of vehicles are the people riding?\n0. trucks\n1. wagons\n2. jeeps\n3. cars\n", "1.jpg"),
-    ("What is the giant fish in the air?\n0. blimp\n1. balloon\n2. kite\n3. sculpture\n", "2.jpg"),
-]
-for text, filename in raw_examples:
-    path = os.path.join(example_images_dir, filename)
-    # ChatInterface multimodal examples 格式: {"text": str, "files": [list]}
-    if os.path.exists(path):
-        examples_data.append({"text": text, "files": [path]})
-# 定义额外输入组件 (Generation Config)
 additional_inputs = [
     gr.Slider(minimum=0.01, maximum=1.0, value=0.6, step=0.05, label="Temperature"),
     gr.Slider(minimum=128, maximum=4096, value=1024, step=128, label="Max New Tokens"),
 ]
 chatbot_component = gr.Chatbot(
     label="Robust-R1 Chat",
     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
     height=650
 )
-# 创建 Interface
 demo = gr.ChatInterface(
     fn=respond,
-    chatbot=chatbot_component,   # <--- 这里传入自定义的 chatbot
-    multimodal=True,          # 启用多模态上传
     title="🤖 Robust-R1: Degradation-Aware Reasoning",
-    description="Upload an image and ask questions. The model considers image degradations during reasoning.",
-    additional_inputs=additional_inputs, # 添加配置滑块
-    additional_inputs_accordion=gr.Accordion(label="⚙️ Generation Config", open=True), # 设置配置区域
-    examples=examples_data,   # 添加示例
-    cache_examples=False,     # 根据需要开启或关闭
 )
-# ==========================================
-# 6. 启动 Launch
-# ==========================================
 if __name__ == "__main__":
-    aunch_kwargs = {
         "theme": gr.themes.Soft(),
         "css": CUSTOM_CSS,
         "allowed_paths": [PROJECT_DIR]

         print("⚠️ spaces module not available, GPU detection may not work")
 def gpu_decorator(func):
     if spaces_available and GPU is not None:
         return GPU(func)
     return func
 MODEL_PATH = os.getenv("MODEL_PATH", "Jiaqi-hkust/Robust-R1-RL")
 PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
 SYS_PROMPT = """First output the types of degradations in image briefly in <TYPE> <TYPE_END> tags,
         and then output what effects do these degradation have on the image in <INFLUENCE> <INFLUENCE_END> tags,
         then based on the strength of degradation, output an APPROPRIATE length for the reasoning process in <REASONING> <REASONING_END> tags,
         and then summarize the content of reasoning and the give the answer in <CONCLUSION> <CONCLUSION_END> tags,
         provides the user with the answer briefly in <ANSWER> <ANSWER_END>."""
 CUSTOM_CSS = """
 .gradio-container { font-family: 'Inter', sans-serif; }
 """
             print(f"⏳ Loading model weights from {self.model_path}...")
             self.processor = AutoProcessor.from_pretrained(self.model_path)
             use_flash_attention = False
             if torch.cuda.is_available():
                 device_capability = torch.cuda.get_device_capability()
 model_handler = None
 def get_model_handler():
     global model_handler
     if model_handler is None:
         model_handler = ModelHandler(MODEL_PATH)
 @gpu_decorator
 def respond(message, history, temperature, max_tokens):
     """
+    message: dict -> {'text': str, 'files': list}
     history: list of dicts -> OpenAI 风格历史记录
     """
     handler = get_model_handler()
+    # 1. 转换当前消息
     current_user_content = []
     if message.get("files"):
         for file_path in message["files"]:
             current_user_content.append({"type": "image", "image": file_path})
     user_text = message.get("text", "")
     if user_text:
         current_user_content.append({"type": "text", "text": user_text})
+    # 2. 构建完整对话 (History + Current)
     conversation = copy.deepcopy(history)
     conversation.append({"role": "user", "content": current_user_content})
+    # 3. 注入 System Prompt
     last_content = conversation[-1]["content"]
     sys_prompt_fmt = "\n" + " ".join(SYS_PROMPT.split())
     if not text_injected:
         last_content.append({"type": "text", "text": sys_prompt_fmt})
+    # 4. 推理
     text_prompt = handler.processor.apply_chat_template(
         conversation, tokenize=False, add_generation_prompt=True
     )
     )
     inputs = inputs.to(handler.model.device)
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=max_tokens,
         do_sample=True if temperature > 0 else False,
     )
     try:
         with torch.no_grad():
             generated_ids = handler.model.generate(**generation_kwargs)
+        input_length = inputs['input_ids'].shape[1]
         generated_ids = generated_ids[0][input_length:]
         generated_text = handler.processor.tokenizer.decode(
             generated_ids,
             skip_special_tokens=True
         )
         yield generated_text
     except Exception as e:
         import traceback
         traceback.print_exc()
+        yield f"❌ Error: {str(e)}"
 # ==========================================
+# 5. 构建 UI
 # ==========================================
+# 【关键修复点】：Examples 格式必须包含 Additional Inputs 的值
 example_images_dir = os.path.join(PROJECT_DIR, "assets")
 examples_data = []
+if os.path.exists(example_images_dir):
+    raw_examples = [
+        ("What type of vehicles are the people riding?\n0. trucks\n1. wagons\n2. jeeps\n3. cars\n", "1.jpg"),
+        ("What is the giant fish in the air?\n0. blimp\n1. balloon\n2. kite\n3. sculpture\n", "2.jpg"),
+    ]
+    for text, filename in raw_examples:
+        path = os.path.join(example_images_dir, filename)
+        if os.path.exists(path):
+            # 格式必须是: [MessageDict, TemperatureValue, MaxTokensValue]
+            examples_data.append([
+                {"text": text, "files": [path]}, # 1. 消息对象
+                0.6,                             # 2. Temperature (对应 additional_inputs[0])
+                1024                             # 3. Max Tokens (对应 additional_inputs[1])
+            ])
+# 定义额外输入
 additional_inputs = [
     gr.Slider(minimum=0.01, maximum=1.0, value=0.6, step=0.05, label="Temperature"),
     gr.Slider(minimum=128, maximum=4096, value=1024, step=128, label="Max New Tokens"),
 ]
+# 自定义 Chatbot 组件
 chatbot_component = gr.Chatbot(
     label="Robust-R1 Chat",
     avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Qwen"),
     height=650
 )
+# ChatInterface
 demo = gr.ChatInterface(
     fn=respond,
+    chatbot=chatbot_component,
+    multimodal=True,
     title="🤖 Robust-R1: Degradation-Aware Reasoning",
+    description="Upload an image and ask questions.",
+    additional_inputs=additional_inputs,
+    additional_inputs_accordion=gr.Accordion(label="⚙️ Generation Config", open=True),
+    examples=examples_data, # 现在这里的格式是 [[msg, 0.6, 1024], ...]
+    cache_examples=False
 )
 if __name__ == "__main__":
+    launch_kwargs = {
         "theme": gr.themes.Soft(),
         "css": CUSTOM_CSS,
         "allowed_paths": [PROJECT_DIR]