ROBO-R1984

Build error

App Files Files Community

openfree commited on Jun 16, 2025

Commit

59be132

verified ·

1 Parent(s): 3cc45f6

Rename app-backup.py to app-오리지날-backup.py

Browse files

Files changed (1) hide show

app-backup.py → app-오리지날-backup.py +41 -35

app-backup.py → app-오리지날-backup.py RENAMED Viewed

@@ -1,3 +1,6 @@
 #!/usr/bin/env python3
 import os
@@ -233,7 +236,7 @@ def analyze_image_for_robot(
     task_type: str = "general",
     use_web_search: bool = False,
     enable_thinking: bool = False,  # 기본값 False로 변경
-    max_new_tokens: int = 250  # 기본값 250으로 변경
 ) -> str:
     """로봇 작업을 위한 이미지 분석"""
     global model, processor
@@ -249,23 +252,27 @@ def analyze_image_for_robot(
         # 태스크별 시스템 프롬프트 구성 (더 간결하게)
         system_prompts = {
-            "general": "당신은 로봇 시각 시스템입니다. 핵심 내용만 간결하게 설명하세요.",
             "planning": """당신은 로봇 작업 계획 AI입니다.
-주요 단계만 간결하게 작성하세요.
-형식: Step_1: xxx
 Step_2: xxx
 Step_n: xxx""",
-            "grounding": "당신은 객체 위치 시스템입니다. 객체 위치를 [x1, y1, x2, y2]로 반환하세요.",
-            "affordance": "당신은 파지점 분석 AI입니다. 파지 영역을 [x1, y1, x2, y2]로 반환하세요.",
-            "trajectory": "당신은 경로 계획 AI입니다. 경로를 [(x1,y1), (x2,y2), ...]로 제시하세요.",
-            "pointing": "당신은 지점 지정 시스템입니다. 위치를 [(x1,y1), (x2,y2), ...]로 반환하세요."
         }
         system_prompt = system_prompts.get(task_type, system_prompts["general"])
         # Chain-of-Thought 추가 (선택적)
         if enable_thinking:
-            system_prompt += "\n\n추론 과정을 <thinking></thinking> 태그 안에 작성 후 최종 답변을 제시하세요."
         # 웹 검색 수행
         combined_system = system_prompt
@@ -314,14 +321,25 @@ Step_n: xxx""",
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
             )
         # 디코딩
-        response = processor.decode(outputs[0], skip_special_tokens=True)
-        # 프롬프트 제거
-        if "Assistant:" in response:
-            response = response.split("Assistant:")[-1].strip()
         return response
@@ -490,20 +508,6 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     </div>
     """)
-    gr.HTML("""
-    <div class="info-box">
-        <h4>🌟 시스템 특징:</h4>
-        <ul>
-            <li>🖼️ 고급 이미지/비디오 분석 (Gemma3-4B VLM)</li>
-            <li>📋 다단계 작업 계획 및 추론</li>
-            <li>📍 정밀한 객체 위치 파악 (Grounding)</li>
-            <li>🤏 로봇 파지점 분석 (Affordance)</li>
-            <li>🛤️ 경로 계획 (Trajectory Planning)</li>
-            <li>🔍 실시간 웹 검색 통합</li>
-            <li>🔄 10초마다 자동 캡처 및 분석</li>
-        </ul>
-    </div>
-    """)
     with gr.Row():
         # 왼쪽: 웹캠 및 입력
@@ -561,7 +565,7 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
                     task_prompt = gr.Textbox(
                         label="작업 설명 / 질문",
                         placeholder="예: 테이블 위의 컵을 잡아서 싱크대에 놓기",
-                        value="이 장면에서 로봇이 수행할 수 있는 작업을 분석하세요.",
                         lines=2
                     )
@@ -582,7 +586,7 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
                         label="최대 토큰 수",
                         minimum=100,
                         maximum=4096,
-                        value=250,  # 기본값 250으로 변경
                         step=50
                     )
@@ -669,9 +673,10 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
             "trajectory": "경로 계획"
         }
-        formatted_result = f"""🤖 {task_names.get(task_type, '분석')} 결과 ({timestamp}):
-{result}"""
         complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">✅ 분석 완료!</div>'
         return formatted_result, complete_status
@@ -700,9 +705,10 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
             max_new_tokens=tokens
         )
-        formatted_result = f"""🔄 자동 분석 ({timestamp}):
-{result}"""
         return (
             webcam_frame,

+#!/usr/bin/env python3
+# 현재 app.py는 양자화 테스트 모델 적용이다.
 #!/usr/bin/env python3
 import os
     task_type: str = "general",
     use_web_search: bool = False,
     enable_thinking: bool = False,  # 기본값 False로 변경
+    max_new_tokens: int = 300  # 장면 설명을 위해 300으로 증가
 ) -> str:
     """로봇 작업을 위한 이미지 분석"""
     global model, processor
         # 태스크별 시스템 프롬프트 구성 (더 간결하게)
         system_prompts = {
+            "general": "당신은 로봇 시각 시스템입니다. 먼저 장면을 1-2줄로 설명하고, 핵심 내용을 간결하게 분석하세요.",
             "planning": """당신은 로봇 작업 계획 AI입니다.
+먼저 장면 이해를 1-2줄로 설명하고, 그 다음 작업 계획을 작성하세요.
+형식:
+[장면 이해] 현재 보이는 장면을 1-2줄로 설명
+[작업 계획]
+Step_1: xxx
 Step_2: xxx
 Step_n: xxx""",
+            "grounding": "당신은 객체 위치 시스템입니다. 먼저 보이는 객체들을 한 줄로 설명하고, 요청된 객체 위치를 [x1, y1, x2, y2]로 반환하세요.",
+            "affordance": "당신은 파지점 분석 AI입니다. 먼저 대상 객체를 한 줄로 설명하고, 파지 영역을 [x1, y1, x2, y2]로 반환하세요.",
+            "trajectory": "당신은 경로 계획 AI입니다. 먼저 환경을 한 줄로 설명하고, 경로를 [(x1,y1), (x2,y2), ...]로 제시하세요.",
+            "pointing": "당신은 지점 지정 시스템입니다. 먼저 참조점들을 한 줄로 설명하고, 위치를 [(x1,y1), (x2,y2), ...]로 반환하세요."
         }
         system_prompt = system_prompts.get(task_type, system_prompts["general"])
         # Chain-of-Thought 추가 (선택적)
         if enable_thinking:
+            system_prompt += "\n\n추론 과정을 <thinking></thinking> 태그 안에 작성 후 최종 답변을 제시하세요. 장면 이해는 추론 과정과 별도로 반드시 포함하세요."
         # 웹 검색 수행
         combined_system = system_prompt
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
+                pad_token_id=processor.tokenizer.pad_token_id,
+                eos_token_id=processor.tokenizer.eos_token_id,
             )
+        # 입력 토큰 제거하여 출력만 추출
+        generated_tokens = outputs[0][inputs.input_ids.shape[1]:]
         # 디코딩
+        response = processor.decode(generated_tokens, skip_special_tokens=True).strip()
+        # 프롬프트 제거 및 정리
+        # 이미 입력 토큰을 제거했으므로 추가 정리만 수행
+        response = response.strip()
+        # 혹시 남아있는 불필요한 텍스트 제거
+        if response.startswith("model\n"):
+            response = response[6:].strip()
+        elif response.startswith("model"):
+            response = response[5:].strip()
         return response
     </div>
     """)
     with gr.Row():
         # 왼쪽: 웹캠 및 입력
                     task_prompt = gr.Textbox(
                         label="작업 설명 / 질문",
                         placeholder="예: 테이블 위의 컵을 잡아서 싱크대에 놓기",
+                        value="현재 장면을 분석하고 로봇이 수행할 수 있는 작업을 제안하세요.",
                         lines=2
                     )
                         label="최대 토큰 수",
                         minimum=100,
                         maximum=4096,
+                        value=300,  # 장면 설명을 위해 300으로 증가
                         step=50
                     )
             "trajectory": "경로 계획"
         }
+        formatted_result = f"""🤖 {task_names.get(task_type, '분석')} 결과 ({timestamp})
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+{result}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
         complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">✅ 분석 완료!</div>'
         return formatted_result, complete_status
             max_new_tokens=tokens
         )
+        formatted_result = f"""🔄 자동 분석 완료 ({timestamp})
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+{result}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
         return (
             webcam_frame,