openfree commited on
Commit
83d1b52
Β·
verified Β·
1 Parent(s): 3c78eaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -233,7 +233,7 @@ def analyze_image_for_robot(
233
  task_type: str = "general",
234
  use_web_search: bool = False,
235
  enable_thinking: bool = False, # κΈ°λ³Έκ°’ False둜 λ³€κ²½
236
- max_new_tokens: int = 250 # κΈ°λ³Έκ°’ 250으둜 λ³€κ²½
237
  ) -> str:
238
  """λ‘œλ΄‡ μž‘μ—…μ„ μœ„ν•œ 이미지 뢄석"""
239
  global model, processor
@@ -249,23 +249,27 @@ def analyze_image_for_robot(
249
 
250
  # νƒœμŠ€ν¬λ³„ μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ ꡬ성 (더 κ°„κ²°ν•˜κ²Œ)
251
  system_prompts = {
252
- "general": "당신은 λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. 핡심 λ‚΄μš©λ§Œ κ°„κ²°ν•˜κ²Œ μ„€λͺ…ν•˜μ„Έμš”.",
253
  "planning": """당신은 λ‘œλ΄‡ μž‘μ—… κ³„νš AIμž…λ‹ˆλ‹€.
254
- μ£Όμš” λ‹¨κ³„λ§Œ κ°„κ²°ν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”.
255
- ν˜•μ‹: Step_1: xxx
 
 
 
 
256
  Step_2: xxx
257
  Step_n: xxx""",
258
- "grounding": "당신은 객체 μœ„μΉ˜ μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. 객체 μœ„μΉ˜λ₯Ό [x1, y1, x2, y2]둜 λ°˜ν™˜ν•˜μ„Έμš”.",
259
- "affordance": "당신은 νŒŒμ§€μ  뢄석 AIμž…λ‹ˆλ‹€. νŒŒμ§€ μ˜μ—­μ„ [x1, y1, x2, y2]둜 λ°˜ν™˜ν•˜μ„Έμš”.",
260
- "trajectory": "당신은 경둜 κ³„νš AIμž…λ‹ˆλ‹€. 경둜λ₯Ό [(x1,y1), (x2,y2), ...]둜 μ œμ‹œν•˜μ„Έμš”.",
261
- "pointing": "당신은 지점 μ§€μ • μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. μœ„μΉ˜λ₯Ό [(x1,y1), (x2,y2), ...]둜 λ°˜ν™˜ν•˜μ„Έμš”."
262
  }
263
 
264
  system_prompt = system_prompts.get(task_type, system_prompts["general"])
265
 
266
  # Chain-of-Thought μΆ”κ°€ (선택적)
267
  if enable_thinking:
268
- system_prompt += "\n\nμΆ”λ‘  과정을 <thinking></thinking> νƒœκ·Έ μ•ˆμ— μž‘μ„± ν›„ μ΅œμ’… 닡변을 μ œμ‹œν•˜μ„Έμš”."
269
 
270
  # μ›Ή 검색 μˆ˜ν–‰
271
  combined_system = system_prompt
@@ -495,6 +499,7 @@ with gr.Blocks(title="πŸ€– λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œ (Gemma3-4B)", css=css) as dem
495
  <h4>🌟 μ‹œμŠ€ν…œ νŠΉμ§•:</h4>
496
  <ul>
497
  <li>πŸ–ΌοΈ κ³ κΈ‰ 이미지/λΉ„λ””μ˜€ 뢄석 (Gemma3-4B VLM)</li>
 
498
  <li>πŸ“‹ 닀단계 μž‘μ—… κ³„νš 및 μΆ”λ‘ </li>
499
  <li>πŸ“ μ •λ°€ν•œ 객체 μœ„μΉ˜ νŒŒμ•… (Grounding)</li>
500
  <li>🀏 λ‘œλ΄‡ νŒŒμ§€μ  뢄석 (Affordance)</li>
@@ -561,7 +566,7 @@ with gr.Blocks(title="πŸ€– λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œ (Gemma3-4B)", css=css) as dem
561
  task_prompt = gr.Textbox(
562
  label="μž‘μ—… μ„€λͺ… / 질문",
563
  placeholder="예: ν…Œμ΄λΈ” μœ„μ˜ 컡을 μž‘μ•„μ„œ μ‹±ν¬λŒ€μ— 놓기",
564
- value="이 μž₯λ©΄μ—μ„œ λ‘œλ΄‡μ΄ μˆ˜ν–‰ν•  수 μžˆλŠ” μž‘μ—…μ„ λΆ„μ„ν•˜μ„Έμš”.",
565
  lines=2
566
  )
567
 
@@ -582,7 +587,7 @@ with gr.Blocks(title="πŸ€– λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œ (Gemma3-4B)", css=css) as dem
582
  label="μ΅œλŒ€ 토큰 수",
583
  minimum=100,
584
  maximum=4096,
585
- value=250, # κΈ°λ³Έκ°’ 250으둜 λ³€κ²½
586
  step=50
587
  )
588
 
@@ -669,9 +674,10 @@ with gr.Blocks(title="πŸ€– λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œ (Gemma3-4B)", css=css) as dem
669
  "trajectory": "경둜 κ³„νš"
670
  }
671
 
672
- formatted_result = f"""πŸ€– {task_names.get(task_type, '뢄석')} κ²°κ³Ό ({timestamp}):
673
-
674
- {result}"""
 
675
 
676
  complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">βœ… 뢄석 μ™„λ£Œ!</div>'
677
  return formatted_result, complete_status
@@ -700,9 +706,10 @@ with gr.Blocks(title="πŸ€– λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œ (Gemma3-4B)", css=css) as dem
700
  max_new_tokens=tokens
701
  )
702
 
703
- formatted_result = f"""πŸ”„ μžλ™ 뢄석 ({timestamp}):
704
-
705
- {result}"""
 
706
 
707
  return (
708
  webcam_frame,
 
233
  task_type: str = "general",
234
  use_web_search: bool = False,
235
  enable_thinking: bool = False, # κΈ°λ³Έκ°’ False둜 λ³€κ²½
236
+ max_new_tokens: int = 300 # μž₯λ©΄ μ„€λͺ…을 μœ„ν•΄ 300으둜 증가
237
  ) -> str:
238
  """λ‘œλ΄‡ μž‘μ—…μ„ μœ„ν•œ 이미지 뢄석"""
239
  global model, processor
 
249
 
250
  # νƒœμŠ€ν¬λ³„ μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ ꡬ성 (더 κ°„κ²°ν•˜κ²Œ)
251
  system_prompts = {
252
+ "general": "당신은 λ‘œλ΄‡ μ‹œκ° μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. λ¨Όμ € μž₯면을 1-2μ€„λ‘œ μ„€λͺ…ν•˜κ³ , 핡심 λ‚΄μš©μ„ κ°„κ²°ν•˜κ²Œ λΆ„μ„ν•˜μ„Έμš”.",
253
  "planning": """당신은 λ‘œλ΄‡ μž‘μ—… κ³„νš AIμž…λ‹ˆλ‹€.
254
+ λ¨Όμ € μž₯λ©΄ 이해λ₯Ό 1-2μ€„λ‘œ μ„€λͺ…ν•˜κ³ , κ·Έ λ‹€μŒ μž‘μ—… κ³„νšμ„ μž‘μ„±ν•˜μ„Έμš”.
255
+ ν˜•μ‹:
256
+ [μž₯λ©΄ 이해] ν˜„μž¬ λ³΄μ΄λŠ” μž₯면을 1-2μ€„λ‘œ μ„€λͺ…
257
+
258
+ [μž‘μ—… κ³„νš]
259
+ Step_1: xxx
260
  Step_2: xxx
261
  Step_n: xxx""",
262
+ "grounding": "당신은 객체 μœ„μΉ˜ μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. λ¨Όμ € λ³΄μ΄λŠ” 객체듀을 ν•œ μ€„λ‘œ μ„€λͺ…ν•˜κ³ , μš”μ²­λœ 객체 μœ„μΉ˜λ₯Ό [x1, y1, x2, y2]둜 λ°˜ν™˜ν•˜μ„Έμš”.",
263
+ "affordance": "당신은 νŒŒμ§€μ  뢄석 AIμž…λ‹ˆλ‹€. λ¨Όμ € λŒ€μƒ 객체λ₯Ό ν•œ μ€„λ‘œ μ„€λͺ…ν•˜κ³ , νŒŒμ§€ μ˜μ—­μ„ [x1, y1, x2, y2]둜 λ°˜ν™˜ν•˜μ„Έμš”.",
264
+ "trajectory": "당신은 경둜 κ³„νš AIμž…λ‹ˆλ‹€. λ¨Όμ € ν™˜κ²½μ„ ν•œ μ€„λ‘œ μ„€λͺ…ν•˜κ³ , 경둜λ₯Ό [(x1,y1), (x2,y2), ...]둜 μ œμ‹œν•˜μ„Έμš”.",
265
+ "pointing": "당신은 지점 μ§€μ • μ‹œμŠ€ν…œμž…λ‹ˆλ‹€. λ¨Όμ € 참쑰점듀을 ν•œ μ€„λ‘œ μ„€λͺ…ν•˜κ³ , μœ„μΉ˜λ₯Ό [(x1,y1), (x2,y2), ...]둜 λ°˜ν™˜ν•˜μ„Έμš”."
266
  }
267
 
268
  system_prompt = system_prompts.get(task_type, system_prompts["general"])
269
 
270
  # Chain-of-Thought μΆ”κ°€ (선택적)
271
  if enable_thinking:
272
+ system_prompt += "\n\nμΆ”λ‘  과정을 <thinking></thinking> νƒœκ·Έ μ•ˆμ— μž‘μ„± ν›„ μ΅œμ’… 닡변을 μ œμ‹œν•˜μ„Έμš”. μž₯λ©΄ μ΄ν•΄λŠ” μΆ”λ‘  κ³Όμ •κ³Ό λ³„λ„λ‘œ λ°˜λ“œμ‹œ ν¬ν•¨ν•˜μ„Έμš”."
273
 
274
  # μ›Ή 검색 μˆ˜ν–‰
275
  combined_system = system_prompt
 
499
  <h4>🌟 μ‹œμŠ€ν…œ νŠΉμ§•:</h4>
500
  <ul>
501
  <li>πŸ–ΌοΈ κ³ κΈ‰ 이미지/λΉ„λ””μ˜€ 뢄석 (Gemma3-4B VLM)</li>
502
+ <li>πŸ‘οΈ μž₯λ©΄ 이해 및 상황 μ„€λͺ…</li>
503
  <li>πŸ“‹ 닀단계 μž‘μ—… κ³„νš 및 μΆ”λ‘ </li>
504
  <li>πŸ“ μ •λ°€ν•œ 객체 μœ„μΉ˜ νŒŒμ•… (Grounding)</li>
505
  <li>🀏 λ‘œλ΄‡ νŒŒμ§€μ  뢄석 (Affordance)</li>
 
566
  task_prompt = gr.Textbox(
567
  label="μž‘μ—… μ„€λͺ… / 질문",
568
  placeholder="예: ν…Œμ΄λΈ” μœ„μ˜ 컡을 μž‘μ•„μ„œ μ‹±ν¬λŒ€μ— 놓기",
569
+ value="ν˜„μž¬ μž₯면을 λΆ„μ„ν•˜κ³  λ‘œλ΄‡μ΄ μˆ˜ν–‰ν•  수 μžˆλŠ” μž‘μ—…μ„ μ œμ•ˆν•˜μ„Έμš”.",
570
  lines=2
571
  )
572
 
 
587
  label="μ΅œλŒ€ 토큰 수",
588
  minimum=100,
589
  maximum=4096,
590
+ value=300, # μž₯λ©΄ μ„€λͺ…을 μœ„ν•΄ 300으둜 증가
591
  step=50
592
  )
593
 
 
674
  "trajectory": "경둜 κ³„νš"
675
  }
676
 
677
+ formatted_result = f"""πŸ€– {task_names.get(task_type, '뢄석')} κ²°κ³Ό ({timestamp})
678
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
679
+ {result}
680
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
681
 
682
  complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">βœ… 뢄석 μ™„λ£Œ!</div>'
683
  return formatted_result, complete_status
 
706
  max_new_tokens=tokens
707
  )
708
 
709
+ formatted_result = f"""πŸ”„ μžλ™ 뢄석 μ™„λ£Œ ({timestamp})
710
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
711
+ {result}
712
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
713
 
714
  return (
715
  webcam_frame,