openfree commited on
Commit
ea3211c
ยท
verified ยท
1 Parent(s): 02285b9

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +109 -34
app-backup.py CHANGED
@@ -232,8 +232,8 @@ def analyze_image_for_robot(
232
  prompt: str,
233
  task_type: str = "general",
234
  use_web_search: bool = False,
235
- enable_thinking: bool = True,
236
- max_new_tokens: int = 1024
237
  ) -> str:
238
  """๋กœ๋ด‡ ์ž‘์—…์„ ์œ„ํ•œ ์ด๋ฏธ์ง€ ๋ถ„์„"""
239
  global model, processor
@@ -247,23 +247,25 @@ def analyze_image_for_robot(
247
  if isinstance(image, np.ndarray):
248
  image = Image.fromarray(image).convert('RGB')
249
 
250
- # ํƒœ์Šคํฌ๋ณ„ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
251
  system_prompts = {
252
- "general": "๋‹น์‹ ์€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ์ด๋ฏธ์ง€๋ฅผ ์ž์„ธํžˆ ๋ถ„์„ํ•˜๊ณ  ์„ค๋ช…ํ•˜์„ธ์š”.",
253
- "planning": """๋‹น์‹ ์€ ๋กœ๋ด‡ ์ž‘์—… ๊ณ„ํš์„ ์ˆ˜๋ฆฝํ•˜๋Š” AI์ž…๋‹ˆ๋‹ค.
254
- ์ฃผ์–ด์ง„ ์ด๋ฏธ์ง€์™€ ์ž‘์—…์„ ๋ถ„์„ํ•˜์—ฌ ๋‹จ๊ณ„๋ณ„ ์‹คํ–‰ ๊ณ„ํš์„ ์ž‘์„ฑํ•˜์„ธ์š”.
255
- ํ˜•์‹: Step_1: xxx\nStep_2: xxx\n...\nStep_n: xxx""",
256
- "grounding": "๋‹น์‹ ์€ ๊ฐ์ฒด ์œ„์น˜๋ฅผ ์ฐพ๋Š” ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ์š”์ฒญ๋œ ๊ฐ์ฒด์˜ ์œ„์น˜๋ฅผ [x1, y1, x2, y2] ์ขŒํ‘œ๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.",
257
- "affordance": "๋‹น์‹ ์€ ๋กœ๋ด‡ ํŒŒ์ง€์ ์„ ๋ถ„์„ํ•˜๋Š” AI์ž…๋‹ˆ๋‹ค. ์ฃผ์–ด์ง„ ์ž‘์—…์„ ์œ„ํ•œ ์ตœ์ ์˜ ํŒŒ์ง€ ์˜์—ญ์„ [x1, y1, x2, y2] ์ขŒํ‘œ๋กœ ์˜ˆ์ธกํ•˜์„ธ์š”.",
258
- "trajectory": "๋‹น์‹ ์€ ๋กœ๋ด‡ ๊ฒฝ๋กœ๋ฅผ ๊ณ„ํšํ•˜๋Š” AI์ž…๋‹ˆ๋‹ค. ๋ชฉํ‘œ ์ง€์ ๊นŒ์ง€์˜ ๊ฒฝ๋กœ๋ฅผ [(x1,y1), (x2,y2), ...] ํ˜•์‹์œผ๋กœ ์ œ์‹œํ•˜์„ธ์š”.",
259
- "pointing": "๋‹น์‹ ์€ ๋‹ค์ค‘ ์ง€์ ์„ ์ง€์ •ํ•˜๋Š” ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ์š”์ฒญ๋œ ์œ„์น˜๋“ค์„ [(x1,y1), (x2,y2), ...] ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”."
 
 
260
  }
261
 
262
  system_prompt = system_prompts.get(task_type, system_prompts["general"])
263
 
264
- # Chain-of-Thought ์ถ”๊ฐ€
265
  if enable_thinking:
266
- system_prompt += "\n\n์ถ”๋ก  ๊ณผ์ •์„ <thinking></thinking> ํƒœ๊ทธ ์•ˆ์— ์ƒ์„ธํžˆ ์ž‘์„ฑํ•œ ํ›„ ์ตœ์ข… ๋‹ต๋ณ€์„ ์ œ์‹œํ•˜์„ธ์š”."
267
 
268
  # ์›น ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
269
  combined_system = system_prompt
@@ -468,6 +470,15 @@ css = """
468
  padding: 10px;
469
  background: #f8f9fa;
470
  }
 
 
 
 
 
 
 
 
 
471
  """
472
 
473
  with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as demo:
@@ -489,7 +500,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
489
  <li>๐Ÿค ๋กœ๋ด‡ ํŒŒ์ง€์  ๋ถ„์„ (Affordance)</li>
490
  <li>๐Ÿ›ค๏ธ ๊ฒฝ๋กœ ๊ณ„ํš (Trajectory Planning)</li>
491
  <li>๐Ÿ” ์‹ค์‹œ๊ฐ„ ์›น ๊ฒ€์ƒ‰ ํ†ตํ•ฉ</li>
492
- <li>๐Ÿ“„ ๋ฌธ์„œ ๋ถ„์„ (PDF, CSV, TXT)</li>
493
  </ul>
494
  </div>
495
  """)
@@ -508,6 +519,11 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
508
  height=350
509
  )
510
 
 
 
 
 
 
511
  # ์บก์ฒ˜๋œ ์ด๋ฏธ์ง€ ํ‘œ์‹œ
512
  captured_image = gr.Image(
513
  label="์บก์ฒ˜๋œ ์ด๋ฏธ์ง€",
@@ -518,9 +534,16 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
518
  # ๋กœ๋ด‡ ์ž‘์—… ๋ฒ„ํŠผ๋“ค
519
  gr.Markdown("### ๐ŸŽฏ ๋กœ๋ด‡ ์ž‘์—… ์„ ํƒ")
520
  with gr.Row():
521
- capture_btn = gr.Button("๐Ÿ“ธ ์บก์ฒ˜", variant="primary", elem_classes="task-button")
522
  clear_capture_btn = gr.Button("๐Ÿ—‘๏ธ ์ดˆ๊ธฐํ™”", elem_classes="task-button")
523
 
 
 
 
 
 
 
 
524
  with gr.Row():
525
  planning_btn = gr.Button("๐Ÿ“‹ ์ž‘์—… ๊ณ„ํš", elem_classes="task-button")
526
  grounding_btn = gr.Button("๐Ÿ“ ๊ฐ์ฒด ์œ„์น˜", elem_classes="task-button")
@@ -551,16 +574,16 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
551
 
552
  enable_thinking = gr.Checkbox(
553
  label="๐Ÿค” ์ถ”๋ก  ๊ณผ์ • ํ‘œ์‹œ",
554
- value=True,
555
  info="Chain-of-Thought ์ถ”๋ก  ๊ณผ์ •์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค"
556
  )
557
 
558
  max_tokens = gr.Slider(
559
  label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜",
560
- minimum=256,
561
  maximum=4096,
562
- value=1024,
563
- step=256
564
  )
565
 
566
  gr.Markdown("### ๐Ÿ“Š ๋ถ„์„ ๊ฒฐ๊ณผ")
@@ -576,8 +599,8 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
576
  '<div class="status-box" style="background:#d4edda; color:#155724;">๐ŸŽฎ ์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ</div>'
577
  )
578
 
579
- # ๋ฌธ์„œ ๋ถ„์„ ํƒญ
580
- with gr.Tab("๐Ÿ“„ ๋ฌธ์„œ ๋ถ„์„"):
581
  with gr.Row():
582
  with gr.Column():
583
  doc_files = gr.File(
@@ -609,6 +632,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
609
 
610
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
611
  webcam_state = gr.State(None)
 
612
 
613
  def capture_webcam(frame):
614
  """์›น์บ  ํ”„๋ ˆ์ž„ ์บก์ฒ˜"""
@@ -636,29 +660,57 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
636
  max_new_tokens=tokens
637
  )
638
 
639
- # ๊ฒฐ๊ณผ ํฌ๋งทํŒ…
640
  timestamp = time.strftime("%H:%M:%S")
641
  task_names = {
642
  "planning": "์ž‘์—… ๊ณ„ํš",
643
- "grounding": "๊ฐ์ฒด ์œ„์น˜ ํŒŒ์•…",
644
- "affordance": "ํŒŒ์ง€์  ๋ถ„์„",
645
  "trajectory": "๊ฒฝ๋กœ ๊ณ„ํš"
646
  }
647
 
648
- formatted_result = f"""๐Ÿค– ๋กœ๋ด‡ {task_names.get(task_type, '๋ถ„์„')} ๊ฒฐ๊ณผ:
649
-
650
- ๐Ÿ“ธ **์ž‘์—…**: {prompt}
651
 
652
- ๐Ÿ“ **๋ถ„์„ ๊ฒฐ๊ณผ**:
653
- {result}
654
-
655
- โฐ ๋ถ„์„ ์‹œ๊ฐ„: {timestamp}
656
- ๐ŸŽฏ ๋ชจ๋ธ: {model_name}
657
- ๐Ÿ”ง ํƒœ์Šคํฌ: {task_type}"""
658
 
659
  complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">โœ… ๋ถ„์„ ์™„๋ฃŒ!</div>'
660
  return formatted_result, complete_status
661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  # ์›น์บ  ์ŠคํŠธ๋ฆฌ๋ฐ
663
  webcam.stream(
664
  fn=lambda x: x,
@@ -666,7 +718,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
666
  outputs=[webcam_state]
667
  )
668
 
669
- # ์บก์ฒ˜ ๋ฒ„ํŠผ
670
  capture_btn.click(
671
  fn=capture_webcam,
672
  inputs=[webcam_state],
@@ -720,6 +772,29 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
720
  outputs=[doc_result]
721
  )
722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
  # ์ดˆ๊ธฐ ๋ชจ๋ธ ๋กœ๋“œ
724
  def initial_load():
725
  load_model()
 
232
  prompt: str,
233
  task_type: str = "general",
234
  use_web_search: bool = False,
235
+ enable_thinking: bool = False, # ๊ธฐ๋ณธ๊ฐ’ False๋กœ ๋ณ€๊ฒฝ
236
+ max_new_tokens: int = 250 # ๊ธฐ๋ณธ๊ฐ’ 250์œผ๋กœ ๋ณ€๊ฒฝ
237
  ) -> str:
238
  """๋กœ๋ด‡ ์ž‘์—…์„ ์œ„ํ•œ ์ด๋ฏธ์ง€ ๋ถ„์„"""
239
  global model, processor
 
247
  if isinstance(image, np.ndarray):
248
  image = Image.fromarray(image).convert('RGB')
249
 
250
+ # ํƒœ์Šคํฌ๋ณ„ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๋” ๊ฐ„๊ฒฐํ•˜๊ฒŒ)
251
  system_prompts = {
252
+ "general": "๋‹น์‹ ์€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ํ•ต์‹ฌ ๋‚ด์šฉ๋งŒ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์„ค๋ช…ํ•˜์„ธ์š”.",
253
+ "planning": """๋‹น์‹ ์€ ๋กœ๋ด‡ ์ž‘์—… ๊ณ„ํš AI์ž…๋‹ˆ๋‹ค.
254
+ ์ฃผ์š” ๋‹จ๊ณ„๋งŒ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์ž‘์„ฑํ•˜์„ธ์š”.
255
+ ํ˜•์‹: Step_1: xxx
256
+ Step_2: xxx
257
+ Step_n: xxx""",
258
+ "grounding": "๋‹น์‹ ์€ ๊ฐ์ฒด ์œ„์น˜ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ๊ฐ์ฒด ์œ„์น˜๋ฅผ [x1, y1, x2, y2]๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.",
259
+ "affordance": "๋‹น์‹ ์€ ํŒŒ์ง€์  ๋ถ„์„ AI์ž…๋‹ˆ๋‹ค. ํŒŒ์ง€ ์˜์—ญ์„ [x1, y1, x2, y2]๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.",
260
+ "trajectory": "๋‹น์‹ ์€ ๊ฒฝ๋กœ ๊ณ„ํš AI์ž…๋‹ˆ๋‹ค. ๊ฒฝ๋กœ๋ฅผ [(x1,y1), (x2,y2), ...]๋กœ ์ œ์‹œํ•˜์„ธ์š”.",
261
+ "pointing": "๋‹น์‹ ์€ ์ง€์  ์ง€์ • ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ์œ„์น˜๋ฅผ [(x1,y1), (x2,y2), ...]๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”."
262
  }
263
 
264
  system_prompt = system_prompts.get(task_type, system_prompts["general"])
265
 
266
+ # Chain-of-Thought ์ถ”๊ฐ€ (์„ ํƒ์ )
267
  if enable_thinking:
268
+ system_prompt += "\n\n์ถ”๋ก  ๊ณผ์ •์„ <thinking></thinking> ํƒœ๊ทธ ์•ˆ์— ์ž‘์„ฑ ํ›„ ์ตœ์ข… ๋‹ต๋ณ€์„ ์ œ์‹œํ•˜์„ธ์š”."
269
 
270
  # ์›น ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰
271
  combined_system = system_prompt
 
470
  padding: 10px;
471
  background: #f8f9fa;
472
  }
473
+ .auto-capture-status {
474
+ text-align: center;
475
+ padding: 5px;
476
+ border-radius: 5px;
477
+ margin: 5px 0;
478
+ font-weight: bold;
479
+ background: #e8f5e9;
480
+ color: #2e7d32;
481
+ }
482
  """
483
 
484
  with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as demo:
 
500
  <li>๐Ÿค ๋กœ๋ด‡ ํŒŒ์ง€์  ๋ถ„์„ (Affordance)</li>
501
  <li>๐Ÿ›ค๏ธ ๊ฒฝ๋กœ ๊ณ„ํš (Trajectory Planning)</li>
502
  <li>๐Ÿ” ์‹ค์‹œ๊ฐ„ ์›น ๊ฒ€์ƒ‰ ํ†ตํ•ฉ</li>
503
+ <li>๐Ÿ”„ 10์ดˆ๋งˆ๋‹ค ์ž๋™ ์บก์ฒ˜ ๋ฐ ๋ถ„์„</li>
504
  </ul>
505
  </div>
506
  """)
 
519
  height=350
520
  )
521
 
522
+ # ์ž๋™ ์บก์ฒ˜ ์ƒํƒœ ํ‘œ์‹œ
523
+ auto_capture_status = gr.HTML(
524
+ '<div class="auto-capture-status">๐Ÿ”„ ์ž๋™ ์บก์ฒ˜: ๋Œ€๊ธฐ ์ค‘</div>'
525
+ )
526
+
527
  # ์บก์ฒ˜๋œ ์ด๋ฏธ์ง€ ํ‘œ์‹œ
528
  captured_image = gr.Image(
529
  label="์บก์ฒ˜๋œ ์ด๋ฏธ์ง€",
 
534
  # ๋กœ๋ด‡ ์ž‘์—… ๋ฒ„ํŠผ๋“ค
535
  gr.Markdown("### ๐ŸŽฏ ๋กœ๋ด‡ ์ž‘์—… ์„ ํƒ")
536
  with gr.Row():
537
+ capture_btn = gr.Button("๐Ÿ“ธ ์ˆ˜๋™ ์บก์ฒ˜", variant="primary", elem_classes="task-button")
538
  clear_capture_btn = gr.Button("๐Ÿ—‘๏ธ ์ดˆ๊ธฐํ™”", elem_classes="task-button")
539
 
540
+ with gr.Row():
541
+ auto_capture_toggle = gr.Checkbox(
542
+ label="๐Ÿ”„ ์ž๋™ ์บก์ฒ˜ ํ™œ์„ฑํ™” (10์ดˆ๋งˆ๋‹ค)",
543
+ value=False,
544
+ info="ํ™œ์„ฑํ™” ์‹œ 10์ดˆ๋งˆ๋‹ค ์ž๋™์œผ๋กœ ์บก์ฒ˜ ๋ฐ ๋ถ„์„"
545
+ )
546
+
547
  with gr.Row():
548
  planning_btn = gr.Button("๐Ÿ“‹ ์ž‘์—… ๊ณ„ํš", elem_classes="task-button")
549
  grounding_btn = gr.Button("๐Ÿ“ ๊ฐ์ฒด ์œ„์น˜", elem_classes="task-button")
 
574
 
575
  enable_thinking = gr.Checkbox(
576
  label="๐Ÿค” ์ถ”๋ก  ๊ณผ์ • ํ‘œ์‹œ",
577
+ value=False, # ๊ธฐ๋ณธ๊ฐ’ False๋กœ ๋ณ€๊ฒฝ
578
  info="Chain-of-Thought ์ถ”๋ก  ๊ณผ์ •์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค"
579
  )
580
 
581
  max_tokens = gr.Slider(
582
  label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜",
583
+ minimum=100,
584
  maximum=4096,
585
+ value=250, # ๊ธฐ๋ณธ๊ฐ’ 250์œผ๋กœ ๋ณ€๊ฒฝ
586
+ step=50
587
  )
588
 
589
  gr.Markdown("### ๐Ÿ“Š ๋ถ„์„ ๊ฒฐ๊ณผ")
 
599
  '<div class="status-box" style="background:#d4edda; color:#155724;">๐ŸŽฎ ์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ</div>'
600
  )
601
 
602
+ # ๋ฌธ์„œ ๋ถ„์„ ํƒญ (์ˆจ๊น€ ์ฒ˜๋ฆฌ)
603
+ with gr.Tab("๐Ÿ“„ ๋ฌธ์„œ ๋ถ„์„", visible=False): # visible=False๋กœ ์ˆจ๊น€
604
  with gr.Row():
605
  with gr.Column():
606
  doc_files = gr.File(
 
632
 
633
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
634
  webcam_state = gr.State(None)
635
+ auto_capture_state = gr.State({"enabled": False, "timer": None})
636
 
637
  def capture_webcam(frame):
638
  """์›น์บ  ํ”„๋ ˆ์ž„ ์บก์ฒ˜"""
 
660
  max_new_tokens=tokens
661
  )
662
 
663
+ # ๊ฒฐ๊ณผ ํฌ๋งทํŒ… (๋” ๊ฐ„๊ฒฐํ•˜๊ฒŒ)
664
  timestamp = time.strftime("%H:%M:%S")
665
  task_names = {
666
  "planning": "์ž‘์—… ๊ณ„ํš",
667
+ "grounding": "๊ฐ์ฒด ์œ„์น˜",
668
+ "affordance": "ํŒŒ์ง€์ ",
669
  "trajectory": "๊ฒฝ๋กœ ๊ณ„ํš"
670
  }
671
 
672
+ formatted_result = f"""๐Ÿค– {task_names.get(task_type, '๋ถ„์„')} ๊ฒฐ๊ณผ ({timestamp}):
 
 
673
 
674
+ {result}"""
 
 
 
 
 
675
 
676
  complete_status = '<div class="status-box" style="background:#d4edda; color:#155724;">โœ… ๋ถ„์„ ์™„๋ฃŒ!</div>'
677
  return formatted_result, complete_status
678
 
679
+ # ์ž๋™ ์บก์ฒ˜ ๋ฐ ๋ถ„์„ ํ•จ์ˆ˜
680
+ def auto_capture_and_analyze(webcam_frame, task_prompt, use_search, thinking, tokens, auto_state):
681
+ """์ž๋™ ์บก์ฒ˜ ๋ฐ ๋ถ„์„"""
682
+ if webcam_frame is None:
683
+ return (
684
+ None,
685
+ "์ž๋™ ์บก์ฒ˜ ๋Œ€๊ธฐ ์ค‘...",
686
+ '<div class="status-box" style="background:#fff3cd; color:#856404;">โณ ์›น์บ  ๋Œ€๊ธฐ ์ค‘</div>',
687
+ '<div class="auto-capture-status">๐Ÿ”„ ์ž๋™ ์บก์ฒ˜: ์›น์บ  ๋Œ€๊ธฐ ์ค‘</div>'
688
+ )
689
+
690
+ # ์บก์ฒ˜ ์ˆ˜ํ–‰
691
+ timestamp = time.strftime("%H:%M:%S")
692
+
693
+ # ์ด๋ฏธ์ง€ ๋ถ„์„ (์ž‘์—… ๊ณ„ํš ๋ชจ๋“œ๋กœ)
694
+ result = analyze_image_for_robot(
695
+ image=webcam_frame,
696
+ prompt=task_prompt,
697
+ task_type="planning",
698
+ use_web_search=use_search,
699
+ enable_thinking=thinking,
700
+ max_new_tokens=tokens
701
+ )
702
+
703
+ formatted_result = f"""๐Ÿ”„ ์ž๋™ ๋ถ„์„ ({timestamp}):
704
+
705
+ {result}"""
706
+
707
+ return (
708
+ webcam_frame,
709
+ formatted_result,
710
+ '<div class="status-box" style="background:#d4edda; color:#155724;">โœ… ์ž๋™ ๋ถ„์„ ์™„๋ฃŒ</div>',
711
+ f'<div class="auto-capture-status">๐Ÿ”„ ์ž๋™ ์บก์ฒ˜: ๋งˆ์ง€๋ง‰ ๋ถ„์„ {timestamp}</div>'
712
+ )
713
+
714
  # ์›น์บ  ์ŠคํŠธ๋ฆฌ๋ฐ
715
  webcam.stream(
716
  fn=lambda x: x,
 
718
  outputs=[webcam_state]
719
  )
720
 
721
+ # ์ˆ˜๋™ ์บก์ฒ˜ ๋ฒ„ํŠผ
722
  capture_btn.click(
723
  fn=capture_webcam,
724
  inputs=[webcam_state],
 
772
  outputs=[doc_result]
773
  )
774
 
775
+ # ์ž๋™ ์บก์ฒ˜ ํƒ€์ด๋จธ (10์ดˆ๋งˆ๋‹ค)
776
+ timer = gr.Timer(10.0, active=False) # 10์ดˆ ํƒ€์ด๋จธ, ์ดˆ๊ธฐ์—๋Š” ๋น„ํ™œ์„ฑํ™”
777
+
778
+ # ์ž๋™ ์บก์ฒ˜ ํ† ๊ธ€ ์ด๋ฒคํŠธ
779
+ def toggle_auto_capture(enabled):
780
+ if enabled:
781
+ return gr.Timer(10.0, active=True), '<div class="auto-capture-status">๐Ÿ”„ ์ž๋™ ์บก์ฒ˜: ํ™œ์„ฑํ™”๋จ (10์ดˆ๋งˆ๋‹ค)</div>'
782
+ else:
783
+ return gr.Timer(active=False), '<div class="auto-capture-status">๐Ÿ”„ ์ž๋™ ์บก์ฒ˜: ๋น„ํ™œ์„ฑํ™”๋จ</div>'
784
+
785
+ auto_capture_toggle.change(
786
+ fn=toggle_auto_capture,
787
+ inputs=[auto_capture_toggle],
788
+ outputs=[timer, auto_capture_status]
789
+ )
790
+
791
+ # ํƒ€์ด๋จธ ํ‹ฑ ์ด๋ฒคํŠธ
792
+ timer.tick(
793
+ fn=auto_capture_and_analyze,
794
+ inputs=[webcam_state, task_prompt, use_web_search, enable_thinking, max_tokens, auto_capture_state],
795
+ outputs=[captured_image, result_output, status_display, auto_capture_status]
796
+ )
797
+
798
  # ์ดˆ๊ธฐ ๋ชจ๋ธ ๋กœ๋“œ
799
  def initial_load():
800
  load_model()