prithivMLmods commited on
Commit
be25c0c
·
verified ·
1 Parent(s): 4f09c23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -17
app.py CHANGED
@@ -8,9 +8,6 @@ import re
8
  import os
9
  from molmo_utils import process_vision_info
10
 
11
- # -----------------------------------------------------------------------------
12
- # 1. Model & Processor Setup
13
- # -----------------------------------------------------------------------------
14
  MODEL_ID = "allenai/Molmo2-4B"
15
 
16
  print(f"Loading {MODEL_ID}...")
@@ -29,9 +26,6 @@ model = AutoModelForImageTextToText.from_pretrained(
29
  )
30
  print("Model loaded successfully.")
31
 
32
- # -----------------------------------------------------------------------------
33
- # 2. Parsing Utilities (From provided snippets)
34
- # -----------------------------------------------------------------------------
35
  COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
36
  FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
37
  POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
@@ -90,9 +84,6 @@ def extract_video_points(text, image_w, image_h, extract_ids=False):
90
  all_points.append((frame_id, x, y))
91
  return all_points
92
 
93
- # -----------------------------------------------------------------------------
94
- # 3. Visualization Utilities
95
- # -----------------------------------------------------------------------------
96
  def draw_points_on_images(images, points):
97
  """Draws points on a list of PIL Images."""
98
  annotated_images = [img.copy() for img in images]
@@ -162,10 +153,6 @@ def draw_points_on_video(video_path, points, original_width, original_height):
162
  out.release()
163
  return output_path
164
 
165
- # -----------------------------------------------------------------------------
166
- # 4. Logic Handlers
167
- # -----------------------------------------------------------------------------
168
-
169
  def process_images(user_text, input_images):
170
  if not input_images:
171
  return "Please upload at least one image.", None
@@ -278,10 +265,6 @@ def process_video(user_text, video_path):
278
 
279
  return generated_text, out_vid
280
 
281
- # -----------------------------------------------------------------------------
282
- # 5. Gradio UI Layout
283
- # -----------------------------------------------------------------------------
284
-
285
  with gr.Blocks(title="Molmo2-4B Demo") as demo:
286
  gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")
287
 
 
8
  import os
9
  from molmo_utils import process_vision_info
10
 
 
 
 
11
  MODEL_ID = "allenai/Molmo2-4B"
12
 
13
  print(f"Loading {MODEL_ID}...")
 
26
  )
27
  print("Model loaded successfully.")
28
 
 
 
 
29
  COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
30
  FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
31
  POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
 
84
  all_points.append((frame_id, x, y))
85
  return all_points
86
 
 
 
 
87
  def draw_points_on_images(images, points):
88
  """Draws points on a list of PIL Images."""
89
  annotated_images = [img.copy() for img in images]
 
153
  out.release()
154
  return output_path
155
 
 
 
 
 
156
  def process_images(user_text, input_images):
157
  if not input_images:
158
  return "Please upload at least one image.", None
 
265
 
266
  return generated_text, out_vid
267
 
 
 
 
 
268
  with gr.Blocks(title="Molmo2-4B Demo") as demo:
269
  gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")
270