Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,9 +8,6 @@ import re
|
|
| 8 |
import os
|
| 9 |
from molmo_utils import process_vision_info
|
| 10 |
|
| 11 |
-
# -----------------------------------------------------------------------------
|
| 12 |
-
# 1. Model & Processor Setup
|
| 13 |
-
# -----------------------------------------------------------------------------
|
| 14 |
MODEL_ID = "allenai/Molmo2-4B"
|
| 15 |
|
| 16 |
print(f"Loading {MODEL_ID}...")
|
|
@@ -29,9 +26,6 @@ model = AutoModelForImageTextToText.from_pretrained(
|
|
| 29 |
)
|
| 30 |
print("Model loaded successfully.")
|
| 31 |
|
| 32 |
-
# -----------------------------------------------------------------------------
|
| 33 |
-
# 2. Parsing Utilities (From provided snippets)
|
| 34 |
-
# -----------------------------------------------------------------------------
|
| 35 |
COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
|
| 36 |
FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
|
| 37 |
POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
|
|
@@ -90,9 +84,6 @@ def extract_video_points(text, image_w, image_h, extract_ids=False):
|
|
| 90 |
all_points.append((frame_id, x, y))
|
| 91 |
return all_points
|
| 92 |
|
| 93 |
-
# -----------------------------------------------------------------------------
|
| 94 |
-
# 3. Visualization Utilities
|
| 95 |
-
# -----------------------------------------------------------------------------
|
| 96 |
def draw_points_on_images(images, points):
|
| 97 |
"""Draws points on a list of PIL Images."""
|
| 98 |
annotated_images = [img.copy() for img in images]
|
|
@@ -162,10 +153,6 @@ def draw_points_on_video(video_path, points, original_width, original_height):
|
|
| 162 |
out.release()
|
| 163 |
return output_path
|
| 164 |
|
| 165 |
-
# -----------------------------------------------------------------------------
|
| 166 |
-
# 4. Logic Handlers
|
| 167 |
-
# -----------------------------------------------------------------------------
|
| 168 |
-
|
| 169 |
def process_images(user_text, input_images):
|
| 170 |
if not input_images:
|
| 171 |
return "Please upload at least one image.", None
|
|
@@ -278,10 +265,6 @@ def process_video(user_text, video_path):
|
|
| 278 |
|
| 279 |
return generated_text, out_vid
|
| 280 |
|
| 281 |
-
# -----------------------------------------------------------------------------
|
| 282 |
-
# 5. Gradio UI Layout
|
| 283 |
-
# -----------------------------------------------------------------------------
|
| 284 |
-
|
| 285 |
with gr.Blocks(title="Molmo2-4B Demo") as demo:
|
| 286 |
gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")
|
| 287 |
|
|
|
|
| 8 |
import os
|
| 9 |
from molmo_utils import process_vision_info
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
MODEL_ID = "allenai/Molmo2-4B"
|
| 12 |
|
| 13 |
print(f"Loading {MODEL_ID}...")
|
|
|
|
| 26 |
)
|
| 27 |
print("Model loaded successfully.")
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
|
| 30 |
FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
|
| 31 |
POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
|
|
|
|
| 84 |
all_points.append((frame_id, x, y))
|
| 85 |
return all_points
|
| 86 |
|
|
|
|
|
|
|
|
|
|
| 87 |
def draw_points_on_images(images, points):
|
| 88 |
"""Draws points on a list of PIL Images."""
|
| 89 |
annotated_images = [img.copy() for img in images]
|
|
|
|
| 153 |
out.release()
|
| 154 |
return output_path
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def process_images(user_text, input_images):
|
| 157 |
if not input_images:
|
| 158 |
return "Please upload at least one image.", None
|
|
|
|
| 265 |
|
| 266 |
return generated_text, out_vid
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
with gr.Blocks(title="Molmo2-4B Demo") as demo:
|
| 269 |
gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")
|
| 270 |
|