Spaces:

prithivMLmods
/

Molmo2-HF-Demo

Running on Zero

App Files Files Community

prithivMLmods commited on Dec 17, 2025

Commit

be25c0c

verified ·

1 Parent(s): 4f09c23

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -17

app.py CHANGED Viewed

@@ -8,9 +8,6 @@ import re
 import os
 from molmo_utils import process_vision_info
-# -----------------------------------------------------------------------------
-# 1. Model & Processor Setup
-# -----------------------------------------------------------------------------
 MODEL_ID = "allenai/Molmo2-4B"
 print(f"Loading {MODEL_ID}...")
@@ -29,9 +26,6 @@ model = AutoModelForImageTextToText.from_pretrained(
 )
 print("Model loaded successfully.")
-# -----------------------------------------------------------------------------
-# 2. Parsing Utilities (From provided snippets)
-# -----------------------------------------------------------------------------
 COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
 FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
 POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
@@ -90,9 +84,6 @@ def extract_video_points(text, image_w, image_h, extract_ids=False):
                     all_points.append((frame_id, x, y))
     return all_points
-# -----------------------------------------------------------------------------
-# 3. Visualization Utilities
-# -----------------------------------------------------------------------------
 def draw_points_on_images(images, points):
     """Draws points on a list of PIL Images."""
     annotated_images = [img.copy() for img in images]
@@ -162,10 +153,6 @@ def draw_points_on_video(video_path, points, original_width, original_height):
     out.release()
     return output_path
-# -----------------------------------------------------------------------------
-# 4. Logic Handlers
-# -----------------------------------------------------------------------------
 def process_images(user_text, input_images):
     if not input_images:
         return "Please upload at least one image.", None
@@ -278,10 +265,6 @@ def process_video(user_text, video_path):
     return generated_text, out_vid
-# -----------------------------------------------------------------------------
-# 5. Gradio UI Layout
-# -----------------------------------------------------------------------------
 with gr.Blocks(title="Molmo2-4B Demo") as demo:
     gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")

 import os
 from molmo_utils import process_vision_info
 MODEL_ID = "allenai/Molmo2-4B"
 print(f"Loading {MODEL_ID}...")
 )
 print("Model loaded successfully.")
 COORD_REGEX = re.compile(rf"<(?:points|tracks).*? coords=\"([0-9\t:;, .]+)\"/?>")
 FRAME_REGEX = re.compile(rf"(?:^|\t|:|,|;)([0-9\.]+) ([0-9\. ]+)")
 POINTS_REGEX = re.compile(r"([0-9]+) ([0-9]{3,4}) ([0-9]{3,4})")
                     all_points.append((frame_id, x, y))
     return all_points
 def draw_points_on_images(images, points):
     """Draws points on a list of PIL Images."""
     annotated_images = [img.copy() for img in images]
     out.release()
     return output_path
 def process_images(user_text, input_images):
     if not input_images:
         return "Please upload at least one image.", None
     return generated_text, out_vid
 with gr.Blocks(title="Molmo2-4B Demo") as demo:
     gr.Markdown("Supports Multi-image QA, Pointing, General Video QA, and Tracking.")