Spaces:
Running on Zero
Running on Zero
feat: two run buttons - Run (30s) for normal tasks, Run Long (60s) for counting/detection
Browse files
app.py
CHANGED
|
@@ -101,14 +101,8 @@ def draw_bboxes(image, bboxes):
|
|
| 101 |
# ---------------------------------------------------------------------------
|
| 102 |
# Inference (supports both single-image and paired-image modes)
|
| 103 |
# ---------------------------------------------------------------------------
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
reference_image: Image.Image,
|
| 107 |
-
test_image: Image.Image,
|
| 108 |
-
prompt: str,
|
| 109 |
-
max_new_tokens: float,
|
| 110 |
-
):
|
| 111 |
-
# Determine mode: single image or paired comparison
|
| 112 |
has_ref = reference_image is not None
|
| 113 |
has_test = test_image is not None
|
| 114 |
|
|
@@ -119,7 +113,6 @@ def predict(
|
|
| 119 |
t_start = time.time()
|
| 120 |
max_new_tokens = int(max_new_tokens)
|
| 121 |
|
| 122 |
-
# Build message content based on available images
|
| 123 |
content = []
|
| 124 |
|
| 125 |
if has_ref and has_test:
|
|
@@ -174,13 +167,11 @@ def predict(
|
|
| 174 |
clean_up_tokenization_spaces=False,
|
| 175 |
)[0]
|
| 176 |
|
| 177 |
-
# Try to visualize bboxes if present
|
| 178 |
bboxes = parse_bboxes(output)
|
| 179 |
vis_image = None
|
| 180 |
if bboxes:
|
| 181 |
vis_image = draw_bboxes(vis_source, bboxes)
|
| 182 |
|
| 183 |
-
# Append timing info
|
| 184 |
prep_time = t_preprocess - t_start
|
| 185 |
gen_time = t_generate - t_preprocess
|
| 186 |
output += f"\n\n---\nPreprocessing: {prep_time:.1f}s | Inference: {gen_time:.1f}s"
|
|
@@ -192,6 +183,16 @@ def predict(
|
|
| 192 |
return f"Error:\n{tb}", None
|
| 193 |
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
# ---------------------------------------------------------------------------
|
| 196 |
# Gradio UI
|
| 197 |
# ---------------------------------------------------------------------------
|
|
@@ -291,7 +292,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title=TITLE) as demo:
|
|
| 291 |
step=16,
|
| 292 |
label="Max New Tokens",
|
| 293 |
)
|
| 294 |
-
run_btn = gr.Button("Run", variant="primary", scale=2)
|
|
|
|
| 295 |
|
| 296 |
output = gr.Textbox(label="Model Output", lines=4)
|
| 297 |
vis_output = gr.Image(label="Detection Visualization")
|
|
@@ -301,6 +303,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title=TITLE) as demo:
|
|
| 301 |
inputs=[ref_img, test_img, prompt, max_tokens],
|
| 302 |
outputs=[output, vis_output],
|
| 303 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
gr.Examples(
|
| 306 |
examples=EXAMPLES,
|
|
|
|
| 101 |
# ---------------------------------------------------------------------------
|
| 102 |
# Inference (supports both single-image and paired-image modes)
|
| 103 |
# ---------------------------------------------------------------------------
|
| 104 |
+
def _run_inference(reference_image, test_image, prompt, max_new_tokens):
|
| 105 |
+
"""Core inference logic shared by both predict functions."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
has_ref = reference_image is not None
|
| 107 |
has_test = test_image is not None
|
| 108 |
|
|
|
|
| 113 |
t_start = time.time()
|
| 114 |
max_new_tokens = int(max_new_tokens)
|
| 115 |
|
|
|
|
| 116 |
content = []
|
| 117 |
|
| 118 |
if has_ref and has_test:
|
|
|
|
| 167 |
clean_up_tokenization_spaces=False,
|
| 168 |
)[0]
|
| 169 |
|
|
|
|
| 170 |
bboxes = parse_bboxes(output)
|
| 171 |
vis_image = None
|
| 172 |
if bboxes:
|
| 173 |
vis_image = draw_bboxes(vis_source, bboxes)
|
| 174 |
|
|
|
|
| 175 |
prep_time = t_preprocess - t_start
|
| 176 |
gen_time = t_generate - t_preprocess
|
| 177 |
output += f"\n\n---\nPreprocessing: {prep_time:.1f}s | Inference: {gen_time:.1f}s"
|
|
|
|
| 183 |
return f"Error:\n{tb}", None
|
| 184 |
|
| 185 |
|
| 186 |
+
@spaces.GPU(duration=30)
|
| 187 |
+
def predict(reference_image, test_image, prompt, max_new_tokens):
|
| 188 |
+
return _run_inference(reference_image, test_image, prompt, max_new_tokens)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
@spaces.GPU(duration=60)
|
| 192 |
+
def predict_long(reference_image, test_image, prompt, max_new_tokens):
|
| 193 |
+
return _run_inference(reference_image, test_image, prompt, max_new_tokens)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
# ---------------------------------------------------------------------------
|
| 197 |
# Gradio UI
|
| 198 |
# ---------------------------------------------------------------------------
|
|
|
|
| 292 |
step=16,
|
| 293 |
label="Max New Tokens",
|
| 294 |
)
|
| 295 |
+
run_btn = gr.Button("Run (30s)", variant="primary", scale=2)
|
| 296 |
+
run_long_btn = gr.Button("Run Long (60s)", variant="secondary", scale=1)
|
| 297 |
|
| 298 |
output = gr.Textbox(label="Model Output", lines=4)
|
| 299 |
vis_output = gr.Image(label="Detection Visualization")
|
|
|
|
| 303 |
inputs=[ref_img, test_img, prompt, max_tokens],
|
| 304 |
outputs=[output, vis_output],
|
| 305 |
)
|
| 306 |
+
run_long_btn.click(
|
| 307 |
+
fn=predict_long,
|
| 308 |
+
inputs=[ref_img, test_img, prompt, max_tokens],
|
| 309 |
+
outputs=[output, vis_output],
|
| 310 |
+
)
|
| 311 |
|
| 312 |
gr.Examples(
|
| 313 |
examples=EXAMPLES,
|