Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -188,21 +188,25 @@ def generate_response(
|
|
| 188 |
):
|
| 189 |
"""Unified generation function for both image and video."""
|
| 190 |
if media_file is None:
|
| 191 |
-
yield "Please upload an image or video file first."
|
| 192 |
return
|
| 193 |
|
| 194 |
processor, model = get_model_and_processor(model_name)
|
| 195 |
if not processor or not model:
|
| 196 |
-
yield "Invalid model selected."
|
| 197 |
return
|
| 198 |
|
| 199 |
media_type = "video" if is_video_file(media_file) else "image"
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
if model_name == "SmolDocling-256M-preview":
|
| 208 |
if "OTSL" in query or "code" in query:
|
|
@@ -273,17 +277,24 @@ body, .gradio-container { font-family: 'Inter', sans-serif; }
|
|
| 273 |
|
| 274 |
def handle_file_upload(file):
|
| 275 |
if file is None:
|
| 276 |
-
return None, gr.update(visible=False)
|
| 277 |
if is_video_file(file.name):
|
| 278 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
else:
|
| 280 |
-
|
|
|
|
| 281 |
|
| 282 |
def clear_all():
|
| 283 |
-
return None, None, None, ""
|
| 284 |
|
| 285 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
| 286 |
-
# Hidden state to store the path to the uploaded file
|
| 287 |
media_file_path = gr.State(None)
|
| 288 |
|
| 289 |
with gr.Row(elem_classes="main-container"):
|
|
@@ -312,25 +323,29 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
| 312 |
with gr.Column(scale=4):
|
| 313 |
gr.Markdown("# Multimodal OCR")
|
| 314 |
|
| 315 |
-
# --- Media Display Area ---
|
| 316 |
with gr.Column(elem_classes="media-display"):
|
| 317 |
image_display = gr.Image(type="filepath", label="Image Preview", visible=False)
|
| 318 |
video_display = gr.Video(label="Video Preview", visible=False)
|
| 319 |
gr.Markdown("Upload an image or video to begin.")
|
| 320 |
|
| 321 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
gr.Examples(
|
| 323 |
examples=all_examples,
|
| 324 |
-
inputs=[media_file_path,
|
|
|
|
|
|
|
| 325 |
label="Examples (Click to run)",
|
| 326 |
-
|
| 327 |
-
outputs=[image_display, video_display]
|
| 328 |
)
|
| 329 |
|
| 330 |
-
# --- Chat/Output Window ---
|
| 331 |
output_display = gr.Markdown(elem_classes="chat-window", value="### Output will be shown here")
|
| 332 |
|
| 333 |
-
# --- Input Bar ---
|
| 334 |
with gr.Row(elem_classes="input-bar", vertical=False):
|
| 335 |
upload_btn = gr.UploadButton("📁 Add Files", file_types=["image", "video"])
|
| 336 |
model_dropdown = gr.Dropdown(
|
|
@@ -338,22 +353,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
| 338 |
label="Select Model",
|
| 339 |
value="Nanonets-OCR-s"
|
| 340 |
)
|
| 341 |
-
query_input
|
| 342 |
-
placeholder="Enter your query here...",
|
| 343 |
-
show_label=False,
|
| 344 |
-
scale=4,
|
| 345 |
-
)
|
| 346 |
submit_btn = gr.Button("▶", elem_classes="submit-button")
|
| 347 |
|
| 348 |
# --- Event Handlers ---
|
| 349 |
upload_btn.upload(
|
| 350 |
fn=handle_file_upload,
|
| 351 |
inputs=[upload_btn],
|
| 352 |
-
outputs=[image_display, video_display]
|
| 353 |
)
|
| 354 |
-
|
| 355 |
-
# When file is uploaded, also store its path in the state
|
| 356 |
-
upload_btn.upload(lambda f: f.name if f else None, upload_btn, media_file_path)
|
| 357 |
|
| 358 |
submit_btn.click(
|
| 359 |
fn=generate_response,
|
|
@@ -363,7 +371,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
| 363 |
|
| 364 |
add_conv_btn.click(
|
| 365 |
fn=clear_all,
|
| 366 |
-
outputs=[media_file_path, image_display, video_display, output_display]
|
| 367 |
)
|
| 368 |
|
| 369 |
|
|
|
|
| 188 |
):
|
| 189 |
"""Unified generation function for both image and video."""
|
| 190 |
if media_file is None:
|
| 191 |
+
yield "Please upload an image or video file first."
|
| 192 |
return
|
| 193 |
|
| 194 |
processor, model = get_model_and_processor(model_name)
|
| 195 |
if not processor or not model:
|
| 196 |
+
yield "Invalid model selected."
|
| 197 |
return
|
| 198 |
|
| 199 |
media_type = "video" if is_video_file(media_file) else "image"
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
if media_type == "video":
|
| 203 |
+
frames = downsample_video(media_file)
|
| 204 |
+
images = [frame for frame, _ in frames]
|
| 205 |
+
else: # image
|
| 206 |
+
images = [Image.open(media_file)]
|
| 207 |
+
except Exception as e:
|
| 208 |
+
yield f"Error processing file: {e}"
|
| 209 |
+
return
|
| 210 |
|
| 211 |
if model_name == "SmolDocling-256M-preview":
|
| 212 |
if "OTSL" in query or "code" in query:
|
|
|
|
| 277 |
|
| 278 |
def handle_file_upload(file):
|
| 279 |
if file is None:
|
| 280 |
+
return None, gr.update(visible=False), gr.update(visible=False)
|
| 281 |
if is_video_file(file.name):
|
| 282 |
+
return file.name, gr.update(visible=False), gr.update(value=file.name, visible=True)
|
| 283 |
+
else:
|
| 284 |
+
return file.name, gr.update(value=file.name, visible=True), gr.update(visible=False)
|
| 285 |
+
|
| 286 |
+
def handle_example_click(file_path, query):
|
| 287 |
+
if is_video_file(file_path):
|
| 288 |
+
# Update state, hide image, show video, update query
|
| 289 |
+
return file_path, gr.update(visible=False), gr.update(value=file_path, visible=True), query
|
| 290 |
else:
|
| 291 |
+
# Update state, show image, hide video, update query
|
| 292 |
+
return file_path, gr.update(value=file_path, visible=True), gr.update(visible=False), query
|
| 293 |
|
| 294 |
def clear_all():
|
| 295 |
+
return None, None, None, "### Output will be shown here", ""
|
| 296 |
|
| 297 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
|
|
| 298 |
media_file_path = gr.State(None)
|
| 299 |
|
| 300 |
with gr.Row(elem_classes="main-container"):
|
|
|
|
| 323 |
with gr.Column(scale=4):
|
| 324 |
gr.Markdown("# Multimodal OCR")
|
| 325 |
|
|
|
|
| 326 |
with gr.Column(elem_classes="media-display"):
|
| 327 |
image_display = gr.Image(type="filepath", label="Image Preview", visible=False)
|
| 328 |
video_display = gr.Video(label="Video Preview", visible=False)
|
| 329 |
gr.Markdown("Upload an image or video to begin.")
|
| 330 |
|
| 331 |
+
# Define query_input here so gr.Examples can reference it
|
| 332 |
+
query_input = gr.Textbox(
|
| 333 |
+
placeholder="Enter your query here...",
|
| 334 |
+
show_label=False,
|
| 335 |
+
scale=4,
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
gr.Examples(
|
| 339 |
examples=all_examples,
|
| 340 |
+
inputs=[media_file_path, query_input], # Pass component objects
|
| 341 |
+
outputs=[media_file_path, image_display, video_display, query_input],
|
| 342 |
+
fn=handle_example_click,
|
| 343 |
label="Examples (Click to run)",
|
| 344 |
+
cache_examples=True
|
|
|
|
| 345 |
)
|
| 346 |
|
|
|
|
| 347 |
output_display = gr.Markdown(elem_classes="chat-window", value="### Output will be shown here")
|
| 348 |
|
|
|
|
| 349 |
with gr.Row(elem_classes="input-bar", vertical=False):
|
| 350 |
upload_btn = gr.UploadButton("📁 Add Files", file_types=["image", "video"])
|
| 351 |
model_dropdown = gr.Dropdown(
|
|
|
|
| 353 |
label="Select Model",
|
| 354 |
value="Nanonets-OCR-s"
|
| 355 |
)
|
| 356 |
+
# The query_input is already defined above, but we place it here visually
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
submit_btn = gr.Button("▶", elem_classes="submit-button")
|
| 358 |
|
| 359 |
# --- Event Handlers ---
|
| 360 |
upload_btn.upload(
|
| 361 |
fn=handle_file_upload,
|
| 362 |
inputs=[upload_btn],
|
| 363 |
+
outputs=[media_file_path, image_display, video_display]
|
| 364 |
)
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
submit_btn.click(
|
| 367 |
fn=generate_response,
|
|
|
|
| 371 |
|
| 372 |
add_conv_btn.click(
|
| 373 |
fn=clear_all,
|
| 374 |
+
outputs=[media_file_path, image_display, video_display, output_display, query_input]
|
| 375 |
)
|
| 376 |
|
| 377 |
|