JasperHaozhe commited on
Commit
9f61b98
Β·
verified Β·
1 Parent(s): 9febb0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -69
app.py CHANGED
@@ -81,11 +81,12 @@ flux_pipeline = FluxKontextPipeline.from_pretrained(
81
  flux_pipeline.to(device_gen)
82
 
83
  TASK_CHOICES = [
 
 
84
  "Pointwise - Image Editing",
85
  "Pointwise - T2I Generation",
86
  "Pairwise - Image Editing",
87
  "Pairwise - T2I Generation",
88
- "Prompt Tuning - Image Editing",
89
  ]
90
 
91
  # ============================================================
@@ -355,13 +356,30 @@ def create_instruction(prompt, task_type):
355
  raise ValueError(f"Unknown task type: {task_type}")
356
 
357
  def update_ui_for_task(task_type):
358
- """Update image component visibility/labels and instruction label based on selected task type."""
359
- if task_type == "Pointwise - Image Editing":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  return (
361
  gr.update(visible=True, label="Source Image"),
362
  gr.update(visible=True, label="Edited Image"),
363
  gr.update(visible=False, label="Image B", value=None),
364
  gr.update(label="Editing Instruction", placeholder="Describe the edit that was applied to the source image…"),
 
365
  )
366
  elif task_type == "Pointwise - T2I Generation":
367
  return (
@@ -369,6 +387,7 @@ def update_ui_for_task(task_type):
369
  gr.update(visible=False, label="(unused)", value=None),
370
  gr.update(visible=False, label="(unused)", value=None),
371
  gr.update(label="Text-to-Image Prompt", placeholder="Enter the text-to-image generation prompt…"),
 
372
  )
373
  elif task_type == "Pairwise - Image Editing":
374
  return (
@@ -376,6 +395,7 @@ def update_ui_for_task(task_type):
376
  gr.update(visible=True, label="Image A"),
377
  gr.update(visible=True, label="Image B"),
378
  gr.update(label="Editing Instruction", placeholder="Describe the edit that was applied to the source image…"),
 
379
  )
380
  elif task_type == "Pairwise - T2I Generation":
381
  return (
@@ -383,13 +403,7 @@ def update_ui_for_task(task_type):
383
  gr.update(visible=True, label="Image B"),
384
  gr.update(visible=False, label="(unused)", value=None),
385
  gr.update(label="Text-to-Image Prompt", placeholder="Enter the text-to-image generation prompt…"),
386
- )
387
- elif task_type == "Prompt Tuning - Image Editing":
388
- return (
389
- gr.update(visible=True, label="Source Image"),
390
- gr.update(visible=True, label="Generated Image", interactive=False, value=None),
391
- gr.update(visible=False, label="(unused)", value=None),
392
- gr.update(label="Instruction", placeholder="Enter the instruction for editing..."),
393
  )
394
  else:
395
  raise ValueError(f"Unknown task type: {task_type}")
@@ -440,76 +454,67 @@ def run_vlm_evaluation(messages, loaded_images):
440
  buffer += new_text
441
  yield buffer
442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  @spaces.GPU(duration=300)
444
  def model_inference(task_type, instruction_text, image1, image2, image3, progress=gr.Progress()):
445
- """Run model inference based on the selected task type and uploaded images."""
446
-
447
  loaded_images = []
448
  task_for_template = task_type
449
- generated_image_path = None
450
 
451
  # Validate inputs and collect images based on task
452
- if task_type == "Pointwise - Image Editing":
453
  if not image1 or not image2:
454
- yield "Error: Please upload both Source Image and Edited Image.", None
455
  return
456
  files = [image1, image2]
457
  loaded_images = [load_image(img) for img in files]
 
458
 
459
- elif task_type == "Pointwise - T2I Generation":
460
  if not image1:
461
- yield "Error: Please upload the Generated Image.", None
462
  return
463
  files = [image1]
464
  loaded_images = [load_image(img) for img in files]
 
465
 
466
  elif task_type == "Pairwise - Image Editing":
467
  if not image1 or not image2 or not image3:
468
- yield "Error: Please upload Source Image, Image A, and Image B.", None
469
  return
470
  files = [image1, image2, image3]
471
  loaded_images = [load_image(img) for img in files]
472
 
473
  elif task_type == "Pairwise - T2I Generation":
474
  if not image1 or not image2:
475
- yield "Error: Please upload both Image A and Image B.", None
476
  return
477
  files = [image1, image2]
478
  loaded_images = [load_image(img) for img in files]
479
 
480
- elif task_type == "Prompt Tuning - Image Editing":
481
- if not image1:
482
- yield "Error: Please upload the Source Image.", None
483
- return
484
-
485
- progress(0, desc="Starting generation...")
486
- yield "Generating edited image with Flux... (This may take a minute)", None
487
-
488
- # Load source image
489
- try:
490
- source_img = load_image(image1)
491
- width, height = source_img.size
492
-
493
- # Run Flux generation on GPU
494
- generated_image = run_flux_generation(instruction_text, source_img, width, height)
495
-
496
- # Save generated image
497
- timestamp = int(time.time())
498
- os.makedirs("generated_images", exist_ok=True)
499
- generated_image_path = f"generated_images/flux_edit_{timestamp}.png"
500
- generated_image.save(generated_image_path)
501
-
502
- except Exception as e:
503
- yield f"Error generating image: {str(e)}", None
504
- return
505
-
506
- yield "Image generated! Evaluating...", generated_image_path
507
-
508
- loaded_images = [source_img, generated_image]
509
- task_for_template = "Pointwise - Image Editing"
510
-
511
  else:
512
- yield "Error: Unknown task type selected.", None
513
  return
514
 
515
  # Build instruction with <image> placeholders
@@ -528,7 +533,7 @@ def model_inference(task_type, instruction_text, image1, image2, image3, progres
528
  # Run VLM evaluation on GPU (streaming)
529
  progress(0.9, desc="Evaluating...")
530
  for text in run_vlm_evaluation(messages, loaded_images):
531
- yield text, gr.update(value=generated_image_path) if generated_image_path else gr.update()
532
 
533
  # ============================================================
534
  # Gradio UI
@@ -537,17 +542,18 @@ def model_inference(task_type, instruction_text, image1, image2, image3, progres
537
  OVERVIEW_MD = """
538
  ### πŸ“‹ Task Overview
539
 
540
- This demo supports **four evaluation tasks**. Select one to get started:
541
 
542
  | Task | Description |
543
  |------|-------------|
 
 
544
  | **Pointwise – Image Editing** | Rate a single edited image against its source image and the editing instruction. Produces per-aspect scores and a refined request. |
545
  | **Pointwise – T2I Generation** | Rate a single generated image against a text-to-image prompt. Produces per-aspect scores and a refined prompt. |
546
  | **Pairwise – Image Editing** | Compare two edited images (A vs B) given a source image and editing instruction. Determines which edit is better per aspect. |
547
  | **Pairwise – T2I Generation** | Compare two generated images (A vs B) given a text-to-image prompt. Determines which generation is better per aspect. |
548
- | **Prompt Tuning – Image Editing** | Generate an edit using Flux (Kontext) from a source image and instruction, then evaluate it. Use the refinement to tune your prompt. |
549
 
550
- **Try the examples below - they're basically begging to be clicked! 🎯**
551
  """
552
 
553
  with gr.Blocks(css="""
@@ -559,11 +565,11 @@ with gr.Blocks(css="""
559
  gr.Markdown(OVERVIEW_MD)
560
 
561
  with gr.Row(equal_height=True):
562
- # ============ LEFT COLUMN – all inputs (scrollable) ============
563
  with gr.Column(scale=1, elem_id="input-panel"):
564
  task_selector = gr.Radio(
565
  choices=TASK_CHOICES,
566
- value="Pointwise - Image Editing",
567
  label="Task Type",
568
  info="Select the evaluation task",
569
  )
@@ -578,9 +584,10 @@ with gr.Blocks(css="""
578
  )
579
  with gr.Column(scale=1, min_width=160):
580
  image2 = gr.Image(
581
- label="Edited Image",
582
  type="filepath",
583
  sources=["upload", "clipboard"],
 
584
  )
585
  with gr.Column(scale=1, min_width=160):
586
  image3 = gr.Image(
@@ -590,40 +597,49 @@ with gr.Blocks(css="""
590
  visible=False,
591
  )
592
 
593
- # ---- Instruction + Evaluate ----
594
  instruction = gr.Textbox(
595
  label="Editing Instruction",
596
  lines=3,
597
- placeholder="Describe the edit that was applied to the source image…",
598
  )
599
- submit_btn = gr.Button("Evaluate", variant="primary")
 
 
600
 
601
- # ---- Examples ----
 
602
  gr.Examples(
603
  examples=[
 
604
  ["Pointwise - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png", None],
605
  ["Pairwise - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png"],
606
- ["Prompt Tuning - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", None, None],
607
  ],
608
  inputs=[task_selector, instruction, image1, image2, image3],
609
  )
610
 
611
- # ============ RIGHT COLUMN – evaluation output ============
612
- with gr.Column(scale=1):
613
- output = gr.Textbox(label="Evaluation Result", lines=30)
614
 
615
- # ---- Wire task selector to update image visibility/labels + instruction label ----
616
  task_selector.change(
617
  fn=update_ui_for_task,
618
  inputs=[task_selector],
619
- outputs=[image1, image2, image3, instruction],
 
 
 
 
 
 
 
620
  )
621
 
622
  # ---- Wire evaluate button ----
623
  submit_btn.click(
624
  fn=model_inference,
625
  inputs=[task_selector, instruction, image1, image2, image3],
626
- outputs=[output, image2],
627
  )
628
 
629
  gr.Markdown(tos_markdown)
 
81
  flux_pipeline.to(device_gen)
82
 
83
  TASK_CHOICES = [
84
+ "Prompt Tuning - Image Editing",
85
+ "Prompt Tuning - T2I Generation",
86
  "Pointwise - Image Editing",
87
  "Pointwise - T2I Generation",
88
  "Pairwise - Image Editing",
89
  "Pairwise - T2I Generation",
 
90
  ]
91
 
92
  # ============================================================
 
356
  raise ValueError(f"Unknown task type: {task_type}")
357
 
358
  def update_ui_for_task(task_type):
359
+ """Update image component visibility/labels, instruction label, and generate button based on selected task type."""
360
+ if task_type == "Prompt Tuning - Image Editing":
361
+ return (
362
+ gr.update(visible=True, label="Source Image"),
363
+ gr.update(visible=True, label="Generated Image", interactive=False, value=None),
364
+ gr.update(visible=False, label="(unused)", value=None),
365
+ gr.update(label="Editing Instruction", placeholder="Enter the instruction for editing..."),
366
+ gr.update(visible=True), # generate_btn visible
367
+ )
368
+ elif task_type == "Prompt Tuning - T2I Generation":
369
+ return (
370
+ gr.update(visible=True, label="Generated Image"),
371
+ gr.update(visible=False, label="(unused)", value=None),
372
+ gr.update(visible=False, label="(unused)", value=None),
373
+ gr.update(label="T2I Prompt", placeholder="Enter the text-to-image generation prompt…"),
374
+ gr.update(visible=False), # generate_btn hidden (no T2I pipeline)
375
+ )
376
+ elif task_type == "Pointwise - Image Editing":
377
  return (
378
  gr.update(visible=True, label="Source Image"),
379
  gr.update(visible=True, label="Edited Image"),
380
  gr.update(visible=False, label="Image B", value=None),
381
  gr.update(label="Editing Instruction", placeholder="Describe the edit that was applied to the source image…"),
382
+ gr.update(visible=False), # generate_btn hidden
383
  )
384
  elif task_type == "Pointwise - T2I Generation":
385
  return (
 
387
  gr.update(visible=False, label="(unused)", value=None),
388
  gr.update(visible=False, label="(unused)", value=None),
389
  gr.update(label="Text-to-Image Prompt", placeholder="Enter the text-to-image generation prompt…"),
390
+ gr.update(visible=False), # generate_btn hidden
391
  )
392
  elif task_type == "Pairwise - Image Editing":
393
  return (
 
395
  gr.update(visible=True, label="Image A"),
396
  gr.update(visible=True, label="Image B"),
397
  gr.update(label="Editing Instruction", placeholder="Describe the edit that was applied to the source image…"),
398
+ gr.update(visible=False), # generate_btn hidden
399
  )
400
  elif task_type == "Pairwise - T2I Generation":
401
  return (
 
403
  gr.update(visible=True, label="Image B"),
404
  gr.update(visible=False, label="(unused)", value=None),
405
  gr.update(label="Text-to-Image Prompt", placeholder="Enter the text-to-image generation prompt…"),
406
+ gr.update(visible=False), # generate_btn hidden
 
 
 
 
 
 
407
  )
408
  else:
409
  raise ValueError(f"Unknown task type: {task_type}")
 
454
  buffer += new_text
455
  yield buffer
456
 
457
+ def generate_image(task_type, instruction_text, image1):
458
+ """Generate an edited image using Flux (Kontext) and return the path."""
459
+ if task_type != "Prompt Tuning - Image Editing":
460
+ raise gr.Error("Generate is only available for Prompt Tuning – Image Editing.")
461
+ if not image1:
462
+ raise gr.Error("Please upload the Source Image first.")
463
+ if not instruction_text:
464
+ raise gr.Error("Please enter an editing instruction first.")
465
+
466
+ source_img = load_image(image1)
467
+ width, height = source_img.size
468
+
469
+ generated_image = run_flux_generation(instruction_text, source_img, width, height)
470
+
471
+ timestamp = int(time.time())
472
+ os.makedirs("generated_images", exist_ok=True)
473
+ generated_image_path = f"generated_images/flux_edit_{timestamp}.png"
474
+ generated_image.save(generated_image_path)
475
+
476
+ return generated_image_path
477
+
478
  @spaces.GPU(duration=300)
479
  def model_inference(task_type, instruction_text, image1, image2, image3, progress=gr.Progress()):
480
+ """Run VLM evaluation based on the selected task type and uploaded images."""
481
+
482
  loaded_images = []
483
  task_for_template = task_type
 
484
 
485
  # Validate inputs and collect images based on task
486
+ if task_type in ("Pointwise - Image Editing", "Prompt Tuning - Image Editing"):
487
  if not image1 or not image2:
488
+ yield "Error: Please upload Source Image and Edited/Generated Image."
489
  return
490
  files = [image1, image2]
491
  loaded_images = [load_image(img) for img in files]
492
+ task_for_template = "Pointwise - Image Editing"
493
 
494
+ elif task_type in ("Pointwise - T2I Generation", "Prompt Tuning - T2I Generation"):
495
  if not image1:
496
+ yield "Error: Please upload the Generated Image."
497
  return
498
  files = [image1]
499
  loaded_images = [load_image(img) for img in files]
500
+ task_for_template = "Pointwise - T2I Generation"
501
 
502
  elif task_type == "Pairwise - Image Editing":
503
  if not image1 or not image2 or not image3:
504
+ yield "Error: Please upload Source Image, Image A, and Image B."
505
  return
506
  files = [image1, image2, image3]
507
  loaded_images = [load_image(img) for img in files]
508
 
509
  elif task_type == "Pairwise - T2I Generation":
510
  if not image1 or not image2:
511
+ yield "Error: Please upload both Image A and Image B."
512
  return
513
  files = [image1, image2]
514
  loaded_images = [load_image(img) for img in files]
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  else:
517
+ yield "Error: Unknown task type selected."
518
  return
519
 
520
  # Build instruction with <image> placeholders
 
533
  # Run VLM evaluation on GPU (streaming)
534
  progress(0.9, desc="Evaluating...")
535
  for text in run_vlm_evaluation(messages, loaded_images):
536
+ yield text
537
 
538
  # ============================================================
539
  # Gradio UI
 
542
  OVERVIEW_MD = """
543
  ### πŸ“‹ Task Overview
544
 
545
+ This demo supports **six tasks**. Select one to get started:
546
 
547
  | Task | Description |
548
  |------|-------------|
549
+ | **Prompt Tuning – Image Editing** | Generate an edit using Flux (Kontext) from a source image and instruction, then evaluate it. Use the refinement to tune your prompt. |
550
+ | **Prompt Tuning – T2I Generation** | Upload a generated image and a text-to-image prompt, then evaluate it. Use the refinement to iteratively improve your prompt. |
551
  | **Pointwise – Image Editing** | Rate a single edited image against its source image and the editing instruction. Produces per-aspect scores and a refined request. |
552
  | **Pointwise – T2I Generation** | Rate a single generated image against a text-to-image prompt. Produces per-aspect scores and a refined prompt. |
553
  | **Pairwise – Image Editing** | Compare two edited images (A vs B) given a source image and editing instruction. Determines which edit is better per aspect. |
554
  | **Pairwise – T2I Generation** | Compare two generated images (A vs B) given a text-to-image prompt. Determines which generation is better per aspect. |
 
555
 
556
+ **Try the examples on the right - they're basically begging to be clicked! 🎯**
557
  """
558
 
559
  with gr.Blocks(css="""
 
565
  gr.Markdown(OVERVIEW_MD)
566
 
567
  with gr.Row(equal_height=True):
568
+ # ============ LEFT COLUMN – inputs ============
569
  with gr.Column(scale=1, elem_id="input-panel"):
570
  task_selector = gr.Radio(
571
  choices=TASK_CHOICES,
572
+ value="Prompt Tuning - Image Editing",
573
  label="Task Type",
574
  info="Select the evaluation task",
575
  )
 
584
  )
585
  with gr.Column(scale=1, min_width=160):
586
  image2 = gr.Image(
587
+ label="Generated Image",
588
  type="filepath",
589
  sources=["upload", "clipboard"],
590
+ interactive=False,
591
  )
592
  with gr.Column(scale=1, min_width=160):
593
  image3 = gr.Image(
 
597
  visible=False,
598
  )
599
 
600
+ # ---- Instruction + Buttons ----
601
  instruction = gr.Textbox(
602
  label="Editing Instruction",
603
  lines=3,
604
+ placeholder="Enter the instruction for editing...",
605
  )
606
+ with gr.Row():
607
+ generate_btn = gr.Button("Generate Image", variant="secondary", visible=True)
608
+ submit_btn = gr.Button("Evaluate", variant="primary")
609
 
610
+ # ============ RIGHT COLUMN – examples ============
611
+ with gr.Column(scale=1):
612
  gr.Examples(
613
  examples=[
614
+ ["Prompt Tuning - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", None, None],
615
  ["Pointwise - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png", None],
616
  ["Pairwise - Image Editing", "Remove the arrows from the blue sign and add the text of Detour ahead, no right turns.", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_source.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png", "example_images/0016cb70b187efe39969766dc4b3f9ed_b63ed6db519f685c33b860b511879cfe2fa7351059a17ebe5eafa83213e222fb_13_ovis_u1_Image A.png"],
 
617
  ],
618
  inputs=[task_selector, instruction, image1, image2, image3],
619
  )
620
 
621
+ # ---- Evaluation result (full width, below the input/examples row) ----
622
+ output = gr.Textbox(label="Evaluation Result", lines=20)
 
623
 
624
+ # ---- Wire task selector to update image visibility/labels, instruction label, and generate button ----
625
  task_selector.change(
626
  fn=update_ui_for_task,
627
  inputs=[task_selector],
628
+ outputs=[image1, image2, image3, instruction, generate_btn],
629
+ )
630
+
631
+ # ---- Wire generate button (Prompt Tuning – Image Editing only) ----
632
+ generate_btn.click(
633
+ fn=generate_image,
634
+ inputs=[task_selector, instruction, image1],
635
+ outputs=[image2],
636
  )
637
 
638
  # ---- Wire evaluate button ----
639
  submit_btn.click(
640
  fn=model_inference,
641
  inputs=[task_selector, instruction, image1, image2, image3],
642
+ outputs=[output],
643
  )
644
 
645
  gr.Markdown(tos_markdown)