ghmk Claude Opus 4.5 commited on
Commit
2ed486e
·
1 Parent(s): f3f2fa1

Add Scene Composer tab for multi-reference scene generation

Browse files

- Tab 1: Character Sheet Generator (existing functionality)
- Tab 2: Scene Composer with up to 4 reference images
- Character sheet 1 & 2 inputs
- Background image input
- Object/prop image input
- render_scene_with_gpu() function with @spaces.GPU decorator
- Aspect ratio selection for scene output

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +416 -153
app.py CHANGED
@@ -458,6 +458,155 @@ def generate_with_gpu(
458
  return None, f"Error: {str(e)}", {}
459
 
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  # =============================================================================
462
  # Gradio Interface Functions
463
  # =============================================================================
@@ -672,9 +821,9 @@ def create_ui():
672
  )
673
 
674
  gr.Markdown("# Character Sheet Pro")
675
- gr.Markdown("Generate 7-view character turnaround sheets from a single input image using FLUX.2 klein.")
676
 
677
- # Backend selection and controls
678
  with gr.Row():
679
  backend_dropdown = gr.Dropdown(
680
  choices=[
@@ -682,7 +831,7 @@ def create_ui():
682
  ("FLUX.2 klein 4B (Fast, ~13GB)", BackendType.FLUX_KLEIN.value),
683
  ("Gemini Flash (Cloud - Fallback)", BackendType.GEMINI_FLASH.value),
684
  ],
685
- value="flux_klein_9b_fp8", # Default to best quality
686
  label="Backend",
687
  scale=2
688
  )
@@ -695,159 +844,252 @@ def create_ui():
695
  scale=2
696
  )
697
 
698
- with gr.Row():
699
- # Left column: Inputs
700
- with gr.Column(scale=1):
701
- gr.Markdown("### Input Settings")
702
-
703
- input_type = gr.Radio(
704
- choices=["Face Only", "Full Body", "Face + Body (Separate)"],
705
- value="Face Only",
706
- label="Input Type",
707
- info="What type of image(s) are you providing?"
708
- )
709
-
710
- main_input = gr.Image(
711
- label="Input Image",
712
- type="pil",
713
- format="png",
714
- visible=True
715
- )
716
-
717
- with gr.Row(visible=False) as face_body_row:
718
- face_input = gr.Image(
719
- label="Face Reference",
720
- type="pil",
721
- format="png",
722
- visible=False
723
- )
724
- body_input = gr.Image(
725
- label="Body Reference",
726
- type="pil",
727
- format="png",
728
- visible=False
729
- )
730
-
731
- gr.Markdown("### Character Details")
732
-
733
- character_name = gr.Textbox(
734
- label="Character Name",
735
- placeholder="My Character",
736
- value=""
737
- )
738
-
739
- gender = gr.Radio(
740
- choices=["Auto/Neutral", "Male", "Female"],
741
- value="Auto/Neutral",
742
- label="Gender"
743
- )
744
-
745
- costume_description = gr.Textbox(
746
- label="Costume Description (Optional)",
747
- placeholder="e.g., Full plate armor with gold trim...",
748
- value="",
749
- lines=3
750
- )
751
-
752
- costume_image = gr.Image(
753
- label="Costume Reference Image (Optional)",
754
- type="pil",
755
- format="png"
756
- )
757
-
758
- gr.Markdown("### Generation Parameters")
759
-
760
  with gr.Row():
761
- num_steps = gr.Number(
762
- label="Inference Steps",
763
- value=4,
764
- minimum=1,
765
- maximum=50,
766
- step=1,
767
- info="FLUX klein uses 4 steps"
768
- )
769
- guidance_scale = gr.Number(
770
- label="Guidance Scale",
771
- value=1.0,
772
- minimum=0.0,
773
- maximum=10.0,
774
- step=0.1,
775
- info="FLUX klein uses 1.0"
776
- )
777
-
778
- include_costume_in_faces = gr.Checkbox(
779
- label="Include costume in face views",
780
- value=False,
781
- info="Turn OFF for FLUX (can confuse framing)"
782
- )
783
-
784
- # GENERATE BUTTON
785
- generate_btn = gr.Button(
786
- "GENERATE CHARACTER SHEET",
787
- variant="primary",
788
- size="lg",
789
- elem_classes=["generate-btn-main"]
790
- )
791
-
792
- # Right column: Output
793
- with gr.Column(scale=2):
794
- gr.Markdown("### Generated Character Sheet")
795
-
796
- output_image = gr.Image(
797
- label="Character Sheet",
798
- type="pil",
799
- format="png",
800
- elem_classes=["output-image"]
801
- )
802
-
803
- status_text = gr.Textbox(
804
- label="Status",
805
- interactive=False
806
- )
807
-
808
- # Preview gallery
809
- gr.Markdown("### Individual Views Preview")
810
-
811
- with gr.Row():
812
- gr.Markdown("**Face Views:**")
813
- with gr.Row():
814
- preview_left_face = gr.Image(label="Left Face", type="pil", height=150, width=112)
815
- preview_front_face = gr.Image(label="Front Face", type="pil", height=150, width=112)
816
- preview_right_face = gr.Image(label="Right Face", type="pil", height=150, width=112)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817
 
818
  with gr.Row():
819
- gr.Markdown("**Body Views:**")
820
- with gr.Row():
821
- preview_left_body = gr.Image(label="Left Body", type="pil", height=150, width=84)
822
- preview_front_body = gr.Image(label="Front Body", type="pil", height=150, width=84)
823
- preview_right_body = gr.Image(label="Right Body", type="pil", height=150, width=84)
824
- preview_back_body = gr.Image(label="Back Body", type="pil", height=150, width=84)
825
-
826
- # Downloads
827
- gr.Markdown("### Downloads")
828
- with gr.Row():
829
- json_download = gr.File(label="Metadata JSON", interactive=False)
830
- zip_download = gr.File(label="Complete Package (ZIP)", interactive=False)
831
-
832
- # Usage instructions
833
- gr.Markdown("---")
834
- gr.Markdown("### How to Use")
835
- gr.Markdown("""
836
- 1. **Upload an image** (face portrait or full body)
837
- 2. **Select input type** based on your image
838
- 3. **Optionally** add character name, gender, and costume description
839
- 4. **Click Generate** - the model loads automatically on first run (~30-60s)
840
- 5. **Wait** for all 7 views to generate (~2-3 minutes total)
841
- 6. **Download** the complete package
842
-
843
- **GPU Notes:**
844
- - Uses Zero GPU (A10G 24GB) - free but with 5-minute session limit
845
- - First generation loads the model (adds ~30-60 seconds)
846
- - Subsequent generations in the same session are faster
847
- - If GPU unavailable, switch to Gemini Flash (requires API key)
848
- """)
849
-
850
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
851
  input_type.change(
852
  fn=update_input_visibility,
853
  inputs=[input_type],
@@ -892,6 +1134,27 @@ def create_ui():
892
  ]
893
  )
894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  return demo
896
 
897
 
 
458
  return None, f"Error: {str(e)}", {}
459
 
460
 
461
+ # =============================================================================
462
+ # Scene Composer GPU Function
463
+ # =============================================================================
464
+
465
+ @spaces.GPU(duration=120) # 2-minute timeout for scene rendering
466
+ def render_scene_with_gpu(
467
+ character_sheet_1: Optional[Image.Image],
468
+ character_sheet_2: Optional[Image.Image],
469
+ background_image: Optional[Image.Image],
470
+ object_image: Optional[Image.Image],
471
+ scene_description: str,
472
+ aspect_ratio: str,
473
+ backend_choice: str,
474
+ api_key: str,
475
+ num_steps: int,
476
+ guidance_scale: float
477
+ ) -> Tuple[Optional[Image.Image], str]:
478
+ """
479
+ GPU-wrapped scene rendering function.
480
+ Uses character sheets and optional references to compose a scene.
481
+ """
482
+ global _cached_service, _cached_backend
483
+
484
+ try:
485
+ # Determine backend
486
+ backend = BackendRouter.backend_from_string(backend_choice)
487
+ is_cloud = backend in (BackendType.GEMINI_FLASH, BackendType.GEMINI_PRO)
488
+
489
+ # Validate inputs
490
+ if character_sheet_1 is None:
491
+ return None, "Error: Please provide at least one character sheet"
492
+
493
+ if not scene_description.strip():
494
+ return None, "Error: Please describe the scene"
495
+
496
+ # Load or reuse service
497
+ if _cached_service is None or _cached_backend != backend:
498
+ logger.info(f"Loading model for {backend.value}...")
499
+ _cached_service = CharacterSheetService(
500
+ api_key=api_key if is_cloud else None,
501
+ backend=backend
502
+ )
503
+ _cached_backend = backend
504
+
505
+ # Build the prompt
506
+ prompt_parts = ["Render the character from the first reference image"]
507
+
508
+ if character_sheet_2 is not None:
509
+ prompt_parts.append("together with the character from the second reference image")
510
+
511
+ prompt_parts.append(f"{scene_description.strip()}")
512
+
513
+ if background_image is not None:
514
+ prompt_parts.append("using the background from the reference")
515
+
516
+ if object_image is not None:
517
+ prompt_parts.append("incorporating the object/prop from the reference")
518
+
519
+ prompt_parts.append("Maintain exact character identity and features from the character sheet(s). High quality, detailed, professional lighting.")
520
+
521
+ prompt = ". ".join(prompt_parts)
522
+
523
+ # Collect input images
524
+ input_images = [character_sheet_1]
525
+ if character_sheet_2 is not None:
526
+ input_images.append(character_sheet_2)
527
+ if background_image is not None:
528
+ input_images.append(background_image)
529
+ if object_image is not None:
530
+ input_images.append(object_image)
531
+
532
+ # Map aspect ratio to dimensions
533
+ aspect_ratios = {
534
+ "1:1 (Square)": (1024, 1024),
535
+ "16:9 (Landscape)": (1344, 768),
536
+ "9:16 (Portrait)": (768, 1344),
537
+ "4:3 (Landscape)": (1152, 896),
538
+ "3:4 (Portrait)": (896, 1152),
539
+ "3:2 (Landscape)": (1248, 832),
540
+ "2:3 (Portrait)": (832, 1248),
541
+ }
542
+ width, height = aspect_ratios.get(aspect_ratio, (1024, 1024))
543
+
544
+ # Generate scene using the client directly
545
+ logger.info(f"Rendering scene: {prompt[:100]}...")
546
+
547
+ if hasattr(_cached_service, 'client') and hasattr(_cached_service.client, 'generate_image'):
548
+ result_image, status = _cached_service.client.generate_image(
549
+ prompt=prompt,
550
+ input_images=input_images,
551
+ width=width,
552
+ height=height,
553
+ steps=num_steps,
554
+ guidance=guidance_scale
555
+ )
556
+ return result_image, status
557
+ else:
558
+ return None, "Error: Scene rendering not supported by current backend"
559
+
560
+ except Exception as e:
561
+ logger.exception(f"Scene rendering error: {e}")
562
+ return None, f"Error: {str(e)}"
563
+
564
+
565
+ def render_scene(
566
+ character_sheet_1: Optional[Image.Image],
567
+ character_sheet_2: Optional[Image.Image],
568
+ background_image: Optional[Image.Image],
569
+ object_image: Optional[Image.Image],
570
+ scene_description: str,
571
+ aspect_ratio: str,
572
+ backend_choice: str,
573
+ api_key_override: str,
574
+ num_steps: int,
575
+ guidance_scale: float,
576
+ progress=gr.Progress()
577
+ ) -> Tuple[Optional[Image.Image], str]:
578
+ """
579
+ Wrapper for scene rendering with progress updates.
580
+ """
581
+ progress(0.1, desc="Preparing scene...")
582
+
583
+ # Preprocess images
584
+ character_sheet_1 = ensure_png_image(character_sheet_1, max_size=1024)
585
+ character_sheet_2 = ensure_png_image(character_sheet_2, max_size=1024) if character_sheet_2 else None
586
+ background_image = ensure_png_image(background_image, max_size=1024) if background_image else None
587
+ object_image = ensure_png_image(object_image, max_size=512) if object_image else None
588
+
589
+ api_key = api_key_override.strip() if api_key_override.strip() else API_KEY
590
+
591
+ progress(0.2, desc="Allocating GPU and rendering scene...")
592
+
593
+ result, status = render_scene_with_gpu(
594
+ character_sheet_1=character_sheet_1,
595
+ character_sheet_2=character_sheet_2,
596
+ background_image=background_image,
597
+ object_image=object_image,
598
+ scene_description=scene_description,
599
+ aspect_ratio=aspect_ratio,
600
+ backend_choice=backend_choice,
601
+ api_key=api_key,
602
+ num_steps=int(num_steps),
603
+ guidance_scale=float(guidance_scale)
604
+ )
605
+
606
+ progress(1.0, desc="Done!")
607
+ return result, status
608
+
609
+
610
  # =============================================================================
611
  # Gradio Interface Functions
612
  # =============================================================================
 
821
  )
822
 
823
  gr.Markdown("# Character Sheet Pro")
824
+ gr.Markdown("Generate 7-view character turnaround sheets and compose scenes with your characters.")
825
 
826
+ # Shared controls (outside tabs)
827
  with gr.Row():
828
  backend_dropdown = gr.Dropdown(
829
  choices=[
 
831
  ("FLUX.2 klein 4B (Fast, ~13GB)", BackendType.FLUX_KLEIN.value),
832
  ("Gemini Flash (Cloud - Fallback)", BackendType.GEMINI_FLASH.value),
833
  ],
834
+ value="flux_klein_9b_fp8",
835
  label="Backend",
836
  scale=2
837
  )
 
844
  scale=2
845
  )
846
 
847
+ with gr.Tabs():
848
+ # =========================================================
849
+ # TAB 1: Character Sheet Generator
850
+ # =========================================================
851
+ with gr.TabItem("Character Sheet Generator"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852
  with gr.Row():
853
+ # Left column: Inputs
854
+ with gr.Column(scale=1):
855
+ gr.Markdown("### Input Settings")
856
+
857
+ input_type = gr.Radio(
858
+ choices=["Face Only", "Full Body", "Face + Body (Separate)"],
859
+ value="Face Only",
860
+ label="Input Type",
861
+ info="What type of image(s) are you providing?"
862
+ )
863
+
864
+ main_input = gr.Image(
865
+ label="Input Image",
866
+ type="pil",
867
+ format="png",
868
+ visible=True
869
+ )
870
+
871
+ with gr.Row(visible=False) as face_body_row:
872
+ face_input = gr.Image(
873
+ label="Face Reference",
874
+ type="pil",
875
+ format="png",
876
+ visible=False
877
+ )
878
+ body_input = gr.Image(
879
+ label="Body Reference",
880
+ type="pil",
881
+ format="png",
882
+ visible=False
883
+ )
884
+
885
+ gr.Markdown("### Character Details")
886
+
887
+ character_name = gr.Textbox(
888
+ label="Character Name",
889
+ placeholder="My Character",
890
+ value=""
891
+ )
892
+
893
+ gender = gr.Radio(
894
+ choices=["Auto/Neutral", "Male", "Female"],
895
+ value="Auto/Neutral",
896
+ label="Gender"
897
+ )
898
+
899
+ costume_description = gr.Textbox(
900
+ label="Costume Description (Optional)",
901
+ placeholder="e.g., Full plate armor with gold trim...",
902
+ value="",
903
+ lines=3
904
+ )
905
+
906
+ costume_image = gr.Image(
907
+ label="Costume Reference Image (Optional)",
908
+ type="pil",
909
+ format="png"
910
+ )
911
+
912
+ gr.Markdown("### Generation Parameters")
913
+
914
+ with gr.Row():
915
+ num_steps = gr.Number(
916
+ label="Inference Steps",
917
+ value=4,
918
+ minimum=1,
919
+ maximum=50,
920
+ step=1,
921
+ info="FLUX klein uses 4 steps"
922
+ )
923
+ guidance_scale = gr.Number(
924
+ label="Guidance Scale",
925
+ value=1.0,
926
+ minimum=0.0,
927
+ maximum=10.0,
928
+ step=0.1,
929
+ info="FLUX klein uses 1.0"
930
+ )
931
+
932
+ include_costume_in_faces = gr.Checkbox(
933
+ label="Include costume in face views",
934
+ value=False,
935
+ info="Turn OFF for FLUX (can confuse framing)"
936
+ )
937
+
938
+ # GENERATE BUTTON
939
+ generate_btn = gr.Button(
940
+ "GENERATE CHARACTER SHEET",
941
+ variant="primary",
942
+ size="lg",
943
+ elem_classes=["generate-btn-main"]
944
+ )
945
+
946
+ # Right column: Output
947
+ with gr.Column(scale=2):
948
+ gr.Markdown("### Generated Character Sheet")
949
+
950
+ output_image = gr.Image(
951
+ label="Character Sheet",
952
+ type="pil",
953
+ format="png",
954
+ elem_classes=["output-image"]
955
+ )
956
+
957
+ status_text = gr.Textbox(
958
+ label="Status",
959
+ interactive=False
960
+ )
961
+
962
+ # Preview gallery
963
+ gr.Markdown("### Individual Views Preview")
964
+
965
+ with gr.Row():
966
+ gr.Markdown("**Face Views:**")
967
+ with gr.Row():
968
+ preview_left_face = gr.Image(label="Left Face", type="pil", height=150, width=112)
969
+ preview_front_face = gr.Image(label="Front Face", type="pil", height=150, width=112)
970
+ preview_right_face = gr.Image(label="Right Face", type="pil", height=150, width=112)
971
+
972
+ with gr.Row():
973
+ gr.Markdown("**Body Views:**")
974
+ with gr.Row():
975
+ preview_left_body = gr.Image(label="Left Body", type="pil", height=150, width=84)
976
+ preview_front_body = gr.Image(label="Front Body", type="pil", height=150, width=84)
977
+ preview_right_body = gr.Image(label="Right Body", type="pil", height=150, width=84)
978
+ preview_back_body = gr.Image(label="Back Body", type="pil", height=150, width=84)
979
+
980
+ # Downloads
981
+ gr.Markdown("### Downloads")
982
+ with gr.Row():
983
+ json_download = gr.File(label="Metadata JSON", interactive=False)
984
+ zip_download = gr.File(label="Complete Package (ZIP)", interactive=False)
985
+
986
+ # =========================================================
987
+ # TAB 2: Scene Composer
988
+ # =========================================================
989
+ with gr.TabItem("Scene Composer"):
990
+ gr.Markdown("### Compose Scenes with Your Characters")
991
+ gr.Markdown("Use character sheets to render characters in custom scenes with backgrounds and props.")
992
 
993
  with gr.Row():
994
+ # Left column: Reference inputs
995
+ with gr.Column(scale=1):
996
+ gr.Markdown("### Reference Images")
997
+
998
+ with gr.Row():
999
+ scene_char1 = gr.Image(
1000
+ label="Character Sheet 1 (Required)",
1001
+ type="pil",
1002
+ format="png"
1003
+ )
1004
+ scene_char2 = gr.Image(
1005
+ label="Character Sheet 2 (Optional)",
1006
+ type="pil",
1007
+ format="png"
1008
+ )
1009
+
1010
+ with gr.Row():
1011
+ scene_background = gr.Image(
1012
+ label="Background Image (Optional)",
1013
+ type="pil",
1014
+ format="png"
1015
+ )
1016
+ scene_object = gr.Image(
1017
+ label="Object/Prop (Optional)",
1018
+ type="pil",
1019
+ format="png"
1020
+ )
1021
+
1022
+ gr.Markdown("### Scene Description")
1023
+ scene_description = gr.Textbox(
1024
+ label="Describe the scene",
1025
+ placeholder="e.g., standing on a beach at sunset, dancing in a nightclub, sitting in a cafe...",
1026
+ lines=3
1027
+ )
1028
+
1029
+ scene_aspect_ratio = gr.Dropdown(
1030
+ choices=[
1031
+ "1:1 (Square)",
1032
+ "16:9 (Landscape)",
1033
+ "9:16 (Portrait)",
1034
+ "4:3 (Landscape)",
1035
+ "3:4 (Portrait)",
1036
+ "3:2 (Landscape)",
1037
+ "2:3 (Portrait)",
1038
+ ],
1039
+ value="16:9 (Landscape)",
1040
+ label="Output Aspect Ratio"
1041
+ )
1042
+
1043
+ with gr.Row():
1044
+ scene_steps = gr.Number(
1045
+ label="Inference Steps",
1046
+ value=4,
1047
+ minimum=1,
1048
+ maximum=50,
1049
+ step=1
1050
+ )
1051
+ scene_guidance = gr.Number(
1052
+ label="Guidance Scale",
1053
+ value=1.0,
1054
+ minimum=0.0,
1055
+ maximum=10.0,
1056
+ step=0.1
1057
+ )
1058
+
1059
+ render_btn = gr.Button(
1060
+ "RENDER SCENE",
1061
+ variant="primary",
1062
+ size="lg",
1063
+ elem_classes=["generate-btn-main"]
1064
+ )
1065
+
1066
+ # Right column: Output
1067
+ with gr.Column(scale=2):
1068
+ gr.Markdown("### Rendered Scene")
1069
+
1070
+ scene_output = gr.Image(
1071
+ label="Scene Output",
1072
+ type="pil",
1073
+ format="png",
1074
+ elem_classes=["output-image"]
1075
+ )
1076
+
1077
+ scene_status = gr.Textbox(
1078
+ label="Status",
1079
+ interactive=False
1080
+ )
1081
+
1082
+ gr.Markdown("---")
1083
+ gr.Markdown("""
1084
+ **Tips for Scene Composer:**
1085
+ - Upload a character sheet generated in the first tab, or use any character turnaround image
1086
+ - Add a second character sheet to include multiple characters in the scene
1087
+ - Background images help set the scene location and lighting
1088
+ - Object/prop images can be items the character holds or interacts with
1089
+ - Be descriptive in your scene description for best results
1090
+ """)
1091
+
1092
+ # Event handlers for Tab 1
1093
  input_type.change(
1094
  fn=update_input_visibility,
1095
  inputs=[input_type],
 
1134
  ]
1135
  )
1136
 
1137
+ # Event handlers for Tab 2 (Scene Composer)
1138
+ render_btn.click(
1139
+ fn=render_scene,
1140
+ inputs=[
1141
+ scene_char1,
1142
+ scene_char2,
1143
+ scene_background,
1144
+ scene_object,
1145
+ scene_description,
1146
+ scene_aspect_ratio,
1147
+ backend_dropdown,
1148
+ api_key_input,
1149
+ scene_steps,
1150
+ scene_guidance
1151
+ ],
1152
+ outputs=[
1153
+ scene_output,
1154
+ scene_status
1155
+ ]
1156
+ )
1157
+
1158
  return demo
1159
 
1160