Spaces:

briaai
/

Fibo-local

Running on Zero

App Files Files Community

linoyts HF Staff commited on Oct 28, 2025

Commit

af4203c

verified ·

1 Parent(s): 5a7485b

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -224

app.py CHANGED Viewed

@@ -9,171 +9,61 @@ import numpy as np
 from diffusers import BriaFiboPipeline
 from diffusers.modular_pipelines import ModularPipeline
-from optimization import optimize_pipeline_
-# resolutions=[
-#     "832 1248",
-#     "896 1152",
-#     "960 1088",
-#     "1024 1024",
-#     "1088 960",
-#     "1152 896",
-#     "1216 832",
-#     "1280 800",
-#     "1344 768",
-# ]
 MAX_SEED = np.iinfo(np.int32).max
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch.set_grad_enabled(False)
 vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)
-pipe = BriaFiboPipeline.from_pretrained(
-        "briaai/FIBO",
-        trust_remote_code=True,
-        dtype=dtype).to(device)
-test_prompt_json = """
-{
-  "short_description": "A surreal and whimsical scene featuring a man, a woman, and a dog posed against a tri-colored backdrop. The woman stands in front of the red section, wearing a t-shirt with a Yoda motif and a skirt with birds. The dog, dressed as a superdog, sits on a checkerboard chair in front of the white section, with a blue tennis ball in its mouth. The man, in a purple suit, stands in front of the gold section, holding a tree branch with a blue jay. The backdrop is divided into red, white, and gold sections, with a small metal grating in the top left and a tear in the gold section. A rustic framed oil painting of the pyramids hangs above the dog.",
-  "objects": [
-    {
-      "description": "A woman standing in front of the red backdrop. She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it. Her right hand is holding an axe.",
-      "location": "Center-left",
-      "relationship": "She is positioned in front of the red backdrop and to the left of the dog and man.",
-      "relative_size": "Medium",
-      "shape_and_color": "Humanoid shape, beige and multicolored clothing.",
-      "appearance_details": "She has a long skirt with birds on it and is holding an axe.",
-      "pose": "Standing upright with a slight tilt to the right.",
-      "expression": "Neutral",
-      "clothing": "She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it.",
-      "action": "Standing",
-      "gender": "Female",
-      "skin_tone_and_texture": "Fair, smooth."
-    },
-    {
-      "description": "A dog dressed as a superdog, sitting on a checkerboard chair in front of the white backdrop. It has a blue tennis ball in its mouth.",
-      "location": "Center",
-      "relationship": "It is positioned in front of the white backdrop and between the woman and the man.",
-      "relative_size": "Medium",
-      "shape_and_color": "Canine shape, brown and white fur, blue tennis ball.",
-      "appearance_details": "It is dressed as a superdog and has a blue tennis ball in its mouth.",
-      "pose": "Sitting upright.",
-      "expression": "Neutral",
-      "clothing": "Superdog costume.",
-      "action": "Sitting",
-      "gender": "Male",
-      "skin_tone_and_texture": "Brown and white fur, soft."
-    },
-    {
-      "description": "A man standing in front of the gold backdrop. He is wearing a three piece purple suit and has spiky blue hair. His left hand is holding a tree branch with a blue jay on it.",
-      "location": "Center-right",
-      "relationship": "He is positioned in front of the gold backdrop and to the right of the woman and dog.",
-      "relative_size": "Medium",
-      "shape_and_color": "Humanoid shape, purple suit, blue hair.",
-      "appearance_details": "He has spiky blue hair and is holding a tree branch with a blue jay on it.",
-      "pose": "Standing upright with a slight tilt to the left.",
-      "expression": "Neutral",
-      "clothing": "He is wearing a three piece purple suit.",
-      "action": "Standing",
-      "gender": "Male",
-      "skin_tone_and_texture": "Fair, smooth."
-    },
-    {
-      "description": "A checkerboard armchair in yellow and brown.",
-      "location": "Bottom-center",
-      "relationship": "The dog is sitting on the chair.",
-      "relative_size": "Small",
-      "shape_and_color": "Chair shape, yellow and brown.",
-      "texture": "Smooth. End of texture answer.",
-      "appearance_details": "The chair is a checkerboard armchair in yellow and brown."
-    },
-    {
-      "description": "A rustic framed oil painting of the pyramids.",
-      "location": "Top-center",
-      "relationship": "The painting is hanging above the dog.",
-      "relative_size": "Small",
-      "shape_and_color": "Rectangular shape, brown frame, yellow and brown pyramids.",
-      "texture": "Rough. End of texture answer.",
-      "appearance_details": "The painting is a rustic framed oil painting of the pyramids."
-    }
-  ],
-  "background_setting": "The background is a tri-colored backdrop divided equally into red, white, and gold sections. There is a small rectangular metal grating in the top left corner and a subtle tear in the gold backdrop in the bottom right corner.",
-  "lighting": {
-    "conditions": "Studio lighting",
-    "direction": "Front-lit",
-    "shadows": "Soft shadows are present, indicating diffused lighting."
-  },
-  "aesthetics": {
-    "composition": "The composition is centered, with the three figures arranged in a row. The backdrop is divided into thirds, creating a symmetrical balance.",
-    "color_scheme": "The color scheme is triadic, with red, white, and gold dominating the backdrop, complemented by the various colors of the figures' clothing and accessories.",
-    "mood_atmosphere": "The mood is whimsical and surreal, with a touch of humor due to the unusual costumes and props.",
-    "preference_score": "high",
-    "aesthetic_score": "high"
-  },
-  "photographic_characteristics": {
-    "depth_of_field": "Deep",
-    "focus": "Sharp focus on all subjects",
-    "camera_angle": "Eye-level",
-    "lens_focal_length": "Standard"
-  },
-  "style_medium": "Photograph",
-  "text_render": [
-    {
-      "text": "Yoda",
-      "location": "Center of the woman's t-shirt",
-      "size": "Small",
-      "color": "Beige",
-      "font": "Cartoonish",
-      "appearance_details": "The text is part of a graphic design on the t-shirt."
-    }
-  ],
-  "context": "This is a surreal and whimsical portrait of a man, a woman, and a dog posed against a tri-colored backdrop. It could be an art piece or a promotional image for a quirky event or product.",
-  "artistic_style": "Surreal Pop"
-}
-"""
-optimize_pipeline_(pipe, test_prompt_json)
-def handle_json(text):
-    try:
-        json.loads(text)
-        return text
-    except:
-        return "Error"
 @spaces.GPU(duration=100)
-def infer(prompt,
-          negative_prompt="",
-          seed=42,
-          randomize_seed=False,
-          width=1024,
-          height=1024,
-          guidance_scale=5,
-          num_inference_steps=50,
-         ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    t=time.time()
     with torch.inference_mode():
-        # 1. Create a prompt to generate an initial image
-        output = vlm_pipe(prompt=prompt)
         json_prompt = output.values["json_prompt"]
-        image = pipe(prompt=json_prompt,
-                                 num_inference_steps=num_inference_steps,
-                                 negative_prompt=negative_prompt,
-                                 width=width,height=height,
-                                 guidance_scale=guidance_scale).images[0]
-    return image, json_prompt
 css = """
 #col-container{
@@ -181,85 +71,61 @@ css = """
     max-width: 768px;
 }
 """
-with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("## FOBI")
-        with gr.Group():
-            with gr.Column():
                 with gr.Row():
-                    prompt_in = gr.Textbox(label="Prompt")
-                    prompt_in_json = gr.JSON(label="Json")
-                submit_btn = gr.Button("Generate")
         result = gr.Image(label="output")
         with gr.Accordion("Advanced Settings", open=False):
-                    with gr.Row():
-                        seed = gr.Slider(
-                            label="Seed",
-                            minimum=0,
-                            maximum=MAX_SEED,
-                            step=1,
-                            value=0,
-                        )
-                        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            label="guidance scale",
-                            minimum=1.0,
-                            maximum=10.0,
-                            step=0.1,
-                            value=5.0
-                        )
-                        num_inference_steps = gr.Slider(
-                            label="number of inference steps",
-                            minimum=1,
-                            maximum=60,
-                            step=1,
-                            value=50,
-                        )
-                        height = gr.Slider(
-                            label="Height",
-                            minimum=768,
-                            maximum=1248,
-                            step=32,
-                            value=1024,
-                        )
-                        width = gr.Slider(
-                            label="Width",
-                            minimum=832,
-                            maximum=1344,
-                            step=64,
-                            value=1024,
-                        )
-                    with gr.Row():
-                        negative_prompt = gr.Textbox(label="negative prompt", value=json.dumps(''))
-                        negative_prompt_json = gr.JSON(label="json negative prompt", value=json.dumps(''))
-    # prompt_in.change(
-    #     handle_json,
-    #     inputs=prompt_in,
-    #     outputs=prompt_in_json)
-    # negative_prompt.change(handle_json, inputs=negative_prompt, outputs=negative_prompt_json)
-    submit_btn.click(
-        fn = infer,
-        inputs = [
-            prompt_in,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs = [
-            result, prompt_in_json
-        ]
-    )
-demo.queue().launch()

 from diffusers import BriaFiboPipeline
 from diffusers.modular_pipelines import ModularPipeline
 MAX_SEED = np.iinfo(np.int32).max
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch.set_grad_enabled(False)
 vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)
+pipe = BriaFiboPipeline.from_pretrained("briaai/FIBO", trust_remote_code=True, dtype=dtype).to(device)
 @spaces.GPU(duration=100)
+def infer(
+    prompt,
+    prompt_refine,
+    prompt_in_json,
+    negative_prompt="",
+    seed=42,
+    randomize_seed=False,
+    width=1024,
+    height=1024,
+    guidance_scale=5,
+    num_inference_steps=50,
+    mode="generate",
+):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     with torch.inference_mode():
+        if negative_prompt:
+            neg_output = vlm_pipe(prompt=negative_prompt)
+            neg_json_prompt = neg_output.values["json_prompt"]
+        else:
+            neg_json_prompt = ""
+        if mode == "refine":
+            json_prompt_str = (
+                json.dumps(prompt_in_json)
+                if isinstance(prompt_in_json, (dict, list))
+                else str(prompt_in_json)
+            )
+            output = vlm_pipe(json_prompt=json_prompt_str, prompt=prompt_refine)
+        else:
+            output = vlm_pipe(prompt=prompt)
         json_prompt = output.values["json_prompt"]
+        image = pipe(
+            prompt=json_prompt,
+            num_inference_steps=num_inference_steps,
+            negative_prompt=neg_json_prompt,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+        ).images[0]
+    return image, seed, json_prompt, neg_json_prompt
 css = """
 #col-container{
     max-width: 768px;
 }
 """
+with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("## FIBO")
+        with gr.Row():
+            with gr.Tab("generate") as tab_generate:
+                with gr.Row():
+                    prompt_generate = gr.Textbox(label="Prompt")
+            with gr.Tab("refine") as tab_refine:
                 with gr.Row():
+                    prompt_refine = gr.Textbox(label="Prompt")
+        submit_btn = gr.Button("Generate")
         result = gr.Image(label="output")
+        with gr.Accordion("Structured Prompt", open=False):
+            prompt_in_json = gr.JSON(label="json structured prompt")
         with gr.Accordion("Advanced Settings", open=False):
+            with gr.Row():
+                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Row():
+                guidance_scale = gr.Slider(label="guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
+                num_inference_steps = gr.Slider(
+                    label="number of inference steps", minimum=1, maximum=60, step=1, value=50
+                )
+                height = gr.Slider(label="Height", minimum=768, maximum=1248, step=32, value=1024)
+                width = gr.Slider(label="Width", minimum=832, maximum=1344, step=64, value=1024)
+            with gr.Row():
+                negative_prompt = gr.Textbox(label="negative prompt")
+                negative_prompt_json = gr.JSON(label="json negative prompt")
+        # Track active tab
+        current_mode = gr.State("generate")
+        tab_generate.select(lambda: "generate", outputs=current_mode)
+        tab_refine.select(lambda: "refine", outputs=current_mode)
+        submit_btn.click(
+            fn=infer,
+            inputs=[
+                prompt_generate,
+                prompt_refine,
+                prompt_in_json,
+                negative_prompt,
+                seed,
+                randomize_seed,
+                width,
+                height,
+                guidance_scale,
+                num_inference_steps,
+                current_mode,
+            ],
+            outputs=[result, seed, prompt_in_json, negative_prompt_json],
+        )
+demo.queue().launch()