Spaces:

sam2ai
/

Image_editing

Sleeping

App Files Files Community

sam2ai commited on Sep 13, 2025

Commit

49f83cc

verified ·

1 Parent(s): 03ddf17

Update app.py

Browse files

Files changed (1) hide show

app.py +282 -193

app.py CHANGED Viewed

@@ -1,25 +1,7 @@
 import gradio as gr
 import json
-import os
-import mimetypes
-from google import genai
-from google.genai import types
-from PIL import Image
-import time
-# --- Helper Function to Save Generated Image ---
-def save_binary_file(directory, file_name, data):
-    """Saves binary data to a file, creating the directory if needed."""
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    file_path = os.path.join(directory, file_name)
-    with open(file_path, "wb") as f:
-        f.write(data)
-    print(f"File saved to: {file_path}")
-    return file_path
-# --- Main Function to Generate Image ---
-def generate_image(
     api_key,
     reference_image,
     scene,
@@ -66,203 +48,310 @@ def generate_image(
     variant_name,
     variant_angle,
 ):
-    # --- Input Validation ---
-    if not api_key:
-        raise gr.Error("API Key is missing. Please enter your Gemini API key.")
-    if reference_image is None:
-        raise gr.Error("Reference image is missing. Please upload an image.")
-    # --- Build Banned List ---
     banned_items = []
-    if ban_mirror: banned_items.append("mirror")
-    if ban_phone: banned_items.append("phone")
-    if ban_selfie: banned_items.append("selfie look")
-    if ban_grainy: banned_items.append("grainy noise")
-    if ban_harsh_flash: banned_items.append("harsh LED flash")
-    if ban_logos: banned_items.append("logos/brand text")
-    if ban_nsfw: banned_items.append("nsfw")
-    if ban_cropped_feet: banned_items.append("cropped feet")
-    # --- Construct JSON Payload ---
     output_json = {
         "scene": scene,
-        "subject": {"type": subject_type, "age_range": age_range, "hair": hair, "makeup": makeup, "jewellery": jewellery},
-        "wardrobe": {"top": top, "bottom": bottom, "footwear": footwear, "notes": wardrobe_notes},
-        "pose": {"angle": pose_angle, "body": body_pose, "hands": hands_pose, "framing": framing},
-        "camera": {"device": camera_device, "flash": flash, "orientation": orientation, "aspect_ratio": aspect_ratio, "distance": distance, "focus": focus},
-        "look": {"texture": texture, "sharpness": sharpness, "color": color, "effects": effects},
-        "background": {"environment": background_environment, "props": background_props},
         "style": {"genre": style_genre, "authenticity": authenticity},
-        "reference_face": {"use_original_structure": use_original_structure, "description": face_description},
         "ban": banned_items,
-        "output": {"count": int(output_count), "size": output_size, "safety": safety},
         "variants": [{"name": variant_name, "angle": variant_angle}],
     }
-    final_json_string = json.dumps(output_json, indent=4)
-    # --- Call Gemini API ---
-    try:
-        # Configure the client
-        client = genai.Client(api_key=api_key)
-        # Prepare the prompt parts (JSON instructions + reference image)
-        prompt_text_part = types.Part.from_text(text=final_json_string)
-        with open(reference_image, 'rb') as f:
-            image_data = f.read()
-        image_mime_type = mimetypes.guess_type(reference_image)[0]
-        image_part = types.Part.from_data(data=image_data, mime_type=image_mime_type)
-        # Define the model and generation config
-        model = "gemini-1.5-flash-latest" # Using a standard available model name
-        contents = [types.Content(role="user", parts=[prompt_text_part, image_part])]
-        generate_content_config = types.GenerateContentConfig(
-            response_modalities=["IMAGE", "TEXT"],
-        )
-        # --- Process Streaming Response ---
-        output_files = []
-        output_directory = "generated_images"
-        timestamp = int(time.time())
-        file_index = 0
-        # Make the streaming API call
-        response_stream = client.models.generate_content_stream(
-            model=model,
-            contents=contents,
-            config=generate_content_config,
-        )
-        for chunk in response_stream:
-            if chunk.candidates and chunk.candidates[0].content and chunk.candidates[0].content.parts:
-                part = chunk.candidates[0].content.parts[0]
-                if part.inline_data and part.inline_data.data:
-                    inline_data = part.inline_data
-                    file_extension = mimetypes.guess_extension(inline_data.mime_type)
-                    file_name = f"output_{timestamp}_{file_index}{file_extension}"
-                    # Save the file and get its path
-                    saved_file_path = save_binary_file(output_directory, file_name, inline_data.data)
-                    output_files.append(saved_file_path)
-                    file_index += 1
-                elif part.text:
-                    print(f"Received text chunk: {part.text}")
-        if not output_files:
-            return None, final_json_string, "No image was generated. Please check the model's response or your prompt."
-        # Return file paths for the Gallery and the JSON for inspection
-        return output_files, final_json_string, "Image generation complete."
-    except Exception as e:
-        # Handle potential errors gracefully
-        error_message = f"An error occurred: {str(e)}"
-        print(error_message)
-        raise gr.Error(error_message)
-# --- Gradio Interface Definition ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Gemini Image Generation Studio")
-    gr.Markdown("Use the tabs below to define your image, then click 'Generate Image' to call the API.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            # --- Left Column for Inputs ---
-            with gr.Tabs():
-                with gr.TabItem("🔑 API & Image"):
-                    api_key_input = gr.Textbox(label="Gemini API Key", type="password", info="Your API key is required to generate images.")
-                    reference_image_input = gr.Image(label="Reference Image", type="filepath", info="Upload the base image for generation or editing.")
-                with gr.TabItem("🎨 Scene & Subject"):
-                    scene_input = gr.Textbox(label="Scene", value="cinematic outdoor portrait; professional photography")
-                    subject_type_input = gr.Textbox(label="Subject Type", value="adult woman (idol vibe)")
-                    age_range_input = gr.Textbox(label="Age Range", value="20s")
-                    hair_input = gr.Textbox(label="Hair", value="straight or styled natural open hair with natural shine")
-                    makeup_input = gr.Textbox(label="Makeup", value="glossy lips, soft eyeliner, luminous skin")
-                    jewellery_input = gr.Textbox(label="Jewellery", value="small hoops, thin chain, subtle bracelets")
-                with gr.TabItem("👕 Wardrobe"):
-                    top_input = gr.Textbox(label="Top", value="basic tee or camisole")
-                    bottom_input = gr.Textbox(label="Bottom", value="denim shorts or mini skirt")
-                    footwear_input = gr.Textbox(label="Footwear", value="sneakers or ankle boots")
-                    wardrobe_notes_input = gr.Textbox(label="Wardrobe Notes", value="casual modern look, styled for natural setting")
-                with gr.TabItem("🧍 Pose & Framing"):
-                    pose_angle_input = gr.Dropdown(label="Pose Angle", choices=["three-quarter", "full body"], value="three-quarter")
-                    body_pose_input = gr.Textbox(label="Body Pose", value="standing or walking casually, relaxed natural posture")
-                    hands_pose_input = gr.Textbox(label="Hands Pose", value="one resting by side or touching hair, the other relaxed")
-                    framing_input = gr.Dropdown(label="Framing", choices=["head-to-toe", "waist-up", "cinematic composition"], value="waist-up")
-                with gr.TabItem("📷 Camera & Look"):
-                    camera_device_input = gr.Textbox(label="Camera Device", value="professional cinema camera / DSLR with prime lens")
-                    flash_input = gr.Textbox(label="Flash", value="none; natural golden hour light or soft reflectors")
-                    orientation_input = gr.Dropdown(label="Orientation", choices=["vertical", "horizontal"], value="vertical")
-                    aspect_ratio_input = gr.Dropdown(label="Aspect Ratio", choices=["16:9", "3:2", "4:3", "1:1"], value="16:9")
-                    distance_input = gr.Textbox(label="Distance", value="cinematic portrait distance with shallow depth")
-                    focus_input = gr.Textbox(label="Focus", value="sharp on subject; soft bokeh background")
-                    texture_input = gr.Textbox(label="Texture", value="smooth high-resolution detail")
-                    sharpness_input = gr.Textbox(label="Sharpness", value="very high; crisp cinematic clarity")
-                    color_input = gr.Textbox(label="Color", value="warm cinematic grading; golden tones and soft contrast")
-                    effects_input = gr.Textbox(label="Effects", value="subtle film grain; natural light flares, depth of field")
-                with gr.TabItem("🌳 Background & Style"):
-                    background_environment_input = gr.Textbox(label="Background Environment", value="nature setting — forest, park, or meadow with soft light")
-                    background_props_input = gr.Textbox(label="Background Props", value="none; focus on subject against natural backdrop")
-                    style_genre_input = gr.Textbox(label="Style Genre", value="cinematic portrait photography")
-                    authenticity_input = gr.Textbox(label="Authenticity", value="natural, elegant, polished")
-                with gr.TabItem("👤 Face & Bans"):
-                    use_original_structure_input = gr.Checkbox(label="Use Original Face Structure", value=True)
-                    face_description_input = gr.Textbox(label="Face Description", value="maintain the same face shape, features, and proportions as in the provided reference image")
-                    gr.Markdown("#### Banned Items")
-                    with gr.Row():
-                        ban_mirror_input = gr.Checkbox(label="Mirror")
-                        ban_phone_input = gr.Checkbox(label="Phone")
-                        ban_selfie_input = gr.Checkbox(label="Selfie Look")
-                        ban_grainy_input = gr.Checkbox(label="Grainy Noise")
-                    with gr.Row():
-                        ban_harsh_flash_input = gr.Checkbox(label="Harsh Flash")
-                        ban_logos_input = gr.Checkbox(label="Logos")
-                        ban_nsfw_input = gr.Checkbox(label="NSFW")
-                        ban_cropped_feet_input = gr.Checkbox(label="Cropped Feet")
-                with gr.TabItem("⚙️ Output & Variants"):
-                    output_count_input = gr.Slider(label="Output Count", minimum=1, maximum=4, step=1, value=1)
-                    output_size_input = gr.Textbox(label="Output Size", value="1024x1024")
-                    safety_input = gr.Dropdown(label="Safety", choices=["strict", "moderate", "none"], value="strict")
-                    variant_name_input = gr.Textbox(label="Variant Name", value="cinematic_nature_fullbody")
-                    variant_angle_input = gr.Textbox(label="Variant Angle", value="full-body shot in meadow or forest path, subject centered with depth of field")
-        with gr.Column(scale=1):
-            # --- Right Column for Outputs ---
-            generate_button = gr.Button("Generate Image", variant="primary")
-            status_text = gr.Textbox(label="Status", interactive=False)
-            image_gallery = gr.Gallery(label="Generated Image(s)", show_label=True, elem_id="gallery", columns=[2], rows=[2], object_fit="contain", height="auto")
-            json_output = gr.JSON(label="Generated JSON Input")
-    # --- Button Click Action ---
-    all_inputs = [
-        api_key_input, reference_image_input, scene_input, subject_type_input,
-        age_range_input, hair_input, makeup_input, jewellery_input, top_input,
-        bottom_input, footwear_input, wardrobe_notes_input, pose_angle_input,
-        body_pose_input, hands_pose_input, framing_input, camera_device_input,
-        flash_input, orientation_input, aspect_ratio_input, distance_input,
-        focus_input, texture_input, sharpness_input, color_input, effects_input,
-        background_environment_input, background_props_input, style_genre_input,
-        authenticity_input, use_original_structure_input, face_description_input,
-        ban_mirror_input, ban_phone_input, ban_selfie_input, ban_grainy_input,
-        ban_harsh_flash_input, ban_logos_input, ban_nsfw_input,
-        ban_cropped_feet_input, output_count_input, output_size_input,
-        safety_input, variant_name_input, variant_angle_input
-    ]
     generate_button.click(
-        fn=generate_image,
-        inputs=all_inputs,
-        outputs=[image_gallery, json_output, status_text],
     )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import gradio as gr
 import json
+def build_json(
     api_key,
     reference_image,
     scene,
     variant_name,
     variant_angle,
 ):
     banned_items = []
+    if ban_mirror:
+        banned_items.append("mirror")
+    if ban_phone:
+        banned_items.append("phone")
+    if ban_selfie:
+        banned_items.append("selfie look")
+    if ban_grainy:
+        banned_items.append("grainy noise")
+    if ban_harsh_flash:
+        banned_items.append("harsh LED flash")
+    if ban_logos:
+        banned_items.append("logos/brand text")
+    if ban_nsfw:
+        banned_items.append("nsfw")
+    if ban_cropped_feet:
+        banned_items.append("cropped feet")
     output_json = {
         "scene": scene,
+        "subject": {
+            "type": subject_type,
+            "age_range": age_range,
+            "hair": hair,
+            "makeup": makeup,
+            "jewellery": jewellery,
+        },
+        "wardrobe": {
+            "top": top,
+            "bottom": bottom,
+            "footwear": footwear,
+            "notes": wardrobe_notes,
+        },
+        "pose": {
+            "angle": pose_angle,
+            "body": body_pose,
+            "hands": hands_pose,
+            "framing": framing,
+        },
+        "camera": {
+            "device": camera_device,
+            "flash": flash,
+            "orientation": orientation,
+            "aspect_ratio": aspect_ratio,
+            "distance": distance,
+            "focus": focus,
+        },
+        "look": {
+            "texture": texture,
+            "sharpness": sharpness,
+            "color": color,
+            "effects": effects,
+        },
+        "background": {
+            "environment": background_environment,
+            "props": background_props,
+        },
         "style": {"genre": style_genre, "authenticity": authenticity},
+        "reference_face": {
+            "use_original_structure": use_original_structure,
+            "description": face_description,
+        },
         "ban": banned_items,
+        "output": {
+            "count": output_count,
+            "size": output_size,
+            "safety": safety
+        },
         "variants": [{"name": variant_name, "angle": variant_angle}],
     }
+    # The user's API key is available in the 'api_key' variable.
+    # You can now use this key and the reference_image path to call the
+    # gemini nano banana API with the generated 'output_json'.
+    # For this example, we will just return the generated JSON.
+    return json.dumps(output_json, indent=4)
+with gr.Blocks() as demo:
+    gr.Markdown("# Gemini Nano Banana Image Generator Interface")
+    gr.Markdown(
+        "Fill in the details below to generate the JSON input for the image generation model."
+    )
+    with gr.Tabs():
+        with gr.TabItem("Authentication and Image"):
+            api_key_input = gr.Textbox(
+                label="Gemini Nano Banana API Key", type="password"
+            )
+            reference_image_input = gr.Image(
+                label="Reference Image", type="filepath"
+            )
+        with gr.TabItem("Scene and Subject"):
+            with gr.Row():
+                scene_input = gr.Textbox(
+                    label="Scene", value="cinematic outdoor portrait; professional photography"
+                )
+            with gr.Row():
+                subject_type_input = gr.Textbox(
+                    label="Subject Type", value="adult woman (idol vibe)"
+                )
+                age_range_input = gr.Textbox(label="Age Range", value="20s")
+            with gr.Row():
+                hair_input = gr.Textbox(
+                    label="Hair",
+                    value="straight or styled natural open hair with natural shine",
+                )
+                makeup_input = gr.Textbox(
+                    label="Makeup", value="glossy lips, soft eyeliner, luminous skin"
+                )
+                jewellery_input = gr.Textbox(
+                    label="Jewellery", value="small hoops, thin chain, subtle bracelets"
+                )
+        with gr.TabItem("Wardrobe"):
+            with gr.Row():
+                top_input = gr.Textbox(label="Top", value="basic tee or camisole")
+                bottom_input = gr.Textbox(
+                    label="Bottom", value="denim shorts or mini skirt"
+                )
+                footwear_input = gr.Textbox(
+                    label="Footwear", value="sneakers or ankle boots"
+                )
+            with gr.Row():
+                wardrobe_notes_input = gr.Textbox(
+                    label="Wardrobe Notes",
+                    value="casual modern look, styled for natural setting",
+                )
+        with gr.TabItem("Pose and Framing"):
+            with gr.Row():
+                pose_angle_input = gr.Dropdown(
+                    label="Pose Angle",
+                    choices=["three-quarter", "full body"],
+                    value="three-quarter",
+                )
+                body_pose_input = gr.Textbox(
+                    label="Body Pose",
+                    value="standing or walking casually, relaxed natural posture",
+                )
+            with gr.Row():
+                hands_pose_input = gr.Textbox(
+                    label="Hands Pose",
+                    value="one resting by side or touching hair, the other relaxed",
+                )
+                framing_input = gr.Dropdown(
+                    label="Framing",
+                    choices=["head-to-toe", "waist-up"],
+                    value="waist-up",
+                )
+        with gr.TabItem("Camera and Look"):
+            with gr.Row():
+                camera_device_input = gr.Textbox(
+                    label="Camera Device",
+                    value="professional cinema camera / DSLR with prime lens",
+                )
+                flash_input = gr.Textbox(
+                    label="Flash", value="none; natural golden hour light or soft reflectors"
+                )
+            with gr.Row():
+                orientation_input = gr.Dropdown(
+                    label="Orientation", choices=["vertical", "horizontal"], value="vertical"
+                )
+                aspect_ratio_input = gr.Dropdown(
+                    label="Aspect Ratio", choices=["16:9", "3:2", "4:3", "1:1"], value="16:9"
+                )
+            with gr.Row():
+                distance_input = gr.Textbox(
+                    label="Distance", value="cinematic portrait distance with shallow depth"
+                )
+                focus_input = gr.Textbox(
+                    label="Focus", value="sharp on subject; soft bokeh background"
+                )
+            with gr.Row():
+                texture_input = gr.Textbox(
+                    label="Texture", value="smooth high-resolution detail"
+                )
+                sharpness_input = gr.Textbox(
+                    label="Sharpness", value="very high; crisp cinematic clarity"
+                )
+            with gr.Row():
+                color_input = gr.Textbox(
+                    label="Color", value="warm cinematic grading; golden tones and soft contrast"
+                )
+                effects_input = gr.Textbox(
+                    label="Effects",
+                    value="subtle film grain; natural light flares, depth of field",
+                )
+        with gr.TabItem("Background and Style"):
+            with gr.Row():
+                background_environment_input = gr.Textbox(
+                    label="Background Environment",
+                    value="nature setting — forest, park, or meadow with soft light",
+                )
+                background_props_input = gr.Textbox(
+                    label="Background Props", value="none; focus on subject against natural backdrop"
+                )
+            with gr.Row():
+                style_genre_input = gr.Textbox(
+                    label="Style Genre", value="cinematic portrait photography"
+                )
+                authenticity_input = gr.Textbox(
+                    label="Authenticity", value="natural, elegant, polished"
+                )
+        with gr.TabItem("Reference Face and Bans"):
+            with gr.Row():
+                use_original_structure_input = gr.Checkbox(
+                    label="Use Original Face Structure", value=True
+                )
+            with gr.Row():
+                face_description_input = gr.Textbox(
+                    label="Face Description",
+                    value="maintain the same face shape, features, and proportions as in the provided reference image",
+                )
+            with gr.Row():
+                gr.Markdown("#### Banned Items")
+            with gr.Row():
+                ban_mirror_input = gr.Checkbox(label="Mirror")
+                ban_phone_input = gr.Checkbox(label="Phone")
+                ban_selfie_input = gr.Checkbox(label="Selfie Look")
+                ban_grainy_input = gr.Checkbox(label="Grainy Noise")
+            with gr.Row():
+                ban_harsh_flash_input = gr.Checkbox(label="Harsh LED Flash")
+                ban_logos_input = gr.Checkbox(label="Logos/Brand Text")
+                ban_nsfw_input = gr.Checkbox(label="NSFW")
+                ban_cropped_feet_input = gr.Checkbox(label="Cropped Feet")
+        with gr.TabItem("Output and Variants"):
+            with gr.Row():
+                output_count_input = gr.Slider(
+                    label="Output Count", minimum=1, maximum=10, step=1, value=1
+                )
+                output_size_input = gr.Textbox(label="Output Size", value="1920x1080")
+                safety_input = gr.Dropdown(
+                    label="Safety", choices=["strict", "moderate", "none"], value="strict"
+                )
+            with gr.Row():
+                variant_name_input = gr.Textbox(
+                    label="Variant Name", value="cinematic_nature_fullbody"
+                )
+                variant_angle_input = gr.Textbox(
+                    label="Variant Angle",
+                    value="full-body shot in meadow or forest path, subject centered with depth of field",
+                )
+    generate_button = gr.Button("Generate JSON")
+    json_output = gr.JSON(label="Generated JSON")
     generate_button.click(
+        fn=build_json,
+        inputs=[
+            api_key_input,
+            reference_image_input,
+            scene_input,
+            subject_type_input,
+            age_range_input,
+            hair_input,
+            makeup_input,
+            jewellery_input,
+            top_input,
+            bottom_input,
+            footwear_input,
+            wardrobe_notes_input,
+            pose_angle_input,
+            body_pose_input,
+            hands_pose_input,
+            framing_input,
+            camera_device_input,
+            flash_input,
+            orientation_input,
+            aspect_ratio_input,
+            distance_input,
+            focus_input,
+            texture_input,
+            sharpness_input,
+            color_input,
+            effects_input,
+            background_environment_input,
+            background_props_input,
+            style_genre_input,
+            authenticity_input,
+            use_original_structure_input,
+            face_description_input,
+            ban_mirror_input,
+            ban_phone_input,
+            ban_selfie_input,
+            ban_grainy_input,
+            ban_harsh_flash_input,
+            ban_logos_input,
+            ban_nsfw_input,
+            ban_cropped_feet_input,
+            output_count_input,
+            output_size_input,
+            safety_input,
+            variant_name_input,
+            variant_angle_input,
+        ],
+        outputs=json_output,
     )
 if __name__ == "__main__":
+    demo.launch()