| import os |
| import json |
| from io import BytesIO |
| from typing import List |
| from dotenv import load_dotenv |
|
|
| import gradio as gr |
| from gradio_floorplan import FloorPlan |
| from PIL import Image |
| from pydantic import BaseModel, Field |
| from google import genai |
|
|
| from google.genai import types |
|
|
| load_dotenv() |
| client = genai.Client() |
|
|
| print("--- LA MAISON APP LOADED (VERSION 1.4 - OPTIMIZED IMAGE GEN & UI) ---") |
|
|
| DEFAULT_FLOOR_PLAN = { |
| "corners": [], |
| "furnitures": [], |
| } |
|
|
| class FurnitureItem(BaseModel): |
| object: str |
| localisation: List[int] = Field(description="[ymin, xmin, ymax, xmax] coordinates") |
| description: str |
|
|
| class RoomLayout(BaseModel): |
| corners: List[List[int]] = Field(description="List of [x, y] coordinates for room corners") |
| furnitures: List[FurnitureItem] |
|
|
| def process_room_image(image_numpy): |
| if image_numpy is None: |
| return DEFAULT_FLOOR_PLAN, DEFAULT_FLOOR_PLAN |
| |
| pil_image = Image.fromarray(image_numpy) |
| |
| |
| generated_image = None |
| gr.Info("Step 1: Generating 2D floor plan image...") |
| print("\n--- Starting Gemini 3.1 Flash Image Preview (Nano Banana 2) - Optimized ---") |
| |
| generate_content_config = types.GenerateContentConfig( |
| thinking_config=types.ThinkingConfig( |
| thinking_level="MINIMAL", |
| ), |
| image_config = types.ImageConfig( |
| image_size="1K", |
| ), |
| response_modalities=[ |
| "IMAGE", |
| ], |
| ) |
|
|
| try: |
| response_stream = client.models.generate_content_stream( |
| model='models/gemini-3.1-flash-image-preview', |
| contents=[pil_image, "Generate a clean, 2D top-down floor plan image representing this exact room. OUTPUT ONLY THE IMAGE."], |
| config=generate_content_config |
| ) |
| for chunk in response_stream: |
| if chunk.parts: |
| for part in chunk.parts: |
| if part.inline_data: |
| print("[Image Data] Received inline image data!") |
| generated_image = Image.open(BytesIO(part.inline_data.data)) |
| break |
| |
| print("--- Finished generating floor plan image ---\n") |
| gr.Info("Step 1 Complete: Floor plan image generated!") |
| except Exception as e: |
| gr.Warning(f"Error generating image: {e}") |
| print(f"Error generating image: {e}") |
| |
| |
| layout_json = DEFAULT_FLOOR_PLAN |
| image_to_parse = generated_image if generated_image else pil_image |
| |
| gr.Info("Step 2: Analyzing top plan image to extract coordinates...") |
| |
| prompted_model = "gemini-3-flash-preview" |
|
|
| try: |
| response_json = client.models.generate_content( |
| model=prompted_model, |
| contents=[ |
| image_to_parse, |
| "OUTPUT FORMAT INSTRUCTION: Return ONLY a valid JSON string. DO NOT provide any reasoning, markdown formatting, or conversational text.\n" |
| "Analyze this floor plan (or room image) and output the exact coordinates " |
| "for the room corners formatted as [[x, y], ...] and a list of furnitures with " |
| "their object name, description, and bounding box [ymin, xmin, ymax, xmax]." |
| ], |
| config={ |
| 'response_mime_type': 'application/json', |
| 'response_schema': RoomLayout |
| } |
| ) |
| if response_json.text: |
| layout_json = json.loads(response_json.text) |
| gr.Info("Step 2 Complete: Coordinates extracted successfully!") |
| print(layout_json) |
| except Exception as e: |
| gr.Warning(f"Error parsing layout JSON: {e}") |
| print(f"Error parsing layout JSON: {e}") |
| |
| |
| return layout_json, layout_json, generated_image, generated_image |
|
|
| def _generate_image(contents, label="image"): |
| """Helper: call Gemini image generation and return a PIL Image or None.""" |
| config = types.GenerateContentConfig( |
| thinking_config=types.ThinkingConfig(thinking_level="MINIMAL"), |
| image_config=types.ImageConfig(image_size="1K"), |
| response_modalities=["IMAGE"], |
| ) |
| try: |
| response_stream = client.models.generate_content_stream( |
| model='models/gemini-3.1-flash-image-preview', |
| contents=contents, |
| config=config, |
| ) |
| for chunk in response_stream: |
| if chunk.parts: |
| for part in chunk.parts: |
| if part.inline_data: |
| print(f"[Image Data] Received {label}") |
| return Image.open(BytesIO(part.inline_data.data)) |
| except Exception as e: |
| gr.Warning(f"Error generating {label}: {e}") |
| print(f"Error generating {label}: {e}") |
| return None |
|
|
|
|
| def generate_final_image(original_image_numpy, current_layout_json, initial_layout_json, original_topview): |
| if original_image_numpy is None: |
| gr.Warning("Please upload an image first.") |
| return None, None |
|
|
| pil_image = Image.fromarray(original_image_numpy) |
|
|
| |
| old_furnitures = {f["object"]: f for f in (initial_layout_json.get("furnitures") or [])} |
| new_furnitures = {f["object"]: f for f in (current_layout_json.get("furnitures") or [])} |
| moved = [ |
| {"object": name, "from": old_furnitures[name]["localisation"], "to": new_furnitures[name]["localisation"]} |
| for name in new_furnitures |
| if name in old_furnitures and old_furnitures[name]["localisation"] != new_furnitures[name]["localisation"] |
| ] |
| added = [f for name, f in new_furnitures.items() if name not in old_furnitures] |
| removed = [f for name, f in old_furnitures.items() if name not in new_furnitures] |
| changes = {"moved": moved, "added": added, "removed": removed} |
| changes_str = json.dumps(changes, indent=2) |
| new_layout_str = json.dumps(current_layout_json, indent=2) |
| print(f"Furniture changes: {changes_str}") |
|
|
| |
| gr.Info("Step A: Generating updated top-view floor plan image...") |
| print("\n--- Step A: new top-view from floor plan ---") |
| step_a_input = original_topview if original_topview is not None else pil_image |
| prompt_a = f"""This is a 2D top-down floor plan image of a room. |
| Redraw it as a clean 2D top-down floor plan image applying the following furniture changes. |
| Coordinates are bounding boxes [ymin, xmin, ymax, xmax]. |
| |
| Changes to apply: |
| {changes_str} |
| |
| Full new target layout for reference: |
| {new_layout_str} |
| |
| OUTPUT ONLY THE IMAGE. Keep the same room boundaries and style.""" |
|
|
| new_topview = _generate_image([step_a_input, prompt_a], label="new top-view") |
| if new_topview is None: |
| gr.Warning("Step A failed: could not generate updated top-view.") |
| return None, None |
|
|
| gr.Info("Step A complete. Generating final photorealistic image...") |
|
|
| |
| print("\n--- Step B: photorealistic synthesis ---") |
| prompt_b = """You are given two images: |
| 1. A 2D top-down floor plan showing the NEW furniture layout. |
| 2. The original room photo. |
| |
| Generate a high-quality photorealistic image of the room from the EXACT SAME camera angle as the original photo, but with the furniture repositioned to match the new floor plan. |
| |
| STRICT INSTRUCTIONS: |
| 1. Maintain the exact camera point-of-view, lighting, colors, and architectural features of the original photo. |
| 2. Place furniture according to the new floor plan layout. |
| 3. The room must look realistic and clean.""" |
|
|
| final_image = _generate_image([new_topview, pil_image, prompt_b], label="final photorealistic") |
|
|
| if final_image is None: |
| gr.Warning("Step B failed: could not generate final image.") |
| return new_topview, None |
|
|
| gr.Info("Final image generated successfully!") |
| return new_topview, final_image |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# LaMaison\nRearrange your spaces using visual planning.") |
| |
| |
| initial_layout_state = gr.State(DEFAULT_FLOOR_PLAN) |
| topview_state = gr.State(None) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| image_input = gr.Image(label="Upload a room image", type="numpy") |
| with gr.Column(scale=1): |
| floor_plan = FloorPlan( |
| value=DEFAULT_FLOOR_PLAN, |
| label="Floor Plan", |
| interactive=True, |
| ) |
| generate_button = gr.Button("Generate New Image based on Floor plan", variant="primary") |
| with gr.Row(): |
| topview_before = gr.Image(label="Top View — Original", interactive=False) |
| topview_after = gr.Image(label="Top View — New Layout (Step A)", interactive=False) |
| image_output = gr.Image(label="Final Photorealistic Output (Step B)", interactive=False) |
|
|
| |
| image_input.change( |
| process_room_image, |
| inputs=image_input, |
| outputs=[floor_plan, initial_layout_state, topview_state, topview_before] |
| ) |
|
|
| |
| generate_button.click( |
| generate_final_image, |
| inputs=[image_input, floor_plan, initial_layout_state, topview_state], |
| outputs=[topview_after, image_output] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|