import os import json from io import BytesIO from typing import List from dotenv import load_dotenv import gradio as gr from gradio_floorplan import FloorPlan from PIL import Image from pydantic import BaseModel, Field from google import genai from google.genai import types load_dotenv() client = genai.Client() print("--- LA MAISON APP LOADED (VERSION 1.4 - OPTIMIZED IMAGE GEN & UI) ---") DEFAULT_FLOOR_PLAN = { "corners": [], "furnitures": [], } class FurnitureItem(BaseModel): object: str localisation: List[int] = Field(description="[ymin, xmin, ymax, xmax] coordinates") description: str class RoomLayout(BaseModel): corners: List[List[int]] = Field(description="List of [x, y] coordinates for room corners") furnitures: List[FurnitureItem] def process_room_image(image_numpy): if image_numpy is None: return DEFAULT_FLOOR_PLAN, DEFAULT_FLOOR_PLAN pil_image = Image.fromarray(image_numpy) # Step 1: Generate Floor Plan image with Nano Banana 2 (Optimized) generated_image = None gr.Info("Step 1: Generating 2D floor plan image...") print("\n--- Starting Gemini 3.1 Flash Image Preview (Nano Banana 2) - Optimized ---") generate_content_config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level="MINIMAL", ), image_config = types.ImageConfig( image_size="1K", ), response_modalities=[ "IMAGE", ], ) try: response_stream = client.models.generate_content_stream( model='models/gemini-3.1-flash-image-preview', contents=[pil_image, "Generate a clean, 2D top-down floor plan image representing this exact room. OUTPUT ONLY THE IMAGE."], config=generate_content_config ) for chunk in response_stream: if chunk.parts: for part in chunk.parts: if part.inline_data: print("[Image Data] Received inline image data!") generated_image = Image.open(BytesIO(part.inline_data.data)) break print("--- Finished generating floor plan image ---\n") gr.Info("Step 1 Complete: Floor plan image generated!") except Exception as e: gr.Warning(f"Error generating image: {e}") print(f"Error generating image: {e}") # Step 2: Extract coordinates layout_json = DEFAULT_FLOOR_PLAN image_to_parse = generated_image if generated_image else pil_image gr.Info("Step 2: Analyzing top plan image to extract coordinates...") prompted_model = "gemini-3-flash-preview" try: response_json = client.models.generate_content( model=prompted_model, contents=[ image_to_parse, "OUTPUT FORMAT INSTRUCTION: Return ONLY a valid JSON string. DO NOT provide any reasoning, markdown formatting, or conversational text.\n" "Analyze this floor plan (or room image) and output the exact coordinates " "for the room corners formatted as [[x, y], ...] and a list of furnitures with " "their object name, description, and bounding box [ymin, xmin, ymax, xmax]." ], config={ 'response_mime_type': 'application/json', 'response_schema': RoomLayout } ) if response_json.text: layout_json = json.loads(response_json.text) gr.Info("Step 2 Complete: Coordinates extracted successfully!") print(layout_json) except Exception as e: gr.Warning(f"Error parsing layout JSON: {e}") print(f"Error parsing layout JSON: {e}") # Return: floor_plan value, initial_layout_state, topview_state, topview_before display return layout_json, layout_json, generated_image, generated_image def _generate_image(contents, label="image"): """Helper: call Gemini image generation and return a PIL Image or None.""" config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig(thinking_level="MINIMAL"), image_config=types.ImageConfig(image_size="1K"), response_modalities=["IMAGE"], ) try: response_stream = client.models.generate_content_stream( model='models/gemini-3.1-flash-image-preview', contents=contents, config=config, ) for chunk in response_stream: if chunk.parts: for part in chunk.parts: if part.inline_data: print(f"[Image Data] Received {label}") return Image.open(BytesIO(part.inline_data.data)) except Exception as e: gr.Warning(f"Error generating {label}: {e}") print(f"Error generating {label}: {e}") return None def generate_final_image(original_image_numpy, current_layout_json, initial_layout_json, original_topview): if original_image_numpy is None: gr.Warning("Please upload an image first.") return None, None pil_image = Image.fromarray(original_image_numpy) # Compute changed furnitures old_furnitures = {f["object"]: f for f in (initial_layout_json.get("furnitures") or [])} new_furnitures = {f["object"]: f for f in (current_layout_json.get("furnitures") or [])} moved = [ {"object": name, "from": old_furnitures[name]["localisation"], "to": new_furnitures[name]["localisation"]} for name in new_furnitures if name in old_furnitures and old_furnitures[name]["localisation"] != new_furnitures[name]["localisation"] ] added = [f for name, f in new_furnitures.items() if name not in old_furnitures] removed = [f for name, f in old_furnitures.items() if name not in new_furnitures] changes = {"moved": moved, "added": added, "removed": removed} changes_str = json.dumps(changes, indent=2) new_layout_str = json.dumps(current_layout_json, indent=2) print(f"Furniture changes: {changes_str}") # Step A: original top-view + new layout → new top-view gr.Info("Step A: Generating updated top-view floor plan image...") print("\n--- Step A: new top-view from floor plan ---") step_a_input = original_topview if original_topview is not None else pil_image prompt_a = f"""This is a 2D top-down floor plan image of a room. Redraw it as a clean 2D top-down floor plan image applying the following furniture changes. Coordinates are bounding boxes [ymin, xmin, ymax, xmax]. Changes to apply: {changes_str} Full new target layout for reference: {new_layout_str} OUTPUT ONLY THE IMAGE. Keep the same room boundaries and style.""" new_topview = _generate_image([step_a_input, prompt_a], label="new top-view") if new_topview is None: gr.Warning("Step A failed: could not generate updated top-view.") return None, None gr.Info("Step A complete. Generating final photorealistic image...") # Step B: new top-view + original photo → final photorealistic image print("\n--- Step B: photorealistic synthesis ---") prompt_b = """You are given two images: 1. A 2D top-down floor plan showing the NEW furniture layout. 2. The original room photo. Generate a high-quality photorealistic image of the room from the EXACT SAME camera angle as the original photo, but with the furniture repositioned to match the new floor plan. STRICT INSTRUCTIONS: 1. Maintain the exact camera point-of-view, lighting, colors, and architectural features of the original photo. 2. Place furniture according to the new floor plan layout. 3. The room must look realistic and clean.""" final_image = _generate_image([new_topview, pil_image, prompt_b], label="final photorealistic") if final_image is None: gr.Warning("Step B failed: could not generate final image.") return new_topview, None gr.Info("Final image generated successfully!") return new_topview, final_image with gr.Blocks() as demo: gr.Markdown("# LaMaison\nRearrange your spaces using visual planning.") # State to store the initial layout snapshot and the original top-view image initial_layout_state = gr.State(DEFAULT_FLOOR_PLAN) topview_state = gr.State(None) with gr.Row(): with gr.Column(scale=1): image_input = gr.Image(label="Upload a room image", type="numpy") with gr.Column(scale=1): floor_plan = FloorPlan( value=DEFAULT_FLOOR_PLAN, label="Floor Plan", interactive=True, ) generate_button = gr.Button("Generate New Image based on Floor plan", variant="primary") with gr.Row(): topview_before = gr.Image(label="Top View — Original", interactive=False) topview_after = gr.Image(label="Top View — New Layout (Step A)", interactive=False) image_output = gr.Image(label="Final Photorealistic Output (Step B)", interactive=False) # Wire it: input -> [floor_plan, initial_layout_state, topview_state, topview_before] image_input.change( process_room_image, inputs=image_input, outputs=[floor_plan, initial_layout_state, topview_state, topview_before] ) # Phase 2: User clicks button -> generates new top-view then final photorealistic image generate_button.click( generate_final_image, inputs=[image_input, floor_plan, initial_layout_state, topview_state], outputs=[topview_after, image_output] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)