LaMaison / app.py
valcore's picture
ui: restore top-view intermediate image outputs
936c778
import os
import json
from io import BytesIO
from typing import List
from dotenv import load_dotenv
import gradio as gr
from gradio_floorplan import FloorPlan
from PIL import Image
from pydantic import BaseModel, Field
from google import genai
from google.genai import types
load_dotenv()
client = genai.Client()
print("--- LA MAISON APP LOADED (VERSION 1.4 - OPTIMIZED IMAGE GEN & UI) ---")
DEFAULT_FLOOR_PLAN = {
"corners": [],
"furnitures": [],
}
class FurnitureItem(BaseModel):
object: str
localisation: List[int] = Field(description="[ymin, xmin, ymax, xmax] coordinates")
description: str
class RoomLayout(BaseModel):
corners: List[List[int]] = Field(description="List of [x, y] coordinates for room corners")
furnitures: List[FurnitureItem]
def process_room_image(image_numpy):
if image_numpy is None:
return DEFAULT_FLOOR_PLAN, DEFAULT_FLOOR_PLAN
pil_image = Image.fromarray(image_numpy)
# Step 1: Generate Floor Plan image with Nano Banana 2 (Optimized)
generated_image = None
gr.Info("Step 1: Generating 2D floor plan image...")
print("\n--- Starting Gemini 3.1 Flash Image Preview (Nano Banana 2) - Optimized ---")
generate_content_config = types.GenerateContentConfig(
thinking_config=types.ThinkingConfig(
thinking_level="MINIMAL",
),
image_config = types.ImageConfig(
image_size="1K",
),
response_modalities=[
"IMAGE",
],
)
try:
response_stream = client.models.generate_content_stream(
model='models/gemini-3.1-flash-image-preview',
contents=[pil_image, "Generate a clean, 2D top-down floor plan image representing this exact room. OUTPUT ONLY THE IMAGE."],
config=generate_content_config
)
for chunk in response_stream:
if chunk.parts:
for part in chunk.parts:
if part.inline_data:
print("[Image Data] Received inline image data!")
generated_image = Image.open(BytesIO(part.inline_data.data))
break
print("--- Finished generating floor plan image ---\n")
gr.Info("Step 1 Complete: Floor plan image generated!")
except Exception as e:
gr.Warning(f"Error generating image: {e}")
print(f"Error generating image: {e}")
# Step 2: Extract coordinates
layout_json = DEFAULT_FLOOR_PLAN
image_to_parse = generated_image if generated_image else pil_image
gr.Info("Step 2: Analyzing top plan image to extract coordinates...")
prompted_model = "gemini-3-flash-preview"
try:
response_json = client.models.generate_content(
model=prompted_model,
contents=[
image_to_parse,
"OUTPUT FORMAT INSTRUCTION: Return ONLY a valid JSON string. DO NOT provide any reasoning, markdown formatting, or conversational text.\n"
"Analyze this floor plan (or room image) and output the exact coordinates "
"for the room corners formatted as [[x, y], ...] and a list of furnitures with "
"their object name, description, and bounding box [ymin, xmin, ymax, xmax]."
],
config={
'response_mime_type': 'application/json',
'response_schema': RoomLayout
}
)
if response_json.text:
layout_json = json.loads(response_json.text)
gr.Info("Step 2 Complete: Coordinates extracted successfully!")
print(layout_json)
except Exception as e:
gr.Warning(f"Error parsing layout JSON: {e}")
print(f"Error parsing layout JSON: {e}")
# Return: floor_plan value, initial_layout_state, topview_state, topview_before display
return layout_json, layout_json, generated_image, generated_image
def _generate_image(contents, label="image"):
"""Helper: call Gemini image generation and return a PIL Image or None."""
config = types.GenerateContentConfig(
thinking_config=types.ThinkingConfig(thinking_level="MINIMAL"),
image_config=types.ImageConfig(image_size="1K"),
response_modalities=["IMAGE"],
)
try:
response_stream = client.models.generate_content_stream(
model='models/gemini-3.1-flash-image-preview',
contents=contents,
config=config,
)
for chunk in response_stream:
if chunk.parts:
for part in chunk.parts:
if part.inline_data:
print(f"[Image Data] Received {label}")
return Image.open(BytesIO(part.inline_data.data))
except Exception as e:
gr.Warning(f"Error generating {label}: {e}")
print(f"Error generating {label}: {e}")
return None
def generate_final_image(original_image_numpy, current_layout_json, initial_layout_json, original_topview):
if original_image_numpy is None:
gr.Warning("Please upload an image first.")
return None, None
pil_image = Image.fromarray(original_image_numpy)
# Compute changed furnitures
old_furnitures = {f["object"]: f for f in (initial_layout_json.get("furnitures") or [])}
new_furnitures = {f["object"]: f for f in (current_layout_json.get("furnitures") or [])}
moved = [
{"object": name, "from": old_furnitures[name]["localisation"], "to": new_furnitures[name]["localisation"]}
for name in new_furnitures
if name in old_furnitures and old_furnitures[name]["localisation"] != new_furnitures[name]["localisation"]
]
added = [f for name, f in new_furnitures.items() if name not in old_furnitures]
removed = [f for name, f in old_furnitures.items() if name not in new_furnitures]
changes = {"moved": moved, "added": added, "removed": removed}
changes_str = json.dumps(changes, indent=2)
new_layout_str = json.dumps(current_layout_json, indent=2)
print(f"Furniture changes: {changes_str}")
# Step A: original top-view + new layout → new top-view
gr.Info("Step A: Generating updated top-view floor plan image...")
print("\n--- Step A: new top-view from floor plan ---")
step_a_input = original_topview if original_topview is not None else pil_image
prompt_a = f"""This is a 2D top-down floor plan image of a room.
Redraw it as a clean 2D top-down floor plan image applying the following furniture changes.
Coordinates are bounding boxes [ymin, xmin, ymax, xmax].
Changes to apply:
{changes_str}
Full new target layout for reference:
{new_layout_str}
OUTPUT ONLY THE IMAGE. Keep the same room boundaries and style."""
new_topview = _generate_image([step_a_input, prompt_a], label="new top-view")
if new_topview is None:
gr.Warning("Step A failed: could not generate updated top-view.")
return None, None
gr.Info("Step A complete. Generating final photorealistic image...")
# Step B: new top-view + original photo → final photorealistic image
print("\n--- Step B: photorealistic synthesis ---")
prompt_b = """You are given two images:
1. A 2D top-down floor plan showing the NEW furniture layout.
2. The original room photo.
Generate a high-quality photorealistic image of the room from the EXACT SAME camera angle as the original photo, but with the furniture repositioned to match the new floor plan.
STRICT INSTRUCTIONS:
1. Maintain the exact camera point-of-view, lighting, colors, and architectural features of the original photo.
2. Place furniture according to the new floor plan layout.
3. The room must look realistic and clean."""
final_image = _generate_image([new_topview, pil_image, prompt_b], label="final photorealistic")
if final_image is None:
gr.Warning("Step B failed: could not generate final image.")
return new_topview, None
gr.Info("Final image generated successfully!")
return new_topview, final_image
with gr.Blocks() as demo:
gr.Markdown("# LaMaison\nRearrange your spaces using visual planning.")
# State to store the initial layout snapshot and the original top-view image
initial_layout_state = gr.State(DEFAULT_FLOOR_PLAN)
topview_state = gr.State(None)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(label="Upload a room image", type="numpy")
with gr.Column(scale=1):
floor_plan = FloorPlan(
value=DEFAULT_FLOOR_PLAN,
label="Floor Plan",
interactive=True,
)
generate_button = gr.Button("Generate New Image based on Floor plan", variant="primary")
with gr.Row():
topview_before = gr.Image(label="Top View — Original", interactive=False)
topview_after = gr.Image(label="Top View — New Layout (Step A)", interactive=False)
image_output = gr.Image(label="Final Photorealistic Output (Step B)", interactive=False)
# Wire it: input -> [floor_plan, initial_layout_state, topview_state, topview_before]
image_input.change(
process_room_image,
inputs=image_input,
outputs=[floor_plan, initial_layout_state, topview_state, topview_before]
)
# Phase 2: User clicks button -> generates new top-view then final photorealistic image
generate_button.click(
generate_final_image,
inputs=[image_input, floor_plan, initial_layout_state, topview_state],
outputs=[topview_after, image_output]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)