Spaces:

valcore
/

LaMaison

Runtime error

App Files Files Community

LaMaison / app.py

valcore

ui: restore top-view intermediate image outputs

936c778 23 days ago

raw

history blame contribute delete

9.68 kB

	import os
	import json
	from io import BytesIO
	from typing import List
	from dotenv import load_dotenv

	import gradio as gr
	from gradio_floorplan import FloorPlan
	from PIL import Image
	from pydantic import BaseModel, Field
	from google import genai

	from google.genai import types

	load_dotenv()
	client = genai.Client()

	print("--- LA MAISON APP LOADED (VERSION 1.4 - OPTIMIZED IMAGE GEN & UI) ---")

	DEFAULT_FLOOR_PLAN = {
	"corners": [],
	"furnitures": [],
	}

	class FurnitureItem(BaseModel):
	object: str
	localisation: List[int] = Field(description="[ymin, xmin, ymax, xmax] coordinates")
	description: str

	class RoomLayout(BaseModel):
	corners: List[List[int]] = Field(description="List of [x, y] coordinates for room corners")
	furnitures: List[FurnitureItem]

	def process_room_image(image_numpy):
	if image_numpy is None:
	return DEFAULT_FLOOR_PLAN, DEFAULT_FLOOR_PLAN

	pil_image = Image.fromarray(image_numpy)

	# Step 1: Generate Floor Plan image with Nano Banana 2 (Optimized)
	generated_image = None
	gr.Info("Step 1: Generating 2D floor plan image...")
	print("\n--- Starting Gemini 3.1 Flash Image Preview (Nano Banana 2) - Optimized ---")

	generate_content_config = types.GenerateContentConfig(
	thinking_config=types.ThinkingConfig(
	thinking_level="MINIMAL",
	),
	image_config = types.ImageConfig(
	image_size="1K",
	),
	response_modalities=[
	"IMAGE",
	],
	)

	try:
	response_stream = client.models.generate_content_stream(
	model='models/gemini-3.1-flash-image-preview',
	contents=[pil_image, "Generate a clean, 2D top-down floor plan image representing this exact room. OUTPUT ONLY THE IMAGE."],
	config=generate_content_config
	)
	for chunk in response_stream:
	if chunk.parts:
	for part in chunk.parts:
	if part.inline_data:
	print("[Image Data] Received inline image data!")
	generated_image = Image.open(BytesIO(part.inline_data.data))
	break

	print("--- Finished generating floor plan image ---\n")
	gr.Info("Step 1 Complete: Floor plan image generated!")
	except Exception as e:
	gr.Warning(f"Error generating image: {e}")
	print(f"Error generating image: {e}")

	# Step 2: Extract coordinates
	layout_json = DEFAULT_FLOOR_PLAN
	image_to_parse = generated_image if generated_image else pil_image

	gr.Info("Step 2: Analyzing top plan image to extract coordinates...")

	prompted_model = "gemini-3-flash-preview"

	try:
	response_json = client.models.generate_content(
	model=prompted_model,
	contents=[
	image_to_parse,
	"OUTPUT FORMAT INSTRUCTION: Return ONLY a valid JSON string. DO NOT provide any reasoning, markdown formatting, or conversational text.\n"
	"Analyze this floor plan (or room image) and output the exact coordinates "
	"for the room corners formatted as [[x, y], ...] and a list of furnitures with "
	"their object name, description, and bounding box [ymin, xmin, ymax, xmax]."
	],
	config={
	'response_mime_type': 'application/json',
	'response_schema': RoomLayout
	}
	)
	if response_json.text:
	layout_json = json.loads(response_json.text)
	gr.Info("Step 2 Complete: Coordinates extracted successfully!")
	print(layout_json)
	except Exception as e:
	gr.Warning(f"Error parsing layout JSON: {e}")
	print(f"Error parsing layout JSON: {e}")

	# Return: floor_plan value, initial_layout_state, topview_state, topview_before display
	return layout_json, layout_json, generated_image, generated_image

	def _generate_image(contents, label="image"):
	"""Helper: call Gemini image generation and return a PIL Image or None."""
	config = types.GenerateContentConfig(
	thinking_config=types.ThinkingConfig(thinking_level="MINIMAL"),
	image_config=types.ImageConfig(image_size="1K"),
	response_modalities=["IMAGE"],
	)
	try:
	response_stream = client.models.generate_content_stream(
	model='models/gemini-3.1-flash-image-preview',
	contents=contents,
	config=config,
	)
	for chunk in response_stream:
	if chunk.parts:
	for part in chunk.parts:
	if part.inline_data:
	print(f"[Image Data] Received {label}")
	return Image.open(BytesIO(part.inline_data.data))
	except Exception as e:
	gr.Warning(f"Error generating {label}: {e}")
	print(f"Error generating {label}: {e}")
	return None


	def generate_final_image(original_image_numpy, current_layout_json, initial_layout_json, original_topview):
	if original_image_numpy is None:
	gr.Warning("Please upload an image first.")
	return None, None

	pil_image = Image.fromarray(original_image_numpy)

	# Compute changed furnitures
	old_furnitures = {f["object"]: f for f in (initial_layout_json.get("furnitures") or [])}
	new_furnitures = {f["object"]: f for f in (current_layout_json.get("furnitures") or [])}
	moved = [
	{"object": name, "from": old_furnitures[name]["localisation"], "to": new_furnitures[name]["localisation"]}
	for name in new_furnitures
	if name in old_furnitures and old_furnitures[name]["localisation"] != new_furnitures[name]["localisation"]
	]
	added = [f for name, f in new_furnitures.items() if name not in old_furnitures]
	removed = [f for name, f in old_furnitures.items() if name not in new_furnitures]
	changes = {"moved": moved, "added": added, "removed": removed}
	changes_str = json.dumps(changes, indent=2)
	new_layout_str = json.dumps(current_layout_json, indent=2)
	print(f"Furniture changes: {changes_str}")

	# Step A: original top-view + new layout → new top-view
	gr.Info("Step A: Generating updated top-view floor plan image...")
	print("\n--- Step A: new top-view from floor plan ---")
	step_a_input = original_topview if original_topview is not None else pil_image
	prompt_a = f"""This is a 2D top-down floor plan image of a room.
	Redraw it as a clean 2D top-down floor plan image applying the following furniture changes.
	Coordinates are bounding boxes [ymin, xmin, ymax, xmax].

	Changes to apply:
	{changes_str}

	Full new target layout for reference:
	{new_layout_str}

	OUTPUT ONLY THE IMAGE. Keep the same room boundaries and style."""

	new_topview = _generate_image([step_a_input, prompt_a], label="new top-view")
	if new_topview is None:
	gr.Warning("Step A failed: could not generate updated top-view.")
	return None, None

	gr.Info("Step A complete. Generating final photorealistic image...")

	# Step B: new top-view + original photo → final photorealistic image
	print("\n--- Step B: photorealistic synthesis ---")
	prompt_b = """You are given two images:
	1. A 2D top-down floor plan showing the NEW furniture layout.
	2. The original room photo.

	Generate a high-quality photorealistic image of the room from the EXACT SAME camera angle as the original photo, but with the furniture repositioned to match the new floor plan.

	STRICT INSTRUCTIONS:
	1. Maintain the exact camera point-of-view, lighting, colors, and architectural features of the original photo.
	2. Place furniture according to the new floor plan layout.
	3. The room must look realistic and clean."""

	final_image = _generate_image([new_topview, pil_image, prompt_b], label="final photorealistic")

	if final_image is None:
	gr.Warning("Step B failed: could not generate final image.")
	return new_topview, None

	gr.Info("Final image generated successfully!")
	return new_topview, final_image

	with gr.Blocks() as demo:
	gr.Markdown("# LaMaison\nRearrange your spaces using visual planning.")

	# State to store the initial layout snapshot and the original top-view image
	initial_layout_state = gr.State(DEFAULT_FLOOR_PLAN)
	topview_state = gr.State(None)

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(label="Upload a room image", type="numpy")
	with gr.Column(scale=1):
	floor_plan = FloorPlan(
	value=DEFAULT_FLOOR_PLAN,
	label="Floor Plan",
	interactive=True,
	)
	generate_button = gr.Button("Generate New Image based on Floor plan", variant="primary")
	with gr.Row():
	topview_before = gr.Image(label="Top View — Original", interactive=False)
	topview_after = gr.Image(label="Top View — New Layout (Step A)", interactive=False)
	image_output = gr.Image(label="Final Photorealistic Output (Step B)", interactive=False)

	# Wire it: input -> [floor_plan, initial_layout_state, topview_state, topview_before]
	image_input.change(
	process_room_image,
	inputs=image_input,
	outputs=[floor_plan, initial_layout_state, topview_state, topview_before]
	)

	# Phase 2: User clicks button -> generates new top-view then final photorealistic image
	generate_button.click(
	generate_final_image,
	inputs=[image_input, floor_plan, initial_layout_state, topview_state],
	outputs=[topview_after, image_output]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)