File size: 6,578 Bytes
c8d72c8
 
 
 
f6475e3
cae10fc
d603030
f9264c3
345e4db
 
c8d72c8
 
 
345e4db
d603030
f6475e3
c8d72c8
345e4db
c8d72c8
 
 
359875a
 
 
d603030
359875a
 
f9264c3
345e4db
359875a
345e4db
7f231c2
345e4db
 
f9264c3
 
 
359875a
f9264c3
cae10fc
345e4db
f9264c3
 
345e4db
 
 
 
f6475e3
f9264c3
f6475e3
7f231c2
d4df241
cae10fc
345e4db
7f231c2
359875a
c8d72c8
f9264c3
c8d72c8
03e1c51
c8d72c8
359875a
 
c8d72c8
359875a
345e4db
f9264c3
345e4db
f2cdd8b
 
359875a
345e4db
 
359875a
 
c8d72c8
f9264c3
345e4db
 
d603030
7f231c2
 
f2cdd8b
359875a
d603030
f2cdd8b
d603030
 
 
 
 
 
 
 
 
 
cae10fc
79f8f06
359875a
c8d72c8
66a6b28
fd7e20d
359875a
 
 
 
 
 
 
 
 
 
 
fd7e20d
 
c8d72c8
359875a
d603030
359875a
66a6b28
f2cdd8b
8987096
d603030
359875a
66a6b28
c8d72c8
fd7e20d
d603030
359875a
c8d72c8
d603030
 
 
 
 
c8d72c8
f2cdd8b
79f8f06
359875a
d603030
 
 
 
 
79f8f06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
import requests
import fal_client
import os
import base64
import io
import time
from PIL import Image
from google import genai
from google.genai.types import GenerateContentConfig, ImageConfig, Part

RUNPOD_API_KEY = os.getenv("RUNPOD_API_KEY")
FAL_KEY = os.getenv("FAL_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_VLM_KEY")
QWEN_ENDPOINT_ID = "jzpm1xin5cprff"

os.environ["FAL_KEY"] = FAL_KEY if FAL_KEY else ""
gemini_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None

DEFAULT_PROMPT = "Add furnishings and accessories to this room as an interior designer would do for a real estate staging. The generated image shall have the exact same dimensions as the original image and architectural details. Respect doorways and windows and make sure they are consistent with the source image and not blocked by furniture. Use cute accessories and with appropriate wall space, add smart simple graphic paintings. Use neutral colors with light colored accents to match the colors of the room. Give the area an attractive glow."

def get_closest_ratio(pil_img):
    w, h = pil_img.size
    ratio = w / h
    ratios = {"9:16": 0.56, "2:3": 0.66, "3:4": 0.75, "4:5": 0.8, "1:1": 1.0, "5:4": 1.25, "4:3": 1.33, "3:2": 1.5, "16:9": 1.77, "21:9": 2.33}
    return min(ratios, key=lambda x: abs(ratios[x] - ratio))

def b64_to_pil(b64_str):
    if not b64_str: return None
    if "base64," in b64_str: b64_str = b64_str.split("base64,")[1]
    return Image.open(io.BytesIO(base64.b64decode(b64_str)))

def bytes_to_pil(img_bytes):
    return Image.open(io.BytesIO(img_bytes))

def get_image_inputs(image_file, image_url):
    if image_file:
        with open(image_file, "rb") as f: raw_bytes = f.read()
        raw_b64 = base64.b64encode(raw_bytes).decode('utf-8')
        fal_url = fal_client.upload_file(image_file)
        return raw_bytes, raw_b64, fal_url
    elif image_url:
        resp = requests.get(image_url)
        raw_bytes = resp.content
        raw_b64 = base64.b64encode(raw_bytes).decode('utf-8')
        return raw_bytes, raw_b64, image_url
    return None, None, None

def run_qwen(raw_b64, prompt):
    url = f"https://api.runpod.ai/v2/{QWEN_ENDPOINT_ID}/runsync"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {RUNPOD_API_KEY}"}
    payload = {"input": {"image": raw_b64, "prompt": prompt, "seed": 42, "use_lightning": True, "true_guidance_scale": 1.0, "num_inference_steps": 4}}
    try:
        response = requests.post(url, headers=headers, json=payload, timeout=60)
        return b64_to_pil(response.json()["output"]["images"][0])
    except: return None

def run_fal_flux(image_url, prompt):
    try:
        handler = fal_client.submit("fal-ai/flux-2/edit", arguments={"prompt": prompt, "image_urls": [image_url]})
        result = handler.get()
        return bytes_to_pil(requests.get(result['images'][0]['url']).content)
    except: return None

def run_gemini(image_bytes, prompt, ratio_str):
    if not gemini_client: return None
    try:
        response = gemini_client.models.generate_content(
            model="gemini-2.5-flash-image",
            contents=[Part.from_bytes(data=image_bytes, mime_type="image/jpeg"), prompt],
            config=GenerateContentConfig(response_modalities=["IMAGE"], image_config=ImageConfig(aspect_ratio=ratio_str), candidate_count=1)
        )
        for part in response.candidates[0].content.parts:
            if part.inline_data: return bytes_to_pil(part.inline_data.data)
    except: return None

def compare_all(image_file, image_url, prompt):
    raw_bytes, raw_b64, web_url = get_image_inputs(image_file, image_url)
    if not raw_bytes:
        yield None, None, None, None, "", "", ""
        return

    og_pil = bytes_to_pil(raw_bytes)
    ratio_str = get_closest_ratio(og_pil)
    q_img, f_img, g_img = None, None, None
    
    yield og_pil, q_img, f_img, g_img, "⏳ Processing (~5s)...", "πŸ•’ Pending...", "πŸ•’ Pending..."
    q_img = run_qwen(raw_b64, prompt)
    
    yield og_pil, q_img, f_img, g_img, "βœ… Complete", "⏳ Processing (~12s)...", "πŸ•’ Pending..."
    f_img = run_fal_flux(web_url, prompt)
    
    yield og_pil, q_img, f_img, g_img, "βœ… Complete", "βœ… Complete", "⏳ Processing (~15s)..."
    g_img = run_gemini(raw_bytes, prompt, ratio_str)
    
    yield og_pil, q_img, f_img, g_img, "βœ… Complete", "βœ… Complete", "βœ… Complete"

with gr.Blocks() as demo:
    gr.HTML("<h2 style='text-align: center; margin: 10px 0;'>πŸ›‹οΈ Interior Design Model Arena</h2>")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML("<center><b>ORIGINAL REFERENCE</b></center>")
            out_og = gr.Image(show_label=False, type="pil", height=320)
            
        with gr.Column(scale=1):
            input_prompt = gr.Textbox(label="Edit Prompt", value=DEFAULT_PROMPT, lines=4)
            with gr.Row():
                input_file = gr.Image(label="Upload", type="filepath", height=100)
                input_url = gr.Textbox(label="OR: Image URL", placeholder="Paste URL...")
            run_btn = gr.Button("πŸš€ Generate Comparison", variant="primary")
            
    gr.HTML("<hr style='margin: 15px 0;'>")

    with gr.Row():
        with gr.Column():
            gr.HTML("<center><b>QWEN-EDIT</b><br><small>$0.004 - $0.008</small></center>")
            stat_q = gr.Markdown("πŸ•’ Waiting...", elem_classes="status-msg")
            out_qwen = gr.Image(show_label=False, type="pil", height=350)
            
        with gr.Column():
            gr.HTML("<center><b>FLUX-2 EDIT</b><br><small>$0.03</small></center>")
            stat_f = gr.Markdown("πŸ•’ Waiting...", elem_classes="status-msg")
            out_fal = gr.Image(show_label=False, type="pil", height=350)
            
        with gr.Column():
            gr.HTML("<center><b>GEMINI 2.5 FLASH</b><br><small>$0.039</small></center>")
            stat_g = gr.Markdown("πŸ•’ Waiting...", elem_classes="status-msg")
            out_gemini = gr.Image(show_label=False, type="pil", height=350)

    run_btn.click(
        fn=compare_all, 
        inputs=[input_file, input_url, input_prompt], 
        outputs=[out_og, out_qwen, out_fal, out_gemini, stat_q, stat_f, stat_g]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft(),
        css="""
        .gradio-container {max-width: 98% !important} 
        img {object-fit: contain !important;}
        .status-msg {text-align: center; margin-top: -10px; font-size: 0.85em; color: #666;}
        """
    )