File size: 6,937 Bytes
1a680b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import random
import time

# Mock function to simulate Google DeepMind Gemini 2.0 Flash
# In a production environment, you would replace this logic with the actual Google Generative AI SDK
def mock_gemini_generation(prompt, img1, img2, img3, history):
    """
    Simulates the image generation/editing process.
    Since this is a 'free demo', we generate a placeholder image based on the prompt.
    """
    
    # 1. Construct the User Message for the Chat History
    user_content = []
    
    # Add text prompt if present
    if prompt and prompt.strip():
        user_content.append({"type": "text", "text": prompt})
    
    # Add images if present (up to 3 spots)
    images = [img1, img2, img3]
    for i, img in enumerate(images):
        if img is not None:
            user_content.append({"type": "image", "image": img})
            
    if not user_content:
        # If empty, just add a placeholder text
        user_content.append({"type": "text", "text": "Generate something..."})

    user_message = {"role": "user", "content": user_content}
    history.append(user_message)

    # 2. Simulate "Thinking" / Processing time
    time.sleep(1.5)

    # 3. Generate a Result (Mock)
    # Create a dynamic image based on the prompt length/content to simulate generation
    width, height = 512, 512
    
    # Create a random background color
    bg_color = (random.randint(50, 255), random.randint(50, 255), random.randint(50, 255))
    result_image = Image.new('RGB', (width, height), color=bg_color)
    draw = ImageDraw.Draw(result_image)

    # Try to use a default font, otherwise load a simple one
    try:
        # Attempting to load a font, fallback to default if not found
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 40)
    except:
        font = ImageFont.load_default()

    # Draw some "AI Art" shapes
    for _ in range(10):
        x1 = random.randint(0, width)
        y1 = random.randint(0, height)
        x2 = x1 + random.randint(-100, 100)
        y2 = y1 + random.randint(-100, 100)
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        draw.ellipse([x1, y1, x2, y2], fill=color, outline="white")

    # Add text overlay based on input
    text_overlay = f"Gemini 2.0 Flash\nSimulated Output\nPrompt: {prompt[:20]}..."
    
    # Calculate text position (centered)
    # Using textbbox for better centering calculation in newer Pillow versions
    try:
        bbox = draw.textbbox((0, 0), text_overlay, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
    except AttributeError:
        # Fallback for older Pillow versions
        text_width, text_height = draw.textsize(text_overlay, font=font)

    position = ((width - text_width) / 2, (height - text_height) / 2)
    
    # Add a shadow for readability
    draw.text((position[0]+2, position[1]+2), text_overlay, fill="black", font=font)
    draw.text(position, text_overlay, fill="white", font=font)

    # 4. Construct the Assistant Message
    bot_message = {
        "role": "assistant", 
        "content": [
            {"type": "text", "text": "Here is the generated image based on your inputs:"},
            {"type": "image", "image": result_image}
        ]
    }
    history.append(bot_message)

    # 5. Return updated history and clear inputs
    return history, gr.Textbox(value=""), None, None, None

def clear_chat():
    return [], None, None, None, ""

# Gradio 6 Application Structure
# Note: In Gradio 6, theme, css, js go in demo.launch(), NOT in gr.Blocks()

with gr.Blocks() as demo:
    
    # Header Section
    gr.HTML("""
        <div style="text-align: center; margin-bottom: 20px;">
            <h1>🎨 Image Generation & Editing</h1>
            <h3>Powered by Google DeepMind Gemini 2.0 Flash (Demo)</h3>
            <p>
                <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="text-decoration: none; color: #007bff; font-weight: bold;">
                    Built with anycoder
                </a>
            </p>
        </div>
    """)

    # Chat Interface Area
    chatbot = gr.Chatbot(
        label="Chat History",
        height=500,
        show_copy_button=True,
        avatar_images=(None, "https://www.google.com/favicon.ico") # Optional avatars
    )

    # Input Section
    with gr.Row():
        # The "Three Blank Spots" for dragging images
        with gr.Column(scale=1):
            img_spot_1 = gr.Image(
                label="Image Input 1", 
                sources=["upload", "clipboard", "webcam"],
                type="pil",
                height=200
            )
        with gr.Column(scale=1):
            img_spot_2 = gr.Image(
                label="Image Input 2", 
                sources=["upload", "clipboard", "webcam"],
                type="pil",
                height=200
            )
        with gr.Column(scale=1):
            img_spot_3 = gr.Image(
                label="Image Input 3", 
                sources=["upload", "clipboard", "webcam"],
                type="pil",
                height=200
            )

    # Text Prompt Area
    with gr.Row():
        text_prompt = gr.Textbox(
            label="Text Prompt",
            placeholder="Describe the image you want to generate or how to edit the uploaded images...",
            lines=2,
            scale=4,
            container=False
        )
        submit_btn = gr.Button("Generate ✨", variant="primary", scale=1, size="lg")
        clear_btn = gr.Button("Clear 🗑️", variant="secondary", scale=1)

    # Event Listeners
    submit_btn.click(
        fn=mock_gemini_generation,
        inputs=[text_prompt, img_spot_1, img_spot_2, img_spot_3, chatbot],
        outputs=[chatbot, text_prompt, img_spot_1, img_spot_2, img_spot_3],
        api_visibility="public" # Gradio 6 syntax
    )

    clear_btn.click(
        fn=clear_chat,
        inputs=None,
        outputs=[chatbot, img_spot_1, img_spot_2, img_spot_3, text_prompt],
        api_visibility="public"
    )
    
    # Allow "Enter" key in textbox to trigger generation
    text_prompt.submit(
        fn=mock_gemini_generation,
        inputs=[text_prompt, img_spot_1, img_spot_2, img_spot_3, chatbot],
        outputs=[chatbot, text_prompt, img_spot_1, img_spot_2, img_spot_3],
        api_visibility="public"
    )

# Launch the App with Gradio 6 Syntax
# All app-level parameters (theme, css, footer_links) go here!
demo.launch(
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="cyan",
        neutral_hue="slate",
        font=gr.themes.GoogleFont("Inter")
    ),
    footer_links=[
        {"label": "Google DeepMind", "url": "https://deepmind.google/"},
        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
    ]
)