Ovis-Image / app.py
tchung1970's picture
Consolidate to single app.py entry point
d41a998
import os
import torch
import gradio as gr
import spaces
import random
import numpy as np
from diffusers.utils import logging
from PIL import Image
from diffusers import OvisImagePipeline
logging.set_verbosity_error()
MAX_SEED = np.iinfo(np.int32).max
device = "cuda"
_dtype = torch.bfloat16
hf_token = os.getenv("HF_TOKEN")
pipe = OvisImagePipeline.from_pretrained(
"AIDC-AI/Ovis-Image-7B",
token=hf_token,
torch_dtype=torch.bfloat16
)
pipe.to("cuda")
examples = [
"Five shimmering goldfish weave through crevices between stones; four are red-and-white, while one is silver-white. By the pond's edge, a golden shaded British Shorthair cat watches them intently, counting on blind luck. Watercolor style.",
"Solar punk vehicle in a bustling city",
"An anthropomorphic cat riding a Harley Davidson in Arizona with sunglasses and a leather jacket",
"An elderly woman poses for a high fashion photoshoot in colorful, patterned clothes with a cyberpunk 2077 vibe",
]
def get_image_size(aspect_ratio):
"""Converts aspect ratio string to width, height tuple."""
if "(" in aspect_ratio and "x" in aspect_ratio:
try:
res_part = aspect_ratio.split("(")[1].split(")")[0]
width, height = res_part.split("x")
return int(width), int(height)
except:
pass
return 1024, 1024
apple_css = """
/* Global Styles */
.gradio-container {
max-width: 85vw !important;
margin: 0 auto !important;
padding: 48px 20px !important;
font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', 'Roboto', sans-serif !important;
}
/* Disable all transitions globally to prevent layout shifts */
* {
transition: none !important;
animation: none !important;
}
/* Header */
.header-container {
text-align: left;
margin-bottom: 24px;
}
.main-title {
font-size: 32px !important;
font-weight: 600 !important;
letter-spacing: -0.02em !important;
line-height: 1.07 !important;
color: #1d1d1f !important;
margin: 0 0 16px 0 !important;
}
.subtitle {
font-size: 21px !important;
font-weight: 400 !important;
line-height: 1.38 !important;
color: #6e6e73 !important;
margin: 0 0 24px 0 !important;
}
.attribution-link {
display: inline-block;
font-size: 14px !important;
color: #0071e3 !important;
text-decoration: none !important;
font-weight: 400 !important;
transition: color 0.2s ease !important;
}
.attribution-link:hover {
color: #0077ed !important;
text-decoration: underline !important;
}
/* Input Section */
.input-section {
background: #ffffff;
border-radius: 18px;
padding: 32px;
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.08);
}
/* Textbox */
textarea {
font-size: 17px !important;
line-height: 1.47 !important;
border-radius: 12px !important;
border: 1px solid #d2d2d7 !important;
padding: 12px 16px !important;
background: #ffffff !important;
font-family: -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
min-height: 200px !important;
max-height: 400px !important;
height: 200px !important;
resize: vertical !important;
overflow-y: auto !important;
margin-bottom: 16px !important;
}
textarea:focus {
border-color: #0071e3 !important;
box-shadow: 0 0 0 4px rgba(0, 113, 227, 0.15) !important;
outline: none !important;
}
textarea::placeholder {
color: #86868b !important;
}
/* Button */
button.primary {
font-size: 17px !important;
font-weight: 400 !important;
padding: 12px 32px !important;
border-radius: 980px !important;
background: #0071e3 !important;
border: none !important;
color: #ffffff !important;
min-height: 44px !important;
letter-spacing: -0.01em !important;
cursor: pointer !important;
}
button.primary:hover {
background: #0077ed !important;
}
button.primary:active {
opacity: 0.9 !important;
}
/* Output Section */
div.output-section {
background: #ffffff;
border-radius: 18px;
padding: 32px;
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.08);
overflow: hidden;
display: flex;
align-items: center;
justify-content: center;
min-height: 80vh;
max-height: 90vh;
will-change: auto;
position: relative;
}
.output-section * {
transform: none !important;
transition: none !important;
animation: none !important;
}
.output-section img {
border-radius: 12px !important;
max-width: 100% !important;
max-height: 85vh !important;
width: auto !important;
height: auto !important;
object-fit: contain !important;
transform: none !important;
transition: none !important;
animation: none !important;
backface-visibility: hidden;
-webkit-backface-visibility: hidden;
}
/* Make progress/generation area fill more space */
.output-section > div {
width: 100% !important;
min-height: 75vh !important;
max-height: 85vh !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
}
.output-section > div > div {
min-height: 75vh !important;
max-height: 85vh !important;
width: 100% !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
}
.output-section * {
max-width: 100% !important;
}
/* Footer */
.footer-text {
text-align: center;
margin-top: 48px;
font-size: 14px !important;
color: #86868b !important;
line-height: 1.43 !important;
}
/* Progress */
.progress-bar {
background: #0071e3 !important;
border-radius: 4px !important;
}
/* Dark Mode */
.dark .main-title {
color: #ffffff !important;
}
.dark .subtitle {
color: #a1a1a6 !important;
}
.input-section .main-title {
color: #ffffff !important;
}
.dark .input-section .main-title {
color: #f5f5f7 !important;
}
.dark .input-section,
.dark .output-section {
background: #1d1d1f;
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.4);
}
.dark textarea {
background: #1d1d1f !important;
border-color: #424245 !important;
color: #f5f5f7 !important;
}
.dark textarea::placeholder {
color: #86868b !important;
}
/* Inline labels */
label.inline-label {
display: flex !important;
align-items: center !important;
min-width: 120px !important;
margin: 0 !important;
padding: 0 12px 0 0 !important;
font-weight: 400 !important;
font-size: 14px !important;
color: #1d1d1f !important;
}
/* Fix column width to prevent shrinking - target Gradio's generated structure */
.input-section {
min-width: 550px !important;
max-width: 550px !important;
width: 550px !important;
flex-shrink: 0 !important;
flex-grow: 0 !important;
}
/* Lock the output section to fill remaining space */
.output-section {
flex-grow: 1 !important;
flex-shrink: 0 !important;
flex-basis: auto !important;
}
/* Prevent Gradio columns from flexing */
.gradio-column {
flex-shrink: 0 !important;
}
/* Stabilize row layout - force horizontal layout with maximum specificity */
.gradio-row,
div.gradio-row,
.gradio-container .gradio-row,
.gradio-container > .gradio-row,
.gradio-container div.gradio-row {
align-items: flex-start !important;
flex-direction: row !important;
display: flex !important;
flex-wrap: nowrap !important;
width: 100% !important;
}
/* Force columns to stay inline */
.gradio-row > .gradio-column,
.gradio-row > div {
display: inline-flex !important;
vertical-align: top !important;
}
/* First column - input section */
.gradio-row > .gradio-column:first-child,
.gradio-row > div:first-child {
width: 550px !important;
min-width: 550px !important;
max-width: 550px !important;
flex: 0 0 550px !important;
}
/* Second column - output section */
.gradio-row > .gradio-column:last-child,
.gradio-row > div:last-child {
flex: 1 1 auto !important;
min-width: 0 !important;
}
/* Lock textbox container size */
.input-section .gr-textbox,
.input-section label[for] {
width: 100% !important;
}
/* Prevent form from expanding */
.input-section form {
width: 100% !important;
max-width: 100% !important;
}
/* Ensure seed input always visible */
.input-section input[type="number"] {
display: block !important;
visibility: visible !important;
}
/* Hide progress indicator in input section - target specific progress elements */
.input-section .progress-container,
.input-section [class*="progress-bar"],
.input-section [class*="progress-text"],
.input-section [class*="progress-level"],
.input-section .progress,
.input-section .eta-bar {
display: none !important;
visibility: hidden !important;
height: 0 !important;
overflow: hidden !important;
}
/* Override ALL responsive behavior - force horizontal layout at ALL viewport sizes */
@media (max-width: 2000px) {
.gradio-row,
div.gradio-row,
.gradio-container .gradio-row,
.gradio-container > .gradio-row {
flex-direction: row !important;
flex-wrap: nowrap !important;
display: flex !important;
}
.gradio-row > .gradio-column,
.gradio-row > div {
display: inline-flex !important;
}
.gradio-row > .gradio-column:first-child,
.gradio-row > div:first-child {
width: 550px !important;
min-width: 550px !important;
max-width: 550px !important;
flex: 0 0 550px !important;
}
.gradio-row > .gradio-column:last-child,
.gradio-row > div:last-child {
flex: 1 1 auto !important;
min-width: 0 !important;
}
}
/* Responsive text sizing only */
@media (max-width: 734px) {
.main-title {
font-size: 40px !important;
}
.subtitle {
font-size: 19px !important;
}
.gradio-container {
padding: 32px 16px !important;
}
.input-section,
.output-section {
padding: 24px !important;
}
/* FORCE horizontal layout even on mobile */
.gradio-row,
div.gradio-row {
flex-direction: row !important;
flex-wrap: nowrap !important;
}
}
/* Remove default Gradio styling */
.contain {
padding: 0 !important;
}
/* Hide Gradio footer */
footer {
display: none !important;
}
.footer {
display: none !important;
}
/* Target main app container */
#root, #app {
width: 100% !important;
max-width: none !important;
}
"""
# JavaScript to force horizontal layout
js_code = """
function() {
function forceHorizontalLayout() {
// Set container width
const container = document.querySelector('.gradio-container');
if (container) {
container.style.maxWidth = '85vw';
container.style.width = '85vw';
}
// Target the main row specifically
const mainRow = document.getElementById('main-row');
if (mainRow) {
mainRow.style.flexDirection = 'row';
mainRow.style.flexWrap = 'nowrap';
mainRow.style.display = 'flex';
mainRow.style.width = '100%';
}
// Force ALL rows to stay horizontal
const rows = document.querySelectorAll('.gradio-row');
rows.forEach(row => {
row.style.flexDirection = 'row';
row.style.flexWrap = 'nowrap';
row.style.display = 'flex';
});
// Target specific columns
const inputCol = document.getElementById('input-column');
if (inputCol) {
inputCol.style.width = '550px';
inputCol.style.minWidth = '550px';
inputCol.style.maxWidth = '550px';
inputCol.style.flex = '0 0 550px';
inputCol.style.display = 'inline-flex';
inputCol.style.flexDirection = 'column';
}
const outputCol = document.getElementById('output-column');
if (outputCol) {
outputCol.style.flex = '1 1 auto';
outputCol.style.minWidth = '0';
outputCol.style.display = 'inline-flex';
outputCol.style.flexDirection = 'column';
}
// Fallback: force all column children of rows
const columns = document.querySelectorAll('.gradio-row > .gradio-column, .gradio-row > div');
columns.forEach((col, index) => {
if (index === 0) {
col.style.width = '550px';
col.style.minWidth = '550px';
col.style.maxWidth = '550px';
col.style.flex = '0 0 550px';
} else if (index === 1) {
col.style.flex = '1 1 auto';
col.style.minWidth = '0';
}
col.style.display = 'inline-flex';
});
}
// Run immediately
forceHorizontalLayout();
// Run again after delays to override Gradio's dynamic changes
setTimeout(forceHorizontalLayout, 100);
setTimeout(forceHorizontalLayout, 500);
setTimeout(forceHorizontalLayout, 1000);
setTimeout(forceHorizontalLayout, 2000);
// Set up mutation observer to reapply on DOM changes
const observer = new MutationObserver(forceHorizontalLayout);
observer.observe(document.body, { childList: true, subtree: true, attributes: true, attributeFilter: ['style', 'class'] });
}
"""
@spaces.GPU(duration=75)
def infer(
prompt,
seed=42,
randomize_seed=False,
aspect_ratio="1:1 (1024x1024)",
guidance_scale=5.0,
num_inference_steps=50,
progress=gr.Progress(track_tqdm=True),
):
"""Generates an image using the Ovis-Image pipeline."""
if randomize_seed:
seed = random.randint(0, MAX_SEED)
width, height = get_image_size(aspect_ratio)
print(f'inference with prompt: {prompt}, size: {height}x{width}, seed: {seed}, steps: {num_inference_steps}, cfg: {guidance_scale}')
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt,
negative_prompt="",
height=height,
width=width,
num_inference_steps=num_inference_steps,
true_cfg_scale=guidance_scale,
generator=generator,
).images[0]
return image, seed
custom_theme = gr.themes.Soft(
primary_hue=gr.themes.colors.blue,
secondary_hue=gr.themes.colors.slate,
neutral_hue=gr.themes.colors.gray,
spacing_size=gr.themes.sizes.spacing_lg,
radius_size=gr.themes.sizes.radius_lg,
text_size=gr.themes.sizes.text_md,
font=[gr.themes.GoogleFont("Inter"), "SF Pro Display", "-apple-system", "BlinkMacSystemFont", "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "SF Mono", "ui-monospace", "monospace"],
).set(
body_background_fill='#f5f5f7',
body_background_fill_dark='#000000',
button_primary_background_fill='#0071e3',
button_primary_background_fill_hover='#0077ed',
button_primary_text_color='#ffffff',
block_background_fill='#ffffff',
block_background_fill_dark='#1d1d1f',
block_border_width='0px',
block_shadow='0 2px 12px rgba(0, 0, 0, 0.08)',
block_shadow_dark='0 2px 12px rgba(0, 0, 0, 0.4)',
input_background_fill='#ffffff',
input_background_fill_dark='#1d1d1f',
input_border_width='1px',
input_border_color='#d2d2d7',
input_border_color_dark='#424245',
input_shadow='none',
input_shadow_focus='0 0 0 4px rgba(0, 113, 227, 0.15)',
)
with gr.Blocks(
title="Ovis-Image",
fill_height=False,
) as demo:
# Two-column layout - variant='panel' prevents responsive stacking
with gr.Row(equal_height=False, variant="panel", elem_id="main-row"):
# Left column - Input controls (fixed width)
with gr.Column(scale=0, min_width=550, elem_classes="input-section", elem_id="input-column"):
# Title above prompt box
gr.HTML("""
<div class="header-container">
<h1 class="main-title">Ovis-Image</h1>
</div>
""")
prompt = gr.Textbox(
placeholder="Describe the image you want to create...",
value=examples[0],
lines=7,
max_lines=7,
label="Prompt",
show_label=True,
container=True,
autoscroll=False,
)
aspect_ratio = gr.Dropdown(
choices=[
"1:1 (1024x1024)",
"4:3 (1024x768)",
"3:4 (768x1024)",
"16:9 (1024x576)",
"9:16 (576x1024)",
],
value="1:1 (1024x1024)",
label="Aspect Ratio",
show_label=True,
container=True,
)
run_button = gr.Button(
"Generate",
variant="primary",
size="lg",
elem_classes="primary"
)
# Hidden advanced settings (still functional but not visible)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
visible=False
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True, visible=False)
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=14.0,
step=0.1,
value=5.0,
visible=False
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=100,
step=1,
value=50,
visible=False
)
# Right column - Image output
with gr.Column(scale=2, elem_classes="output-section", elem_id="output-column"):
result = gr.Image(
label="Result",
show_label=False,
type="pil",
format="png",
)
# Event handlers - using gr.on() like original Qwen-Image
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[
prompt,
seed,
randomize_seed,
aspect_ratio,
guidance_scale,
num_inference_steps,
],
outputs=[result, seed],
)
# Load JS after DOM is ready (like Qwen-Image)
demo.load(None, None, None, js=js_code)
if __name__ == '__main__':
demo.launch(
theme=custom_theme,
css=apple_css,
)