Spaces:
Running on Zero
Running on Zero
| import random | |
| from pathlib import Path | |
| import spaces | |
| import torch | |
| import gradio as gr | |
| from huggingface_hub import snapshot_download | |
| from transformers import AutoTokenizer, T5EncoderModel | |
| from minit2i_pipeline import ( | |
| MiniT2IMMJiTModel, | |
| MiniT2IFlowMatchScheduler, | |
| MiniT2ITextToImagePipeline, | |
| ) | |
| REPO_ID = "MiniT2I/MiniT2I" | |
| TEXT_ENCODER = "google/flan-t5-large" | |
| DTYPE = torch.bfloat16 | |
| MAX_SEED = 2**31 - 1 | |
| MODELS = { | |
| "MiniT2I-B/16 (base)": "minit2i-b-16", | |
| "MiniT2I-L/16 (large)": "minit2i-l-16", | |
| } | |
| root = Path(snapshot_download(REPO_ID)) | |
| # Shared T5 text encoder — both variants use google/flan-t5-large | |
| tokenizer = AutoTokenizer.from_pretrained(TEXT_ENCODER) | |
| text_encoder = T5EncoderModel.from_pretrained( | |
| TEXT_ENCODER, torch_dtype=torch.float32 | |
| ).to("cuda") | |
| def _load(model_dir): | |
| transformer = MiniT2IMMJiTModel.from_pretrained( | |
| root / model_dir / "transformer", torch_dtype=DTYPE | |
| ) | |
| scheduler = MiniT2IFlowMatchScheduler.from_pretrained(root / model_dir / "scheduler") | |
| pipe = MiniT2ITextToImagePipeline( | |
| transformer=transformer, | |
| scheduler=scheduler, | |
| tokenizer=tokenizer, | |
| text_encoder=text_encoder, | |
| text_encoder_name=TEXT_ENCODER, | |
| ) | |
| pipe.to("cuda") | |
| return pipe | |
| # Preload BOTH models in the global context | |
| PIPES = {label: _load(model_dir) for label, model_dir in MODELS.items()} | |
| def generate(prompt, model_label, guidance_scale, num_inference_steps, seed, randomize_seed, | |
| progress=gr.Progress(track_tqdm=True)): | |
| if not prompt or not prompt.strip(): | |
| raise gr.Error("Please enter a prompt.") | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| generator = torch.Generator(device="cuda").manual_seed(int(seed)) | |
| pipe = PIPES[model_label] | |
| image = pipe( | |
| prompt, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=int(num_inference_steps), | |
| generator=generator, | |
| progress=True, | |
| ).images[0] | |
| return image, seed | |
| EXAMPLES = [ | |
| "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", | |
| "A lonely astronaut standing on a quiet beach under two moons.", | |
| "A cozy cabin in a snowy forest at dusk, warm light in the windows.", | |
| "A bowl of ramen with steam rising, photorealistic, top-down view.", | |
| ] | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # 🍊 MiniT2I | |
| A minimalist text-to-image model — pick the **B/16** or **L/16** variant below. | |
| Both are preloaded. Images are generated at 512×512. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Describe the image you want to generate…", | |
| lines=3, | |
| ) | |
| model_label = gr.Radio( | |
| choices=list(MODELS.keys()), | |
| value=list(MODELS.keys())[0], | |
| label="Model", | |
| ) | |
| run_btn = gr.Button("Generate", variant="primary") | |
| with gr.Accordion("Advanced settings", open=False): | |
| guidance_scale = gr.Slider( | |
| label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=2.5 | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="Inference steps", minimum=10, maximum=150, step=1, value=100 | |
| ) | |
| with gr.Row(): | |
| seed = gr.Slider( | |
| label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0 | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| with gr.Column(scale=1): | |
| output = gr.Image(label="Result", height=512) | |
| gr.Examples(examples=EXAMPLES, inputs=prompt) | |
| inputs = [prompt, model_label, guidance_scale, num_inference_steps, seed, randomize_seed] | |
| run_btn.click(generate, inputs=inputs, outputs=[output, seed]) | |
| prompt.submit(generate, inputs=inputs, outputs=[output, seed]) | |
| demo.launch(theme=gr.themes.Citrus()) | |