|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
from diffusers import StableDiffusionPipeline |
|
|
import torch |
|
|
import wget |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
|
|
|
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device) |
|
|
|
|
|
sd_pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device) |
|
|
|
|
|
|
|
|
translator = pipeline( |
|
|
task="translation", |
|
|
model="facebook/nllb-200-distilled-600M", |
|
|
torch_dtype=torch.bfloat16, |
|
|
device=device |
|
|
) |
|
|
|
|
|
|
|
|
def generate_image_and_translate(image, num_images=1): |
|
|
|
|
|
caption_en = caption_image(image)[0]['generated_text'] |
|
|
|
|
|
|
|
|
caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text'] |
|
|
|
|
|
generated_images = [] |
|
|
|
|
|
|
|
|
for _ in range(num_images): |
|
|
generated_image = sd_pipeline(prompt=caption_en).images[0] |
|
|
generated_images.append(generated_image) |
|
|
|
|
|
|
|
|
return generated_images, caption_en, caption_ar |
|
|
|
|
|
|
|
|
def generate_image_and_translate(image, num_images=1): |
|
|
|
|
|
caption_en = caption_image(image)[0]['generated_text'] |
|
|
|
|
|
|
|
|
caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text'] |
|
|
|
|
|
generated_images = [] |
|
|
|
|
|
|
|
|
for _ in range(num_images): |
|
|
generated_image = sd_pipeline(prompt=caption_en).images[0] |
|
|
generated_images.append(generated_image) |
|
|
|
|
|
|
|
|
return generated_images, caption_en, caption_ar |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=generate_image_and_translate, |
|
|
inputs=[ |
|
|
gr.Image(type="pil", label="π€ Upload Image"), |
|
|
gr.Slider(minimum=1, maximum=10, label="π’ Number of Images", value=1, step=1) |
|
|
], |
|
|
outputs=[ |
|
|
gr.Gallery(label="πΌοΈ Generated Images"), |
|
|
gr.Textbox(label="π Generated Caption (English)", interactive=False), |
|
|
gr.Textbox(label="π Translated Caption (Arabic)", interactive=False) |
|
|
], |
|
|
title="Image Generation and Captioning", |
|
|
description="Upload an image to extract a caption and display it in both Arabic and English. Then, a new image will be generated based on that caption.", |
|
|
theme='freddyaboulton/dracula_revamped' |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch() |