Spaces:

ProfRom
/

TestSpace3

Sleeping

App Files Files Community

ProfRom commited on 19 days ago

Commit

7004d4e

verified ·

1 Parent(s): 20351ba

Bakare - Unit 8

Browse files

Files changed (2) hide show

app.py +75 -26
requirements.txt +5 -3

app.py CHANGED Viewed

@@ -1,39 +1,88 @@
-import gradio as gr
-from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
-from PIL import Image
 import torch
-# Load model
-model_name = "nlpconnect/vit-gpt2-image-captioning"
-model = VisionEncoderDecoderModel.from_pretrained(model_name)
-processor = ViTImageProcessor.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
-# Caption function
-def predict_caption(image):
-    if image is None:
-        return "Upload an image."
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
-    with torch.no_grad():
-        output_ids = model.generate(pixel_values, max_length=32, num_beams=4)
-    caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    return caption.strip()
-# UI
 demo = gr.Interface(
-    fn=predict_caption,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Caption"),
-    title="AI Image Captioning",
-    description="Upload an image to get an AI-generated caption."
 )
 if __name__ == "__main__":

 import torch
+from diffusers import StableDiffusionPipeline
+import gradio as gr
+# -------------------------------------------------------
+# 1. LOAD PRETRAINED TEXT-TO-IMAGE MODEL
+# -------------------------------------------------------
+model_id = "runwayml/stable-diffusion-v1-5"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if device == "cuda" else torch.float32
+pipe = StableDiffusionPipeline.from_pretrained(
+    model_id,
+    torch_dtype=dtype,
+    safety_checker=None,
+    use_safetensors=True
+)
+pipe = pipe.to(device)
+# -------------------------------------------------------
+# 2. CORE PREDICTION FUNCTION
+# -------------------------------------------------------
+def generate_image(prompt: str,
+                   num_inference_steps: int = 25,
+                   guidance_scale: float = 7.5):
+    if not prompt or prompt.strip() == "":
+        prompt = "A friendly robot reading a book in a cozy library, digital art"
+    if device == "cuda":
+        with torch.autocast(device_type="cuda"):
+            result = pipe(
+                prompt,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale
+            )
+    else:
+        result = pipe(
+            prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale
+        )
+    return result.images[0]
+# -------------------------------------------------------
+# 3. GRADIO UI
+# -------------------------------------------------------
+prompt_input = gr.Textbox(
+    label="Enter your image prompt",
+    lines=2,
+    placeholder="e.g., 'A watercolor painting of a sunrise over mountains'"
+)
+steps_slider = gr.Slider(
+    minimum=10,
+    maximum=40,
+    value=25,
+    step=1,
+    label="Number of inference steps"
+)
+guidance_slider = gr.Slider(
+    minimum=1.0,
+    maximum=15.0,
+    value=7.5,
+    step=0.5,
+    label="Guidance scale"
+)
+image_output = gr.Image(label="Generated image")
 demo = gr.Interface(
+    fn=generate_image,
+    inputs=[prompt_input, steps_slider, guidance_slider],
+    outputs=image_output,
+    title="Multimodal Text-to-Image Generator",
+    description="Enter a prompt to generate an image using a pretrained text-to-image model."
 )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
-gradio
-transformers
 torch
-Pillow

+gradio>=4.0.0
+diffusers>=0.30.0
+transformers>=4.40.0
+accelerate>=0.30.0
 torch
+safetensors