ProfRom commited on
Commit
7004d4e
·
verified ·
1 Parent(s): 20351ba

Bakare - Unit 8

Browse files
Files changed (2) hide show
  1. app.py +75 -26
  2. requirements.txt +5 -3
app.py CHANGED
@@ -1,39 +1,88 @@
1
- import gradio as gr
2
- from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
3
- from PIL import Image
4
  import torch
 
 
 
 
 
 
5
 
6
- # Load model
7
- model_name = "nlpconnect/vit-gpt2-image-captioning"
8
- model = VisionEncoderDecoderModel.from_pretrained(model_name)
9
- processor = ViTImageProcessor.from_pretrained(model_name)
10
- tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
- model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Caption function
16
- def predict_caption(image):
17
- if image is None:
18
- return "Upload an image."
19
- if image.mode != "RGB":
20
- image = image.convert("RGB")
21
 
22
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- with torch.no_grad():
25
- output_ids = model.generate(pixel_values, max_length=32, num_beams=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
28
- return caption.strip()
29
 
30
- # UI
31
  demo = gr.Interface(
32
- fn=predict_caption,
33
- inputs=gr.Image(type="pil", label="Upload Image"),
34
- outputs=gr.Textbox(label="Caption"),
35
- title="AI Image Captioning",
36
- description="Upload an image to get an AI-generated caption."
37
  )
38
 
39
  if __name__ == "__main__":
 
 
 
 
1
  import torch
2
+ from diffusers import StableDiffusionPipeline
3
+ import gradio as gr
4
+
5
+ # -------------------------------------------------------
6
+ # 1. LOAD PRETRAINED TEXT-TO-IMAGE MODEL
7
+ # -------------------------------------------------------
8
 
9
+ model_id = "runwayml/stable-diffusion-v1-5"
 
 
 
 
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ dtype = torch.float16 if device == "cuda" else torch.float32
13
+
14
+ pipe = StableDiffusionPipeline.from_pretrained(
15
+ model_id,
16
+ torch_dtype=dtype,
17
+ safety_checker=None,
18
+ use_safetensors=True
19
+ )
20
+
21
+ pipe = pipe.to(device)
22
+
23
+
24
+ # -------------------------------------------------------
25
+ # 2. CORE PREDICTION FUNCTION
26
+ # -------------------------------------------------------
27
+
28
+ def generate_image(prompt: str,
29
+ num_inference_steps: int = 25,
30
+ guidance_scale: float = 7.5):
31
 
32
+ if not prompt or prompt.strip() == "":
33
+ prompt = "A friendly robot reading a book in a cozy library, digital art"
 
 
 
 
34
 
35
+ if device == "cuda":
36
+ with torch.autocast(device_type="cuda"):
37
+ result = pipe(
38
+ prompt,
39
+ num_inference_steps=num_inference_steps,
40
+ guidance_scale=guidance_scale
41
+ )
42
+ else:
43
+ result = pipe(
44
+ prompt,
45
+ num_inference_steps=num_inference_steps,
46
+ guidance_scale=guidance_scale
47
+ )
48
 
49
+ return result.images[0]
50
+
51
+
52
+ # -------------------------------------------------------
53
+ # 3. GRADIO UI
54
+ # -------------------------------------------------------
55
+
56
+ prompt_input = gr.Textbox(
57
+ label="Enter your image prompt",
58
+ lines=2,
59
+ placeholder="e.g., 'A watercolor painting of a sunrise over mountains'"
60
+ )
61
+
62
+ steps_slider = gr.Slider(
63
+ minimum=10,
64
+ maximum=40,
65
+ value=25,
66
+ step=1,
67
+ label="Number of inference steps"
68
+ )
69
+
70
+ guidance_slider = gr.Slider(
71
+ minimum=1.0,
72
+ maximum=15.0,
73
+ value=7.5,
74
+ step=0.5,
75
+ label="Guidance scale"
76
+ )
77
 
78
+ image_output = gr.Image(label="Generated image")
 
79
 
 
80
  demo = gr.Interface(
81
+ fn=generate_image,
82
+ inputs=[prompt_input, steps_slider, guidance_slider],
83
+ outputs=image_output,
84
+ title="Multimodal Text-to-Image Generator",
85
+ description="Enter a prompt to generate an image using a pretrained text-to-image model."
86
  )
87
 
88
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
- gradio
2
- transformers
 
 
3
  torch
4
- Pillow
 
1
+ gradio>=4.0.0
2
+ diffusers>=0.30.0
3
+ transformers>=4.40.0
4
+ accelerate>=0.30.0
5
  torch
6
+ safetensors