Avmromanov commited on
Commit
8ec1acd
·
1 Parent(s): d14260e

img caption

Browse files
Files changed (1) hide show
  1. app.py +18 -131
app.py CHANGED
@@ -1,138 +1,25 @@
1
  import gradio as gr
2
- import torch
3
- from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
4
- from PIL import Image
5
- import io
6
- import base64
7
 
8
- # Check if GPU is available
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
- # Load text-to-image pipeline
12
- pipe_text_to_image = StableDiffusionPipeline.from_pretrained(
13
- "runwayml/stable-diffusion-v1-5",
14
- torch_dtype=torch.float16 if device == "cuda" else torch.float32
15
- ).to(device)
16
-
17
- # Load image-to-image pipeline
18
- pipe_image_to_image = StableDiffusionImg2ImgPipeline.from_pretrained(
19
- "runwayml/stable-diffusion-v1-5",
20
- torch_dtype=torch.float16 if device == "cuda" else torch.float32
21
- ).to(device)
22
-
23
- def generate_image(prompt, negative_prompt="", steps=20, guidance=7.5):
24
- """Generate image from text prompt"""
25
- if not prompt.strip():
26
- return None, "Please enter a prompt"
27
-
28
- try:
29
- with torch.autocast(device):
30
- image = pipe_text_to_image(
31
- prompt=prompt,
32
- negative_prompt=negative_prompt,
33
- num_inference_steps=steps,
34
- guidance_scale=guidance
35
- ).images[0]
36
- return image, "Image generated successfully!"
37
- except Exception as e:
38
- return None, f"Error: {str(e)}"
39
-
40
- def modify_image(image, prompt, strength=0.75, steps=20, guidance=7.5):
41
- """Modify existing image with prompt"""
42
  if image is None:
43
- return None, "Please upload an image first"
44
- if not prompt.strip():
45
- return None, "Please enter a modification prompt"
46
 
47
- try:
48
- # Resize image to appropriate dimensions
49
- width, height = image.size
50
- max_size = 512
51
- if width > max_size or height > max_size:
52
- image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
53
-
54
- with torch.autocast(device):
55
- result = pipe_image_to_image(
56
- prompt=prompt,
57
- image=image,
58
- strength=strength,
59
- num_inference_steps=steps,
60
- guidance_scale=guidance
61
- ).images[0]
62
- return result, "Image modified successfully!"
63
- except Exception as e:
64
- return None, f"Error: {str(e)}"
65
 
66
- with gr.Blocks(title="AI Image Modifier") as demo:
67
- gr.Markdown("# 🎨 AI Image Generator & Modifier")
68
- gr.Markdown("Create new images or modify existing ones using text prompts!")
69
-
70
- with gr.Tab("Generate New Image"):
71
- with gr.Row():
72
- with gr.Column():
73
- gen_prompt = gr.Textbox(
74
- label="Describe your image",
75
- placeholder="A beautiful sunset over mountains, digital art...",
76
- lines=2
77
- )
78
- gen_negative = gr.Textbox(
79
- label="What to avoid (optional)",
80
- placeholder="blurry, low quality, distorted...",
81
- lines=1
82
- )
83
- with gr.Row():
84
- gen_steps = gr.Slider(1, 50, value=20, label="Steps")
85
- gen_guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale")
86
-
87
- gen_button = gr.Button("Generate Image", variant="primary")
88
-
89
- with gr.Column():
90
- gen_output = gr.Image(label="Generated Image")
91
- gen_status = gr.Textbox(label="Status", interactive=False)
92
-
93
- with gr.Tab("Modify Existing Image"):
94
- with gr.Row():
95
- with gr.Column():
96
- mod_input_image = gr.Image(label="Upload Image", type="pil")
97
- mod_prompt = gr.Textbox(
98
- label="How do you want to modify it?",
99
- placeholder="Make it look like winter, change style to oil painting...",
100
- lines=2
101
- )
102
- with gr.Row():
103
- mod_strength = gr.Slider(0.1, 1.0, value=0.75, label="Modification Strength")
104
- mod_steps = gr.Slider(1, 50, value=20, label="Steps")
105
- mod_guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale")
106
-
107
- mod_button = gr.Button("Modify Image", variant="primary")
108
-
109
- with gr.Column():
110
- mod_output = gr.Image(label="Modified Image")
111
- mod_status = gr.Textbox(label="Status", interactive=False)
112
-
113
- # Examples
114
- gr.Markdown("### Example Prompts:")
115
- gr.Examples(
116
- examples=[
117
- ["A majestic dragon flying over a medieval castle, fantasy art, highly detailed"],
118
- ["A cyberpunk cityscape at night, neon lights, raining, futuristic"],
119
- ["A cute corgi puppy wearing a superhero cape, cartoon style"],
120
- ["An astronaut riding a horse on Mars, photorealistic"]
121
- ],
122
- inputs=gen_prompt
123
- )
124
-
125
- # Connect functions
126
- gen_button.click(
127
- generate_image,
128
- inputs=[gen_prompt, gen_negative, gen_steps, gen_guidance],
129
- outputs=[gen_output, gen_status]
130
- )
131
-
132
- mod_button.click(
133
- modify_image,
134
- inputs=[mod_input_image, mod_prompt, mod_strength, mod_steps, mod_guidance],
135
- outputs=[mod_output, mod_status]
136
- )
137
 
138
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
 
3
 
4
+ # Load image captioning model
5
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
6
 
7
+ def caption_image(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  if image is None:
9
+ return "Please upload an image"
 
 
10
 
11
+ result = captioner(image)
12
+ return result[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ demo = gr.Interface(
15
+ fn=caption_image,
16
+ inputs=gr.Image(label="Upload Image", type="pil"),
17
+ outputs=gr.Textbox(label="Generated Caption"),
18
+ title="Image Captioning",
19
+ description="Upload an image and AI will generate a caption for it",
20
+ examples=[
21
+ ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png"]
22
+ ]
23
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ demo.launch()