ziadmostafa commited on
Commit
ffc2acd
·
1 Parent(s): 30fa863

first commit

Browse files
Files changed (2) hide show
  1. app.py +203 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import gradio as gr
4
+ from PIL import Image
5
+ import io
6
+
7
+ class MultimodalImageCreator:
8
+ def __init__(self):
9
+ """
10
+ Initialize the Multimodal Image Creator
11
+ Uses environment variables for API token
12
+ """
13
+ # Retrieve API token from environment variable
14
+ self.hf_token = os.environ.get('HF_API_TOKEN')
15
+
16
+ if not self.hf_token:
17
+ raise ValueError(
18
+ "Hugging Face API token not found. "
19
+ "Set it in Spaces secrets or as an environment variable."
20
+ )
21
+
22
+ # Image Captioning API Endpoint
23
+ self.caption_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
24
+
25
+ # Text-to-Image API Endpoint
26
+ self.image_gen_api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2"
27
+
28
+ # Common headers for API requests
29
+ self.headers = {
30
+ "Authorization": f"Bearer {self.hf_token}",
31
+ "Content-Type": "application/octet-stream"
32
+ }
33
+
34
+ def generate_caption(self, image_path):
35
+ """
36
+ Generate a caption for the input image using Hugging Face API
37
+
38
+ Args:
39
+ image_path (str): Path to the input image
40
+
41
+ Returns:
42
+ str: Generated image caption
43
+ """
44
+ try:
45
+ # Read the image file
46
+ with open(image_path, "rb") as f:
47
+ data = f.read()
48
+
49
+ # Make API request
50
+ response = requests.post(
51
+ self.caption_api_url,
52
+ headers=self.headers,
53
+ data=data
54
+ )
55
+
56
+ # Check response
57
+ if response.status_code == 200:
58
+ # Extract caption from response
59
+ caption = response.json()[0].get('generated_text', 'No caption generated')
60
+ return caption
61
+ else:
62
+ return f"Error: {response.status_code} - {response.text}"
63
+
64
+ except Exception as e:
65
+ return f"An error occurred: {str(e)}"
66
+
67
+ def generate_variations(self, caption, num_variations=3):
68
+ """
69
+ Generate image variations based on the input caption
70
+
71
+ Args:
72
+ caption (str): Base caption to generate images from
73
+ num_variations (int): Number of image variations to generate
74
+
75
+ Returns:
76
+ list: Generated image variations
77
+ """
78
+ generated_images = []
79
+
80
+ try:
81
+ for i in range(num_variations):
82
+ # Create a slightly varied prompt
83
+ varied_prompt = f"{caption}, artistic variation {i+1}, high quality"
84
+
85
+ # Make API request
86
+ response = requests.post(
87
+ self.image_gen_api_url,
88
+ headers={
89
+ "Authorization": f"Bearer {self.hf_token}",
90
+ "Content-Type": "application/json"
91
+ },
92
+ json={"inputs": varied_prompt}
93
+ )
94
+
95
+ # Check response
96
+ if response.status_code == 200:
97
+ # Convert response to PIL Image
98
+ image = Image.open(io.BytesIO(response.content))
99
+ generated_images.append(image)
100
+ else:
101
+ print(f"Error generating variation {i+1}: {response.status_code}")
102
+
103
+ return generated_images
104
+
105
+ except Exception as e:
106
+ print(f"An error occurred during image generation: {str(e)}")
107
+ return []
108
+
109
+ def create_gradio_interface():
110
+ """
111
+ Create a Gradio interface for the Multimodal Image Creator
112
+
113
+ Returns:
114
+ gr.Blocks: Gradio interface
115
+ """
116
+ # Initialize the multimodal image creator
117
+ creator = MultimodalImageCreator()
118
+
119
+ def process_image(input_image, num_variations):
120
+ try:
121
+ # Validate input
122
+ if input_image is None:
123
+ return None, "Please upload an image.", [], []
124
+
125
+ # Save the uploaded image temporarily
126
+ temp_image_path = "temp_input_image.jpg"
127
+ Image.fromarray(input_image).save(temp_image_path)
128
+
129
+ # Generate caption
130
+ original_caption = creator.generate_caption(temp_image_path)
131
+
132
+ # Create variations
133
+ generated_images = creator.generate_variations(
134
+ original_caption,
135
+ num_variations=num_variations
136
+ )
137
+
138
+ # Clean up temporary file
139
+ os.remove(temp_image_path)
140
+
141
+ # Generate variation captions
142
+ variation_captions = [
143
+ f"Variation based on: {original_caption}"
144
+ for _ in generated_images
145
+ ]
146
+
147
+ return input_image, original_caption, generated_images, variation_captions
148
+
149
+ except Exception as e:
150
+ return None, f"An error occurred: {str(e)}", [], []
151
+
152
+ # Create Gradio Interface
153
+ with gr.Blocks() as demo:
154
+ gr.Markdown("# Multimodal Image Content Creator")
155
+ gr.Markdown("Upload an image to generate a caption and create variations!")
156
+
157
+ with gr.Row():
158
+ # Input components
159
+ with gr.Column():
160
+ input_image = gr.Image(type="numpy", label="Upload Image")
161
+ num_variations = gr.Slider(
162
+ minimum=1,
163
+ maximum=5,
164
+ value=3,
165
+ step=1,
166
+ label="Number of Variations"
167
+ )
168
+ submit_btn = gr.Button("Generate Variations")
169
+
170
+ # Output components
171
+ with gr.Column():
172
+ # Original image and caption
173
+ original_image_output = gr.Image(label="Original Image")
174
+ original_caption = gr.Textbox(label="Generated Caption")
175
+
176
+ # Variations gallery
177
+ variations_gallery = gr.Gallery(label="Image Variations")
178
+ variations_captions = gr.Textbox(label="Variation Prompts")
179
+
180
+ # Set up the processing
181
+ submit_btn.click(
182
+ fn=process_image,
183
+ inputs=[input_image, num_variations],
184
+ outputs=[
185
+ original_image_output,
186
+ original_caption,
187
+ variations_gallery,
188
+ variations_captions
189
+ ]
190
+ )
191
+
192
+ return demo
193
+
194
+ # Create and launch the Gradio interface
195
+ demo = create_gradio_interface()
196
+
197
+ # If running locally
198
+ if __name__ == "__main__":
199
+ demo.launch(
200
+ share=True,
201
+ debug=True
202
+ )
203
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ requests
2
+ gradio
3
+ pillow