maria355 commited on
Commit
69b204b
Β·
verified Β·
1 Parent(s): 4741bb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -119
app.py CHANGED
@@ -5,92 +5,151 @@ import json
5
  import io
6
  import base64
7
  from PIL import Image
8
- import google.generativeai as genai
9
  import os
10
  from datetime import datetime
11
  import time
12
  import re
 
13
 
14
- # Configure Gemini API
 
 
 
 
 
 
 
 
15
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
16
- if GEMINI_API_KEY:
17
  genai.configure(api_key=GEMINI_API_KEY)
18
- gemini_model = genai.GenerativeModel('gemini-pro')
 
 
 
 
19
 
20
- # Hugging Face API endpoints for free models
21
- HF_TEXT_API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium"
22
- HF_IMAGE_API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
23
 
24
  # Alternative text generation models to try
25
  TEXT_MODELS = [
26
  "microsoft/DialoGPT-medium",
27
- "gpt2-medium",
28
- "facebook/blenderbot-400M-distill"
 
29
  ]
30
 
31
  # Alternative image generation models to try
32
  IMAGE_MODELS = [
33
  "stabilityai/stable-diffusion-2-1",
34
  "runwayml/stable-diffusion-v1-5",
35
- "CompVis/stable-diffusion-v1-4"
 
36
  ]
37
 
38
  def query_huggingface_text(payload, model_name):
39
- """Query Hugging Face text generation API"""
40
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
41
- headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', '')}"}
 
 
 
42
 
43
  try:
44
  response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
 
45
  if response.status_code == 200:
46
- return response.json()
 
 
 
 
47
  else:
 
48
  return None
 
 
 
 
49
  except Exception as e:
50
  print(f"Error with model {model_name}: {str(e)}")
51
  return None
52
 
53
  def query_huggingface_image(payload, model_name):
54
- """Query Hugging Face image generation API"""
55
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
56
- headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', '')}"}
 
 
 
57
 
58
  try:
59
  response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
 
60
  if response.status_code == 200:
61
  return response.content
 
 
 
62
  else:
 
63
  return None
 
 
 
 
64
  except Exception as e:
65
  print(f"Error with image model {model_name}: {str(e)}")
66
  return None
67
 
68
  def transcribe_audio(audio_file):
69
- """Convert speech to text using speech recognition"""
70
  if audio_file is None:
71
  return "No audio file provided"
72
 
73
  recognizer = sr.Recognizer()
74
 
 
 
 
 
 
75
  try:
76
- # Load audio file
77
- with sr.AudioFile(audio_file) as source:
 
 
 
 
 
78
  audio = recognizer.record(source)
79
 
80
- # Recognize speech using Google Speech Recognition (free)
81
- text = recognizer.recognize_google(audio)
82
- return text
 
 
 
 
 
 
 
 
 
 
 
83
  except sr.UnknownValueError:
84
- return "Could not understand the audio"
85
  except sr.RequestError as e:
86
- return f"Error with speech recognition service: {str(e)}"
87
  except Exception as e:
88
  return f"Error processing audio: {str(e)}"
89
 
90
  def enhance_prompt_with_gemini(text):
91
  """Enhance the prompt using Gemini API for better results"""
92
- if not GEMINI_API_KEY:
93
- return text
94
 
95
  try:
96
  prompt = f"""
@@ -126,89 +185,101 @@ def generate_text_content(prompt, content_type="blog"):
126
  """Generate text content using Hugging Face models"""
127
 
128
  # Enhance prompt with Gemini if available
129
- if GEMINI_API_KEY:
130
  enhanced_text, _ = enhance_prompt_with_gemini(prompt)
131
  prompt = enhanced_text
132
 
133
  # Adjust prompt based on content type
134
- if content_type == "blog":
135
- full_prompt = f"Write a detailed blog post about: {prompt}\n\nBlog post:"
136
- elif content_type == "social":
137
- full_prompt = f"Write an engaging social media post about: {prompt}\n\nPost:"
138
- elif content_type == "caption":
139
- full_prompt = f"Write a creative caption for: {prompt}\n\nCaption:"
140
- elif content_type == "story":
141
- full_prompt = f"Write a short story about: {prompt}\n\nStory:"
142
- else:
143
- full_prompt = prompt
144
 
145
  # Try different models until one works
146
  for model in TEXT_MODELS:
147
  payload = {
148
  "inputs": full_prompt,
149
  "parameters": {
150
- "max_length": 500,
151
  "temperature": 0.7,
152
  "do_sample": True,
153
- "top_p": 0.9
 
154
  }
155
  }
156
 
157
  result = query_huggingface_text(payload, model)
158
 
159
  if result and len(result) > 0:
160
- if isinstance(result, list) and len(result) > 0:
161
- generated_text = result[0].get("generated_text", "")
 
 
 
 
 
 
162
  # Clean up the response
163
- if generated_text.startswith(full_prompt):
164
- generated_text = generated_text[len(full_prompt):].strip()
165
- return generated_text if generated_text else f"Generated content for: {prompt}"
166
- elif isinstance(result, dict):
167
- generated_text = result.get("generated_text", "")
168
- if generated_text.startswith(full_prompt):
169
  generated_text = generated_text[len(full_prompt):].strip()
170
- return generated_text if generated_text else f"Generated content for: {prompt}"
 
 
 
 
 
 
171
 
172
  # Fallback content if all models fail
173
- return f"""Here's some content about {prompt}:
174
-
175
- This is an interesting topic that deserves exploration. The concept of {prompt} has various applications and implications that are worth discussing.
176
-
177
- Key points to consider:
178
- β€’ The fundamental aspects of this topic
179
- β€’ Its practical applications
180
- β€’ Potential future developments
181
- β€’ Impact on relevant stakeholders
182
-
183
- This content was generated based on your voice input and can be further customized according to your specific needs."""
184
 
185
  def generate_image_from_text(prompt):
186
  """Generate image using Hugging Face Stable Diffusion models"""
187
 
188
  # Enhance prompt with Gemini if available
189
- if GEMINI_API_KEY:
190
  _, enhanced_image = enhance_prompt_with_gemini(prompt)
191
  prompt = enhanced_image
192
 
193
  # Add some style enhancements to the prompt
194
- enhanced_prompt = f"{prompt}, high quality, detailed, artistic, professional"
195
 
196
  # Try different image models until one works
197
  for model in IMAGE_MODELS:
198
- payload = {"inputs": enhanced_prompt}
 
 
 
 
 
 
199
 
200
  image_bytes = query_huggingface_image(payload, model)
201
 
202
  if image_bytes:
203
  try:
204
  image = Image.open(io.BytesIO(image_bytes))
 
 
 
205
  return image
206
  except Exception as e:
207
  print(f"Error opening image from {model}: {str(e)}")
208
  continue
209
 
210
  # Return a placeholder image if all models fail
211
- placeholder = Image.new('RGB', (512, 512), color='lightgray')
212
  return placeholder
213
 
214
  def process_voice_input(audio_file, content_type):
@@ -259,60 +330,79 @@ def process_text_input(text_input, content_type):
259
 
260
  return text_content, image
261
 
262
- # Create Gradio interface
263
  def create_interface():
264
- """Create the main Gradio interface"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
- with gr.Blocks(title="VociArt - Voice-Controlled AI Content Creator", theme=gr.themes.Soft()) as app:
267
 
268
- gr.Markdown("""
269
- # πŸŽ™οΈ VociArt - Voice-Controlled AI Content Creator
 
 
 
270
 
271
- Generate AI content and images using just your voice! Speak your ideas and watch them come to life as both text and visuals.
 
272
 
273
- **Features:**
274
- - πŸ—£οΈ Voice-to-text conversion
275
- - πŸ“ AI text content generation
276
- - 🎨 AI image generation
277
- - 🌍 Multi-language support
278
- - πŸ’Ύ Save and share outputs
279
  """)
280
 
281
  with gr.Tab("πŸŽ™οΈ Voice Input"):
282
  with gr.Row():
283
- with gr.Column():
284
  audio_input = gr.Audio(
285
  sources=["microphone"],
286
  type="filepath",
287
- label="🎀 Record Your Voice"
 
288
  )
289
 
290
  content_type = gr.Dropdown(
291
  choices=["blog", "social", "caption", "story"],
292
  value="blog",
293
- label="πŸ“ Content Type"
 
294
  )
295
 
296
- voice_submit_btn = gr.Button("πŸš€ Generate Content from Voice", variant="primary")
297
 
298
- with gr.Column():
299
  transcribed_output = gr.Textbox(
300
- label="πŸ“ Transcribed Text",
301
- placeholder="Your speech will appear here..."
 
302
  )
303
 
304
  with gr.Row():
305
  with gr.Column():
306
  text_output = gr.Textbox(
307
- label="πŸ“„ Generated Text Content",
308
- lines=10,
309
- placeholder="Generated text content will appear here..."
 
310
  )
311
 
312
  with gr.Column():
313
  image_output = gr.Image(
314
  label="🎨 Generated Image",
315
- type="pil"
 
316
  )
317
 
318
  with gr.Tab("⌨️ Text Input"):
@@ -320,7 +410,7 @@ def create_interface():
320
  with gr.Column():
321
  text_input = gr.Textbox(
322
  label="πŸ’­ Enter Your Idea",
323
- placeholder="Type your content idea here...",
324
  lines=3
325
  )
326
 
@@ -330,76 +420,101 @@ def create_interface():
330
  label="πŸ“ Content Type"
331
  )
332
 
333
- text_submit_btn = gr.Button("πŸš€ Generate Content from Text", variant="primary")
334
 
335
  with gr.Row():
336
  with gr.Column():
337
  text_output_2 = gr.Textbox(
338
- label="πŸ“„ Generated Text Content",
339
- lines=10,
340
- placeholder="Generated text content will appear here..."
 
341
  )
342
 
343
  with gr.Column():
344
  image_output_2 = gr.Image(
345
  label="🎨 Generated Image",
346
- type="pil"
 
347
  )
348
 
349
- with gr.Tab("ℹ️ About"):
350
  gr.Markdown("""
351
- ## About VociArt
 
 
352
 
353
- VociArt is a revolutionary voice-controlled AI content creator that transforms your spoken ideas into both text content and stunning visuals.
 
 
354
 
355
- ### How it works:
356
- 1. **Record**: Speak your ideas using the microphone
357
- 2. **Process**: AI transcribes and enhances your prompt
358
- 3. **Generate**: Creates both text content and images
359
- 4. **Customize**: Choose from different content types
360
 
361
- ### Content Types:
362
- - **Blog**: Detailed blog posts and articles
363
- - **Social**: Social media posts and updates
364
- - **Caption**: Creative captions for images
365
- - **Story**: Short stories and narratives
366
 
367
- ### Technologies Used:
368
- - Hugging Face Transformers (Free models)
369
- - Google Speech Recognition
370
- - Gemini AI for prompt enhancement
371
- - Stable Diffusion for image generation
372
 
373
- ### Tips for best results:
374
- - Speak clearly and at a moderate pace
375
- - Be specific about what you want
376
- - Try different content types
377
- - Use descriptive language for better images
378
 
379
  ---
380
- *Made with ❀️ using free AI models*
381
  """)
382
 
383
- # Event handlers
384
  voice_submit_btn.click(
385
  fn=process_voice_input,
386
  inputs=[audio_input, content_type],
387
- outputs=[text_output, image_output, transcribed_output]
 
388
  )
389
 
390
  text_submit_btn.click(
391
  fn=process_text_input,
392
  inputs=[text_input, text_content_type],
393
- outputs=[text_output_2, image_output_2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  )
395
 
396
  return app
397
 
398
  # Launch the application
399
  if __name__ == "__main__":
 
400
  app = create_interface()
401
  app.launch(
402
  server_name="0.0.0.0",
403
  server_port=7860,
404
- share=True
 
 
405
  )
 
5
  import io
6
  import base64
7
  from PIL import Image
 
8
  import os
9
  from datetime import datetime
10
  import time
11
  import re
12
+ import tempfile
13
 
14
+ # Try to import optional dependencies
15
+ try:
16
+ import google.generativeai as genai
17
+ GEMINI_AVAILABLE = True
18
+ except ImportError:
19
+ GEMINI_AVAILABLE = False
20
+ print("Gemini AI not available - continuing without prompt enhancement")
21
+
22
+ # Configure Gemini API if available
23
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
+ if GEMINI_AVAILABLE and GEMINI_API_KEY:
25
  genai.configure(api_key=GEMINI_API_KEY)
26
+ try:
27
+ gemini_model = genai.GenerativeModel('gemini-pro')
28
+ except Exception as e:
29
+ print(f"Error initializing Gemini: {e}")
30
+ GEMINI_AVAILABLE = False
31
 
32
+ # Hugging Face token
33
+ HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
 
34
 
35
  # Alternative text generation models to try
36
  TEXT_MODELS = [
37
  "microsoft/DialoGPT-medium",
38
+ "gpt2",
39
+ "facebook/blenderbot-400M-distill",
40
+ "microsoft/DialoGPT-small"
41
  ]
42
 
43
  # Alternative image generation models to try
44
  IMAGE_MODELS = [
45
  "stabilityai/stable-diffusion-2-1",
46
  "runwayml/stable-diffusion-v1-5",
47
+ "CompVis/stable-diffusion-v1-4",
48
+ "stabilityai/stable-diffusion-2-1-base"
49
  ]
50
 
51
  def query_huggingface_text(payload, model_name):
52
+ """Query Hugging Face text generation API with better error handling"""
53
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
54
+ headers = {}
55
+
56
+ if HF_TOKEN:
57
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
58
 
59
  try:
60
  response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
61
+
62
  if response.status_code == 200:
63
+ result = response.json()
64
+ return result
65
+ elif response.status_code == 503:
66
+ print(f"Model {model_name} is loading, trying next model...")
67
+ return None
68
  else:
69
+ print(f"Error {response.status_code} with model {model_name}: {response.text}")
70
  return None
71
+
72
+ except requests.exceptions.Timeout:
73
+ print(f"Timeout with model {model_name}")
74
+ return None
75
  except Exception as e:
76
  print(f"Error with model {model_name}: {str(e)}")
77
  return None
78
 
79
  def query_huggingface_image(payload, model_name):
80
+ """Query Hugging Face image generation API with better error handling"""
81
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
82
+ headers = {}
83
+
84
+ if HF_TOKEN:
85
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
86
 
87
  try:
88
  response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
89
+
90
  if response.status_code == 200:
91
  return response.content
92
+ elif response.status_code == 503:
93
+ print(f"Image model {model_name} is loading, trying next model...")
94
+ return None
95
  else:
96
+ print(f"Error {response.status_code} with image model {model_name}")
97
  return None
98
+
99
+ except requests.exceptions.Timeout:
100
+ print(f"Timeout with image model {model_name}")
101
+ return None
102
  except Exception as e:
103
  print(f"Error with image model {model_name}: {str(e)}")
104
  return None
105
 
106
  def transcribe_audio(audio_file):
107
+ """Convert speech to text using speech recognition with better error handling"""
108
  if audio_file is None:
109
  return "No audio file provided"
110
 
111
  recognizer = sr.Recognizer()
112
 
113
+ # Adjust for ambient noise
114
+ recognizer.energy_threshold = 300
115
+ recognizer.dynamic_energy_threshold = True
116
+ recognizer.pause_threshold = 0.8
117
+
118
  try:
119
+ # Handle different audio file types
120
+ audio_path = str(audio_file)
121
+
122
+ # Load and process audio file
123
+ with sr.AudioFile(audio_path) as source:
124
+ # Adjust for ambient noise
125
+ recognizer.adjust_for_ambient_noise(source, duration=0.5)
126
  audio = recognizer.record(source)
127
 
128
+ # Try Google Speech Recognition first (free tier)
129
+ try:
130
+ text = recognizer.recognize_google(audio, language='en-US')
131
+ return text
132
+ except sr.RequestError:
133
+ # Fallback to offline recognition if available
134
+ try:
135
+ text = recognizer.recognize_sphinx(audio)
136
+ return text
137
+ except (sr.RequestError, sr.UnknownValueError):
138
+ pass
139
+
140
+ return "Could not transcribe the audio. Please try speaking more clearly."
141
+
142
  except sr.UnknownValueError:
143
+ return "Could not understand the audio. Please speak more clearly."
144
  except sr.RequestError as e:
145
+ return f"Speech recognition service error: {str(e)}"
146
  except Exception as e:
147
  return f"Error processing audio: {str(e)}"
148
 
149
  def enhance_prompt_with_gemini(text):
150
  """Enhance the prompt using Gemini API for better results"""
151
+ if not (GEMINI_AVAILABLE and GEMINI_API_KEY):
152
+ return text, text
153
 
154
  try:
155
  prompt = f"""
 
185
  """Generate text content using Hugging Face models"""
186
 
187
  # Enhance prompt with Gemini if available
188
+ if GEMINI_AVAILABLE and GEMINI_API_KEY:
189
  enhanced_text, _ = enhance_prompt_with_gemini(prompt)
190
  prompt = enhanced_text
191
 
192
  # Adjust prompt based on content type
193
+ content_templates = {
194
+ "blog": f"Write a detailed blog post about: {prompt}\n\nBlog post:",
195
+ "social": f"Write an engaging social media post about: {prompt}\n\nPost:",
196
+ "caption": f"Write a creative caption for: {prompt}\n\nCaption:",
197
+ "story": f"Write a short story about: {prompt}\n\nStory:"
198
+ }
199
+
200
+ full_prompt = content_templates.get(content_type, prompt)
 
 
201
 
202
  # Try different models until one works
203
  for model in TEXT_MODELS:
204
  payload = {
205
  "inputs": full_prompt,
206
  "parameters": {
207
+ "max_length": 200,
208
  "temperature": 0.7,
209
  "do_sample": True,
210
+ "top_p": 0.9,
211
+ "repetition_penalty": 1.1
212
  }
213
  }
214
 
215
  result = query_huggingface_text(payload, model)
216
 
217
  if result and len(result) > 0:
218
+ try:
219
+ if isinstance(result, list) and len(result) > 0:
220
+ generated_text = result[0].get("generated_text", "")
221
+ elif isinstance(result, dict):
222
+ generated_text = result.get("generated_text", "")
223
+ else:
224
+ continue
225
+
226
  # Clean up the response
227
+ if generated_text and generated_text.startswith(full_prompt):
 
 
 
 
 
228
  generated_text = generated_text[len(full_prompt):].strip()
229
+
230
+ if generated_text and len(generated_text) > 10:
231
+ return generated_text
232
+
233
+ except Exception as e:
234
+ print(f"Error processing result from {model}: {e}")
235
+ continue
236
 
237
  # Fallback content if all models fail
238
+ fallback_content = {
239
+ "blog": f"# {prompt}\n\nThis is an interesting topic that deserves exploration. Here are some key points to consider:\n\nβ€’ The fundamental concepts and principles\nβ€’ Practical applications and use cases\nβ€’ Benefits and potential challenges\nβ€’ Future developments and trends\n\nThis topic offers many opportunities for further discussion and research.",
240
+ "social": f"🌟 Excited to share thoughts on {prompt}! This is such an important topic that deserves more attention. What are your thoughts? #AI #Innovation",
241
+ "caption": f"✨ {prompt} ✨ Sometimes the most beautiful moments come from the simplest ideas. πŸ“Έ #inspiration #creativity",
242
+ "story": f"Once upon a time, there was something special about {prompt}. It captured the imagination of everyone who encountered it, leading to unexpected adventures and new discoveries. The end was just the beginning of something even more wonderful."
243
+ }
244
+
245
+ return fallback_content.get(content_type, f"Content generated for: {prompt}")
 
 
 
246
 
247
  def generate_image_from_text(prompt):
248
  """Generate image using Hugging Face Stable Diffusion models"""
249
 
250
  # Enhance prompt with Gemini if available
251
+ if GEMINI_AVAILABLE and GEMINI_API_KEY:
252
  _, enhanced_image = enhance_prompt_with_gemini(prompt)
253
  prompt = enhanced_image
254
 
255
  # Add some style enhancements to the prompt
256
+ enhanced_prompt = f"{prompt}, high quality, detailed, artistic, professional, masterpiece"
257
 
258
  # Try different image models until one works
259
  for model in IMAGE_MODELS:
260
+ payload = {
261
+ "inputs": enhanced_prompt,
262
+ "parameters": {
263
+ "num_inference_steps": 20,
264
+ "guidance_scale": 7.5
265
+ }
266
+ }
267
 
268
  image_bytes = query_huggingface_image(payload, model)
269
 
270
  if image_bytes:
271
  try:
272
  image = Image.open(io.BytesIO(image_bytes))
273
+ # Ensure image is in RGB mode
274
+ if image.mode != 'RGB':
275
+ image = image.convert('RGB')
276
  return image
277
  except Exception as e:
278
  print(f"Error opening image from {model}: {str(e)}")
279
  continue
280
 
281
  # Return a placeholder image if all models fail
282
+ placeholder = Image.new('RGB', (512, 512), color='lightblue')
283
  return placeholder
284
 
285
  def process_voice_input(audio_file, content_type):
 
330
 
331
  return text_content, image
332
 
 
333
  def create_interface():
334
+ """Create the main Gradio interface optimized for Hugging Face Spaces"""
335
+
336
+ # Custom CSS for better appearance
337
+ custom_css = """
338
+ .gradio-container {
339
+ max-width: 1200px !important;
340
+ }
341
+ .main-header {
342
+ text-align: center;
343
+ background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
344
+ -webkit-background-clip: text;
345
+ -webkit-text-fill-color: transparent;
346
+ font-size: 2.5em;
347
+ font-weight: bold;
348
+ margin-bottom: 20px;
349
+ }
350
+ """
351
 
352
+ with gr.Blocks(title="VociArt - Voice AI Creator", theme=gr.themes.Soft(), css=custom_css) as app:
353
 
354
+ gr.HTML("""
355
+ <div class="main-header">
356
+ πŸŽ™οΈ VociArt - Voice AI Creator
357
+ </div>
358
+ """)
359
 
360
+ gr.Markdown("""
361
+ Transform your voice into AI-generated content and stunning visuals! πŸš€
362
 
363
+ **✨ Features:** Voice-to-text β€’ AI content generation β€’ Image creation β€’ Multiple content types
 
 
 
 
 
364
  """)
365
 
366
  with gr.Tab("πŸŽ™οΈ Voice Input"):
367
  with gr.Row():
368
+ with gr.Column(scale=1):
369
  audio_input = gr.Audio(
370
  sources=["microphone"],
371
  type="filepath",
372
+ label="🎀 Record Your Voice",
373
+ show_download_button=False
374
  )
375
 
376
  content_type = gr.Dropdown(
377
  choices=["blog", "social", "caption", "story"],
378
  value="blog",
379
+ label="πŸ“ Content Type",
380
+ info="Choose the type of content to generate"
381
  )
382
 
383
+ voice_submit_btn = gr.Button("πŸš€ Generate from Voice", variant="primary", size="lg")
384
 
385
+ with gr.Column(scale=1):
386
  transcribed_output = gr.Textbox(
387
+ label="πŸ“ What You Said",
388
+ placeholder="Your transcribed speech will appear here...",
389
+ lines=3
390
  )
391
 
392
  with gr.Row():
393
  with gr.Column():
394
  text_output = gr.Textbox(
395
+ label="πŸ“„ Generated Content",
396
+ lines=8,
397
+ placeholder="AI-generated content will appear here...",
398
+ show_copy_button=True
399
  )
400
 
401
  with gr.Column():
402
  image_output = gr.Image(
403
  label="🎨 Generated Image",
404
+ type="pil",
405
+ show_download_button=True
406
  )
407
 
408
  with gr.Tab("⌨️ Text Input"):
 
410
  with gr.Column():
411
  text_input = gr.Textbox(
412
  label="πŸ’­ Enter Your Idea",
413
+ placeholder="Type your creative idea here...",
414
  lines=3
415
  )
416
 
 
420
  label="πŸ“ Content Type"
421
  )
422
 
423
+ text_submit_btn = gr.Button("πŸš€ Generate from Text", variant="primary", size="lg")
424
 
425
  with gr.Row():
426
  with gr.Column():
427
  text_output_2 = gr.Textbox(
428
+ label="πŸ“„ Generated Content",
429
+ lines=8,
430
+ placeholder="AI-generated content will appear here...",
431
+ show_copy_button=True
432
  )
433
 
434
  with gr.Column():
435
  image_output_2 = gr.Image(
436
  label="🎨 Generated Image",
437
+ type="pil",
438
+ show_download_button=True
439
  )
440
 
441
+ with gr.Tab("ℹ️ About & Tips"):
442
  gr.Markdown("""
443
+ ## 🌟 About VociArt
444
+
445
+ VociArt transforms your spoken ideas into professional content and stunning visuals using cutting-edge AI technology.
446
 
447
+ ### 🎯 How to Use:
448
+ 1. **Voice Tab**: Click the microphone, speak your idea clearly, select content type, then click generate
449
+ 2. **Text Tab**: Type your idea directly, choose content type, and generate
450
 
451
+ ### πŸ“ Content Types:
452
+ - **πŸ“° Blog**: Detailed articles and posts
453
+ - **πŸ“± Social**: Engaging social media content
454
+ - **πŸ“Έ Caption**: Creative image captions
455
+ - **πŸ“š Story**: Short narratives and tales
456
 
457
+ ### πŸ’‘ Pro Tips:
458
+ - **Speak Clearly**: Use a quiet environment and speak at normal pace
459
+ - **Be Specific**: Detailed prompts create better results
460
+ - **Try Different Types**: Each content type has unique characteristics
461
+ - **Use Keywords**: Include relevant terms for better image generation
462
 
463
+ ### πŸ”§ Technical Features:
464
+ - **Free AI Models**: Powered by Hugging Face's free inference API
465
+ - **Speech Recognition**: Google Speech Recognition for transcription
466
+ - **Smart Fallbacks**: Multiple models ensure reliability
467
+ - **Gemini Enhancement**: Optional prompt improvement (if API key provided)
468
 
469
+ ### 🎨 Example Prompts:
470
+ - *"A futuristic city with flying cars at sunset"*
471
+ - *"Write about the benefits of morning meditation"*
472
+ - *"Create a social media post about healthy cooking"*
473
+ - *"A magical forest with glowing mushrooms"*
474
 
475
  ---
476
+ πŸ’ **Made with love using free AI models** - Perfect for creators, marketers, and storytellers!
477
  """)
478
 
479
+ # Event handlers with better error handling
480
  voice_submit_btn.click(
481
  fn=process_voice_input,
482
  inputs=[audio_input, content_type],
483
+ outputs=[text_output, image_output, transcribed_output],
484
+ api_name="voice_generate"
485
  )
486
 
487
  text_submit_btn.click(
488
  fn=process_text_input,
489
  inputs=[text_input, text_content_type],
490
+ outputs=[text_output_2, image_output_2],
491
+ api_name="text_generate"
492
+ )
493
+
494
+ # Add examples
495
+ gr.Examples(
496
+ examples=[
497
+ ["A peaceful mountain landscape with a lake", "caption"],
498
+ ["The future of artificial intelligence in education", "blog"],
499
+ ["Delicious homemade pizza recipe", "social"],
500
+ ["A brave knight on a quest for the golden crown", "story"]
501
+ ],
502
+ inputs=[text_input, text_content_type],
503
+ outputs=[text_output_2, image_output_2],
504
+ fn=process_text_input,
505
+ cache_examples=False
506
  )
507
 
508
  return app
509
 
510
  # Launch the application
511
  if __name__ == "__main__":
512
+ print("πŸš€ Starting VociArt...")
513
  app = create_interface()
514
  app.launch(
515
  server_name="0.0.0.0",
516
  server_port=7860,
517
+ share=False, # Set to False for Hugging Face Spaces
518
+ show_error=True,
519
+ quiet=False
520
  )