maria355 commited on
Commit
d1e7dff
Β·
verified Β·
1 Parent(s): c1bc582

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -200
app.py CHANGED
@@ -9,17 +9,14 @@ import os
9
  from datetime import datetime
10
  import time
11
  import re
12
- import tempfile
13
 
14
- # Try to import optional dependencies
15
  try:
16
  import google.generativeai as genai
17
  GEMINI_AVAILABLE = True
18
  except ImportError:
19
  GEMINI_AVAILABLE = False
20
- print("Gemini AI not available - continuing without prompt enhancement")
21
 
22
- # Configure Gemini API if available
23
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
25
  genai.configure(api_key=GEMINI_API_KEY)
@@ -29,27 +26,21 @@ if GEMINI_AVAILABLE and GEMINI_API_KEY:
29
  print(f"Error initializing Gemini: {e}")
30
  GEMINI_AVAILABLE = False
31
 
32
- # Hugging Face token
33
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
34
 
35
- # Alternative text generation models to try
36
  TEXT_MODELS = [
37
  "microsoft/DialoGPT-medium",
38
  "gpt2",
39
- "facebook/blenderbot-400M-distill",
40
- "microsoft/DialoGPT-small"
41
  ]
42
 
43
- # Alternative image generation models to try
44
  IMAGE_MODELS = [
45
  "stabilityai/stable-diffusion-2-1",
46
  "runwayml/stable-diffusion-v1-5",
47
- "CompVis/stable-diffusion-v1-4",
48
- "stabilityai/stable-diffusion-2-1-base"
49
  ]
50
 
51
  def query_huggingface_text(payload, model_name):
52
- """Query Hugging Face text generation API with better error handling"""
53
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
54
  headers = {}
55
 
@@ -63,21 +54,17 @@ def query_huggingface_text(payload, model_name):
63
  result = response.json()
64
  return result
65
  elif response.status_code == 503:
66
- print(f"Model {model_name} is loading, trying next model...")
67
  return None
68
  else:
69
- print(f"Error {response.status_code} with model {model_name}: {response.text}")
70
  return None
71
 
72
- except requests.exceptions.Timeout:
73
- print(f"Timeout with model {model_name}")
74
- return None
75
  except Exception as e:
76
  print(f"Error with model {model_name}: {str(e)}")
77
  return None
78
 
79
  def query_huggingface_image(payload, model_name):
80
- """Query Hugging Face image generation API with better error handling"""
81
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
82
  headers = {}
83
 
@@ -89,71 +76,47 @@ def query_huggingface_image(payload, model_name):
89
 
90
  if response.status_code == 200:
91
  return response.content
92
- elif response.status_code == 503:
93
- print(f"Image model {model_name} is loading, trying next model...")
94
- return None
95
  else:
96
- print(f"Error {response.status_code} with image model {model_name}")
97
  return None
98
 
99
- except requests.exceptions.Timeout:
100
- print(f"Timeout with image model {model_name}")
101
- return None
102
  except Exception as e:
103
  print(f"Error with image model {model_name}: {str(e)}")
104
  return None
105
 
106
  def transcribe_audio(audio_file):
107
- """Convert speech to text using speech recognition with better error handling"""
108
  if audio_file is None:
109
  return "No audio file provided"
110
 
111
  recognizer = sr.Recognizer()
112
 
113
  try:
114
- # Handle different audio file types
115
  audio_path = str(audio_file)
116
 
117
- # Load and process audio file
118
  with sr.AudioFile(audio_path) as source:
119
- # Adjust for ambient noise if possible
120
- try:
121
- recognizer.adjust_for_ambient_noise(source, duration=0.2)
122
- except:
123
- pass # Skip if adjustment fails
124
  audio = recognizer.record(source)
125
 
126
- # Try Google Speech Recognition (free tier)
127
  try:
128
- text = recognizer.recognize_google(audio, language='en-US')
129
- if text.strip():
130
- return text
131
- else:
132
- return "No speech detected in the audio"
133
  except sr.UnknownValueError:
134
- return "Could not understand the audio. Please speak more clearly and try again."
135
  except sr.RequestError as e:
136
- return f"Speech recognition service temporarily unavailable: {str(e)}"
137
 
138
  except Exception as e:
139
- return f"Error processing audio file: {str(e)}. Please check your audio format."
140
 
141
  def enhance_prompt_with_gemini(text):
142
- """Enhance the prompt using Gemini API for better results"""
143
  if not (GEMINI_AVAILABLE and GEMINI_API_KEY):
144
  return text, text
145
 
146
  try:
147
  prompt = f"""
148
- Enhance this prompt for AI content and image generation. Make it more detailed and creative while keeping the original intent:
149
-
150
  Original: {text}
151
 
152
- Please provide:
153
- 1. An enhanced text generation prompt
154
- 2. An enhanced image generation prompt
155
-
156
- Format your response as:
157
  TEXT: [enhanced text prompt]
158
  IMAGE: [enhanced image prompt]
159
  """
@@ -161,7 +124,6 @@ def enhance_prompt_with_gemini(text):
161
  response = gemini_model.generate_content(prompt)
162
  enhanced = response.text
163
 
164
- # Parse the response
165
  text_match = re.search(r'TEXT:\s*(.+?)(?=IMAGE:|$)', enhanced, re.DOTALL)
166
  image_match = re.search(r'IMAGE:\s*(.+?)$', enhanced, re.DOTALL)
167
 
@@ -170,37 +132,29 @@ def enhance_prompt_with_gemini(text):
170
 
171
  return enhanced_text, enhanced_image
172
  except Exception as e:
173
- print(f"Gemini enhancement error: {str(e)}")
174
  return text, text
175
 
176
  def generate_text_content(prompt, content_type="blog"):
177
- """Generate text content using Hugging Face models"""
178
-
179
- # Enhance prompt with Gemini if available
180
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
181
  enhanced_text, _ = enhance_prompt_with_gemini(prompt)
182
  prompt = enhanced_text
183
 
184
- # Adjust prompt based on content type
185
  content_templates = {
186
- "blog": f"Write a detailed blog post about: {prompt}\n\nBlog post:",
187
- "social": f"Write an engaging social media post about: {prompt}\n\nPost:",
188
- "caption": f"Write a creative caption for: {prompt}\n\nCaption:",
189
- "story": f"Write a short story about: {prompt}\n\nStory:"
190
  }
191
 
192
  full_prompt = content_templates.get(content_type, prompt)
193
 
194
- # Try different models until one works
195
  for model in TEXT_MODELS:
196
  payload = {
197
  "inputs": full_prompt,
198
  "parameters": {
199
  "max_length": 200,
200
- "temperature": 0.7,
201
- "do_sample": True,
202
- "top_p": 0.9,
203
- "repetition_penalty": 1.1
204
  }
205
  }
206
 
@@ -215,7 +169,6 @@ def generate_text_content(prompt, content_type="blog"):
215
  else:
216
  continue
217
 
218
- # Clean up the response
219
  if generated_text and generated_text.startswith(full_prompt):
220
  generated_text = generated_text[len(full_prompt):].strip()
221
 
@@ -223,76 +176,57 @@ def generate_text_content(prompt, content_type="blog"):
223
  return generated_text
224
 
225
  except Exception as e:
226
- print(f"Error processing result from {model}: {e}")
227
  continue
228
 
229
- # Fallback content if all models fail
230
  fallback_content = {
231
- "blog": f"# {prompt}\n\nThis is an interesting topic that deserves exploration. Here are some key points to consider:\n\nβ€’ The fundamental concepts and principles\nβ€’ Practical applications and use cases\nβ€’ Benefits and potential challenges\nβ€’ Future developments and trends\n\nThis topic offers many opportunities for further discussion and research.",
232
- "social": f"🌟 Excited to share thoughts on {prompt}! This is such an important topic that deserves more attention. What are your thoughts? #AI #Innovation",
233
- "caption": f"✨ {prompt} ✨ Sometimes the most beautiful moments come from the simplest ideas. πŸ“Έ #inspiration #creativity",
234
- "story": f"Once upon a time, there was something special about {prompt}. It captured the imagination of everyone who encountered it, leading to unexpected adventures and new discoveries. The end was just the beginning of something even more wonderful."
235
  }
236
 
237
- return fallback_content.get(content_type, f"Content generated for: {prompt}")
238
 
239
  def generate_image_from_text(prompt):
240
- """Generate image using Hugging Face Stable Diffusion models"""
241
-
242
- # Enhance prompt with Gemini if available
243
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
244
  _, enhanced_image = enhance_prompt_with_gemini(prompt)
245
  prompt = enhanced_image
246
 
247
- # Add some style enhancements to the prompt
248
- enhanced_prompt = f"{prompt}, high quality, detailed, artistic, professional, masterpiece"
249
 
250
- # Try different image models until one works
251
  for model in IMAGE_MODELS:
252
- payload = {
253
- "inputs": enhanced_prompt,
254
- "parameters": {
255
- "num_inference_steps": 20,
256
- "guidance_scale": 7.5
257
- }
258
- }
259
 
260
  image_bytes = query_huggingface_image(payload, model)
261
 
262
  if image_bytes:
263
  try:
264
  image = Image.open(io.BytesIO(image_bytes))
265
- # Ensure image is in RGB mode
266
  if image.mode != 'RGB':
267
  image = image.convert('RGB')
268
  return image
269
  except Exception as e:
270
- print(f"Error opening image from {model}: {str(e)}")
271
  continue
272
 
273
- # Return a placeholder image if all models fail
274
  placeholder = Image.new('RGB', (512, 512), color='lightblue')
275
  return placeholder
276
 
277
  def process_voice_input(audio_file, content_type):
278
- """Main function to process voice input and generate content"""
279
-
280
  if audio_file is None:
281
  return "Please record some audio first", None, ""
282
 
283
- # Transcribe audio
284
  transcribed_text = transcribe_audio(audio_file)
285
 
286
  if transcribed_text.startswith("Error") or transcribed_text.startswith("Could not"):
287
  return transcribed_text, None, transcribed_text
288
 
289
- # Generate text content
290
  try:
291
  text_content = generate_text_content(transcribed_text, content_type)
292
  except Exception as e:
293
  text_content = f"Error generating text: {str(e)}"
294
 
295
- # Generate image
296
  try:
297
  image = generate_image_from_text(transcribed_text)
298
  except Exception as e:
@@ -302,18 +236,14 @@ def process_voice_input(audio_file, content_type):
302
  return text_content, image, transcribed_text
303
 
304
  def process_text_input(text_input, content_type):
305
- """Process direct text input"""
306
-
307
  if not text_input.strip():
308
  return "Please enter some text", None
309
 
310
- # Generate text content
311
  try:
312
  text_content = generate_text_content(text_input, content_type)
313
  except Exception as e:
314
  text_content = f"Error generating text: {str(e)}"
315
 
316
- # Generate image
317
  try:
318
  image = generate_image_from_text(text_input)
319
  except Exception as e:
@@ -323,61 +253,36 @@ def process_text_input(text_input, content_type):
323
  return text_content, image
324
 
325
  def create_interface():
326
- """Create the main Gradio interface optimized for Hugging Face Spaces"""
327
-
328
- # Custom CSS for better appearance
329
- custom_css = """
330
- .gradio-container {
331
- max-width: 1200px !important;
332
- }
333
- .main-header {
334
- text-align: center;
335
- background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
336
- -webkit-background-clip: text;
337
- -webkit-text-fill-color: transparent;
338
- font-size: 2.5em;
339
- font-weight: bold;
340
- margin-bottom: 20px;
341
- }
342
- """
343
-
344
- with gr.Blocks(title="VociArt - Voice AI Creator", theme=gr.themes.Soft(), css=custom_css) as app:
345
-
346
- gr.HTML("""
347
- <div class="main-header">
348
- πŸŽ™οΈ VociArt - Voice AI Creator
349
- </div>
350
- """)
351
 
352
  gr.Markdown("""
353
- Transform your voice into AI-generated content and stunning visuals! πŸš€
 
 
354
 
355
- **✨ Features:** Voice-to-text β€’ AI content generation β€’ Image creation β€’ Multiple content types
356
  """)
357
 
358
  with gr.Tab("πŸŽ™οΈ Voice Input"):
359
  with gr.Row():
360
- with gr.Column(scale=1):
361
  audio_input = gr.Audio(
362
  sources=["microphone"],
363
  type="filepath",
364
- label="🎀 Record Your Voice",
365
- show_download_button=False
366
  )
367
 
368
  content_type = gr.Dropdown(
369
  choices=["blog", "social", "caption", "story"],
370
  value="blog",
371
- label="πŸ“ Content Type",
372
- info="Choose the type of content to generate"
373
  )
374
 
375
- voice_submit_btn = gr.Button("πŸš€ Generate from Voice", variant="primary", size="lg")
376
 
377
- with gr.Column(scale=1):
378
  transcribed_output = gr.Textbox(
379
  label="πŸ“ What You Said",
380
- placeholder="Your transcribed speech will appear here...",
381
  lines=3
382
  )
383
 
@@ -385,16 +290,13 @@ def create_interface():
385
  with gr.Column():
386
  text_output = gr.Textbox(
387
  label="πŸ“„ Generated Content",
388
- lines=8,
389
- placeholder="AI-generated content will appear here...",
390
- show_copy_button=True
391
  )
392
 
393
  with gr.Column():
394
  image_output = gr.Image(
395
  label="🎨 Generated Image",
396
- type="pil",
397
- show_download_button=True
398
  )
399
 
400
  with gr.Tab("⌨️ Text Input"):
@@ -402,7 +304,6 @@ def create_interface():
402
  with gr.Column():
403
  text_input = gr.Textbox(
404
  label="πŸ’­ Enter Your Idea",
405
- placeholder="Type your creative idea here...",
406
  lines=3
407
  )
408
 
@@ -412,101 +313,63 @@ def create_interface():
412
  label="πŸ“ Content Type"
413
  )
414
 
415
- text_submit_btn = gr.Button("πŸš€ Generate from Text", variant="primary", size="lg")
416
 
417
  with gr.Row():
418
  with gr.Column():
419
  text_output_2 = gr.Textbox(
420
  label="πŸ“„ Generated Content",
421
- lines=8,
422
- placeholder="AI-generated content will appear here...",
423
- show_copy_button=True
424
  )
425
 
426
  with gr.Column():
427
  image_output_2 = gr.Image(
428
  label="🎨 Generated Image",
429
- type="pil",
430
- show_download_button=True
431
  )
432
 
433
- with gr.Tab("ℹ️ About & Tips"):
434
  gr.Markdown("""
435
- ## 🌟 About VociArt
436
-
437
- VociArt transforms your spoken ideas into professional content and stunning visuals using cutting-edge AI technology.
438
 
439
- ### 🎯 How to Use:
440
- 1. **Voice Tab**: Click the microphone, speak your idea clearly, select content type, then click generate
441
- 2. **Text Tab**: Type your idea directly, choose content type, and generate
442
 
443
- ### πŸ“ Content Types:
444
- - **πŸ“° Blog**: Detailed articles and posts
445
- - **πŸ“± Social**: Engaging social media content
446
- - **πŸ“Έ Caption**: Creative image captions
447
- - **πŸ“š Story**: Short narratives and tales
448
 
449
- ### πŸ’‘ Pro Tips:
450
- - **Speak Clearly**: Use a quiet environment and speak at normal pace
451
- - **Be Specific**: Detailed prompts create better results
452
- - **Try Different Types**: Each content type has unique characteristics
453
- - **Use Keywords**: Include relevant terms for better image generation
454
 
455
- ### πŸ”§ Technical Features:
456
- - **Free AI Models**: Powered by Hugging Face's free inference API
457
- - **Speech Recognition**: Google Speech Recognition for transcription
458
- - **Smart Fallbacks**: Multiple models ensure reliability
459
- - **Gemini Enhancement**: Optional prompt improvement (if API key provided)
460
 
461
- ### 🎨 Example Prompts:
462
- - *"A futuristic city with flying cars at sunset"*
463
- - *"Write about the benefits of morning meditation"*
464
- - *"Create a social media post about healthy cooking"*
465
- - *"A magical forest with glowing mushrooms"*
466
-
467
- ---
468
- πŸ’ **Made with love using free AI models** - Perfect for creators, marketers, and storytellers!
469
  """)
470
 
471
- # Event handlers with better error handling
472
  voice_submit_btn.click(
473
  fn=process_voice_input,
474
  inputs=[audio_input, content_type],
475
- outputs=[text_output, image_output, transcribed_output],
476
- api_name="voice_generate"
477
  )
478
 
479
  text_submit_btn.click(
480
  fn=process_text_input,
481
  inputs=[text_input, text_content_type],
482
- outputs=[text_output_2, image_output_2],
483
- api_name="text_generate"
484
- )
485
-
486
- # Add examples
487
- gr.Examples(
488
- examples=[
489
- ["A peaceful mountain landscape with a lake", "caption"],
490
- ["The future of artificial intelligence in education", "blog"],
491
- ["Delicious homemade pizza recipe", "social"],
492
- ["A brave knight on a quest for the golden crown", "story"]
493
- ],
494
- inputs=[text_input, text_content_type],
495
- outputs=[text_output_2, image_output_2],
496
- fn=process_text_input,
497
- cache_examples=False
498
  )
499
 
500
  return app
501
 
502
- # Launch the application
503
  if __name__ == "__main__":
504
- print("πŸš€ Starting VociArt...")
505
  app = create_interface()
506
  app.launch(
507
  server_name="0.0.0.0",
508
- server_port=7860,
509
- share=False, # Set to False for Hugging Face Spaces
510
- show_error=True,
511
- quiet=False
512
  )
 
9
  from datetime import datetime
10
  import time
11
  import re
 
12
 
 
13
  try:
14
  import google.generativeai as genai
15
  GEMINI_AVAILABLE = True
16
  except ImportError:
17
  GEMINI_AVAILABLE = False
18
+ print("Gemini AI not available")
19
 
 
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
22
  genai.configure(api_key=GEMINI_API_KEY)
 
26
  print(f"Error initializing Gemini: {e}")
27
  GEMINI_AVAILABLE = False
28
 
 
29
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
30
 
 
31
  TEXT_MODELS = [
32
  "microsoft/DialoGPT-medium",
33
  "gpt2",
34
+ "facebook/blenderbot-400M-distill"
 
35
  ]
36
 
 
37
  IMAGE_MODELS = [
38
  "stabilityai/stable-diffusion-2-1",
39
  "runwayml/stable-diffusion-v1-5",
40
+ "CompVis/stable-diffusion-v1-4"
 
41
  ]
42
 
43
  def query_huggingface_text(payload, model_name):
 
44
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
45
  headers = {}
46
 
 
54
  result = response.json()
55
  return result
56
  elif response.status_code == 503:
57
+ print(f"Model {model_name} is loading")
58
  return None
59
  else:
60
+ print(f"Error {response.status_code} with model {model_name}")
61
  return None
62
 
 
 
 
63
  except Exception as e:
64
  print(f"Error with model {model_name}: {str(e)}")
65
  return None
66
 
67
  def query_huggingface_image(payload, model_name):
 
68
  API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
69
  headers = {}
70
 
 
76
 
77
  if response.status_code == 200:
78
  return response.content
 
 
 
79
  else:
80
+ print(f"Error with image model {model_name}")
81
  return None
82
 
 
 
 
83
  except Exception as e:
84
  print(f"Error with image model {model_name}: {str(e)}")
85
  return None
86
 
87
  def transcribe_audio(audio_file):
 
88
  if audio_file is None:
89
  return "No audio file provided"
90
 
91
  recognizer = sr.Recognizer()
92
 
93
  try:
 
94
  audio_path = str(audio_file)
95
 
 
96
  with sr.AudioFile(audio_path) as source:
 
 
 
 
 
97
  audio = recognizer.record(source)
98
 
 
99
  try:
100
+ text = recognizer.recognize_google(audio)
101
+ return text
 
 
 
102
  except sr.UnknownValueError:
103
+ return "Could not understand the audio"
104
  except sr.RequestError as e:
105
+ return f"Speech recognition error: {str(e)}"
106
 
107
  except Exception as e:
108
+ return f"Error processing audio: {str(e)}"
109
 
110
  def enhance_prompt_with_gemini(text):
 
111
  if not (GEMINI_AVAILABLE and GEMINI_API_KEY):
112
  return text, text
113
 
114
  try:
115
  prompt = f"""
116
+ Enhance this prompt for content and image generation:
 
117
  Original: {text}
118
 
119
+ Provide:
 
 
 
 
120
  TEXT: [enhanced text prompt]
121
  IMAGE: [enhanced image prompt]
122
  """
 
124
  response = gemini_model.generate_content(prompt)
125
  enhanced = response.text
126
 
 
127
  text_match = re.search(r'TEXT:\s*(.+?)(?=IMAGE:|$)', enhanced, re.DOTALL)
128
  image_match = re.search(r'IMAGE:\s*(.+?)$', enhanced, re.DOTALL)
129
 
 
132
 
133
  return enhanced_text, enhanced_image
134
  except Exception as e:
135
+ print(f"Gemini error: {str(e)}")
136
  return text, text
137
 
138
  def generate_text_content(prompt, content_type="blog"):
 
 
 
139
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
140
  enhanced_text, _ = enhance_prompt_with_gemini(prompt)
141
  prompt = enhanced_text
142
 
 
143
  content_templates = {
144
+ "blog": f"Write a blog post about: {prompt}\n\nPost:",
145
+ "social": f"Write a social media post about: {prompt}\n\nPost:",
146
+ "caption": f"Write a caption for: {prompt}\n\nCaption:",
147
+ "story": f"Write a story about: {prompt}\n\nStory:"
148
  }
149
 
150
  full_prompt = content_templates.get(content_type, prompt)
151
 
 
152
  for model in TEXT_MODELS:
153
  payload = {
154
  "inputs": full_prompt,
155
  "parameters": {
156
  "max_length": 200,
157
+ "temperature": 0.7
 
 
 
158
  }
159
  }
160
 
 
169
  else:
170
  continue
171
 
 
172
  if generated_text and generated_text.startswith(full_prompt):
173
  generated_text = generated_text[len(full_prompt):].strip()
174
 
 
176
  return generated_text
177
 
178
  except Exception as e:
179
+ print(f"Error processing result: {e}")
180
  continue
181
 
 
182
  fallback_content = {
183
+ "blog": f"# About {prompt}\n\nThis is an interesting topic with many aspects to explore. Here are key points:\n\nβ€’ Main concepts and principles\nβ€’ Practical applications\nβ€’ Future possibilities\n\nThis topic offers great potential for discussion.",
184
+ "social": f"Excited to share thoughts about {prompt}! This is such an important topic. What are your thoughts? #inspiration",
185
+ "caption": f"✨ {prompt} ✨ Beautiful moments from simple ideas. #creativity #inspiration",
186
+ "story": f"There was something special about {prompt}. It captured everyone's imagination, leading to wonderful adventures and discoveries."
187
  }
188
 
189
+ return fallback_content.get(content_type, f"Content about: {prompt}")
190
 
191
  def generate_image_from_text(prompt):
 
 
 
192
  if GEMINI_AVAILABLE and GEMINI_API_KEY:
193
  _, enhanced_image = enhance_prompt_with_gemini(prompt)
194
  prompt = enhanced_image
195
 
196
+ enhanced_prompt = f"{prompt}, high quality, detailed, artistic"
 
197
 
 
198
  for model in IMAGE_MODELS:
199
+ payload = {"inputs": enhanced_prompt}
 
 
 
 
 
 
200
 
201
  image_bytes = query_huggingface_image(payload, model)
202
 
203
  if image_bytes:
204
  try:
205
  image = Image.open(io.BytesIO(image_bytes))
 
206
  if image.mode != 'RGB':
207
  image = image.convert('RGB')
208
  return image
209
  except Exception as e:
210
+ print(f"Error opening image: {str(e)}")
211
  continue
212
 
 
213
  placeholder = Image.new('RGB', (512, 512), color='lightblue')
214
  return placeholder
215
 
216
  def process_voice_input(audio_file, content_type):
 
 
217
  if audio_file is None:
218
  return "Please record some audio first", None, ""
219
 
 
220
  transcribed_text = transcribe_audio(audio_file)
221
 
222
  if transcribed_text.startswith("Error") or transcribed_text.startswith("Could not"):
223
  return transcribed_text, None, transcribed_text
224
 
 
225
  try:
226
  text_content = generate_text_content(transcribed_text, content_type)
227
  except Exception as e:
228
  text_content = f"Error generating text: {str(e)}"
229
 
 
230
  try:
231
  image = generate_image_from_text(transcribed_text)
232
  except Exception as e:
 
236
  return text_content, image, transcribed_text
237
 
238
  def process_text_input(text_input, content_type):
 
 
239
  if not text_input.strip():
240
  return "Please enter some text", None
241
 
 
242
  try:
243
  text_content = generate_text_content(text_input, content_type)
244
  except Exception as e:
245
  text_content = f"Error generating text: {str(e)}"
246
 
 
247
  try:
248
  image = generate_image_from_text(text_input)
249
  except Exception as e:
 
253
  return text_content, image
254
 
255
  def create_interface():
256
+ with gr.Blocks(title="VociArt - Voice AI Creator", theme=gr.themes.Soft()) as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  gr.Markdown("""
259
+ # πŸŽ™οΈ VociArt - Voice AI Creator
260
+
261
+ Transform your voice into AI-generated content and images!
262
 
263
+ **Features:** Voice-to-text β€’ Content generation β€’ Image creation
264
  """)
265
 
266
  with gr.Tab("πŸŽ™οΈ Voice Input"):
267
  with gr.Row():
268
+ with gr.Column():
269
  audio_input = gr.Audio(
270
  sources=["microphone"],
271
  type="filepath",
272
+ label="🎀 Record Your Voice"
 
273
  )
274
 
275
  content_type = gr.Dropdown(
276
  choices=["blog", "social", "caption", "story"],
277
  value="blog",
278
+ label="πŸ“ Content Type"
 
279
  )
280
 
281
+ voice_submit_btn = gr.Button("πŸš€ Generate from Voice", variant="primary")
282
 
283
+ with gr.Column():
284
  transcribed_output = gr.Textbox(
285
  label="πŸ“ What You Said",
 
286
  lines=3
287
  )
288
 
 
290
  with gr.Column():
291
  text_output = gr.Textbox(
292
  label="πŸ“„ Generated Content",
293
+ lines=8
 
 
294
  )
295
 
296
  with gr.Column():
297
  image_output = gr.Image(
298
  label="🎨 Generated Image",
299
+ type="pil"
 
300
  )
301
 
302
  with gr.Tab("⌨️ Text Input"):
 
304
  with gr.Column():
305
  text_input = gr.Textbox(
306
  label="πŸ’­ Enter Your Idea",
 
307
  lines=3
308
  )
309
 
 
313
  label="πŸ“ Content Type"
314
  )
315
 
316
+ text_submit_btn = gr.Button("πŸš€ Generate from Text", variant="primary")
317
 
318
  with gr.Row():
319
  with gr.Column():
320
  text_output_2 = gr.Textbox(
321
  label="πŸ“„ Generated Content",
322
+ lines=8
 
 
323
  )
324
 
325
  with gr.Column():
326
  image_output_2 = gr.Image(
327
  label="🎨 Generated Image",
328
+ type="pil"
 
329
  )
330
 
331
+ with gr.Tab("ℹ️ About"):
332
  gr.Markdown("""
333
+ ## About VociArt
 
 
334
 
335
+ Transform spoken ideas into content and visuals using AI!
 
 
336
 
337
+ ### How to Use:
338
+ 1. **Voice**: Record your idea, select content type, generate
339
+ 2. **Text**: Type your idea, choose type, generate
 
 
340
 
341
+ ### Content Types:
342
+ - **Blog**: Articles and posts
343
+ - **Social**: Social media content
344
+ - **Caption**: Image captions
345
+ - **Story**: Short stories
346
 
347
+ ### Tips:
348
+ - Speak clearly in a quiet environment
349
+ - Be specific with your ideas
350
+ - Try different content types
 
351
 
352
+ Made with free AI models from Hugging Face!
 
 
 
 
 
 
 
353
  """)
354
 
 
355
  voice_submit_btn.click(
356
  fn=process_voice_input,
357
  inputs=[audio_input, content_type],
358
+ outputs=[text_output, image_output, transcribed_output]
 
359
  )
360
 
361
  text_submit_btn.click(
362
  fn=process_text_input,
363
  inputs=[text_input, text_content_type],
364
+ outputs=[text_output_2, image_output_2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  )
366
 
367
  return app
368
 
 
369
  if __name__ == "__main__":
370
+ print("Starting VociArt...")
371
  app = create_interface()
372
  app.launch(
373
  server_name="0.0.0.0",
374
+ server_port=7860
 
 
 
375
  )