maria355 commited on
Commit
7746fc0
·
verified ·
1 Parent(s): b820505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -663
app.py CHANGED
@@ -2,726 +2,403 @@ import streamlit as st
2
  import speech_recognition as sr
3
  import requests
4
  import io
5
- import base64
6
  from PIL import Image
 
7
  import google.generativeai as genai
8
  import time
9
- import json
10
  import os
11
  from datetime import datetime
 
12
 
13
- # Page configuration
14
  st.set_page_config(
15
- page_title="VociArt - Voice-Controlled AI Creator",
16
- page_icon="🎤",
17
- layout="wide"
 
18
  )
19
 
20
- # Custom CSS for better UI
21
- st.markdown("""
22
- <style>
23
- .main-header {
24
- text-align: center;
25
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
26
- -webkit-background-clip: text;
27
- -webkit-text-fill-color: transparent;
28
- font-size: 3rem;
29
- font-weight: bold;
30
- margin-bottom: 2rem;
31
- }
32
- .feature-box {
33
- background: #f0f2f6;
34
- padding: 20px;
35
- border-radius: 10px;
36
- margin: 10px 0;
37
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
38
- }
39
- .output-box {
40
- background: #ffffff;
41
- border: 2px solid #e0e0e0;
42
- padding: 20px;
43
- border-radius: 10px;
44
- margin: 10px 0;
45
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
46
- }
47
- .status-box {
48
- background: #e8f5e8;
49
- border: 1px solid #4caf50;
50
- padding: 10px;
51
- border-radius: 5px;
52
- margin: 10px 0;
53
- }
54
- .warning-box {
55
- background: #fff3cd;
56
- border: 1px solid #ffc107;
57
- padding: 10px;
58
- border-radius: 5px;
59
- margin: 10px 0;
60
- }
61
- </style>
62
- """, unsafe_allow_html=True)
63
-
64
  # Initialize session state
65
- if 'text_output' not in st.session_state:
66
- st.session_state.text_output = ""
67
- if 'image_output' not in st.session_state:
68
- st.session_state.image_output = None
69
- if 'voice_input' not in st.session_state:
70
- st.session_state.voice_input = ""
71
- if 'processed_prompt' not in st.session_state:
72
- st.session_state.processed_prompt = ""
73
- if 'hf_token' not in st.session_state:
74
- st.session_state.hf_token = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # Initialize Gemini API
77
- def initialize_gemini():
78
- """Initialize Gemini API with secret key"""
79
  try:
80
- gemini_key = st.secrets.get("GEMINI_API_KEY")
81
- if gemini_key:
82
- genai.configure(api_key=gemini_key)
83
- return True
84
  else:
85
- st.error("⚠️ Gemini API key not found in secrets. Please configure GEMINI_API_KEY in your Streamlit secrets.")
86
- return False
87
  except Exception as e:
88
- st.error(f"❌ Error initializing Gemini API: {str(e)}")
89
- return False
90
 
91
- # Hugging Face API configuration
92
- HF_API_URL_TEXT = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium"
93
- HF_API_URL_IMAGE = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
94
-
95
- def query_huggingface(api_url, payload, headers):
96
- """Query Hugging Face API with retry logic"""
97
- max_retries = 3
98
- for attempt in range(max_retries):
99
- try:
100
- response = requests.post(api_url, headers=headers, json=payload, timeout=30)
101
- if response.status_code == 503:
102
- # Model is loading, wait and retry
103
- st.warning(f"🔄 Model is loading... Attempt {attempt + 1}/{max_retries}")
104
- time.sleep(20) # Wait for model to load
105
- continue
106
- elif response.status_code == 200:
107
- return response
108
- else:
109
- st.error(f"❌ API Error: {response.status_code} - {response.text}")
110
- return None
111
- except requests.exceptions.Timeout:
112
- st.warning(f"⏰ Request timeout. Attempt {attempt + 1}/{max_retries}")
113
- time.sleep(5)
114
- except Exception as e:
115
- st.error(f"❌ Request failed: {str(e)}")
116
  return None
117
- return None
118
-
119
- def generate_text_content(prompt, content_type="general", hf_token=""):
120
- """Generate text content using Hugging Face free models"""
121
-
122
- if not hf_token:
123
- st.warning("⚠️ Hugging Face token required for text generation")
124
- return generate_fallback_content(prompt, content_type)
125
-
126
- # Enhanced prompts based on content type
127
- prompt_templates = {
128
- "blog": f"Write a comprehensive blog post about '{prompt}'. Include an engaging introduction, detailed main points with examples, and a strong conclusion. Make it informative and well-structured.",
129
- "social": f"Create an engaging social media post about '{prompt}'. Make it catchy, shareable, and include relevant hashtags. Keep it concise but impactful.",
130
- "story": f"Write a creative short story about '{prompt}'. Include vivid descriptions, interesting characters, and an engaging narrative with a clear beginning, middle, and end.",
131
- "caption": f"Write a compelling and creative caption about '{prompt}'. Make it engaging, relevant, and suitable for social media sharing.",
132
- "general": f"Create detailed, informative content about '{prompt}'. Provide comprehensive information with clear explanations and useful insights."
133
- }
134
-
135
- enhanced_prompt = prompt_templates.get(content_type, prompt_templates["general"])
136
-
137
- # Try different models for better text generation
138
- text_models = [
139
- "microsoft/DialoGPT-large",
140
- "facebook/blenderbot-400M-distill",
141
- "microsoft/DialoGPT-medium",
142
- "gpt2"
143
- ]
144
-
145
- headers = {"Authorization": f"Bearer {hf_token}"}
146
-
147
- for model in text_models:
148
- try:
149
- api_url = f"https://api-inference.huggingface.co/models/{model}"
150
- payload = {
151
- "inputs": enhanced_prompt,
152
- "parameters": {
153
- "max_length": 800,
154
- "temperature": 0.8,
155
- "do_sample": True,
156
- "top_p": 0.9,
157
- "repetition_penalty": 1.2
158
- }
159
- }
160
-
161
- with st.spinner(f"🤖 Generating {content_type} content using {model}..."):
162
- response = query_huggingface(api_url, payload, headers)
163
-
164
- if response and response.status_code == 200:
165
- result = response.json()
166
- if isinstance(result, list) and len(result) > 0:
167
- generated_text = result[0].get('generated_text', '')
168
- # Clean up the generated text
169
- if generated_text and len(generated_text.strip()) > 50:
170
- return clean_generated_text(generated_text, enhanced_prompt)
171
- elif isinstance(result, dict):
172
- generated_text = result.get('generated_text', result.get('response', ''))
173
- if generated_text and len(generated_text.strip()) > 50:
174
- return clean_generated_text(generated_text, enhanced_prompt)
175
-
176
- except Exception as e:
177
- st.warning(f"⚠️ Model {model} failed: {str(e)}")
178
- continue
179
-
180
- # Fallback: Generate structured content
181
- st.info("🔄 Using fallback content generation...")
182
- return generate_fallback_content(prompt, content_type)
183
-
184
- def clean_generated_text(text, original_prompt):
185
- """Clean and format generated text"""
186
- # Remove the original prompt from the beginning if it's repeated
187
- if text.startswith(original_prompt):
188
- text = text[len(original_prompt):].strip()
189
-
190
- # Remove excessive repetition and clean up
191
- lines = text.split('\n')
192
- cleaned_lines = []
193
- for line in lines:
194
- line = line.strip()
195
- if line and line not in cleaned_lines[-3:]: # Avoid immediate repetition
196
- cleaned_lines.append(line)
197
-
198
- return '\n'.join(cleaned_lines)
199
-
200
- def generate_fallback_content(prompt, content_type):
201
- """Generate fallback content when API fails"""
202
- templates = {
203
- "blog": f"""# {prompt.title()}
204
-
205
- ## Introduction
206
- {prompt} is a fascinating topic that deserves comprehensive exploration. In today's rapidly evolving world, understanding {prompt} has become increasingly important for various reasons.
207
-
208
- ## Key Insights
209
- • **Foundation**: The fundamental aspects of {prompt} provide essential knowledge that forms the basis for deeper understanding.
210
-
211
- • **Applications**: Real-world applications of {prompt} demonstrate its practical value and relevance in different contexts.
212
-
213
- • **Impact**: The influence of {prompt} extends beyond its immediate scope, affecting related areas and creating ripple effects.
214
-
215
- • **Future Perspectives**: Looking ahead, {prompt} continues to evolve and adapt, presenting new opportunities and challenges.
216
-
217
- ## Practical Considerations
218
- When dealing with {prompt}, it's important to consider multiple perspectives and approaches. Each situation may require different strategies and solutions.
219
-
220
- ## Conclusion
221
- {prompt} remains a significant and evolving topic. By understanding its various dimensions, we can better appreciate its importance and make informed decisions related to it.
222
-
223
- *This content serves as a starting point for deeper exploration of {prompt}.*""",
224
-
225
- "social": f"""🌟 Exploring the fascinating world of {prompt}! ✨
226
-
227
- There's something truly captivating about {prompt} that deserves our attention. Whether you're just discovering it or deepening your understanding, there's always more to learn! 🚀
228
-
229
- 💡 Key takeaway: {prompt} offers unique perspectives that can transform how we think about related topics.
230
-
231
- What's your experience with {prompt}? Share your thoughts below! 👇
232
-
233
- #Innovation #Learning #Growth #Discovery #Knowledge #AI #Content #Trending""",
234
-
235
- "story": f"""**The Discovery of {prompt}**
236
-
237
- Sarah had always been curious about {prompt}, but she never imagined how deeply it would impact her life. It started on a Tuesday morning, when she stumbled upon something extraordinary.
238
-
239
- The morning light filtered through her window as she began to explore {prompt} more deeply. What she discovered challenged everything she thought she knew. Each new piece of information was like solving a puzzle, revealing a bigger picture that was both beautiful and complex.
240
-
241
- As days turned into weeks, Sarah's understanding grew. She realized that {prompt} wasn't just a concept—it was a gateway to new possibilities. The connections she made, the insights she gained, all led to a profound transformation in her perspective.
242
-
243
- Looking back, Sarah smiled. Her journey with {prompt} had taught her that the most meaningful discoveries often come when we least expect them. Sometimes, the best adventures begin with simple curiosity.
244
-
245
- *The end of one discovery is often the beginning of another.*""",
246
-
247
- "caption": f"""✨ Discovering the magic of {prompt} ✨
248
-
249
- Sometimes the most beautiful moments come from exploring new ideas and perspectives. {prompt} reminds us that there's always something wonderful waiting to be discovered.
250
-
251
- 🌟 What inspires you today?
252
-
253
- #Inspiration #Discovery #Creativity #Growth #Innovation #AI #Content""",
254
-
255
- "general": f"""**Understanding {prompt}**
256
-
257
- {prompt} represents an important concept that merits thoughtful consideration. In our interconnected world, topics like {prompt} play significant roles in shaping our understanding and experiences.
258
-
259
- **Key Aspects:**
260
- - **Definition**: {prompt} encompasses various elements that contribute to its overall significance
261
- - **Relevance**: The importance of {prompt} becomes apparent when we examine its applications and implications
262
- - **Perspectives**: Different viewpoints on {prompt} offer valuable insights and enhance our comprehension
263
-
264
- **Practical Applications:**
265
- {prompt} finds relevance in multiple contexts, from theoretical discussions to practical implementations. Understanding these applications helps us appreciate its broader significance.
266
-
267
- **Conclusion:**
268
- {prompt} continues to be a topic worth exploring. As we deepen our understanding, we discover new dimensions and possibilities that enrich our knowledge and perspective.
269
-
270
- *This overview provides a foundation for further exploration of {prompt}.*"""
271
- }
272
- return templates.get(content_type, templates["general"])
273
-
274
- def generate_image(prompt, hf_token=""):
275
- """Generate image using Hugging Face free models"""
276
-
277
- if not hf_token:
278
- st.warning("⚠️ Hugging Face token required for image generation")
279
  return None
280
-
281
- headers = {"Authorization": f"Bearer {hf_token}"}
282
-
283
- # Enhanced prompt for better image generation
284
- enhanced_prompts = [
285
- f"high quality, detailed, beautiful {prompt}, digital art, trending on artstation, 4k, professional",
286
- f"stunning {prompt}, masterpiece, highly detailed, photorealistic, premium quality",
287
- f"artistic {prompt}, beautiful composition, vibrant colors, professional photography style"
288
- ]
289
-
290
- # Try different models and prompts
291
- image_models = [
292
- "stabilityai/stable-diffusion-2-1",
293
- "runwayml/stable-diffusion-v1-5",
294
- "CompVis/stable-diffusion-v1-4"
295
- ]
296
-
297
- for model in image_models:
298
- for enhanced_prompt in enhanced_prompts:
299
- try:
300
- api_url = f"https://api-inference.huggingface.co/models/{model}"
301
- payload = {"inputs": enhanced_prompt}
302
-
303
- with st.spinner(f"🎨 Creating image using {model}..."):
304
- response = query_huggingface(api_url, payload, headers)
305
-
306
- if response and response.status_code == 200:
307
- image_bytes = response.content
308
- if len(image_bytes) > 1000: # Ensure we got actual image data
309
- image = Image.open(io.BytesIO(image_bytes))
310
- return image
311
-
312
- except Exception as e:
313
- st.warning(f"⚠️ Image generation with {model} failed: {str(e)}")
314
- continue
315
-
316
- st.warning("🎨 Image generation failed. This might be due to model loading, API limits, or high demand. Please try again later.")
317
- return None
318
 
319
- def voice_to_text():
320
- """Convert voice input to text"""
321
  recognizer = sr.Recognizer()
322
 
323
  try:
324
  with sr.Microphone() as source:
325
- st.info("🎤 Listening... Speak clearly for 10-15 seconds!")
326
  recognizer.adjust_for_ambient_noise(source, duration=1)
327
- # Increased timeout and phrase time limit for better capture
328
- audio = recognizer.listen(source, timeout=15, phrase_time_limit=20)
329
 
330
- with st.spinner("🔄 Converting speech to text..."):
331
- # Try Google Speech Recognition first
332
- try:
333
- text = recognizer.recognize_google(audio)
334
- return text
335
- except:
336
- # Fallback to other recognition services if available
337
- try:
338
- text = recognizer.recognize_sphinx(audio)
339
- return text
340
- except:
341
- raise sr.UnknownValueError("Could not understand audio")
342
-
343
- except sr.RequestError as e:
344
- st.error(f"❌ Could not request results from speech recognition service: {e}")
345
- return None
346
- except sr.UnknownValueError:
347
- st.warning("⚠️ Could not understand the audio clearly. Please try speaking more clearly and ensure your microphone is working.")
348
- return None
349
  except sr.WaitTimeoutError:
350
- st.warning("⏰ No speech detected within the time limit. Please try again and speak immediately after clicking the button.")
351
- return None
 
 
 
 
 
 
352
  except Exception as e:
353
- st.error(f"❌ An error occurred during voice recognition: {e}")
354
- return None
355
 
356
- def translate_and_enhance_text(text, target_language="en"):
357
- """Translate text and enhance it for better AI generation using Gemini API"""
 
 
 
 
358
  try:
359
- if not initialize_gemini():
360
- return text # Return original if Gemini is not available
 
 
361
 
362
- model = genai.GenerativeModel('gemini-pro')
363
-
364
- if target_language == "en":
365
- # Just enhance the prompt for better AI generation
366
- prompt = f"""Enhance the following prompt for better AI content and image generation. Make it more descriptive and detailed while keeping the core meaning:
367
-
368
- Original prompt: "{text}"
369
-
370
- Enhanced prompt:"""
371
- else:
372
- # Translate and enhance
373
- language_names = {
374
- "es": "Spanish", "fr": "French", "de": "German",
375
- "it": "Italian", "pt": "Portuguese", "hi": "Hindi", "ur": "Urdu"
376
- }
377
- lang_name = language_names.get(target_language, target_language)
378
 
379
- prompt = f"""Translate the following text to {lang_name} and then enhance it for better AI content and image generation. Make it more descriptive and detailed:
380
-
381
- Original text: "{text}"
382
-
383
- Translated and enhanced prompt:"""
384
-
385
- response = model.generate_content(prompt)
386
- enhanced_text = response.text.strip()
387
-
388
- # Clean up the response
389
- if enhanced_text.lower().startswith("enhanced prompt:"):
390
- enhanced_text = enhanced_text[16:].strip()
391
- elif enhanced_text.lower().startswith("translated and enhanced prompt:"):
392
- enhanced_text = enhanced_text[31:].strip()
393
-
394
- return enhanced_text if enhanced_text else text
395
 
 
 
396
  except Exception as e:
397
- st.warning(f"⚠️ Translation/Enhancement failed: {e}")
398
  return text
399
 
400
- # Main App Interface
401
- st.markdown('<h1 class="main-header">🎤 VociArt</h1>', unsafe_allow_html=True)
402
- st.markdown('<p style="text-align: center; font-size: 1.2rem; color: #666;">Voice-Controlled AI Content Creator & Image Generator</p>', unsafe_allow_html=True)
403
-
404
- # Check Gemini API status
405
- gemini_status = initialize_gemini()
406
- if gemini_status:
407
- st.markdown('<div class="status-box">✅ Gemini API: Connected</div>', unsafe_allow_html=True)
408
- else:
409
- st.markdown('<div class="warning-box">⚠️ Gemini API: Not configured (translation features disabled)</div>', unsafe_allow_html=True)
410
-
411
- # Sidebar for configuration
412
- with st.sidebar:
413
- st.header("⚙️ Configuration")
414
-
415
- # HF Token input
416
- st.subheader("🔑 Hugging Face Token")
417
- hf_token = st.text_input("Enter your HF token", type="password",
418
- help="Get your free token from huggingface.co/settings/tokens")
419
-
420
- if hf_token:
421
- st.session_state.hf_token = hf_token
422
- st.success("✅ Token saved!")
423
-
424
- st.markdown("---")
425
-
426
- st.subheader("🌍 Language Settings")
427
- target_language = st.selectbox("Output Language",
428
- ["en", "es", "fr", "de", "it", "pt", "hi", "ur"],
429
- help="Select target language for translation")
430
-
431
- st.subheader("📝 Content Type")
432
- content_type = st.selectbox("Choose content type",
433
- ["general", "blog", "social", "story", "caption"],
434
- help="Select the type of content to generate")
435
-
436
- st.markdown("---")
437
-
438
- # API Status
439
- st.subheader("📊 API Status")
440
- if st.session_state.hf_token:
441
- st.success("🤗 Hugging Face: Ready")
442
  else:
443
- st.warning("🤗 Hugging Face: Token needed")
444
-
445
- if gemini_status:
446
- st.success("🔮 Gemini: Ready")
447
- else:
448
- st.error("🔮 Gemini: Not configured")
449
-
450
- st.markdown("---")
451
-
452
- # Instructions
453
- st.subheader("📖 Quick Guide")
454
- st.markdown("""
455
- 1. **Add HF Token** above
456
- 2. **Choose settings** (language & content type)
457
- 3. **Record voice** using audio widget or type prompt
458
- 4. **Generate** text/image content
459
- 5. **Download** your creations
460
-
461
- **Note:** Voice recording uses your browser's built-in audio recorder for better compatibility with cloud deployments.
462
- """)
463
-
464
- # Main content area
465
- col1, col2 = st.columns([1, 1])
466
-
467
- with col1:
468
- st.markdown('<div class="feature-box">', unsafe_allow_html=True)
469
- st.subheader("🎤 Voice Input")
470
-
471
- if st.button("🎙️ Start Voice Recording", type="primary", use_container_width=True):
472
- voice_text = voice_to_text()
473
- if voice_text:
474
- st.session_state.voice_input = voice_text
475
- # Translate and enhance if needed
476
- enhanced = translate_and_enhance_text(voice_text, target_language)
477
- st.session_state.processed_prompt = enhanced
478
 
479
- # Manual text input as fallback
480
- st.subheader("✍️ Manual Text Input")
481
- manual_input = st.text_area("Type your prompt here:", height=100,
482
- placeholder="Enter your creative prompt...")
483
 
484
- if st.button("🚀 Process Text", use_container_width=True):
485
- if manual_input:
486
- st.session_state.voice_input = manual_input
487
- # Translate and enhance if needed
488
- enhanced = translate_and_enhance_text(manual_input, target_language)
489
- st.session_state.processed_prompt = enhanced
490
-
491
- # Display current inputs
492
- if st.session_state.voice_input:
493
- st.success(f"**Original Input:** {st.session_state.voice_input}")
494
 
495
- if st.session_state.processed_prompt and st.session_state.processed_prompt != st.session_state.voice_input:
496
- st.info(f"**Enhanced Prompt:** {st.session_state.processed_prompt}")
 
 
 
 
497
 
498
- st.markdown('</div>', unsafe_allow_html=True)
499
 
500
- with col2:
501
- st.markdown('<div class="feature-box">', unsafe_allow_html=True)
502
- st.subheader("🤖 AI Generation")
503
 
504
- generation_col1, generation_col2 = st.columns([1, 1])
 
505
 
506
- with generation_col1:
507
- generate_text_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
508
- if st.button("📝 Generate Text",
509
- disabled=generate_text_disabled,
510
- use_container_width=True):
511
- if st.session_state.processed_prompt and st.session_state.hf_token:
512
- st.session_state.text_output = generate_text_content(
513
- st.session_state.processed_prompt,
514
- content_type,
515
- st.session_state.hf_token
516
- )
517
 
518
- with generation_col2:
519
- generate_image_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
520
- if st.button("🎨 Generate Image",
521
- disabled=generate_image_disabled,
522
- use_container_width=True):
523
- if st.session_state.processed_prompt and st.session_state.hf_token:
524
- st.session_state.image_output = generate_image(
525
- st.session_state.processed_prompt,
526
- st.session_state.hf_token
527
- )
528
 
529
- # Status messages
530
- if not st.session_state.processed_prompt:
531
- st.info("💡 Add a prompt to start generating!")
532
- elif not st.session_state.hf_token:
533
- st.warning("⚠️ Add Hugging Face token to enable generation!")
 
 
534
 
535
- st.markdown('</div>', unsafe_allow_html=True)
536
-
537
- # Output Display
538
- st.markdown("## 📤 Generated Content")
539
 
540
- output_col1, output_col2 = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
- with output_col1:
543
- st.markdown('<div class="output-box">', unsafe_allow_html=True)
544
- st.subheader("📝 Generated Text")
545
-
546
- if st.session_state.text_output:
547
- # Display text in a nice container
548
- formatted_text = st.session_state.text_output.replace('\n', '<br>')
549
- st.markdown(f"""
550
- <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #007bff;">
551
- {formatted_text}
552
- </div>
553
- """, unsafe_allow_html=True)
554
 
555
- # Download and copy buttons
556
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
557
- download_col1, download_col2 = st.columns([1, 1])
 
 
 
558
 
559
- with download_col1:
560
- st.download_button(
561
- label="💾 Download Text",
562
- data=st.session_state.text_output,
563
- file_name=f"vociart_text_{timestamp}.txt",
564
- mime="text/plain",
565
- use_container_width=True
566
- )
567
 
568
- with download_col2:
569
- if st.button("📋 Copy Text", use_container_width=True):
570
- st.success(" Text copied to clipboard!")
571
-
572
- # Word count
573
- word_count = len(st.session_state.text_output.split())
574
- st.caption(f"📊 Word count: {word_count}")
575
 
576
- else:
577
- st.info("📝 Generated text will appear here...")
578
- st.markdown("""
579
- <div style="text-align: center; padding: 20px; color: #666;">
580
- <i>Your AI-generated content will be displayed here</i>
581
- </div>
582
- """, unsafe_allow_html=True)
583
-
584
- st.markdown('</div>', unsafe_allow_html=True)
585
-
586
- with output_col2:
587
- st.markdown('<div class="output-box">', unsafe_allow_html=True)
588
- st.subheader("🎨 Generated Image")
589
-
590
- if st.session_state.image_output:
591
- st.image(st.session_state.image_output,
592
- caption="AI Generated Image",
593
- use_column_width=True)
594
 
595
- # Download button for image
596
- img_buffer = io.BytesIO()
597
- st.session_state.image_output.save(img_buffer, format='PNG')
598
- img_buffer.seek(0)
599
 
600
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 
 
 
 
 
 
 
 
601
 
602
- image_download_col1, image_download_col2 = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
- with image_download_col1:
605
- st.download_button(
606
- label="💾 Download Image",
607
- data=img_buffer.getvalue(),
608
- file_name=f"vociart_image_{timestamp}.png",
609
- mime="image/png",
610
- use_container_width=True
611
- )
 
612
 
613
- with image_download_col2:
614
- # Display image info
615
- width, height = st.session_state.image_output.size
616
- st.caption(f"📊 Size: {width}×{height}")
 
 
 
 
617
 
618
- else:
619
- st.info("🎨 Generated image will appear here...")
620
- st.markdown("""
621
- <div style="text-align: center; padding: 20px; color: #666;">
622
- <i>Your AI-generated image will be displayed here</i>
623
- </div>
624
- """, unsafe_allow_html=True)
625
-
626
- st.markdown('</div>', unsafe_allow_html=True)
627
-
628
- # Action buttons at the bottom
629
- st.markdown("---")
630
- action_col1, action_col2, action_col3 = st.columns([1, 1, 1])
631
-
632
- with action_col1:
633
- if st.button("🗑️ Clear All Content", type="secondary", use_container_width=True):
634
- st.session_state.text_output = ""
635
- st.session_state.image_output = None
636
- st.session_state.voice_input = ""
637
- st.session_state.processed_prompt = ""
638
- st.success("✅ All content cleared!")
639
- st.rerun()
640
-
641
- with action_col2:
642
- if st.button("🔄 Reset Session", use_container_width=True):
643
- for key in st.session_state.keys():
644
- del st.session_state[key]
645
- st.success("✅ Session reset!")
646
- st.rerun()
647
-
648
- with action_col3:
649
- if st.button("💡 Show Tips", use_container_width=True):
650
- st.info("""
651
- **💡 Pro Tips:**
652
- - Speak clearly and close to microphone
653
- - Use descriptive prompts for better results
654
- - Try different content types for variety
655
- - Wait for models to load (first time may be slow)
656
- """)
657
-
658
- # Footer with detailed instructions
659
- st.markdown("---")
660
- with st.expander("📖 Detailed Instructions & Setup Guide"):
661
- st.markdown("""
662
- ### 🚀 Complete Setup Guide:
663
-
664
- #### **1. Get Your Free API Keys:**
665
- - 🤗 **Hugging Face Token**:
666
- - Go to [huggingface.co](https://huggingface.co)
667
- - Sign up for free account
668
- - Go to Settings → Access Tokens
669
- - Create new token with "Read" permissions
670
- - Copy and paste in the sidebar
671
-
672
- #### **2. Configure Gemini API (Admin):**
673
- - 🔮 **For App Administrator**: Add `GEMINI_API_KEY` to Streamlit secrets
674
- - Get free key from [Google AI Studio](https://aistudio.google.com)
675
-
676
- #### **3. Using VociArt:**
677
-
678
- **Voice Method:**
679
- 1. Click "🎙️ Start Voice Recording"
680
- 2. Speak clearly when you see "Listening..."
681
- 3. Wait for speech-to-text conversion
682
- 4. Your prompt will be enhanced automatically
683
-
684
- **Manual Method:**
685
- 1. Type your prompt in the text area
686
- 2. Click "🚀 Process Text"
687
- 3. Your prompt will be enhanced for better AI generation
688
-
689
- **Generation:**
690
- 1. Choose your content type (blog, social, story, etc.)
691
- 2. Select target language for translation
692
- 3. Click "📝 Generate Text" or "🎨 Generate Image"
693
- 4. Wait for AI to create your content
694
- 5. Download or copy your results
695
-
696
- ### 🎯 Best Practices:
697
- - **Voice Input**: Speak slowly and clearly, avoid background noise
698
- - **Prompts**: Be descriptive and specific for better results
699
- - **Content Types**: Choose appropriate type for your needs
700
- - **Languages**: Translation works best with clear, simple prompts
701
- - **Patience**: First-time model loading may take 20-30 seconds
702
 
703
- ### 🔧 Troubleshooting:
704
- - **No audio detected**: Check microphone permissions and try again
705
- - **API errors**: Verify your Hugging Face token is valid
706
- - **Model loading**: Wait patiently, models load on first use
707
- - **Poor results**: Try rephrasing your prompt or changing content type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
 
709
- ### 📱 Features:
710
- - ✅ **100% Free**: Uses only free APIs and models
711
- - ✅ **No Installation**: Runs in browser
712
- - ✅ **Multi-language**: Supports 8 languages with translation
713
- - **Voice Control**: Complete voice-to-content workflow
714
- - **Multiple Formats**: Blog posts, social media, stories, captions
715
- - **Download**: Save text and images locally
716
- - ✅ **Responsive**: Works on desktop and mobile
717
- """)
 
 
718
 
719
- # Version and credits
720
- st.markdown("""
721
- ---
722
- <div style="text-align: center; color: #666; font-size: 0.9rem;">
723
- <strong>VociArt v1.0</strong> - Voice-Controlled AI Content Creator<br>
724
- Powered by Hugging Face 🤗 | Google Gemini 🔮 | Streamlit ⚡<br>
725
- <em>Create amazing content with just your voice!</em>
726
- </div>
727
- """, unsafe_allow_html=True)
 
2
  import speech_recognition as sr
3
  import requests
4
  import io
 
5
  from PIL import Image
6
+ import base64
7
  import google.generativeai as genai
8
  import time
 
9
  import os
10
  from datetime import datetime
11
+ import json
12
 
13
+ # Configure page
14
  st.set_page_config(
15
+ page_title="VociArt - Voice AI Creator",
16
+ page_icon="🎨",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded"
19
  )
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Initialize session state
22
+ if 'generated_content' not in st.session_state:
23
+ st.session_state.generated_content = ""
24
+ if 'generated_image' not in st.session_state:
25
+ st.session_state.generated_image = None
26
+ if 'voice_text' not in st.session_state:
27
+ st.session_state.voice_text = ""
28
+ if 'history' not in st.session_state:
29
+ st.session_state.history = []
30
+
31
+ # Configure Gemini API
32
+ @st.cache_resource
33
+ def configure_gemini():
34
+ try:
35
+ # Get API key from Streamlit secrets
36
+ api_key = st.secrets["GEMINI_API_KEY"]
37
+ genai.configure(api_key=api_key)
38
+ return genai.GenerativeModel('gemini-pro')
39
+ except Exception as e:
40
+ st.error(f"Error configuring Gemini API: {str(e)}")
41
+ return None
42
+
43
+ # Hugging Face API URLs
44
+ HF_TEXT_API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-large"
45
+ HF_IMAGE_API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
46
 
47
+ def query_huggingface_text(payload):
48
+ """Generate text content using Hugging Face API"""
 
49
  try:
50
+ response = requests.post(HF_TEXT_API_URL, json=payload)
51
+ if response.status_code == 200:
52
+ return response.json()
 
53
  else:
54
+ return {"error": f"API returned status code {response.status_code}"}
 
55
  except Exception as e:
56
+ return {"error": str(e)}
 
57
 
58
+ def query_huggingface_image(payload):
59
+ """Generate image using Hugging Face API"""
60
+ try:
61
+ response = requests.post(HF_IMAGE_API_URL, json=payload)
62
+ if response.status_code == 200:
63
+ return response.content
64
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return None
66
+ except Exception as e:
67
+ st.error(f"Image generation error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ def speech_to_text():
71
+ """Convert speech to text using speech_recognition"""
72
  recognizer = sr.Recognizer()
73
 
74
  try:
75
  with sr.Microphone() as source:
76
+ st.info("🎤 Listening... Speak now!")
77
  recognizer.adjust_for_ambient_noise(source, duration=1)
78
+ audio = recognizer.listen(source, timeout=10, phrase_time_limit=30)
 
79
 
80
+ st.info("🔄 Processing speech...")
81
+ text = recognizer.recognize_google(audio)
82
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  except sr.WaitTimeoutError:
84
+ st.error("⏰ Listening timeout. Please try again.")
85
+ return ""
86
+ except sr.UnknownValueError:
87
+ st.error("🔇 Could not understand audio. Please speak clearly.")
88
+ return ""
89
+ except sr.RequestError as e:
90
+ st.error(f"❌ Speech recognition error: {str(e)}")
91
+ return ""
92
  except Exception as e:
93
+ st.error(f"❌ Unexpected error: {str(e)}")
94
+ return ""
95
 
96
+ def enhance_prompt_with_gemini(text, content_type):
97
+ """Enhance user prompt using Gemini for better AI generation"""
98
+ model = configure_gemini()
99
+ if not model:
100
+ return text
101
+
102
  try:
103
+ if content_type == "text":
104
+ enhancement_prompt = f"""
105
+ Enhance this prompt for AI text generation. Make it more specific and detailed for creating engaging content:
106
+ Original: "{text}"
107
 
108
+ Return only the enhanced prompt, nothing else.
109
+ """
110
+ else: # image
111
+ enhancement_prompt = f"""
112
+ Enhance this prompt for AI image generation. Add artistic details, style, and visual elements:
113
+ Original: "{text}"
 
 
 
 
 
 
 
 
 
 
114
 
115
+ Return only the enhanced prompt for image generation, nothing else.
116
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ response = model.generate_content(enhancement_prompt)
119
+ return response.text.strip()
120
  except Exception as e:
121
+ st.warning(f"Prompt enhancement failed: {str(e)}. Using original prompt.")
122
  return text
123
 
124
+ def generate_text_content(prompt, content_type="article"):
125
+ """Generate text content based on prompt and type"""
126
+ enhanced_prompt = enhance_prompt_with_gemini(prompt, "text")
127
+
128
+ # Create context based on content type
129
+ if content_type == "article":
130
+ context = f"Write a detailed article about: {enhanced_prompt}"
131
+ elif content_type == "social_post":
132
+ context = f"Create an engaging social media post about: {enhanced_prompt}"
133
+ elif content_type == "caption":
134
+ context = f"Write a compelling caption for: {enhanced_prompt}"
135
+ elif content_type == "story":
136
+ context = f"Tell an interesting story about: {enhanced_prompt}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  else:
138
+ context = enhanced_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ payload = {"inputs": context, "parameters": {"max_length": 300, "temperature": 0.7}}
 
 
 
141
 
142
+ with st.spinner("🤖 Generating text content..."):
143
+ result = query_huggingface_text(payload)
144
+
145
+ if "error" in result:
146
+ st.error(f"Text generation failed: {result['error']}")
147
+ return ""
 
 
 
 
148
 
149
+ if isinstance(result, list) and len(result) > 0:
150
+ generated_text = result[0].get("generated_text", "")
151
+ # Clean up the generated text
152
+ if context in generated_text:
153
+ generated_text = generated_text.replace(context, "").strip()
154
+ return generated_text
155
 
156
+ return ""
157
 
158
+ def generate_image(prompt):
159
+ """Generate image based on prompt"""
160
+ enhanced_prompt = enhance_prompt_with_gemini(prompt, "image")
161
 
162
+ # Add artistic enhancement to the prompt
163
+ artistic_prompt = f"{enhanced_prompt}, high quality, detailed, artistic, professional"
164
 
165
+ payload = {"inputs": artistic_prompt}
 
 
 
 
 
 
 
 
 
 
166
 
167
+ with st.spinner("🎨 Generating image..."):
168
+ image_bytes = query_huggingface_image(payload)
 
 
 
 
 
 
 
 
169
 
170
+ if image_bytes:
171
+ try:
172
+ image = Image.open(io.BytesIO(image_bytes))
173
+ return image
174
+ except Exception as e:
175
+ st.error(f"Error processing image: {str(e)}")
176
+ return None
177
 
178
+ return None
 
 
 
179
 
180
+ def save_to_history(prompt, content, image):
181
+ """Save generation to history"""
182
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
183
+ st.session_state.history.append({
184
+ "timestamp": timestamp,
185
+ "prompt": prompt,
186
+ "content": content,
187
+ "image": image
188
+ })
189
+
190
+ # Keep only last 10 items
191
+ if len(st.session_state.history) > 10:
192
+ st.session_state.history = st.session_state.history[-10:]
193
+
194
+ def download_content(content, filename="content.txt"):
195
+ """Create download link for text content"""
196
+ return st.download_button(
197
+ label="💾 Download Text",
198
+ data=content,
199
+ file_name=filename,
200
+ mime="text/plain"
201
+ )
202
+
203
+ def download_image(image, filename="generated_image.png"):
204
+ """Create download link for image"""
205
+ if image:
206
+ buf = io.BytesIO()
207
+ image.save(buf, format="PNG")
208
+ return st.download_button(
209
+ label="🖼️ Download Image",
210
+ data=buf.getvalue(),
211
+ file_name=filename,
212
+ mime="image/png"
213
+ )
214
 
215
+ # Main App Interface
216
+ def main():
217
+ # Header
218
+ st.title("🎨 VociArt - Voice AI Creator")
219
+ st.markdown("*Generate amazing content and images using just your voice!*")
220
+
221
+ # Sidebar
222
+ with st.sidebar:
223
+ st.header("⚙️ Settings")
 
 
 
224
 
225
+ # Content type selection
226
+ content_type = st.selectbox(
227
+ "📝 Content Type",
228
+ ["article", "social_post", "caption", "story"],
229
+ help="Choose the type of text content to generate"
230
+ )
231
 
232
+ # Language selection
233
+ language = st.selectbox(
234
+ "🌍 Language",
235
+ ["English", "Spanish", "French", "German", "Italian", "Portuguese"],
236
+ help="Select your preferred language"
237
+ )
 
 
238
 
239
+ # Generation options
240
+ st.subheader("🎛️ Generation Options")
241
+ generate_text = st.checkbox("Generate Text Content", value=True)
242
+ generate_images = st.checkbox("Generate Images", value=True)
 
 
 
243
 
244
+ # History
245
+ st.subheader("📚 Recent History")
246
+ if st.session_state.history:
247
+ for i, item in enumerate(reversed(st.session_state.history[-5:])):
248
+ with st.expander(f"🕐 {item['timestamp'][:16]}"):
249
+ st.text(f"Prompt: {item['prompt'][:50]}...")
250
+ if st.button(f"Load #{len(st.session_state.history)-i}", key=f"load_{i}"):
251
+ st.session_state.voice_text = item['prompt']
252
+ st.session_state.generated_content = item['content']
253
+ st.session_state.generated_image = item['image']
254
+ st.rerun()
255
+
256
+ # Main content area
257
+ col1, col2 = st.columns([2, 1])
258
+
259
+ with col1:
260
+ st.header("🎙️ Voice Input")
 
261
 
262
+ # Voice input section
263
+ col_voice1, col_voice2 = st.columns([3, 1])
 
 
264
 
265
+ with col_voice1:
266
+ # Manual text input
267
+ manual_text = st.text_area(
268
+ "✏️ Or type your prompt manually:",
269
+ value=st.session_state.voice_text,
270
+ height=100,
271
+ placeholder="Describe what you want to create..."
272
+ )
273
+
274
+ if manual_text != st.session_state.voice_text:
275
+ st.session_state.voice_text = manual_text
276
 
277
+ with col_voice2:
278
+ st.markdown("### 🎤")
279
+ if st.button("🎤 Start Recording", type="primary", use_container_width=True):
280
+ if 'recording' not in st.session_state:
281
+ st.session_state.recording = True
282
+ voice_text = speech_to_text()
283
+ if voice_text:
284
+ st.session_state.voice_text = voice_text
285
+ st.success(f"✅ Captured: '{voice_text}'")
286
+ st.rerun()
287
+ st.session_state.recording = False
288
+
289
+ if st.button("🗑️ Clear", use_container_width=True):
290
+ st.session_state.voice_text = ""
291
+ st.session_state.generated_content = ""
292
+ st.session_state.generated_image = None
293
+ st.rerun()
294
+
295
+ with col2:
296
+ st.header("🎯 Quick Actions")
297
 
298
+ # Example prompts
299
+ st.subheader("💡 Example Prompts")
300
+ example_prompts = [
301
+ "Write about sustainable living",
302
+ "Create a motivational social media post",
303
+ "Design a futuristic city",
304
+ "Tell a story about space exploration",
305
+ "Create a product advertisement"
306
+ ]
307
 
308
+ for prompt in example_prompts:
309
+ if st.button(f"📝 {prompt}", key=f"example_{hash(prompt)}", use_container_width=True):
310
+ st.session_state.voice_text = prompt
311
+ st.rerun()
312
+
313
+ # Generation section
314
+ if st.session_state.voice_text:
315
+ st.header("🚀 Generation")
316
 
317
+ col_gen1, col_gen2 = st.columns(2)
318
+
319
+ with col_gen1:
320
+ if st.button("🤖 Generate Content", type="primary", use_container_width=True):
321
+ if generate_text:
322
+ content = generate_text_content(st.session_state.voice_text, content_type)
323
+ st.session_state.generated_content = content
324
+
325
+ if generate_images:
326
+ image = generate_image(st.session_state.voice_text)
327
+ st.session_state.generated_image = image
328
+
329
+ # Save to history
330
+ save_to_history(
331
+ st.session_state.voice_text,
332
+ st.session_state.generated_content,
333
+ st.session_state.generated_image
334
+ )
335
+
336
+ st.success("✅ Content generated successfully!")
337
+
338
+ with col_gen2:
339
+ if st.button("🔄 Regenerate", use_container_width=True):
340
+ st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
+ # Results section
343
+ if st.session_state.generated_content or st.session_state.generated_image:
344
+ st.header("📋 Generated Content")
345
+
346
+ # Text content
347
+ if st.session_state.generated_content and generate_text:
348
+ st.subheader("📝 Text Content")
349
+
350
+ # Editable text area
351
+ edited_content = st.text_area(
352
+ "Edit your content:",
353
+ value=st.session_state.generated_content,
354
+ height=200
355
+ )
356
+
357
+ if edited_content != st.session_state.generated_content:
358
+ st.session_state.generated_content = edited_content
359
+
360
+ # Download button
361
+ col_dl1, col_dl2, col_dl3 = st.columns(3)
362
+ with col_dl1:
363
+ download_content(st.session_state.generated_content, f"{content_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")
364
+ with col_dl2:
365
+ if st.button("📋 Copy to Clipboard", key="copy_text"):
366
+ st.code(st.session_state.generated_content, language=None)
367
+ with col_dl3:
368
+ if st.button("🔄 Enhance Text", key="enhance_text"):
369
+ enhanced = enhance_prompt_with_gemini(st.session_state.generated_content, "text")
370
+ st.session_state.generated_content = enhanced
371
+ st.rerun()
372
+
373
+ # Image content
374
+ if st.session_state.generated_image and generate_images:
375
+ st.subheader("🖼️ Generated Image")
376
+
377
+ col_img1, col_img2 = st.columns([3, 1])
378
+
379
+ with col_img1:
380
+ st.image(st.session_state.generated_image, use_container_width=True)
381
+
382
+ with col_img2:
383
+ download_image(st.session_state.generated_image, f"vociart_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
384
+
385
+ if st.button("🎨 Regenerate Image", key="regen_image"):
386
+ new_image = generate_image(st.session_state.voice_text)
387
+ if new_image:
388
+ st.session_state.generated_image = new_image
389
+ st.rerun()
390
 
391
+ # Footer
392
+ st.markdown("---")
393
+ st.markdown(
394
+ """
395
+ <div style='text-align: center; color: #666;'>
396
+ <p>🎨 VociArt - Powered by Hugging Face & Gemini AI</p>
397
+ <p>Create amazing content with just your voice! 🎙️✨</p>
398
+ </div>
399
+ """,
400
+ unsafe_allow_html=True
401
+ )
402
 
403
+ if __name__ == "__main__":
404
+ main()