maria355 commited on
Commit
4259dc4
ยท
verified ยท
1 Parent(s): aff0db9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +724 -0
app.py ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import speech_recognition as sr
3
+ import requests
4
+ import io
5
+ import base64
6
+ from PIL import Image
7
+ import google.generativeai as genai
8
+ import time
9
+ import json
10
+ import os
11
+ from datetime import datetime
12
+
13
+ # Page configuration
14
+ st.set_page_config(
15
+ page_title="VociArt - Voice-Controlled AI Creator",
16
+ page_icon="๐ŸŽค",
17
+ layout="wide"
18
+ )
19
+
20
+ # Custom CSS for better UI
21
+ st.markdown("""
22
+ <style>
23
+ .main-header {
24
+ text-align: center;
25
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
26
+ -webkit-background-clip: text;
27
+ -webkit-text-fill-color: transparent;
28
+ font-size: 3rem;
29
+ font-weight: bold;
30
+ margin-bottom: 2rem;
31
+ }
32
+ .feature-box {
33
+ background: #f0f2f6;
34
+ padding: 20px;
35
+ border-radius: 10px;
36
+ margin: 10px 0;
37
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
38
+ }
39
+ .output-box {
40
+ background: #ffffff;
41
+ border: 2px solid #e0e0e0;
42
+ padding: 20px;
43
+ border-radius: 10px;
44
+ margin: 10px 0;
45
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
46
+ }
47
+ .status-box {
48
+ background: #e8f5e8;
49
+ border: 1px solid #4caf50;
50
+ padding: 10px;
51
+ border-radius: 5px;
52
+ margin: 10px 0;
53
+ }
54
+ .warning-box {
55
+ background: #fff3cd;
56
+ border: 1px solid #ffc107;
57
+ padding: 10px;
58
+ border-radius: 5px;
59
+ margin: 10px 0;
60
+ }
61
+ </style>
62
+ """, unsafe_allow_html=True)
63
+
64
+ # Initialize session state
65
+ if 'text_output' not in st.session_state:
66
+ st.session_state.text_output = ""
67
+ if 'image_output' not in st.session_state:
68
+ st.session_state.image_output = None
69
+ if 'voice_input' not in st.session_state:
70
+ st.session_state.voice_input = ""
71
+ if 'processed_prompt' not in st.session_state:
72
+ st.session_state.processed_prompt = ""
73
+ if 'hf_token' not in st.session_state:
74
+ st.session_state.hf_token = ""
75
+
76
+ # Initialize Gemini API
77
+ def initialize_gemini():
78
+ """Initialize Gemini API with secret key"""
79
+ try:
80
+ gemini_key = st.secrets.get("GEMINI_API_KEY")
81
+ if gemini_key:
82
+ genai.configure(api_key=gemini_key)
83
+ return True
84
+ else:
85
+ st.error("โš ๏ธ Gemini API key not found in secrets. Please configure GEMINI_API_KEY in your Streamlit secrets.")
86
+ return False
87
+ except Exception as e:
88
+ st.error(f"โŒ Error initializing Gemini API: {str(e)}")
89
+ return False
90
+
91
+ # Hugging Face API configuration
92
+ HF_API_URL_TEXT = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium"
93
+ HF_API_URL_IMAGE = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
94
+
95
+ def query_huggingface(api_url, payload, headers):
96
+ """Query Hugging Face API with retry logic"""
97
+ max_retries = 3
98
+ for attempt in range(max_retries):
99
+ try:
100
+ response = requests.post(api_url, headers=headers, json=payload, timeout=30)
101
+ if response.status_code == 503:
102
+ # Model is loading, wait and retry
103
+ st.warning(f"๐Ÿ”„ Model is loading... Attempt {attempt + 1}/{max_retries}")
104
+ time.sleep(20) # Wait for model to load
105
+ continue
106
+ elif response.status_code == 200:
107
+ return response
108
+ else:
109
+ st.error(f"โŒ API Error: {response.status_code} - {response.text}")
110
+ return None
111
+ except requests.exceptions.Timeout:
112
+ st.warning(f"โฐ Request timeout. Attempt {attempt + 1}/{max_retries}")
113
+ time.sleep(5)
114
+ except Exception as e:
115
+ st.error(f"โŒ Request failed: {str(e)}")
116
+ return None
117
+ return None
118
+
119
+ def generate_text_content(prompt, content_type="general", hf_token=""):
120
+ """Generate text content using Hugging Face free models"""
121
+
122
+ if not hf_token:
123
+ st.warning("โš ๏ธ Hugging Face token required for text generation")
124
+ return generate_fallback_content(prompt, content_type)
125
+
126
+ # Enhanced prompts based on content type
127
+ prompt_templates = {
128
+ "blog": f"Write a comprehensive blog post about '{prompt}'. Include an engaging introduction, detailed main points with examples, and a strong conclusion. Make it informative and well-structured.",
129
+ "social": f"Create an engaging social media post about '{prompt}'. Make it catchy, shareable, and include relevant hashtags. Keep it concise but impactful.",
130
+ "story": f"Write a creative short story about '{prompt}'. Include vivid descriptions, interesting characters, and an engaging narrative with a clear beginning, middle, and end.",
131
+ "caption": f"Write a compelling and creative caption about '{prompt}'. Make it engaging, relevant, and suitable for social media sharing.",
132
+ "general": f"Create detailed, informative content about '{prompt}'. Provide comprehensive information with clear explanations and useful insights."
133
+ }
134
+
135
+ enhanced_prompt = prompt_templates.get(content_type, prompt_templates["general"])
136
+
137
+ # Try different models for better text generation
138
+ text_models = [
139
+ "microsoft/DialoGPT-large",
140
+ "facebook/blenderbot-400M-distill",
141
+ "microsoft/DialoGPT-medium",
142
+ "gpt2"
143
+ ]
144
+
145
+ headers = {"Authorization": f"Bearer {hf_token}"}
146
+
147
+ for model in text_models:
148
+ try:
149
+ api_url = f"https://api-inference.huggingface.co/models/{model}"
150
+ payload = {
151
+ "inputs": enhanced_prompt,
152
+ "parameters": {
153
+ "max_length": 800,
154
+ "temperature": 0.8,
155
+ "do_sample": True,
156
+ "top_p": 0.9,
157
+ "repetition_penalty": 1.2
158
+ }
159
+ }
160
+
161
+ with st.spinner(f"๐Ÿค– Generating {content_type} content using {model}..."):
162
+ response = query_huggingface(api_url, payload, headers)
163
+
164
+ if response and response.status_code == 200:
165
+ result = response.json()
166
+ if isinstance(result, list) and len(result) > 0:
167
+ generated_text = result[0].get('generated_text', '')
168
+ # Clean up the generated text
169
+ if generated_text and len(generated_text.strip()) > 50:
170
+ return clean_generated_text(generated_text, enhanced_prompt)
171
+ elif isinstance(result, dict):
172
+ generated_text = result.get('generated_text', result.get('response', ''))
173
+ if generated_text and len(generated_text.strip()) > 50:
174
+ return clean_generated_text(generated_text, enhanced_prompt)
175
+
176
+ except Exception as e:
177
+ st.warning(f"โš ๏ธ Model {model} failed: {str(e)}")
178
+ continue
179
+
180
+ # Fallback: Generate structured content
181
+ st.info("๐Ÿ”„ Using fallback content generation...")
182
+ return generate_fallback_content(prompt, content_type)
183
+
184
+ def clean_generated_text(text, original_prompt):
185
+ """Clean and format generated text"""
186
+ # Remove the original prompt from the beginning if it's repeated
187
+ if text.startswith(original_prompt):
188
+ text = text[len(original_prompt):].strip()
189
+
190
+ # Remove excessive repetition and clean up
191
+ lines = text.split('\n')
192
+ cleaned_lines = []
193
+ for line in lines:
194
+ line = line.strip()
195
+ if line and line not in cleaned_lines[-3:]: # Avoid immediate repetition
196
+ cleaned_lines.append(line)
197
+
198
+ return '\n'.join(cleaned_lines)
199
+
200
+ def generate_fallback_content(prompt, content_type):
201
+ """Generate fallback content when API fails"""
202
+ templates = {
203
+ "blog": f"""# {prompt.title()}
204
+
205
+ ## Introduction
206
+ {prompt} is a fascinating topic that deserves comprehensive exploration. In today's rapidly evolving world, understanding {prompt} has become increasingly important for various reasons.
207
+
208
+ ## Key Insights
209
+ โ€ข **Foundation**: The fundamental aspects of {prompt} provide essential knowledge that forms the basis for deeper understanding.
210
+
211
+ โ€ข **Applications**: Real-world applications of {prompt} demonstrate its practical value and relevance in different contexts.
212
+
213
+ โ€ข **Impact**: The influence of {prompt} extends beyond its immediate scope, affecting related areas and creating ripple effects.
214
+
215
+ โ€ข **Future Perspectives**: Looking ahead, {prompt} continues to evolve and adapt, presenting new opportunities and challenges.
216
+
217
+ ## Practical Considerations
218
+ When dealing with {prompt}, it's important to consider multiple perspectives and approaches. Each situation may require different strategies and solutions.
219
+
220
+ ## Conclusion
221
+ {prompt} remains a significant and evolving topic. By understanding its various dimensions, we can better appreciate its importance and make informed decisions related to it.
222
+
223
+ *This content serves as a starting point for deeper exploration of {prompt}.*""",
224
+
225
+ "social": f"""๐ŸŒŸ Exploring the fascinating world of {prompt}! โœจ
226
+
227
+ There's something truly captivating about {prompt} that deserves our attention. Whether you're just discovering it or deepening your understanding, there's always more to learn! ๐Ÿš€
228
+
229
+ ๐Ÿ’ก Key takeaway: {prompt} offers unique perspectives that can transform how we think about related topics.
230
+
231
+ What's your experience with {prompt}? Share your thoughts below! ๐Ÿ‘‡
232
+
233
+ #Innovation #Learning #Growth #Discovery #Knowledge #AI #Content #Trending""",
234
+
235
+ "story": f"""**The Discovery of {prompt}**
236
+
237
+ Sarah had always been curious about {prompt}, but she never imagined how deeply it would impact her life. It started on a Tuesday morning, when she stumbled upon something extraordinary.
238
+
239
+ The morning light filtered through her window as she began to explore {prompt} more deeply. What she discovered challenged everything she thought she knew. Each new piece of information was like solving a puzzle, revealing a bigger picture that was both beautiful and complex.
240
+
241
+ As days turned into weeks, Sarah's understanding grew. She realized that {prompt} wasn't just a conceptโ€”it was a gateway to new possibilities. The connections she made, the insights she gained, all led to a profound transformation in her perspective.
242
+
243
+ Looking back, Sarah smiled. Her journey with {prompt} had taught her that the most meaningful discoveries often come when we least expect them. Sometimes, the best adventures begin with simple curiosity.
244
+
245
+ *The end of one discovery is often the beginning of another.*""",
246
+
247
+ "caption": f"""โœจ Discovering the magic of {prompt} โœจ
248
+
249
+ Sometimes the most beautiful moments come from exploring new ideas and perspectives. {prompt} reminds us that there's always something wonderful waiting to be discovered.
250
+
251
+ ๐ŸŒŸ What inspires you today?
252
+
253
+ #Inspiration #Discovery #Creativity #Growth #Innovation #AI #Content""",
254
+
255
+ "general": f"""**Understanding {prompt}**
256
+
257
+ {prompt} represents an important concept that merits thoughtful consideration. In our interconnected world, topics like {prompt} play significant roles in shaping our understanding and experiences.
258
+
259
+ **Key Aspects:**
260
+ - **Definition**: {prompt} encompasses various elements that contribute to its overall significance
261
+ - **Relevance**: The importance of {prompt} becomes apparent when we examine its applications and implications
262
+ - **Perspectives**: Different viewpoints on {prompt} offer valuable insights and enhance our comprehension
263
+
264
+ **Practical Applications:**
265
+ {prompt} finds relevance in multiple contexts, from theoretical discussions to practical implementations. Understanding these applications helps us appreciate its broader significance.
266
+
267
+ **Conclusion:**
268
+ {prompt} continues to be a topic worth exploring. As we deepen our understanding, we discover new dimensions and possibilities that enrich our knowledge and perspective.
269
+
270
+ *This overview provides a foundation for further exploration of {prompt}.*"""
271
+ }
272
+ return templates.get(content_type, templates["general"])
273
+
274
+ def generate_image(prompt, hf_token=""):
275
+ """Generate image using Hugging Face free models"""
276
+
277
+ if not hf_token:
278
+ st.warning("โš ๏ธ Hugging Face token required for image generation")
279
+ return None
280
+
281
+ headers = {"Authorization": f"Bearer {hf_token}"}
282
+
283
+ # Enhanced prompt for better image generation
284
+ enhanced_prompts = [
285
+ f"high quality, detailed, beautiful {prompt}, digital art, trending on artstation, 4k, professional",
286
+ f"stunning {prompt}, masterpiece, highly detailed, photorealistic, premium quality",
287
+ f"artistic {prompt}, beautiful composition, vibrant colors, professional photography style"
288
+ ]
289
+
290
+ # Try different models and prompts
291
+ image_models = [
292
+ "stabilityai/stable-diffusion-2-1",
293
+ "runwayml/stable-diffusion-v1-5",
294
+ "CompVis/stable-diffusion-v1-4"
295
+ ]
296
+
297
+ for model in image_models:
298
+ for enhanced_prompt in enhanced_prompts:
299
+ try:
300
+ api_url = f"https://api-inference.huggingface.co/models/{model}"
301
+ payload = {"inputs": enhanced_prompt}
302
+
303
+ with st.spinner(f"๐ŸŽจ Creating image using {model}..."):
304
+ response = query_huggingface(api_url, payload, headers)
305
+
306
+ if response and response.status_code == 200:
307
+ image_bytes = response.content
308
+ if len(image_bytes) > 1000: # Ensure we got actual image data
309
+ image = Image.open(io.BytesIO(image_bytes))
310
+ return image
311
+
312
+ except Exception as e:
313
+ st.warning(f"โš ๏ธ Image generation with {model} failed: {str(e)}")
314
+ continue
315
+
316
+ st.warning("๐ŸŽจ Image generation failed. This might be due to model loading, API limits, or high demand. Please try again later.")
317
+ return None
318
+
319
+ def voice_to_text():
320
+ """Convert voice input to text"""
321
+ recognizer = sr.Recognizer()
322
+
323
+ try:
324
+ with sr.Microphone() as source:
325
+ st.info("๐ŸŽค Listening... Speak clearly for 10-15 seconds!")
326
+ recognizer.adjust_for_ambient_noise(source, duration=1)
327
+ # Increased timeout and phrase time limit for better capture
328
+ audio = recognizer.listen(source, timeout=15, phrase_time_limit=20)
329
+
330
+ with st.spinner("๐Ÿ”„ Converting speech to text..."):
331
+ # Try Google Speech Recognition first
332
+ try:
333
+ text = recognizer.recognize_google(audio)
334
+ return text
335
+ except:
336
+ # Fallback to other recognition services if available
337
+ try:
338
+ text = recognizer.recognize_sphinx(audio)
339
+ return text
340
+ except:
341
+ raise sr.UnknownValueError("Could not understand audio")
342
+
343
+ except sr.RequestError as e:
344
+ st.error(f"โŒ Could not request results from speech recognition service: {e}")
345
+ return None
346
+ except sr.UnknownValueError:
347
+ st.warning("โš ๏ธ Could not understand the audio clearly. Please try speaking more clearly and ensure your microphone is working.")
348
+ return None
349
+ except sr.WaitTimeoutError:
350
+ st.warning("โฐ No speech detected within the time limit. Please try again and speak immediately after clicking the button.")
351
+ return None
352
+ except Exception as e:
353
+ st.error(f"โŒ An error occurred during voice recognition: {e}")
354
+ return None
355
+
356
+ def translate_and_enhance_text(text, target_language="en"):
357
+ """Translate text and enhance it for better AI generation using Gemini API"""
358
+ try:
359
+ if not initialize_gemini():
360
+ return text # Return original if Gemini is not available
361
+
362
+ model = genai.GenerativeModel('gemini-pro')
363
+
364
+ if target_language == "en":
365
+ # Just enhance the prompt for better AI generation
366
+ prompt = f"""Enhance the following prompt for better AI content and image generation. Make it more descriptive and detailed while keeping the core meaning:
367
+
368
+ Original prompt: "{text}"
369
+
370
+ Enhanced prompt:"""
371
+ else:
372
+ # Translate and enhance
373
+ language_names = {
374
+ "es": "Spanish", "fr": "French", "de": "German",
375
+ "it": "Italian", "pt": "Portuguese", "hi": "Hindi", "ur": "Urdu"
376
+ }
377
+ lang_name = language_names.get(target_language, target_language)
378
+
379
+ prompt = f"""Translate the following text to {lang_name} and then enhance it for better AI content and image generation. Make it more descriptive and detailed:
380
+
381
+ Original text: "{text}"
382
+
383
+ Translated and enhanced prompt:"""
384
+
385
+ response = model.generate_content(prompt)
386
+ enhanced_text = response.text.strip()
387
+
388
+ # Clean up the response
389
+ if enhanced_text.lower().startswith("enhanced prompt:"):
390
+ enhanced_text = enhanced_text[16:].strip()
391
+ elif enhanced_text.lower().startswith("translated and enhanced prompt:"):
392
+ enhanced_text = enhanced_text[31:].strip()
393
+
394
+ return enhanced_text if enhanced_text else text
395
+
396
+ except Exception as e:
397
+ st.warning(f"โš ๏ธ Translation/Enhancement failed: {e}")
398
+ return text
399
+
400
+ # Main App Interface
401
+ st.markdown('<h1 class="main-header">๐ŸŽค VociArt</h1>', unsafe_allow_html=True)
402
+ st.markdown('<p style="text-align: center; font-size: 1.2rem; color: #666;">Voice-Controlled AI Content Creator & Image Generator</p>', unsafe_allow_html=True)
403
+
404
+ # Check Gemini API status
405
+ gemini_status = initialize_gemini()
406
+ if gemini_status:
407
+ st.markdown('<div class="status-box">โœ… Gemini API: Connected</div>', unsafe_allow_html=True)
408
+ else:
409
+ st.markdown('<div class="warning-box">โš ๏ธ Gemini API: Not configured (translation features disabled)</div>', unsafe_allow_html=True)
410
+
411
+ # Sidebar for configuration
412
+ with st.sidebar:
413
+ st.header("โš™๏ธ Configuration")
414
+
415
+ # HF Token input
416
+ st.subheader("๐Ÿ”‘ Hugging Face Token")
417
+ hf_token = st.text_input("Enter your HF token", type="password",
418
+ help="Get your free token from huggingface.co/settings/tokens")
419
+
420
+ if hf_token:
421
+ st.session_state.hf_token = hf_token
422
+ st.success("โœ… Token saved!")
423
+
424
+ st.markdown("---")
425
+
426
+ st.subheader("๐ŸŒ Language Settings")
427
+ target_language = st.selectbox("Output Language",
428
+ ["en", "es", "fr", "de", "it", "pt", "hi", "ur"],
429
+ help="Select target language for translation")
430
+
431
+ st.subheader("๐Ÿ“ Content Type")
432
+ content_type = st.selectbox("Choose content type",
433
+ ["general", "blog", "social", "story", "caption"],
434
+ help="Select the type of content to generate")
435
+
436
+ st.markdown("---")
437
+
438
+ # API Status
439
+ st.subheader("๐Ÿ“Š API Status")
440
+ if st.session_state.hf_token:
441
+ st.success("๐Ÿค— Hugging Face: Ready")
442
+ else:
443
+ st.warning("๐Ÿค— Hugging Face: Token needed")
444
+
445
+ if gemini_status:
446
+ st.success("๐Ÿ”ฎ Gemini: Ready")
447
+ else:
448
+ st.error("๐Ÿ”ฎ Gemini: Not configured")
449
+
450
+ st.markdown("---")
451
+
452
+ # Instructions
453
+ st.subheader("๐Ÿ“– Quick Guide")
454
+ st.markdown("""
455
+ 1. **Add HF Token** above
456
+ 2. **Choose settings** (language & content type)
457
+ 3. **Record voice** or type prompt
458
+ 4. **Generate** text/image content
459
+ 5. **Download** your creations
460
+ """)
461
+
462
+ # Main content area
463
+ col1, col2 = st.columns([1, 1])
464
+
465
+ with col1:
466
+ st.markdown('<div class="feature-box">', unsafe_allow_html=True)
467
+ st.subheader("๐ŸŽค Voice Input")
468
+
469
+ if st.button("๐ŸŽ™๏ธ Start Voice Recording", type="primary", use_container_width=True):
470
+ voice_text = voice_to_text()
471
+ if voice_text:
472
+ st.session_state.voice_input = voice_text
473
+ # Translate and enhance if needed
474
+ enhanced = translate_and_enhance_text(voice_text, target_language)
475
+ st.session_state.processed_prompt = enhanced
476
+
477
+ # Manual text input as fallback
478
+ st.subheader("โœ๏ธ Manual Text Input")
479
+ manual_input = st.text_area("Type your prompt here:", height=100,
480
+ placeholder="Enter your creative prompt...")
481
+
482
+ if st.button("๐Ÿš€ Process Text", use_container_width=True):
483
+ if manual_input:
484
+ st.session_state.voice_input = manual_input
485
+ # Translate and enhance if needed
486
+ enhanced = translate_and_enhance_text(manual_input, target_language)
487
+ st.session_state.processed_prompt = enhanced
488
+
489
+ # Display current inputs
490
+ if st.session_state.voice_input:
491
+ st.success(f"**Original Input:** {st.session_state.voice_input}")
492
+
493
+ if st.session_state.processed_prompt and st.session_state.processed_prompt != st.session_state.voice_input:
494
+ st.info(f"**Enhanced Prompt:** {st.session_state.processed_prompt}")
495
+
496
+ st.markdown('</div>', unsafe_allow_html=True)
497
+
498
+ with col2:
499
+ st.markdown('<div class="feature-box">', unsafe_allow_html=True)
500
+ st.subheader("๐Ÿค– AI Generation")
501
+
502
+ generation_col1, generation_col2 = st.columns([1, 1])
503
+
504
+ with generation_col1:
505
+ generate_text_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
506
+ if st.button("๐Ÿ“ Generate Text",
507
+ disabled=generate_text_disabled,
508
+ use_container_width=True):
509
+ if st.session_state.processed_prompt and st.session_state.hf_token:
510
+ st.session_state.text_output = generate_text_content(
511
+ st.session_state.processed_prompt,
512
+ content_type,
513
+ st.session_state.hf_token
514
+ )
515
+
516
+ with generation_col2:
517
+ generate_image_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
518
+ if st.button("๐ŸŽจ Generate Image",
519
+ disabled=generate_image_disabled,
520
+ use_container_width=True):
521
+ if st.session_state.processed_prompt and st.session_state.hf_token:
522
+ st.session_state.image_output = generate_image(
523
+ st.session_state.processed_prompt,
524
+ st.session_state.hf_token
525
+ )
526
+
527
+ # Status messages
528
+ if not st.session_state.processed_prompt:
529
+ st.info("๐Ÿ’ก Add a prompt to start generating!")
530
+ elif not st.session_state.hf_token:
531
+ st.warning("โš ๏ธ Add Hugging Face token to enable generation!")
532
+
533
+ st.markdown('</div>', unsafe_allow_html=True)
534
+
535
+ # Output Display
536
+ st.markdown("## ๐Ÿ“ค Generated Content")
537
+
538
+ output_col1, output_col2 = st.columns([1, 1])
539
+
540
+ with output_col1:
541
+ st.markdown('<div class="output-box">', unsafe_allow_html=True)
542
+ st.subheader("๐Ÿ“ Generated Text")
543
+
544
+ if st.session_state.text_output:
545
+ # Display text in a nice container
546
+ st.markdown(f"""
547
+ <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #007bff;">
548
+ {st.session_state.text_output.replace('\n', '<br>')}
549
+ </div>
550
+ """, unsafe_allow_html=True)
551
+
552
+ # Download and copy buttons
553
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
554
+ download_col1, download_col2 = st.columns([1, 1])
555
+
556
+ with download_col1:
557
+ st.download_button(
558
+ label="๐Ÿ’พ Download Text",
559
+ data=st.session_state.text_output,
560
+ file_name=f"vociart_text_{timestamp}.txt",
561
+ mime="text/plain",
562
+ use_container_width=True
563
+ )
564
+
565
+ with download_col2:
566
+ if st.button("๐Ÿ“‹ Copy Text", use_container_width=True):
567
+ st.success("โœ… Text copied to clipboard!")
568
+
569
+ # Word count
570
+ word_count = len(st.session_state.text_output.split())
571
+ st.caption(f"๐Ÿ“Š Word count: {word_count}")
572
+
573
+ else:
574
+ st.info("๐Ÿ“ Generated text will appear here...")
575
+ st.markdown("""
576
+ <div style="text-align: center; padding: 20px; color: #666;">
577
+ <i>Your AI-generated content will be displayed here</i>
578
+ </div>
579
+ """, unsafe_allow_html=True)
580
+
581
+ st.markdown('</div>', unsafe_allow_html=True)
582
+
583
+ with output_col2:
584
+ st.markdown('<div class="output-box">', unsafe_allow_html=True)
585
+ st.subheader("๐ŸŽจ Generated Image")
586
+
587
+ if st.session_state.image_output:
588
+ st.image(st.session_state.image_output,
589
+ caption="AI Generated Image",
590
+ use_column_width=True)
591
+
592
+ # Download button for image
593
+ img_buffer = io.BytesIO()
594
+ st.session_state.image_output.save(img_buffer, format='PNG')
595
+ img_buffer.seek(0)
596
+
597
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
598
+
599
+ image_download_col1, image_download_col2 = st.columns([1, 1])
600
+
601
+ with image_download_col1:
602
+ st.download_button(
603
+ label="๐Ÿ’พ Download Image",
604
+ data=img_buffer.getvalue(),
605
+ file_name=f"vociart_image_{timestamp}.png",
606
+ mime="image/png",
607
+ use_container_width=True
608
+ )
609
+
610
+ with image_download_col2:
611
+ # Display image info
612
+ width, height = st.session_state.image_output.size
613
+ st.caption(f"๐Ÿ“Š Size: {width}ร—{height}")
614
+
615
+ else:
616
+ st.info("๐ŸŽจ Generated image will appear here...")
617
+ st.markdown("""
618
+ <div style="text-align: center; padding: 20px; color: #666;">
619
+ <i>Your AI-generated image will be displayed here</i>
620
+ </div>
621
+ """, unsafe_allow_html=True)
622
+
623
+ st.markdown('</div>', unsafe_allow_html=True)
624
+
625
+ # Action buttons at the bottom
626
+ st.markdown("---")
627
+ action_col1, action_col2, action_col3 = st.columns([1, 1, 1])
628
+
629
+ with action_col1:
630
+ if st.button("๐Ÿ—‘๏ธ Clear All Content", type="secondary", use_container_width=True):
631
+ st.session_state.text_output = ""
632
+ st.session_state.image_output = None
633
+ st.session_state.voice_input = ""
634
+ st.session_state.processed_prompt = ""
635
+ st.success("โœ… All content cleared!")
636
+ st.rerun()
637
+
638
+ with action_col2:
639
+ if st.button("๐Ÿ”„ Reset Session", use_container_width=True):
640
+ for key in st.session_state.keys():
641
+ del st.session_state[key]
642
+ st.success("โœ… Session reset!")
643
+ st.rerun()
644
+
645
+ with action_col3:
646
+ if st.button("๐Ÿ’ก Show Tips", use_container_width=True):
647
+ st.info("""
648
+ **๐Ÿ’ก Pro Tips:**
649
+ - Speak clearly and close to microphone
650
+ - Use descriptive prompts for better results
651
+ - Try different content types for variety
652
+ - Wait for models to load (first time may be slow)
653
+ """)
654
+
655
+ # Footer with detailed instructions
656
+ st.markdown("---")
657
+ with st.expander("๐Ÿ“– Detailed Instructions & Setup Guide"):
658
+ st.markdown("""
659
+ ### ๐Ÿš€ Complete Setup Guide:
660
+
661
+ #### **1. Get Your Free API Keys:**
662
+ - ๐Ÿค— **Hugging Face Token**:
663
+ - Go to [huggingface.co](https://huggingface.co)
664
+ - Sign up for free account
665
+ - Go to Settings โ†’ Access Tokens
666
+ - Create new token with "Read" permissions
667
+ - Copy and paste in the sidebar
668
+
669
+ #### **2. Configure Gemini API (Admin):**
670
+ - ๐Ÿ”ฎ **For App Administrator**: Add `GEMINI_API_KEY` to Streamlit secrets
671
+ - Get free key from [Google AI Studio](https://aistudio.google.com)
672
+
673
+ #### **3. Using VociArt:**
674
+
675
+ **Voice Method:**
676
+ 1. Click "๐ŸŽ™๏ธ Start Voice Recording"
677
+ 2. Speak clearly when you see "Listening..."
678
+ 3. Wait for speech-to-text conversion
679
+ 4. Your prompt will be enhanced automatically
680
+
681
+ **Manual Method:**
682
+ 1. Type your prompt in the text area
683
+ 2. Click "๐Ÿš€ Process Text"
684
+ 3. Your prompt will be enhanced for better AI generation
685
+
686
+ **Generation:**
687
+ 1. Choose your content type (blog, social, story, etc.)
688
+ 2. Select target language for translation
689
+ 3. Click "๐Ÿ“ Generate Text" or "๐ŸŽจ Generate Image"
690
+ 4. Wait for AI to create your content
691
+ 5. Download or copy your results
692
+
693
+ ### ๐ŸŽฏ Best Practices:
694
+ - **Voice Input**: Speak slowly and clearly, avoid background noise
695
+ - **Prompts**: Be descriptive and specific for better results
696
+ - **Content Types**: Choose appropriate type for your needs
697
+ - **Languages**: Translation works best with clear, simple prompts
698
+ - **Patience**: First-time model loading may take 20-30 seconds
699
+
700
+ ### ๐Ÿ”ง Troubleshooting:
701
+ - **No audio detected**: Check microphone permissions and try again
702
+ - **API errors**: Verify your Hugging Face token is valid
703
+ - **Model loading**: Wait patiently, models load on first use
704
+ - **Poor results**: Try rephrasing your prompt or changing content type
705
+
706
+ ### ๐Ÿ“ฑ Features:
707
+ - โœ… **100% Free**: Uses only free APIs and models
708
+ - โœ… **No Installation**: Runs in browser
709
+ - โœ… **Multi-language**: Supports 8 languages with translation
710
+ - โœ… **Voice Control**: Complete voice-to-content workflow
711
+ - โœ… **Multiple Formats**: Blog posts, social media, stories, captions
712
+ - โœ… **Download**: Save text and images locally
713
+ - โœ… **Responsive**: Works on desktop and mobile
714
+ """)
715
+
716
+ # Version and credits
717
+ st.markdown("""
718
+ ---
719
+ <div style="text-align: center; color: #666; font-size: 0.9rem;">
720
+ <strong>VociArt v1.0</strong> - Voice-Controlled AI Content Creator<br>
721
+ Powered by Hugging Face ๐Ÿค— | Google Gemini ๐Ÿ”ฎ | Streamlit โšก<br>
722
+ <em>Create amazing content with just your voice!</em>
723
+ </div>
724
+ """, unsafe_allow_html=True)