maria355 commited on
Commit
42902e3
Β·
verified Β·
1 Parent(s): be8ab5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +361 -188
app.py CHANGED
@@ -16,7 +16,6 @@ try:
16
  TRANSFORMERS_AVAILABLE = True
17
  except ImportError:
18
  TRANSFORMERS_AVAILABLE = False
19
- st.error("Transformers not available")
20
 
21
  try:
22
  import google.generativeai as genai
@@ -29,14 +28,13 @@ try:
29
  AUDIO_REC_AVAILABLE = True
30
  except ImportError:
31
  AUDIO_REC_AVAILABLE = False
32
- st.warning("Audio recording not available")
33
 
34
  # Configure page
35
  st.set_page_config(
36
  page_title="VoiceCanvas - AI Content Studio",
37
  page_icon="🎨",
38
  layout="wide",
39
- initial_sidebar_state="collapsed"
40
  )
41
 
42
  # Initialize session state
@@ -46,46 +44,59 @@ if 'transcription' not in st.session_state:
46
  st.session_state.transcription = ""
47
  if 'processing' not in st.session_state:
48
  st.session_state.processing = False
 
 
 
 
49
 
50
  # Global variables for models
51
  whisper_model = None
52
  text_generator = None
53
 
54
  def load_models():
55
- """Load models efficiently"""
56
  global whisper_model, text_generator
57
 
 
 
 
58
  if not TRANSFORMERS_AVAILABLE:
59
  st.error("AI models not available")
60
- return
61
 
62
- if whisper_model is None:
63
- try:
64
- # Use the smallest Whisper model for speed
65
- whisper_model = pipeline(
66
- "automatic-speech-recognition",
67
- model="openai/whisper-tiny",
68
- device=-1, # Force CPU
69
- torch_dtype=torch.float32
70
- )
71
- except Exception as e:
72
- st.error(f"Error loading Whisper: {e}")
73
- whisper_model = "error"
74
 
75
- if text_generator is None:
76
- try:
77
- # Use a lightweight text generation model
78
- text_generator = pipeline(
79
- "text-generation",
80
- model="microsoft/DialoGPT-small",
81
- device=-1, # Force CPU
82
- max_length=150,
83
- do_sample=True,
84
- temperature=0.7
85
- )
86
- except Exception as e:
87
- st.warning(f"Text generator not available: {e}")
88
- text_generator = "error"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  def setup_gemini():
91
  """Setup Gemini API if available"""
@@ -105,16 +116,21 @@ def setup_gemini():
105
  return False
106
 
107
  def transcribe_audio_simple(audio_file):
108
- """Simple audio transcription"""
109
  try:
110
- if whisper_model is None or whisper_model == "error":
111
  return "Error: Speech recognition not available"
112
 
 
 
113
  # Transcribe using pipeline
114
  result = whisper_model(audio_file)
 
 
115
  return result["text"].strip()
116
 
117
  except Exception as e:
 
118
  return f"Error: {str(e)}"
119
 
120
  def generate_content_with_gemini(prompt):
@@ -123,25 +139,44 @@ def generate_content_with_gemini(prompt):
123
  return generate_content_offline(prompt)
124
 
125
  try:
 
 
126
  model = genai.GenerativeModel('gemini-pro')
127
  response = model.generate_content(f"""
128
  Based on this input: "{prompt}"
129
 
130
- Create marketing content with:
131
- 1. 3 catchy taglines (max 10 words each)
132
- 2. 3 social media posts (max 280 chars each)
133
- 3. 1 product description (max 100 words)
134
- 4. 3 image generation prompts
 
 
135
 
136
- Format your response clearly with headers.
 
 
 
 
 
 
 
 
 
137
  """)
 
 
138
  return response.text
 
139
  except Exception as e:
140
  st.warning(f"Gemini error: {e}. Using offline generation.")
 
141
  return generate_content_offline(prompt)
142
 
143
  def generate_content_offline(prompt):
144
  """Generate content using offline methods"""
 
 
145
  # Create structured content
146
  content = {
147
  "taglines": [
@@ -167,29 +202,37 @@ def generate_content_offline(prompt):
167
 
168
  # Store both versions
169
  st.session_state.generated_content['structured'] = content
 
 
170
  return formatted
171
 
172
  def generate_image_with_api(prompt):
173
  """Generate image using free API"""
174
  try:
175
- # Using free image generation API
 
176
  api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
177
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
178
 
179
  if not os.getenv('HF_TOKEN'):
180
  st.warning("Add HF_TOKEN environment variable for image generation")
 
181
  return None
182
 
183
- response = requests.post(api_url, headers=headers, json={"inputs": prompt}, timeout=30)
184
 
185
  if response.status_code == 200:
186
  image = Image.open(io.BytesIO(response.content))
 
187
  return image
188
  else:
189
  st.warning(f"Image API returned status {response.status_code}")
 
190
  return None
 
191
  except Exception as e:
192
  st.error(f"Image generation error: {e}")
 
193
  return None
194
 
195
  def format_content_display(content):
@@ -222,235 +265,365 @@ def format_content_display(content):
222
  return str(content)
223
 
224
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  st.title("🎨 VoiceCanvas - AI Content Studio")
226
- st.markdown("*Transform your ideas into marketing content using AI*")
227
-
228
- # Quick setup
229
- gemini_available = setup_gemini()
230
-
231
- # Status indicator
232
- col1, col2, col3 = st.columns([2, 1, 1])
233
- with col2:
234
- if gemini_available:
235
- st.success("βœ… Enhanced AI")
236
- else:
237
- st.info("πŸ”„ Basic Mode")
238
- with col3:
239
- st.metric("Status", "Ready" if not st.session_state.processing else "Processing")
240
 
241
  # Main input area
242
- st.header("🎀 Input Your Idea")
243
 
244
- # Tabs for different input methods
 
245
  if AUDIO_REC_AVAILABLE:
246
- tab1, tab2, tab3 = st.tabs(["πŸŽ™οΈ Voice", "πŸ“ Upload", "✍️ Text"])
247
- else:
248
- tab2, tab3 = st.tabs(["πŸ“ Upload", "✍️ Text"])
 
 
249
 
250
- # Voice tab (only if available)
251
  if AUDIO_REC_AVAILABLE:
252
- with tab1:
253
- st.info("Record your voice to generate content ideas")
254
 
255
  # Audio recorder
256
  wav_audio_data = st_audiorec()
257
 
258
  if wav_audio_data is not None:
259
- st.success("πŸŽ‰ Audio recorded!")
260
  st.audio(wav_audio_data, format='audio/wav')
261
 
262
- if st.button("πŸ”„ Convert to Text", key="transcribe_btn"):
263
- st.session_state.processing = True
264
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  # Upload tab
267
- with tab2:
 
 
268
  uploaded_file = st.file_uploader(
269
- "Upload audio file",
270
  type=['wav', 'mp3', 'm4a'],
271
- help="Max 5MB, 30 seconds recommended"
272
  )
273
 
274
  if uploaded_file:
 
275
  st.audio(uploaded_file)
276
- if st.button("πŸ”„ Convert to Text", key="upload_transcribe"):
277
- st.session_state.processing = True
278
- # Process uploaded file
279
- with st.spinner("Converting speech to text..."):
280
- if TRANSFORMERS_AVAILABLE:
281
- load_models()
282
- transcription = transcribe_audio_simple(uploaded_file)
283
- st.session_state.transcription = transcription
284
  else:
285
- st.session_state.transcription = "Speech-to-text not available. Please use text input."
286
- st.session_state.processing = False
287
- st.rerun()
 
 
 
 
 
288
 
289
  # Text tab
290
- with tab3:
 
 
291
  user_input = st.text_area(
292
- "Type your idea or product description:",
293
- placeholder="e.g., A smart fitness tracker that monitors sleep patterns and provides personalized recommendations",
294
- height=120
 
295
  )
 
296
  if user_input:
297
  st.session_state.transcription = user_input
 
 
 
 
 
 
 
 
298
 
299
- # Process audio transcription if needed
300
- if st.session_state.processing and AUDIO_REC_AVAILABLE:
301
- # Check if wav_audio_data exists in the current scope
302
- if 'wav_audio_data' in locals() and wav_audio_data is not None:
303
- with st.spinner("🎯 Converting speech to text..."):
 
 
 
 
 
 
 
 
 
 
 
 
304
  if TRANSFORMERS_AVAILABLE:
305
- load_models()
306
-
307
- # Save audio to temp file for processing
308
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
309
- tmp_file.write(wav_audio_data)
310
- transcription = transcribe_audio_simple(tmp_file.name)
311
- st.session_state.transcription = transcription
312
- os.unlink(tmp_file.name)
313
  else:
314
- st.session_state.transcription = "Speech recognition not available. Please use text input."
315
-
316
- st.session_state.processing = False
317
- st.rerun()
318
 
319
- # Show transcription
320
  if st.session_state.transcription:
321
- st.subheader("πŸ“ Your Input")
 
 
322
  edited_text = st.text_area(
323
- "Edit if needed:",
324
  value=st.session_state.transcription,
325
- height=100,
326
- key="edit_transcription"
 
327
  )
328
  st.session_state.transcription = edited_text
329
 
330
- # Generate content button
331
- if st.button("πŸš€ Generate Marketing Content", type="primary", use_container_width=True):
332
- with st.spinner("✨ Creating amazing content..."):
333
- if gemini_available:
334
- content_text = generate_content_with_gemini(st.session_state.transcription)
335
- st.session_state.generated_content['text'] = content_text
336
- else:
337
- content_text = generate_content_offline(st.session_state.transcription)
338
- st.session_state.generated_content['text'] = content_text
339
- st.rerun()
 
 
 
 
 
340
 
341
  # Display generated content
342
  if st.session_state.generated_content:
343
- st.header("✨ Generated Content")
 
344
 
345
  # Text content
346
  if 'text' in st.session_state.generated_content:
347
  st.markdown(st.session_state.generated_content['text'])
348
 
349
  # Image generation section
350
- st.subheader("🎨 Generate Images")
 
351
 
352
- if 'structured' in st.session_state.generated_content:
353
- # Show image prompts from structured content
354
- prompts = st.session_state.generated_content['structured'].get('image_prompts', [])
355
- if prompts:
356
- selected_prompt = st.selectbox("Choose image style:", prompts)
357
-
358
- if st.button("πŸ–ΌοΈ Generate Image"):
359
- with st.spinner("Creating image..."):
360
- img = generate_image_with_api(selected_prompt)
361
- if img:
362
- st.image(img, caption="Generated Image", use_column_width=True)
363
-
364
- # Store image for download
365
- st.session_state.generated_content['image'] = img
366
-
367
- # Download button
368
- img_buffer = io.BytesIO()
369
- img.save(img_buffer, format="PNG")
370
- st.download_button(
371
- "πŸ“₯ Download Image",
372
- img_buffer.getvalue(),
373
- file_name=f"generated_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
374
- mime="image/png"
375
- )
376
- else:
377
- st.warning("Image generation not available. Check HF_TOKEN in settings.")
378
- else:
379
- # Simple prompt input for image generation
380
- img_prompt = st.text_input("Enter image description:",
381
- placeholder="Professional product photo with clean background")
382
- if img_prompt and st.button("πŸ–ΌοΈ Generate Image"):
383
- with st.spinner("Creating image..."):
384
- img = generate_image_with_api(img_prompt)
385
  if img:
386
- st.image(img, caption="Generated Image", use_column_width=True)
387
  st.session_state.generated_content['image'] = img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  # Export section
390
- st.header("πŸ“₯ Export Content")
 
391
 
392
- col1, col2 = st.columns(2)
393
 
394
  with col1:
395
  # Text export
396
  if 'text' in st.session_state.generated_content:
397
- content_export = f"""Marketing Content Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
398
- Input: {st.session_state.transcription}
 
399
 
400
  {st.session_state.generated_content['text']}
 
 
 
401
  """
 
402
  st.download_button(
403
- "πŸ“„ Download Text Content",
404
  content_export,
405
  file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
406
  mime="text/plain",
407
- use_container_width=True
 
408
  )
409
 
410
  with col2:
411
- # JSON export for structured data
412
  if 'structured' in st.session_state.generated_content:
413
  json_data = {
414
- "timestamp": datetime.now().isoformat(),
 
 
 
 
415
  "input": st.session_state.transcription,
416
  "content": st.session_state.generated_content['structured']
417
  }
418
 
419
  st.download_button(
420
- "πŸ“Š Download JSON Data",
421
  json.dumps(json_data, indent=2),
422
  file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
423
  mime="application/json",
424
- use_container_width=True
 
425
  )
426
-
427
- # Help section
428
- with st.expander("πŸ’‘ Tips & Help"):
429
- st.markdown("""
430
- **How to use VoiceCanvas:**
431
- 1. 🎀 **Record/Upload**: Share your product idea via voice or upload audio
432
- 2. ✏️ **Edit**: Review and refine the transcribed text
433
- 3. πŸš€ **Generate**: Create marketing content automatically
434
- 4. πŸ“₯ **Export**: Download your content in various formats
435
-
436
- **For best results:**
437
- - Speak clearly and describe your product/service
438
- - Include key features and benefits
439
- - Mention your target audience
440
-
441
- **API Setup (Optional):**
442
- - Add `GEMINI_API_KEY` for enhanced text generation
443
- - Add `HF_TOKEN` for image generation
444
-
445
- **Current Status:**
446
- - Transformers: {'βœ… Available' if TRANSFORMERS_AVAILABLE else '❌ Not Available'}
447
- - Audio Recording: {'βœ… Available' if AUDIO_REC_AVAILABLE else '❌ Not Available'}
448
- - Gemini AI: {'βœ… Available' if gemini_available else '❌ Not Available'}
449
- """)
450
 
451
  # Footer
452
  st.markdown("---")
453
- st.markdown("🎨 **VoiceCanvas** - Transform ideas into marketing content | Made with Streamlit")
 
 
 
454
 
455
  if __name__ == "__main__":
456
  main()
 
16
  TRANSFORMERS_AVAILABLE = True
17
  except ImportError:
18
  TRANSFORMERS_AVAILABLE = False
 
19
 
20
  try:
21
  import google.generativeai as genai
 
28
  AUDIO_REC_AVAILABLE = True
29
  except ImportError:
30
  AUDIO_REC_AVAILABLE = False
 
31
 
32
  # Configure page
33
  st.set_page_config(
34
  page_title="VoiceCanvas - AI Content Studio",
35
  page_icon="🎨",
36
  layout="wide",
37
+ initial_sidebar_state="expanded"
38
  )
39
 
40
  # Initialize session state
 
44
  st.session_state.transcription = ""
45
  if 'processing' not in st.session_state:
46
  st.session_state.processing = False
47
+ if 'current_task' not in st.session_state:
48
+ st.session_state.current_task = ""
49
+ if 'models_loaded' not in st.session_state:
50
+ st.session_state.models_loaded = False
51
 
52
  # Global variables for models
53
  whisper_model = None
54
  text_generator = None
55
 
56
  def load_models():
57
+ """Load models efficiently with progress tracking"""
58
  global whisper_model, text_generator
59
 
60
+ if st.session_state.models_loaded:
61
+ return True
62
+
63
  if not TRANSFORMERS_AVAILABLE:
64
  st.error("AI models not available")
65
+ return False
66
 
67
+ progress_bar = st.progress(0)
68
+ status_text = st.empty()
 
 
 
 
 
 
 
 
 
 
69
 
70
+ try:
71
+ # Load Whisper model
72
+ status_text.text("Loading speech recognition model...")
73
+ progress_bar.progress(25)
74
+
75
+ whisper_model = pipeline(
76
+ "automatic-speech-recognition",
77
+ model="openai/whisper-tiny",
78
+ device=-1,
79
+ torch_dtype=torch.float32
80
+ )
81
+
82
+ progress_bar.progress(75)
83
+ status_text.text("Models loaded successfully!")
84
+ progress_bar.progress(100)
85
+
86
+ st.session_state.models_loaded = True
87
+
88
+ # Clear progress indicators after a moment
89
+ time.sleep(1)
90
+ progress_bar.empty()
91
+ status_text.empty()
92
+
93
+ return True
94
+
95
+ except Exception as e:
96
+ st.error(f"Error loading models: {e}")
97
+ progress_bar.empty()
98
+ status_text.empty()
99
+ return False
100
 
101
  def setup_gemini():
102
  """Setup Gemini API if available"""
 
116
  return False
117
 
118
  def transcribe_audio_simple(audio_file):
119
+ """Simple audio transcription with progress tracking"""
120
  try:
121
+ if whisper_model is None:
122
  return "Error: Speech recognition not available"
123
 
124
+ st.session_state.current_task = "Converting speech to text..."
125
+
126
  # Transcribe using pipeline
127
  result = whisper_model(audio_file)
128
+
129
+ st.session_state.current_task = ""
130
  return result["text"].strip()
131
 
132
  except Exception as e:
133
+ st.session_state.current_task = ""
134
  return f"Error: {str(e)}"
135
 
136
  def generate_content_with_gemini(prompt):
 
139
  return generate_content_offline(prompt)
140
 
141
  try:
142
+ st.session_state.current_task = "Generating enhanced content with Gemini AI..."
143
+
144
  model = genai.GenerativeModel('gemini-pro')
145
  response = model.generate_content(f"""
146
  Based on this input: "{prompt}"
147
 
148
+ Create comprehensive marketing content with:
149
+
150
+ ## Marketing Taglines
151
+ Generate 3 catchy, memorable taglines (max 12 words each)
152
+
153
+ ## Social Media Posts
154
+ Create 3 engaging social media posts (max 280 characters each)
155
 
156
+ ## Product Description
157
+ Write 1 compelling product description (100-150 words)
158
+
159
+ ## Image Generation Prompts
160
+ Provide 3 detailed prompts for AI image generation
161
+
162
+ ## Call-to-Action Ideas
163
+ Suggest 3 effective call-to-action phrases
164
+
165
+ Format with clear markdown headers and numbered lists.
166
  """)
167
+
168
+ st.session_state.current_task = ""
169
  return response.text
170
+
171
  except Exception as e:
172
  st.warning(f"Gemini error: {e}. Using offline generation.")
173
+ st.session_state.current_task = ""
174
  return generate_content_offline(prompt)
175
 
176
  def generate_content_offline(prompt):
177
  """Generate content using offline methods"""
178
+ st.session_state.current_task = "Generating content with offline templates..."
179
+
180
  # Create structured content
181
  content = {
182
  "taglines": [
 
202
 
203
  # Store both versions
204
  st.session_state.generated_content['structured'] = content
205
+ st.session_state.current_task = ""
206
+
207
  return formatted
208
 
209
  def generate_image_with_api(prompt):
210
  """Generate image using free API"""
211
  try:
212
+ st.session_state.current_task = "Creating image with AI..."
213
+
214
  api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
215
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
216
 
217
  if not os.getenv('HF_TOKEN'):
218
  st.warning("Add HF_TOKEN environment variable for image generation")
219
+ st.session_state.current_task = ""
220
  return None
221
 
222
+ response = requests.post(api_url, headers=headers, json={"inputs": prompt}, timeout=60)
223
 
224
  if response.status_code == 200:
225
  image = Image.open(io.BytesIO(response.content))
226
+ st.session_state.current_task = ""
227
  return image
228
  else:
229
  st.warning(f"Image API returned status {response.status_code}")
230
+ st.session_state.current_task = ""
231
  return None
232
+
233
  except Exception as e:
234
  st.error(f"Image generation error: {e}")
235
+ st.session_state.current_task = ""
236
  return None
237
 
238
  def format_content_display(content):
 
265
  return str(content)
266
 
267
  def main():
268
+ # Sidebar with tips and status
269
+ with st.sidebar:
270
+ st.header("🎨 VoiceCanvas")
271
+ st.markdown("*AI Content Studio*")
272
+
273
+ # Status section
274
+ st.subheader("πŸ“Š System Status")
275
+
276
+ gemini_available = setup_gemini()
277
+
278
+ col1, col2 = st.columns(2)
279
+ with col1:
280
+ st.metric("Mode", "Enhanced" if gemini_available else "Basic")
281
+ with col2:
282
+ st.metric("Status", "Ready" if not st.session_state.processing else "Working")
283
+
284
+ # Component status
285
+ st.write("πŸ€– **Components:**")
286
+ st.write(f"β€’ Speech Recognition: {'βœ…' if TRANSFORMERS_AVAILABLE else '❌'}")
287
+ st.write(f"β€’ Audio Recording: {'βœ…' if AUDIO_REC_AVAILABLE else '❌'}")
288
+ st.write(f"β€’ Enhanced AI: {'βœ…' if gemini_available else '❌'}")
289
+
290
+ # Current task indicator
291
+ if st.session_state.current_task:
292
+ st.info(f"πŸ”„ {st.session_state.current_task}")
293
+
294
+ st.markdown("---")
295
+
296
+ # Tips and help
297
+ st.subheader("πŸ’‘ How to Use")
298
+
299
+ with st.expander("πŸš€ Quick Start", expanded=True):
300
+ st.markdown("""
301
+ 1. **Input**: Use voice, upload audio, or type text
302
+ 2. **Edit**: Review and refine your input
303
+ 3. **Generate**: Create marketing content
304
+ 4. **Export**: Download your materials
305
+ """)
306
+
307
+ with st.expander("🎯 Best Practices"):
308
+ st.markdown("""
309
+ **For Voice/Audio:**
310
+ - Speak clearly at normal pace
311
+ - Use quiet environment
312
+ - Describe your product/service
313
+ - Mention target audience
314
+
315
+ **For Text:**
316
+ - Be specific about features
317
+ - Include benefits and use cases
318
+ - Mention what makes it unique
319
+ - Use 50+ words for detail
320
+ """)
321
+
322
+ with st.expander("βš™οΈ Setup (Optional)"):
323
+ st.markdown("""
324
+ **Enhanced Features:**
325
+
326
+ Add environment variables:
327
+ - `GEMINI_API_KEY`: Advanced text generation
328
+ - `HF_TOKEN`: AI image generation
329
+
330
+ **Get API Keys:**
331
+ - [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
332
+ - [Hugging Face](https://huggingface.co/settings/tokens) (Free)
333
+ """)
334
+
335
+ with st.expander("πŸ› οΈ Troubleshooting"):
336
+ st.markdown("""
337
+ **Common Issues:**
338
+ - Audio not recording β†’ Try different browser
339
+ - Slow processing β†’ Models loading for first time
340
+ - No image generation β†’ Add HF_TOKEN
341
+ - Basic content only β†’ Add GEMINI_API_KEY
342
+ """)
343
+
344
+ # Main content
345
  st.title("🎨 VoiceCanvas - AI Content Studio")
346
+ st.markdown("*Transform your ideas into comprehensive marketing content*")
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
  # Main input area
349
+ st.header("πŸ’‘ Share Your Idea")
350
 
351
+ # Dynamic tabs based on available features
352
+ available_tabs = []
353
  if AUDIO_REC_AVAILABLE:
354
+ available_tabs.append("πŸŽ™οΈ Record")
355
+ available_tabs.extend(["πŸ“ Upload", "✍️ Type"])
356
+
357
+ tabs = st.tabs(available_tabs)
358
+ tab_index = 0
359
 
360
+ # Recording tab (if available)
361
  if AUDIO_REC_AVAILABLE:
362
+ with tabs[tab_index]:
363
+ st.info("🎀 Click the microphone button to start recording")
364
 
365
  # Audio recorder
366
  wav_audio_data = st_audiorec()
367
 
368
  if wav_audio_data is not None:
369
+ st.success("πŸŽ‰ Audio recorded successfully!")
370
  st.audio(wav_audio_data, format='audio/wav')
371
 
372
+ col1, col2 = st.columns([1, 2])
373
+ with col1:
374
+ if st.button("πŸ”„ Transcribe Audio", key="transcribe_btn", type="primary"):
375
+ if not st.session_state.models_loaded:
376
+ if load_models():
377
+ st.session_state.processing = True
378
+ st.rerun()
379
+ else:
380
+ st.session_state.processing = True
381
+ st.rerun()
382
+
383
+ with col2:
384
+ if st.session_state.processing:
385
+ st.info("πŸ”„ Processing your audio...")
386
+ tab_index += 1
387
 
388
  # Upload tab
389
+ with tabs[tab_index]:
390
+ st.info("πŸ“ Upload an audio file containing your idea")
391
+
392
  uploaded_file = st.file_uploader(
393
+ "Choose audio file",
394
  type=['wav', 'mp3', 'm4a'],
395
+ help="Supported: WAV, MP3, M4A β€’ Max 10MB β€’ Best: 30 seconds or less"
396
  )
397
 
398
  if uploaded_file:
399
+ st.success("πŸ“„ File uploaded successfully!")
400
  st.audio(uploaded_file)
401
+
402
+ col1, col2 = st.columns([1, 2])
403
+ with col1:
404
+ if st.button("πŸ”„ Process Audio", key="upload_transcribe", type="primary"):
405
+ if not st.session_state.models_loaded:
406
+ if load_models():
407
+ st.session_state.processing = True
408
+ st.rerun()
409
  else:
410
+ st.session_state.processing = True
411
+ st.rerun()
412
+
413
+ with col2:
414
+ if st.session_state.processing:
415
+ st.info("πŸ”„ Converting speech to text...")
416
+
417
+ tab_index += 1
418
 
419
  # Text tab
420
+ with tabs[tab_index]:
421
+ st.info("✍️ Type or paste your product/service description")
422
+
423
  user_input = st.text_area(
424
+ "Describe your idea:",
425
+ placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
426
+ height=150,
427
+ help="Be detailed! Include features, benefits, and target audience for best results."
428
  )
429
+
430
  if user_input:
431
  st.session_state.transcription = user_input
432
+ word_count = len(user_input.split())
433
+
434
+ if word_count < 10:
435
+ st.warning("πŸ’‘ Add more details for better results (at least 10 words)")
436
+ elif word_count < 30:
437
+ st.info("πŸ“ Good start! Add more features/benefits for richer content")
438
+ else:
439
+ st.success(f"βœ… Great detail! ({word_count} words)")
440
 
441
+ # Process audio transcription
442
+ if st.session_state.processing:
443
+ if AUDIO_REC_AVAILABLE and 'wav_audio_data' in locals() and wav_audio_data is not None:
444
+ # Process recorded audio
445
+ with st.spinner("🎯 Converting your speech to text..."):
446
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
447
+ tmp_file.write(wav_audio_data)
448
+ transcription = transcribe_audio_simple(tmp_file.name)
449
+ st.session_state.transcription = transcription
450
+ os.unlink(tmp_file.name)
451
+
452
+ st.session_state.processing = False
453
+ st.rerun()
454
+
455
+ elif 'uploaded_file' in locals() and uploaded_file is not None:
456
+ # Process uploaded file
457
+ with st.spinner("🎯 Processing your audio file..."):
458
  if TRANSFORMERS_AVAILABLE:
459
+ transcription = transcribe_audio_simple(uploaded_file)
460
+ st.session_state.transcription = transcription
 
 
 
 
 
 
461
  else:
462
+ st.session_state.transcription = "Speech-to-text not available. Please use text input."
463
+
464
+ st.session_state.processing = False
465
+ st.rerun()
466
 
467
+ # Show transcription and editing
468
  if st.session_state.transcription:
469
+ st.markdown("---")
470
+ st.header("πŸ“ Review Your Input")
471
+
472
  edited_text = st.text_area(
473
+ "Edit or refine your input:",
474
  value=st.session_state.transcription,
475
+ height=120,
476
+ key="edit_transcription",
477
+ help="Make any corrections or add more details"
478
  )
479
  st.session_state.transcription = edited_text
480
 
481
+ # Generate content section
482
+ st.markdown("---")
483
+ col1, col2, col3 = st.columns([1, 2, 1])
484
+
485
+ with col2:
486
+ if st.button("πŸš€ Generate Marketing Content", type="primary", use_container_width=True):
487
+ with st.spinner("✨ Creating comprehensive marketing content..."):
488
+ if gemini_available:
489
+ content_text = generate_content_with_gemini(st.session_state.transcription)
490
+ st.session_state.generated_content['text'] = content_text
491
+ else:
492
+ content_text = generate_content_offline(st.session_state.transcription)
493
+ st.session_state.generated_content['text'] = content_text
494
+ st.success("βœ… Content generated successfully!")
495
+ st.rerun()
496
 
497
  # Display generated content
498
  if st.session_state.generated_content:
499
+ st.markdown("---")
500
+ st.header("✨ Your Marketing Content")
501
 
502
  # Text content
503
  if 'text' in st.session_state.generated_content:
504
  st.markdown(st.session_state.generated_content['text'])
505
 
506
  # Image generation section
507
+ st.markdown("---")
508
+ st.subheader("🎨 Visual Content")
509
 
510
+ col1, col2 = st.columns([2, 1])
511
+
512
+ with col1:
513
+ if 'structured' in st.session_state.generated_content:
514
+ # Show pre-made prompts
515
+ prompts = st.session_state.generated_content['structured'].get('image_prompts', [])
516
+ if prompts:
517
+ selected_prompt = st.selectbox(
518
+ "Choose image style:",
519
+ prompts,
520
+ help="Select from AI-generated image prompts"
521
+ )
522
+ else:
523
+ # Custom prompt input
524
+ selected_prompt = st.text_input(
525
+ "Describe the image you want:",
526
+ placeholder="Professional product photo with clean white background",
527
+ help="Be specific about style, colors, composition"
528
+ )
529
+
530
+ with col2:
531
+ st.write("") # Spacing
532
+ st.write("") # Spacing
533
+
534
+ if st.button("πŸ–ΌοΈ Generate Image", use_container_width=True):
535
+ if selected_prompt:
536
+ img = generate_image_with_api(selected_prompt)
 
 
 
 
 
 
537
  if img:
 
538
  st.session_state.generated_content['image'] = img
539
+ st.success("🎨 Image created!")
540
+ st.rerun()
541
+ else:
542
+ st.error("Image generation failed. Check HF_TOKEN.")
543
+ else:
544
+ st.warning("Please enter/select an image description")
545
+
546
+ # Display generated image
547
+ if 'image' in st.session_state.generated_content:
548
+ st.image(
549
+ st.session_state.generated_content['image'],
550
+ caption="AI Generated Image",
551
+ use_column_width=True
552
+ )
553
 
554
  # Export section
555
+ st.markdown("---")
556
+ st.header("πŸ“₯ Export Your Content")
557
 
558
+ col1, col2, col3 = st.columns(3)
559
 
560
  with col1:
561
  # Text export
562
  if 'text' in st.session_state.generated_content:
563
+ content_export = f"""VOICECANVAS MARKETING CONTENT
564
+ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
565
+ Source: {st.session_state.transcription[:100]}...
566
 
567
  {st.session_state.generated_content['text']}
568
+
569
+ ---
570
+ Created with VoiceCanvas AI Content Studio
571
  """
572
+
573
  st.download_button(
574
+ "πŸ“„ Download Text",
575
  content_export,
576
  file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
577
  mime="text/plain",
578
+ use_container_width=True,
579
+ help="Download complete text content"
580
  )
581
 
582
  with col2:
583
+ # JSON export
584
  if 'structured' in st.session_state.generated_content:
585
  json_data = {
586
+ "metadata": {
587
+ "timestamp": datetime.now().isoformat(),
588
+ "generator": "VoiceCanvas AI Studio",
589
+ "mode": "Enhanced" if gemini_available else "Basic"
590
+ },
591
  "input": st.session_state.transcription,
592
  "content": st.session_state.generated_content['structured']
593
  }
594
 
595
  st.download_button(
596
+ "πŸ“Š Download Data",
597
  json.dumps(json_data, indent=2),
598
  file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
599
  mime="application/json",
600
+ use_container_width=True,
601
+ help="Download structured data (JSON)"
602
  )
603
+
604
+ with col3:
605
+ # Image export
606
+ if 'image' in st.session_state.generated_content:
607
+ img_buffer = io.BytesIO()
608
+ st.session_state.generated_content['image'].save(img_buffer, format="PNG")
609
+
610
+ st.download_button(
611
+ "πŸ–ΌοΈ Download Image",
612
+ img_buffer.getvalue(),
613
+ file_name=f"ai_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
614
+ mime="image/png",
615
+ use_container_width=True,
616
+ help="Download generated image"
617
+ )
618
+ else:
619
+ st.info("Generate an image first", icon="ℹ️")
 
 
 
 
 
 
 
620
 
621
  # Footer
622
  st.markdown("---")
623
+ col1, col2, col3 = st.columns([1, 2, 1])
624
+ with col2:
625
+ st.markdown("🎨 **VoiceCanvas AI Content Studio**")
626
+ st.caption("Transform ideas into marketing magic β€’ Built with Streamlit")
627
 
628
  if __name__ == "__main__":
629
  main()