maria355 commited on
Commit
7d5a6b7
Β·
verified Β·
1 Parent(s): 88b7e1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -43
app.py CHANGED
@@ -10,10 +10,26 @@ import json
10
  from datetime import datetime
11
  import time
12
 
13
- # Hugging Face imports
14
- from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
15
- import google.generativeai as genai
16
- from st_audiorec import st_audiorec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Configure page
19
  st.set_page_config(
@@ -39,6 +55,10 @@ def load_models():
39
  """Load models efficiently"""
40
  global whisper_model, text_generator
41
 
 
 
 
 
42
  if whisper_model is None:
43
  try:
44
  # Use the smallest Whisper model for speed
@@ -50,25 +70,33 @@ def load_models():
50
  )
51
  except Exception as e:
52
  st.error(f"Error loading Whisper: {e}")
 
53
 
54
  if text_generator is None:
55
  try:
56
  # Use a lightweight text generation model
57
  text_generator = pipeline(
58
  "text-generation",
59
- model="microsoft/DialoGPT-small", # Smaller model
60
  device=-1, # Force CPU
61
  max_length=150,
62
  do_sample=True,
63
  temperature=0.7
64
  )
65
  except Exception as e:
66
- st.error(f"Error loading text generator: {e}")
 
67
 
68
  def setup_gemini():
69
  """Setup Gemini API if available"""
 
 
 
70
  try:
71
- api_key = os.getenv("GEMINI_API_KEY") or st.secrets.get("GEMINI_API_KEY", "")
 
 
 
72
  if api_key:
73
  genai.configure(api_key=api_key)
74
  return True
@@ -79,7 +107,7 @@ def setup_gemini():
79
  def transcribe_audio_simple(audio_file):
80
  """Simple audio transcription"""
81
  try:
82
- if whisper_model is None:
83
  return "Error: Speech recognition not available"
84
 
85
  # Transcribe using pipeline
@@ -91,6 +119,9 @@ def transcribe_audio_simple(audio_file):
91
 
92
  def generate_content_with_gemini(prompt):
93
  """Generate content using Gemini"""
 
 
 
94
  try:
95
  model = genai.GenerativeModel('gemini-pro')
96
  response = model.generate_content(f"""
@@ -106,10 +137,12 @@ def generate_content_with_gemini(prompt):
106
  """)
107
  return response.text
108
  except Exception as e:
109
- return f"Error generating content: {e}"
 
110
 
111
  def generate_content_offline(prompt):
112
- """Generate content using free models"""
 
113
  content = {
114
  "taglines": [
115
  f"Experience {prompt} like never before",
@@ -128,7 +161,13 @@ def generate_content_offline(prompt):
128
  f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
129
  ]
130
  }
131
- return content
 
 
 
 
 
 
132
 
133
  def generate_image_with_api(prompt):
134
  """Generate image using free API"""
@@ -137,12 +176,17 @@ def generate_image_with_api(prompt):
137
  api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
138
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
139
 
140
- response = requests.post(api_url, headers=headers, json={"inputs": prompt})
 
 
 
 
141
 
142
  if response.status_code == 200:
143
  image = Image.open(io.BytesIO(response.content))
144
  return image
145
  else:
 
146
  return None
147
  except Exception as e:
148
  st.error(f"Image generation error: {e}")
@@ -198,22 +242,28 @@ def main():
198
  st.header("🎀 Input Your Idea")
199
 
200
  # Tabs for different input methods
201
- tab1, tab2, tab3 = st.tabs(["πŸŽ™οΈ Voice", "πŸ“ Upload", "✍️ Text"])
 
 
 
202
 
203
- with tab1:
204
- st.info("Record your voice to generate content ideas")
205
-
206
- # Audio recorder
207
- wav_audio_data = st_audiorec()
208
-
209
- if wav_audio_data is not None:
210
- st.success("πŸŽ‰ Audio recorded!")
211
- st.audio(wav_audio_data, format='audio/wav')
212
 
213
- if st.button("πŸ”„ Convert to Text", key="transcribe_btn"):
214
- st.session_state.processing = True
215
- st.rerun()
 
 
 
 
 
 
 
216
 
 
217
  with tab2:
218
  uploaded_file = st.file_uploader(
219
  "Upload audio file",
@@ -227,32 +277,41 @@ def main():
227
  st.session_state.processing = True
228
  # Process uploaded file
229
  with st.spinner("Converting speech to text..."):
230
- load_models() # Load only when needed
231
- transcription = transcribe_audio_simple(uploaded_file)
232
- st.session_state.transcription = transcription
 
 
 
233
  st.session_state.processing = False
234
  st.rerun()
235
 
 
236
  with tab3:
237
  user_input = st.text_area(
238
  "Type your idea or product description:",
239
- placeholder="e.g., A smart fitness tracker that monitors sleep patterns",
240
  height=120
241
  )
242
  if user_input:
243
  st.session_state.transcription = user_input
244
 
245
  # Process audio transcription if needed
246
- if st.session_state.processing and AUDIO_REC_AVAILABLE and 'wav_audio_data' in locals() and wav_audio_data is not None:
247
- with st.spinner("🎯 Converting speech to text..."):
248
- load_models() # Load models only when needed
249
-
250
- # Save audio to temp file for processing
251
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
252
- tmp_file.write(wav_audio_data)
253
- transcription = transcribe_audio_simple(tmp_file.name)
254
- st.session_state.transcription = transcription
255
- os.unlink(tmp_file.name) # Clean up temp file
 
 
 
 
 
256
 
257
  st.session_state.processing = False
258
  st.rerun()
@@ -275,9 +334,8 @@ def main():
275
  content_text = generate_content_with_gemini(st.session_state.transcription)
276
  st.session_state.generated_content['text'] = content_text
277
  else:
278
- content_dict = generate_content_offline(st.session_state.transcription)
279
- st.session_state.generated_content['structured'] = content_dict
280
- st.session_state.generated_content['text'] = format_content_display(content_dict)
281
  st.rerun()
282
 
283
  # Display generated content
@@ -303,6 +361,9 @@ def main():
303
  if img:
304
  st.image(img, caption="Generated Image", use_column_width=True)
305
 
 
 
 
306
  # Download button
307
  img_buffer = io.BytesIO()
308
  img.save(img_buffer, format="PNG")
@@ -313,7 +374,7 @@ def main():
313
  mime="image/png"
314
  )
315
  else:
316
- st.warning("Image generation not available. Try again later.")
317
  else:
318
  # Simple prompt input for image generation
319
  img_prompt = st.text_input("Enter image description:",
@@ -323,6 +384,7 @@ def main():
323
  img = generate_image_with_api(img_prompt)
324
  if img:
325
  st.image(img, caption="Generated Image", use_column_width=True)
 
326
 
327
  # Export section
328
  st.header("πŸ“₯ Export Content")
@@ -379,6 +441,11 @@ Input: {st.session_state.transcription}
379
  **API Setup (Optional):**
380
  - Add `GEMINI_API_KEY` for enhanced text generation
381
  - Add `HF_TOKEN` for image generation
 
 
 
 
 
382
  """)
383
 
384
  # Footer
 
10
  from datetime import datetime
11
  import time
12
 
13
+ # Import with error handling
14
+ try:
15
+ from transformers import pipeline
16
+ TRANSFORMERS_AVAILABLE = True
17
+ except ImportError:
18
+ TRANSFORMERS_AVAILABLE = False
19
+ st.error("Transformers not available")
20
+
21
+ try:
22
+ import google.generativeai as genai
23
+ GENAI_AVAILABLE = True
24
+ except ImportError:
25
+ GENAI_AVAILABLE = False
26
+
27
+ try:
28
+ from st_audiorec import st_audiorec
29
+ AUDIO_REC_AVAILABLE = True
30
+ except ImportError:
31
+ AUDIO_REC_AVAILABLE = False
32
+ st.warning("Audio recording not available")
33
 
34
  # Configure page
35
  st.set_page_config(
 
55
  """Load models efficiently"""
56
  global whisper_model, text_generator
57
 
58
+ if not TRANSFORMERS_AVAILABLE:
59
+ st.error("AI models not available")
60
+ return
61
+
62
  if whisper_model is None:
63
  try:
64
  # Use the smallest Whisper model for speed
 
70
  )
71
  except Exception as e:
72
  st.error(f"Error loading Whisper: {e}")
73
+ whisper_model = "error"
74
 
75
  if text_generator is None:
76
  try:
77
  # Use a lightweight text generation model
78
  text_generator = pipeline(
79
  "text-generation",
80
+ model="microsoft/DialoGPT-small",
81
  device=-1, # Force CPU
82
  max_length=150,
83
  do_sample=True,
84
  temperature=0.7
85
  )
86
  except Exception as e:
87
+ st.warning(f"Text generator not available: {e}")
88
+ text_generator = "error"
89
 
90
  def setup_gemini():
91
  """Setup Gemini API if available"""
92
+ if not GENAI_AVAILABLE:
93
+ return False
94
+
95
  try:
96
+ api_key = os.getenv("GEMINI_API_KEY")
97
+ if not api_key and hasattr(st, 'secrets'):
98
+ api_key = st.secrets.get("GEMINI_API_KEY", "")
99
+
100
  if api_key:
101
  genai.configure(api_key=api_key)
102
  return True
 
107
  def transcribe_audio_simple(audio_file):
108
  """Simple audio transcription"""
109
  try:
110
+ if whisper_model is None or whisper_model == "error":
111
  return "Error: Speech recognition not available"
112
 
113
  # Transcribe using pipeline
 
119
 
120
  def generate_content_with_gemini(prompt):
121
  """Generate content using Gemini"""
122
+ if not GENAI_AVAILABLE:
123
+ return generate_content_offline(prompt)
124
+
125
  try:
126
  model = genai.GenerativeModel('gemini-pro')
127
  response = model.generate_content(f"""
 
137
  """)
138
  return response.text
139
  except Exception as e:
140
+ st.warning(f"Gemini error: {e}. Using offline generation.")
141
+ return generate_content_offline(prompt)
142
 
143
  def generate_content_offline(prompt):
144
+ """Generate content using offline methods"""
145
+ # Create structured content
146
  content = {
147
  "taglines": [
148
  f"Experience {prompt} like never before",
 
161
  f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
162
  ]
163
  }
164
+
165
+ # Format for display
166
+ formatted = format_content_display(content)
167
+
168
+ # Store both versions
169
+ st.session_state.generated_content['structured'] = content
170
+ return formatted
171
 
172
  def generate_image_with_api(prompt):
173
  """Generate image using free API"""
 
176
  api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
177
  headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
178
 
179
+ if not os.getenv('HF_TOKEN'):
180
+ st.warning("Add HF_TOKEN environment variable for image generation")
181
+ return None
182
+
183
+ response = requests.post(api_url, headers=headers, json={"inputs": prompt}, timeout=30)
184
 
185
  if response.status_code == 200:
186
  image = Image.open(io.BytesIO(response.content))
187
  return image
188
  else:
189
+ st.warning(f"Image API returned status {response.status_code}")
190
  return None
191
  except Exception as e:
192
  st.error(f"Image generation error: {e}")
 
242
  st.header("🎀 Input Your Idea")
243
 
244
  # Tabs for different input methods
245
+ if AUDIO_REC_AVAILABLE:
246
+ tab1, tab2, tab3 = st.tabs(["πŸŽ™οΈ Voice", "πŸ“ Upload", "✍️ Text"])
247
+ else:
248
+ tab2, tab3 = st.tabs(["πŸ“ Upload", "✍️ Text"])
249
 
250
+ # Voice tab (only if available)
251
+ if AUDIO_REC_AVAILABLE:
252
+ with tab1:
253
+ st.info("Record your voice to generate content ideas")
 
 
 
 
 
254
 
255
+ # Audio recorder
256
+ wav_audio_data = st_audiorec()
257
+
258
+ if wav_audio_data is not None:
259
+ st.success("πŸŽ‰ Audio recorded!")
260
+ st.audio(wav_audio_data, format='audio/wav')
261
+
262
+ if st.button("πŸ”„ Convert to Text", key="transcribe_btn"):
263
+ st.session_state.processing = True
264
+ st.rerun()
265
 
266
+ # Upload tab
267
  with tab2:
268
  uploaded_file = st.file_uploader(
269
  "Upload audio file",
 
277
  st.session_state.processing = True
278
  # Process uploaded file
279
  with st.spinner("Converting speech to text..."):
280
+ if TRANSFORMERS_AVAILABLE:
281
+ load_models()
282
+ transcription = transcribe_audio_simple(uploaded_file)
283
+ st.session_state.transcription = transcription
284
+ else:
285
+ st.session_state.transcription = "Speech-to-text not available. Please use text input."
286
  st.session_state.processing = False
287
  st.rerun()
288
 
289
+ # Text tab
290
  with tab3:
291
  user_input = st.text_area(
292
  "Type your idea or product description:",
293
+ placeholder="e.g., A smart fitness tracker that monitors sleep patterns and provides personalized recommendations",
294
  height=120
295
  )
296
  if user_input:
297
  st.session_state.transcription = user_input
298
 
299
  # Process audio transcription if needed
300
+ if st.session_state.processing and AUDIO_REC_AVAILABLE:
301
+ # Check if wav_audio_data exists in the current scope
302
+ if 'wav_audio_data' in locals() and wav_audio_data is not None:
303
+ with st.spinner("🎯 Converting speech to text..."):
304
+ if TRANSFORMERS_AVAILABLE:
305
+ load_models()
306
+
307
+ # Save audio to temp file for processing
308
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
309
+ tmp_file.write(wav_audio_data)
310
+ transcription = transcribe_audio_simple(tmp_file.name)
311
+ st.session_state.transcription = transcription
312
+ os.unlink(tmp_file.name)
313
+ else:
314
+ st.session_state.transcription = "Speech recognition not available. Please use text input."
315
 
316
  st.session_state.processing = False
317
  st.rerun()
 
334
  content_text = generate_content_with_gemini(st.session_state.transcription)
335
  st.session_state.generated_content['text'] = content_text
336
  else:
337
+ content_text = generate_content_offline(st.session_state.transcription)
338
+ st.session_state.generated_content['text'] = content_text
 
339
  st.rerun()
340
 
341
  # Display generated content
 
361
  if img:
362
  st.image(img, caption="Generated Image", use_column_width=True)
363
 
364
+ # Store image for download
365
+ st.session_state.generated_content['image'] = img
366
+
367
  # Download button
368
  img_buffer = io.BytesIO()
369
  img.save(img_buffer, format="PNG")
 
374
  mime="image/png"
375
  )
376
  else:
377
+ st.warning("Image generation not available. Check HF_TOKEN in settings.")
378
  else:
379
  # Simple prompt input for image generation
380
  img_prompt = st.text_input("Enter image description:",
 
384
  img = generate_image_with_api(img_prompt)
385
  if img:
386
  st.image(img, caption="Generated Image", use_column_width=True)
387
+ st.session_state.generated_content['image'] = img
388
 
389
  # Export section
390
  st.header("πŸ“₯ Export Content")
 
441
  **API Setup (Optional):**
442
  - Add `GEMINI_API_KEY` for enhanced text generation
443
  - Add `HF_TOKEN` for image generation
444
+
445
+ **Current Status:**
446
+ - Transformers: {'βœ… Available' if TRANSFORMERS_AVAILABLE else '❌ Not Available'}
447
+ - Audio Recording: {'βœ… Available' if AUDIO_REC_AVAILABLE else '❌ Not Available'}
448
+ - Gemini AI: {'βœ… Available' if gemini_available else '❌ Not Available'}
449
  """)
450
 
451
  # Footer