jfforero commited on
Commit
5633b04
·
verified ·
1 Parent(s): 2d570a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -55,15 +55,23 @@ def load_musicgen_model():
55
 
56
  processor, music_model, device = load_musicgen_model()
57
 
58
- # Function to chunk audio into 10-second segments
59
  def chunk_audio(audio_path, chunk_duration=10):
60
- """Split audio into 10-second chunks and return list of chunk file paths"""
61
  try:
62
  # Load audio file
63
  audio = AudioSegment.from_file(audio_path)
64
  duration_ms = len(audio)
65
  chunk_ms = chunk_duration * 1000
66
 
 
 
 
 
 
 
 
 
67
  chunks = []
68
  chunk_files = []
69
 
@@ -299,15 +307,15 @@ def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
299
  }
300
 
301
  # Function to get predictions for all chunks
302
- def get_predictions(audio_input, generate_audio=True):
303
- # Chunk the audio into 10-second segments
304
- chunk_files, total_chunks = chunk_audio(audio_input, chunk_duration=10)
305
 
306
  results = []
307
 
308
  # Process each chunk
309
  for i, chunk_path in enumerate(chunk_files):
310
- print(f"Processing chunk {i+1}/{total_chunks}")
311
  result = process_chunk(chunk_path, i, total_chunks, generate_audio)
312
  results.append(result)
313
 
@@ -328,11 +336,20 @@ def clear_all():
328
  # Create the Gradio interface with proper output handling
329
  with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
330
  gr.Markdown("# Affective Virtual Environments")
331
- gr.Markdown("Create an AVE using your voice. Audio is split into 5-second chunks, with separate predictions and generations for each segment.")
332
 
333
  with gr.Row():
334
  audio_input = gr.Audio(label="Input Audio", type="filepath", sources=["microphone", "upload"])
335
  with gr.Column():
 
 
 
 
 
 
 
 
 
336
  # Add checkbox for audio generation
337
  generate_audio_checkbox = gr.Checkbox(
338
  label="Generate Audio (may take longer)",
@@ -356,8 +373,8 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
356
  output_containers = []
357
  group_components = [] # Store group components separately
358
 
359
- # We'll create up to 10 chunk slots (adjust as needed)
360
- for i in range(10):
361
  with gr.Group(visible=False) as chunk_group:
362
  gr.Markdown(f"### Chunk {i+1} Results")
363
  with gr.Row():
@@ -378,17 +395,21 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
378
  'music': audio_output
379
  })
380
 
381
- def process_and_display(audio_input, generate_audio):
 
 
 
 
382
  # Show loading indicator
383
- yield [gr.HTML("""
384
  <div style="text-align: center; margin: 20px;">
385
- <p style="font-size: 18px; color: #4a4a4a;">Processing audio chunks...</p>
386
  <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
387
  <style>@keyframes spin {0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); }}</style>
388
  </div>
389
  """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 5)
390
 
391
- results = get_predictions(audio_input, generate_audio)
392
 
393
  # Initialize outputs list
394
  outputs = []
@@ -421,7 +442,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
421
  # Set up the button click
422
  process_btn.click(
423
  fn=process_and_display,
424
- inputs=[audio_input, generate_audio_checkbox],
425
  outputs=[loading_indicator] + group_components + [comp for container in output_containers for comp in [
426
  container['emotion'],
427
  container['transcription'],
@@ -441,7 +462,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
441
  container['sentiment'],
442
  container['image'],
443
  container['music']
444
- ]] + [loading_indicator]
445
  )
446
 
447
  interface.launch()
 
55
 
56
  processor, music_model, device = load_musicgen_model()
57
 
58
+ # Function to chunk audio into segments
59
  def chunk_audio(audio_path, chunk_duration=10):
60
+ """Split audio into chunks and return list of chunk file paths"""
61
  try:
62
  # Load audio file
63
  audio = AudioSegment.from_file(audio_path)
64
  duration_ms = len(audio)
65
  chunk_ms = chunk_duration * 1000
66
 
67
+ # Validate chunk duration
68
+ if chunk_duration <= 0:
69
+ raise ValueError("Chunk duration must be positive")
70
+
71
+ if chunk_duration > duration_ms / 1000:
72
+ # If chunk duration is longer than audio, return the whole audio
73
+ return [audio_path], 1
74
+
75
  chunks = []
76
  chunk_files = []
77
 
 
307
  }
308
 
309
  # Function to get predictions for all chunks
310
+ def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
311
+ # Chunk the audio into segments
312
+ chunk_files, total_chunks = chunk_audio(audio_input, chunk_duration)
313
 
314
  results = []
315
 
316
  # Process each chunk
317
  for i, chunk_path in enumerate(chunk_files):
318
+ print(f"Processing chunk {i+1}/{total_chunks} ({chunk_duration}s each)")
319
  result = process_chunk(chunk_path, i, total_chunks, generate_audio)
320
  results.append(result)
321
 
 
336
  # Create the Gradio interface with proper output handling
337
  with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
338
  gr.Markdown("# Affective Virtual Environments")
339
+ gr.Markdown("Create an AVE using your voice. Audio is split into chunks, with separate predictions and generations for each segment.")
340
 
341
  with gr.Row():
342
  audio_input = gr.Audio(label="Input Audio", type="filepath", sources=["microphone", "upload"])
343
  with gr.Column():
344
+ # Add chunk duration input
345
+ chunk_duration_input = gr.Number(
346
+ label="Chunk Duration (seconds)",
347
+ value=10,
348
+ minimum=1,
349
+ maximum=60,
350
+ step=1,
351
+ info="Duration of each audio segment to process (1-60 seconds)"
352
+ )
353
  # Add checkbox for audio generation
354
  generate_audio_checkbox = gr.Checkbox(
355
  label="Generate Audio (may take longer)",
 
373
  output_containers = []
374
  group_components = [] # Store group components separately
375
 
376
+ # We'll create up to 20 chunk slots to accommodate different chunk durations
377
+ for i in range(20):
378
  with gr.Group(visible=False) as chunk_group:
379
  gr.Markdown(f"### Chunk {i+1} Results")
380
  with gr.Row():
 
395
  'music': audio_output
396
  })
397
 
398
+ def process_and_display(audio_input, generate_audio, chunk_duration):
399
+ # Validate chunk duration
400
+ if chunk_duration is None or chunk_duration <= 0:
401
+ chunk_duration = 10
402
+
403
  # Show loading indicator
404
+ yield [gr.HTML(f"""
405
  <div style="text-align: center; margin: 20px;">
406
+ <p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
407
  <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
408
  <style>@keyframes spin {0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); }}</style>
409
  </div>
410
  """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 5)
411
 
412
+ results = get_predictions(audio_input, generate_audio, chunk_duration)
413
 
414
  # Initialize outputs list
415
  outputs = []
 
442
  # Set up the button click
443
  process_btn.click(
444
  fn=process_and_display,
445
+ inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
446
  outputs=[loading_indicator] + group_components + [comp for container in output_containers for comp in [
447
  container['emotion'],
448
  container['transcription'],
 
462
  container['sentiment'],
463
  container['image'],
464
  container['music']
465
+ ]] + [loading_indicator] + [gr.Number(value=10)] # Reset chunk duration to default
466
  )
467
 
468
  interface.launch()