muhammad-abdullah commited on
Commit
81fd8ac
·
1 Parent(s): bd6e845

audio enabled

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -462,8 +462,8 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
462
 
463
  for audio_chunk in audio_stream:
464
  generated_audio += list(audio_chunk)
465
- # yield gemini_output['model_response'], gemini_output['song_title'], history[-1]['music_caption'], (sampling_rate, np.asarray(audio_chunk)), history[-1]['cover'], cover_description, history
466
- yield gemini_output['model_response'], gemini_output['song_title'], history[-1]['music_caption'], history[-1]['cover'], cover_description, history
467
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
468
 
469
  elif gemini_output['generation_flag']: # should we give the actual audio here as well
@@ -474,8 +474,8 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
474
  extend_stride=extend_stride, seed=None, top_k = top_k, top_p = top_p, temperature = temperature,do_sample=do_sample, guidance_scale=guidance_scale, generation_flag=gemini_output['generation_flag'])
475
  for audio_chunk in audio_stream:
476
  generated_audio += list(audio_chunk)
477
- # yield gemini_output['model_response'], gemini_output['song_title'], music_caption, (sampling_rate, audio_chunk), cover, cover_description, history
478
- yield gemini_output['model_response'], gemini_output['song_title'], music_caption, cover, cover_description, history
479
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
480
 
481
  else:
@@ -483,12 +483,11 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
483
  last_log = log.get_last_log()
484
  if last_log is not None:
485
  audio = last_log['audio_path']
486
- # yield gemini_output['model_response'], gemini_output['song_title'], music_caption, audio, cover, cover_description, history
487
- yield gemini_output['model_response'], gemini_output['song_title'], music_caption, cover, cover_description, history
488
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": audio, "cover": cover, "cover_description": cover_description})
489
  else:
490
- # yield gemini_output['model_response'], gemini_output['song_title'], music_caption, None, cover, cover_description, history
491
- yield gemini_output['model_response'], gemini_output['song_title'], music_caption, cover, cover_description, history
492
 
493
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": None, "cover": cover, "cover_description": cover_description})
494
 
@@ -515,7 +514,7 @@ demo = gr.Interface(
515
  outputs=[gr.Textbox(label="Generated Text Output"),
516
  gr.Textbox(label="Song Title"),
517
  gr.Textbox(label="Music generation caption"),
518
- # gr.Audio(label="Generated Music", streaming=True, autoplay=True, show_download_button=False),
519
  gr.Image(label="Cover Image"),
520
  gr.Textbox(label="Cover description"),
521
  gr.State(),
 
462
 
463
  for audio_chunk in audio_stream:
464
  generated_audio += list(audio_chunk)
465
+ yield gemini_output['model_response'], gemini_output['song_title'], history[-1]['music_caption'], (sampling_rate, np.asarray(audio_chunk)), history[-1]['cover'], cover_description, history
466
+
467
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
468
 
469
  elif gemini_output['generation_flag']: # should we give the actual audio here as well
 
474
  extend_stride=extend_stride, seed=None, top_k = top_k, top_p = top_p, temperature = temperature,do_sample=do_sample, guidance_scale=guidance_scale, generation_flag=gemini_output['generation_flag'])
475
  for audio_chunk in audio_stream:
476
  generated_audio += list(audio_chunk)
477
+ yield gemini_output['model_response'], gemini_output['song_title'], music_caption, (sampling_rate, audio_chunk), cover, cover_description, history
478
+
479
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
480
 
481
  else:
 
483
  last_log = log.get_last_log()
484
  if last_log is not None:
485
  audio = last_log['audio_path']
486
+ yield gemini_output['model_response'], gemini_output['song_title'], music_caption, audio, cover, cover_description, history
487
+
488
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": audio, "cover": cover, "cover_description": cover_description})
489
  else:
490
+ yield gemini_output['model_response'], gemini_output['song_title'], music_caption, None, cover, cover_description, history
 
491
 
492
  history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": None, "cover": cover, "cover_description": cover_description})
493
 
 
514
  outputs=[gr.Textbox(label="Generated Text Output"),
515
  gr.Textbox(label="Song Title"),
516
  gr.Textbox(label="Music generation caption"),
517
+ gr.Audio(label="Generated Music", streaming=True, autoplay=True, show_download_button=False),
518
  gr.Image(label="Cover Image"),
519
  gr.Textbox(label="Cover description"),
520
  gr.State(),