Commit ·
81fd8ac
1
Parent(s): bd6e845
audio enabled
Browse files
app.py
CHANGED
|
@@ -462,8 +462,8 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
|
|
| 462 |
|
| 463 |
for audio_chunk in audio_stream:
|
| 464 |
generated_audio += list(audio_chunk)
|
| 465 |
-
|
| 466 |
-
|
| 467 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
|
| 468 |
|
| 469 |
elif gemini_output['generation_flag']: # should we give the actual audio here as well
|
|
@@ -474,8 +474,8 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
|
|
| 474 |
extend_stride=extend_stride, seed=None, top_k = top_k, top_p = top_p, temperature = temperature,do_sample=do_sample, guidance_scale=guidance_scale, generation_flag=gemini_output['generation_flag'])
|
| 475 |
for audio_chunk in audio_stream:
|
| 476 |
generated_audio += list(audio_chunk)
|
| 477 |
-
|
| 478 |
-
|
| 479 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
|
| 480 |
|
| 481 |
else:
|
|
@@ -483,12 +483,11 @@ def main(history, prompt, image, audio, video, duration=15, play_steps_in_s=2, t
|
|
| 483 |
last_log = log.get_last_log()
|
| 484 |
if last_log is not None:
|
| 485 |
audio = last_log['audio_path']
|
| 486 |
-
|
| 487 |
-
|
| 488 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": audio, "cover": cover, "cover_description": cover_description})
|
| 489 |
else:
|
| 490 |
-
|
| 491 |
-
yield gemini_output['model_response'], gemini_output['song_title'], music_caption, cover, cover_description, history
|
| 492 |
|
| 493 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": None, "cover": cover, "cover_description": cover_description})
|
| 494 |
|
|
@@ -515,7 +514,7 @@ demo = gr.Interface(
|
|
| 515 |
outputs=[gr.Textbox(label="Generated Text Output"),
|
| 516 |
gr.Textbox(label="Song Title"),
|
| 517 |
gr.Textbox(label="Music generation caption"),
|
| 518 |
-
|
| 519 |
gr.Image(label="Cover Image"),
|
| 520 |
gr.Textbox(label="Cover description"),
|
| 521 |
gr.State(),
|
|
|
|
| 462 |
|
| 463 |
for audio_chunk in audio_stream:
|
| 464 |
generated_audio += list(audio_chunk)
|
| 465 |
+
yield gemini_output['model_response'], gemini_output['song_title'], history[-1]['music_caption'], (sampling_rate, np.asarray(audio_chunk)), history[-1]['cover'], cover_description, history
|
| 466 |
+
|
| 467 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
|
| 468 |
|
| 469 |
elif gemini_output['generation_flag']: # should we give the actual audio here as well
|
|
|
|
| 474 |
extend_stride=extend_stride, seed=None, top_k = top_k, top_p = top_p, temperature = temperature,do_sample=do_sample, guidance_scale=guidance_scale, generation_flag=gemini_output['generation_flag'])
|
| 475 |
for audio_chunk in audio_stream:
|
| 476 |
generated_audio += list(audio_chunk)
|
| 477 |
+
yield gemini_output['model_response'], gemini_output['song_title'], music_caption, (sampling_rate, audio_chunk), cover, cover_description, history
|
| 478 |
+
|
| 479 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": generated_audio, "cover": cover, "cover_description": cover_description})
|
| 480 |
|
| 481 |
else:
|
|
|
|
| 483 |
last_log = log.get_last_log()
|
| 484 |
if last_log is not None:
|
| 485 |
audio = last_log['audio_path']
|
| 486 |
+
yield gemini_output['model_response'], gemini_output['song_title'], music_caption, audio, cover, cover_description, history
|
| 487 |
+
|
| 488 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": audio, "cover": cover, "cover_description": cover_description})
|
| 489 |
else:
|
| 490 |
+
yield gemini_output['model_response'], gemini_output['song_title'], music_caption, None, cover, cover_description, history
|
|
|
|
| 491 |
|
| 492 |
history.append({"chat": chat, "log": log, "prompt": prompt, "model_response": gemini_output['model_response'], "music_caption": music_caption, "audio": None, "cover": cover, "cover_description": cover_description})
|
| 493 |
|
|
|
|
| 514 |
outputs=[gr.Textbox(label="Generated Text Output"),
|
| 515 |
gr.Textbox(label="Song Title"),
|
| 516 |
gr.Textbox(label="Music generation caption"),
|
| 517 |
+
gr.Audio(label="Generated Music", streaming=True, autoplay=True, show_download_button=False),
|
| 518 |
gr.Image(label="Cover Image"),
|
| 519 |
gr.Textbox(label="Cover description"),
|
| 520 |
gr.State(),
|