Update app.py
Browse files
app.py
CHANGED
|
@@ -40,8 +40,23 @@ def check_api(model_name):
|
|
| 40 |
except :
|
| 41 |
return "api not ready yet"
|
| 42 |
|
| 43 |
-
from moviepy.editor import
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def extract_audio(video_in):
|
| 47 |
input_video = video_in
|
|
@@ -232,9 +247,25 @@ def get_musical_prompt(user_prompt, chosen_model):
|
|
| 232 |
print(f"SUGGESTED Musical prompt: {cleaned_text}")
|
| 233 |
return cleaned_text.lstrip("\n")
|
| 234 |
|
| 235 |
-
def
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
if chosen_model == [] :
|
| 240 |
raise gr.Error("Please pick a model")
|
|
@@ -242,6 +273,8 @@ def infer(image_in, chosen_model, api_status):
|
|
| 242 |
if api_status == "api not ready yet" :
|
| 243 |
raise gr.Error("This model is not ready yet, you can pick another one instead :)")
|
| 244 |
|
|
|
|
|
|
|
| 245 |
gr.Info("Getting image caption with Kosmos2...")
|
| 246 |
user_prompt = get_caption(image_in)
|
| 247 |
|
|
@@ -263,10 +296,11 @@ def infer(image_in, chosen_model, api_status):
|
|
| 263 |
elif chosen_model == "MusicGen" :
|
| 264 |
gr.Info("Now calling MusicGen for music...")
|
| 265 |
music_o = get_musicgen(musical_prompt)
|
| 266 |
-
|
| 267 |
-
return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o
|
| 268 |
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
| 270 |
musical_prompt = caption
|
| 271 |
|
| 272 |
if chosen_model == "MAGNet" :
|
|
@@ -284,11 +318,11 @@ def retry(chosen_model, caption):
|
|
| 284 |
elif chosen_model == "MusicGen" :
|
| 285 |
gr.Info("Now calling MusicGen for music...")
|
| 286 |
music_o = get_musicgen(musical_prompt)
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
demo_title = "Image to Music V2"
|
| 291 |
-
description = "Get music from a picture, compare text-to-music models"
|
| 292 |
|
| 293 |
css = """
|
| 294 |
#col-container {
|
|
@@ -319,11 +353,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 319 |
with gr.Row():
|
| 320 |
|
| 321 |
with gr.Column():
|
| 322 |
-
|
| 323 |
-
label = "Image reference",
|
| 324 |
-
type = "filepath",
|
| 325 |
-
elem_id = "image-in"
|
| 326 |
-
)
|
| 327 |
|
| 328 |
with gr.Row():
|
| 329 |
|
|
@@ -345,24 +375,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 345 |
interactive=False
|
| 346 |
)
|
| 347 |
|
| 348 |
-
submit_btn = gr.Button("Make music from my
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
examples = [
|
| 352 |
-
["examples/ocean_poet.jpeg"],
|
| 353 |
-
["examples/jasper_horace.jpeg"],
|
| 354 |
-
["examples/summer.jpeg"],
|
| 355 |
-
["examples/mona_diner.png"],
|
| 356 |
-
["examples/monalisa.png"],
|
| 357 |
-
["examples/santa.png"],
|
| 358 |
-
["examples/winter_hiking.png"],
|
| 359 |
-
["examples/teatime.jpeg"],
|
| 360 |
-
["examples/news_experts.jpeg"]
|
| 361 |
-
],
|
| 362 |
-
fn = infer,
|
| 363 |
-
inputs = [image_in, chosen_model],
|
| 364 |
-
examples_per_page = 4
|
| 365 |
-
)
|
| 366 |
|
| 367 |
with gr.Column():
|
| 368 |
|
|
@@ -377,6 +392,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 377 |
result = gr.Audio(
|
| 378 |
label = "Music"
|
| 379 |
)
|
|
|
|
|
|
|
| 380 |
|
| 381 |
|
| 382 |
chosen_model.change(
|
|
@@ -388,21 +405,22 @@ with gr.Blocks(css=css) as demo:
|
|
| 388 |
|
| 389 |
retry_btn.click(
|
| 390 |
fn = retry,
|
| 391 |
-
inputs = [chosen_model, caption],
|
| 392 |
-
outputs = [result]
|
| 393 |
)
|
| 394 |
|
| 395 |
submit_btn.click(
|
| 396 |
fn = infer,
|
| 397 |
inputs = [
|
| 398 |
-
|
| 399 |
chosen_model,
|
| 400 |
check_status
|
| 401 |
],
|
| 402 |
outputs =[
|
| 403 |
caption,
|
| 404 |
retry_btn,
|
| 405 |
-
result
|
|
|
|
| 406 |
],
|
| 407 |
concurrency_limit = 4
|
| 408 |
)
|
|
|
|
| 40 |
except :
|
| 41 |
return "api not ready yet"
|
| 42 |
|
| 43 |
+
from moviepy.editor import *
|
| 44 |
+
import cv2
|
| 45 |
+
|
| 46 |
+
def extract_firstframe(video_in):
|
| 47 |
+
vidcap = cv2.VideoCapture(video_in)
|
| 48 |
+
success,image = vidcap.read()
|
| 49 |
+
count = 0
|
| 50 |
+
while success:
|
| 51 |
+
if count == 0:
|
| 52 |
+
cv2.imwrite("first_frame.jpg", image) # save first extracted frame as jpg file named first_frame.jpg
|
| 53 |
+
else:
|
| 54 |
+
break # exit loop after saving first frame
|
| 55 |
+
success,image = vidcap.read()
|
| 56 |
+
print ('Read a new frame: ', success)
|
| 57 |
+
count += 1
|
| 58 |
+
print ("Done extracted first frame!")
|
| 59 |
+
return "first_frame.jpg"
|
| 60 |
|
| 61 |
def extract_audio(video_in):
|
| 62 |
input_video = video_in
|
|
|
|
| 247 |
print(f"SUGGESTED Musical prompt: {cleaned_text}")
|
| 248 |
return cleaned_text.lstrip("\n")
|
| 249 |
|
| 250 |
+
def blend_vmsc(video_in, audio_result):
|
| 251 |
+
audioClip = AudioFileClip(audio_result)
|
| 252 |
+
print(f"AUD: {audioClip.duration}")
|
| 253 |
+
clip = VideoFileClip(video_in)
|
| 254 |
+
print(f"VID: {clip.duration}")
|
| 255 |
+
if clip.duration < audioClip.duration :
|
| 256 |
+
audioClip = audioClip.subclip((0.0), (clip.duration))
|
| 257 |
+
elif clip.duration > audioClip.duration :
|
| 258 |
+
clip = clip.subclip((0.0), (audioClip.duration))
|
| 259 |
+
final_clip = clip.set_audio(audioClip)
|
| 260 |
+
# Set the output codec
|
| 261 |
+
codec = 'libx264'
|
| 262 |
+
audio_codec = 'aac'
|
| 263 |
+
final_clip.write_videofile('final_video_with_music.mp4', codec=codec, audio_codec=audio_codec)
|
| 264 |
+
return "final_video_with_music.mp4"
|
| 265 |
+
|
| 266 |
+
def infer(video_in, chosen_model, api_status):
|
| 267 |
+
if video_in == None :
|
| 268 |
+
raise gr.Error("Please provide a video input")
|
| 269 |
|
| 270 |
if chosen_model == [] :
|
| 271 |
raise gr.Error("Please pick a model")
|
|
|
|
| 273 |
if api_status == "api not ready yet" :
|
| 274 |
raise gr.Error("This model is not ready yet, you can pick another one instead :)")
|
| 275 |
|
| 276 |
+
image_in = extract_firstframe(video_in)
|
| 277 |
+
|
| 278 |
gr.Info("Getting image caption with Kosmos2...")
|
| 279 |
user_prompt = get_caption(image_in)
|
| 280 |
|
|
|
|
| 296 |
elif chosen_model == "MusicGen" :
|
| 297 |
gr.Info("Now calling MusicGen for music...")
|
| 298 |
music_o = get_musicgen(musical_prompt)
|
|
|
|
|
|
|
| 299 |
|
| 300 |
+
final_res = blend_vmsc(video_in, music_o)
|
| 301 |
+
return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o, final_res
|
| 302 |
+
|
| 303 |
+
def retry(video_in, chosen_model, caption):
|
| 304 |
musical_prompt = caption
|
| 305 |
|
| 306 |
if chosen_model == "MAGNet" :
|
|
|
|
| 318 |
elif chosen_model == "MusicGen" :
|
| 319 |
gr.Info("Now calling MusicGen for music...")
|
| 320 |
music_o = get_musicgen(musical_prompt)
|
| 321 |
+
final_res = blend_vmsc(video_in, music_o)
|
| 322 |
+
return music_o, final_res
|
| 323 |
|
| 324 |
+
demo_title = "Video to Music"
|
| 325 |
+
description = "Get music from a video shot, compare text-to-music models"
|
|
|
|
|
|
|
| 326 |
|
| 327 |
css = """
|
| 328 |
#col-container {
|
|
|
|
| 353 |
with gr.Row():
|
| 354 |
|
| 355 |
with gr.Column():
|
| 356 |
+
video_in = gr.Video(sources=["upload"], label="Video input")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
with gr.Row():
|
| 359 |
|
|
|
|
| 375 |
interactive=False
|
| 376 |
)
|
| 377 |
|
| 378 |
+
submit_btn = gr.Button("Make music from my shot !")
|
| 379 |
+
|
| 380 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
with gr.Column():
|
| 383 |
|
|
|
|
| 392 |
result = gr.Audio(
|
| 393 |
label = "Music"
|
| 394 |
)
|
| 395 |
+
|
| 396 |
+
video_o = gr.Video(label="Video with soundFX")
|
| 397 |
|
| 398 |
|
| 399 |
chosen_model.change(
|
|
|
|
| 405 |
|
| 406 |
retry_btn.click(
|
| 407 |
fn = retry,
|
| 408 |
+
inputs = [video_in, chosen_model, caption],
|
| 409 |
+
outputs = [result, video_o]
|
| 410 |
)
|
| 411 |
|
| 412 |
submit_btn.click(
|
| 413 |
fn = infer,
|
| 414 |
inputs = [
|
| 415 |
+
video_in,
|
| 416 |
chosen_model,
|
| 417 |
check_status
|
| 418 |
],
|
| 419 |
outputs =[
|
| 420 |
caption,
|
| 421 |
retry_btn,
|
| 422 |
+
result,
|
| 423 |
+
video_o
|
| 424 |
],
|
| 425 |
concurrency_limit = 4
|
| 426 |
)
|