Spaces:

YiYiXu
/

it-happened-one-frame-2

Runtime error

App Files Files Community

yiyixuxu commited on Jun 7, 2022

Commit

e572140

1 Parent(s): 0f2175b

limit video size, also add code to clean up the saved videos

Browse files

Files changed (1) hide show

app.py +104 -84

app.py CHANGED Viewed

@@ -17,46 +17,63 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model, preprocess = clip.load("ViT-B/32")
-def select_video_format(url, format_note='480p', ext='mp4'):
     defaults = ['480p', '360p','240p','144p']
     ydl_opts = {}
     ydl = youtube_dl.YoutubeDL(ydl_opts)
     info_dict = ydl.extract_info(url, download=False)
     formats = info_dict.get('formats', None)
     available_format_notes = set([f['format_note'] for f in formats])
-    if format_note not in available_format_notes:
-        format_note = [d for d in defaults if d in available_format_notes][0]
-    formats = [f for f in formats if f['format_note'] == format_note and f['ext'] == ext and f['vcodec'].split('.')[0] != 'av01']
-    format = formats[0]
-    format_id = format.get('format_id', None)
-    fps = format.get('fps', None)
-    print(f'format selected: {format}')
     return(format, format_id, fps)
-# to-do: delete saved videos
-# testing aria2c
-def download_video(url,format_id, n_keep=10):
-    ydl_opts = {
-      'format':format_id,
-      'cachedir': False,
-      'external_downloader' : 'aria2c',
-      'external_downloader_args' :['--max-connection-per-server=16','--dir=videos'],
-      'outtmpl': "videos/%(id)s.%(ext)s"}
-    # create a directory for saved videos
-    video_path = Path('videos')
     try:
-      video_path.mkdir(parents=True)
     except FileExistsError:
       pass
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
-            ydl.cache.remove()
-            meta = ydl.extract_info(url)
-            save_location = 'videos/' + meta['id'] + '.' + meta['ext']
-        except youtube_dl.DownloadError as error:
-            print(f'error with download_video function: {error}')
-        return(save_location)
 def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
     cap = cv2.VideoCapture(video)
@@ -76,35 +93,30 @@ def process_video_parallel(video, skip_frames, dest_path, num_processes, process
     cap.release()
-def vid2frames(url, sampling_interval=1, ext='mp4'):
   # create folder for extracted frames - if folder exists, delete and create a new one
-    dest_path = Path('frames')
     try:
-        dest_path.mkdir(parents=True)
     except FileExistsError:
-        shutil.rmtree(dest_path)
-        dest_path.mkdir(parents=True)
-    # figure out the format for download,
-    # by default select 480p and .mp4
-    format, format_id, fps = select_video_format(url, format_note='480p', ext='mp4')
     # download the video
-    video = download_video(url,format_id)
-    # calculate skip_frames
-    try:
-        skip_frames = int(fps * sampling_interval)
-    except:
-        skip_frames = int(30 * sampling_interval)
-    print(f'video saved at: {video}, fps:{fps}, skip_frames: {skip_frames}')
     # extract video frames at given sampling interval with multiprocessing -
-    n_workers = min(os.cpu_count(), 12)
-    print(f'now extracting frames with {n_workers} process...')
-    with Pool(n_workers) as pool:
-        pool.map(partial(process_video_parallel, video, skip_frames, dest_path, n_workers), range(n_workers))
-    return(skip_frames, dest_path)
 def captioned_strip(images, caption=None, times=None, rows=1):
@@ -130,41 +142,47 @@ def captioned_strip(images, caption=None, times=None, rows=1):
 def run_inference(url, sampling_interval, search_query, bs=526):
     skip_frames, path_frames= vid2frames(url,sampling_interval)
-    filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
-    n_frames = len(filenames)
-    bs = min(n_frames,bs)
-    print(f"extracted {n_frames} frames, now encoding images")
-    # encoding images one batch at a time, combine all batch outputs -> image_features, size n_frames x 512
-    image_features = torch.empty(size=(n_frames, 512)).to(device)
-    print(f"batch size :{bs} ; number of batches: {len(range(0, n_frames,bs))}")
-    for b in range(0, n_frames,bs):
-        images = []
-        # loop through all frames in the batch -> create batch_image_input, size bs x 3 x 224 x 224
-        for filename in filenames[b:b+bs]:
-            image = Image.open(filename).convert("RGB")
-            images.append(preprocess(image))
-        batch_image_input = torch.tensor(np.stack(images)).to(device)
-        # encoding batch_image_input -> batch_image_features
-        with torch.no_grad():
-            batch_image_features = model.encode_image(batch_image_input)
-            batch_image_features /= batch_image_features.norm(dim=-1, keepdim=True)
-        # add encoded image embedding to image_features
-        image_features[b:b+bs] = batch_image_features
-    # encoding search query
-    with torch.no_grad():
-        text_features = model.encode_text(clip.tokenize(search_query).to(device))
-        text_features /= text_features.norm(dim=-1, keepdim=True)
-    print(image_features.dtype, text_features.dtype)
-    similarity = (100.0 * image_features @ text_features.T)
-    values, indices = similarity.topk(4, dim=0)
-    best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
-    times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
-    image_output = captioned_strip(best_frames,search_query, times,2)
-    title = search_query
     return(title, image_output)
-inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video!"),
           gr.Number(5,label='sampling interval (seconds)'),
           gr.inputs.Textbox(label="What do you want to search?")]
 outputs = [
@@ -172,6 +190,8 @@ outputs = [
     gr.outputs.Image(label=""),
 ]
 gr.Interface(
     run_inference,
     inputs=inputs,

 model, preprocess = clip.load("ViT-B/32")
+def select_video_format(url, format_note='240p', ext='mp4', max_size = 50000000):
     defaults = ['480p', '360p','240p','144p']
     ydl_opts = {}
     ydl = youtube_dl.YoutubeDL(ydl_opts)
     info_dict = ydl.extract_info(url, download=False)
     formats = info_dict.get('formats', None)
+    # filter out formats we can't process
+    formats = [f for f in formats if f['ext'] == ext
+               and f['vcodec'].split('.')[0] != 'av01'
+               and f['filesize'] is not None and f['filesize'] <= max_size]
     available_format_notes = set([f['format_note'] for f in formats])
+    try:
+        if format_note not in available_format_notes:
+            format_note = [d for d in defaults if d in available_format_notes][0]
+        formats = [f for f in formats if f['format_note'] == format_note]
+        format = formats[0]
+        format_id = format.get('format_id', None)
+        fps = format.get('fps', None)
+        print(f'format selected: {format}')
+    except IndexError as err:
+        print(f"can't find suitable video formats. we are not able to process video larger than 95 Mib at the moment")
+        format, format_id, fps = None, None, None
     return(format, format_id, fps)
+# to-do: delete saved videos
+def download_video(url):
+    # create "videos" foder for saved videos
+    path_videos = Path('videos')
     try:
+      path_videos.mkdir(parents=True)
     except FileExistsError:
       pass
+    # clear the "videos" folder
+    videos_to_keep = ['v1rkzUIL8oc', 'k4R5wZs8cxI','0diCvgWv_ng']
+    if len(list(path_videos.glob('*'))) > 10:
+        for path_video in path_videos.glob('*'):
+            if path_video.stem not in set(videos_to_keep):
+                path_video.unlink()
+                print(f'removed video {path_video}')
+    # select format to download for given video
+    # by default select 480p and .mp4
+    format, format_id, fps = select_video_format(url)
+    if format_id is not None:
+        dl_opts = {
+        'format':format_id,
+        'outtmpl': "videos/%(id)s.%(ext)s"}
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+            try:
+                ydl.cache.remove()
+                meta = ydl.extract_info(url)
+                save_location = 'videos/' + meta['id'] + '.' + meta['ext']
+            except youtube_dl.DownloadError as error:
+                print(f'error with download_video function: {error}')
+                save_location = None
+    return(fps, save_location)
 def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
     cap = cv2.VideoCapture(video)
     cap.release()
+def vid2frames(url, sampling_interval=1):
   # create folder for extracted frames - if folder exists, delete and create a new one
+    path_frames = Path('frames')
     try:
+        path_frames.mkdir(parents=True)
     except FileExistsError:
+        shutil.rmtree(path_frames)
+        path_frames.mkdir(parents=True)
     # download the video
+    fps, video = download_video(url)
+    if video is not None:
+      if fps is None: fps = 30
+      skip_frames = int(fps * sampling_interval)
+      print(f'video saved at: {video}, fps:{fps}, skip_frames: {skip_frames}')
     # extract video frames at given sampling interval with multiprocessing -
+      n_workers = min(os.cpu_count(), 12)
+      print(f'now extracting frames with {n_workers} process...')
+      with Pool(n_workers) as pool:
+        pool.map(partial(process_video_parallel, video, skip_frames, path_frames, n_workers), range(n_workers))
+    else:
+      skip_frames, path_frames = None, None
+    return(skip_frames, path_frames)
 def captioned_strip(images, caption=None, times=None, rows=1):
 def run_inference(url, sampling_interval, search_query, bs=526):
     skip_frames, path_frames= vid2frames(url,sampling_interval)
+    if path_frames is not None:
+      filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
+      n_frames = len(filenames)
+      bs = min(n_frames,bs)
+      print(f"extracted {n_frames} frames, now encoding images")
+      # encoding images one batch at a time, combine all batch outputs -> image_features, size n_frames x 512
+      image_features = torch.empty(size=(n_frames, 512),dtype=torch.float32).to(device)
+      print(f"encoding images, batch size :{bs} ; number of batches: {len(range(0, n_frames,bs))}")
+      for b in range(0, n_frames,bs):
+          images = []
+          # loop through all frames in the batch -> create batch_image_input, size bs x 3 x 224 x 224
+          for filename in filenames[b:b+bs]:
+              image = Image.open(filename).convert("RGB")
+              images.append(preprocess(image))
+          batch_image_input = torch.tensor(np.stack(images)).to(device)
+          # encoding batch_image_input -> batch_image_features
+          with torch.no_grad():
+              batch_image_features = model.encode_image(batch_image_input)
+              batch_image_features /= batch_image_features.norm(dim=-1, keepdim=True)
+          # add encoded image embedding to image_features
+          image_features[b:b+bs] = batch_image_features
+      # encoding search query
+      print(f'encoding search query')
+      with torch.no_grad():
+          text_features = model.encode_text(clip.tokenize(search_query).to(device)).to(dtype=torch.float32)
+          text_features /= text_features.norm(dim=-1, keepdim=True)
+      similarity = (100.0 * image_features @ text_features.T)
+      values, indices = similarity.topk(4, dim=0)
+      best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
+      times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
+      image_output = captioned_strip(best_frames,search_query, times,2)
+      title = search_query
+      print('task complete')
+    else:
+      title = "not able to download video"
+      image_output = None
     return(title, image_output)
+inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video! (note that downloading mighte be slow, e.g. it will take a few minutes to process a 10 minutes video)"),
           gr.Number(5,label='sampling interval (seconds)'),
           gr.inputs.Textbox(label="What do you want to search?")]
 outputs = [
     gr.outputs.Image(label=""),
 ]
+example_videos = ['v1rkzUIL8oc', 'k4R5wZs8cxI','0diCvgWv_ng']
 gr.Interface(
     run_inference,
     inputs=inputs,