Spaces:
Build error
Build error
Iskaj
commited on
Commit
·
727e567
1
Parent(s):
ed0180d
add documentation to data.py
Browse files
data.py
CHANGED
|
@@ -4,6 +4,9 @@ import shutil
|
|
| 4 |
|
| 5 |
from videohash import filepath_from_url
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
with open('apb2022.json') as filein:
|
| 8 |
urls, videos, url2video, video2url = [], [], {}, {}
|
| 9 |
for item in json.load(filein):
|
|
@@ -12,13 +15,14 @@ with open('apb2022.json') as filein:
|
|
| 12 |
url2video[item['url']] = item['mp4']
|
| 13 |
video2url[item['mp4']] = item['url']
|
| 14 |
|
|
|
|
| 15 |
for url in videos:
|
| 16 |
filepath = filepath_from_url(url) + '.index'
|
| 17 |
datapath = os.path.join('data', os.path.basename(filepath))
|
| 18 |
if not os.path.exists(filepath) and os.path.exists(datapath):
|
| 19 |
shutil.copyfile(datapath, filepath)
|
| 20 |
|
| 21 |
-
|
| 22 |
if __name__ == "__main__":
|
| 23 |
from videomatch import get_video_index
|
| 24 |
|
|
|
|
| 4 |
|
| 5 |
from videohash import filepath_from_url
|
| 6 |
|
| 7 |
+
# < Algemene Politieke Beschouwing 2022 >
|
| 8 |
+
# Load this data based on a .json file to get those videos to compare to.
|
| 9 |
+
# This can be updated with any .json file containing other videos.
|
| 10 |
with open('apb2022.json') as filein:
|
| 11 |
urls, videos, url2video, video2url = [], [], {}, {}
|
| 12 |
for item in json.load(filein):
|
|
|
|
| 15 |
url2video[item['url']] = item['mp4']
|
| 16 |
video2url[item['mp4']] = item['url']
|
| 17 |
|
| 18 |
+
# Get filepaths for the url's indices in the dataset and copy those to data folder if they're not present
|
| 19 |
for url in videos:
|
| 20 |
filepath = filepath_from_url(url) + '.index'
|
| 21 |
datapath = os.path.join('data', os.path.basename(filepath))
|
| 22 |
if not os.path.exists(filepath) and os.path.exists(datapath):
|
| 23 |
shutil.copyfile(datapath, filepath)
|
| 24 |
|
| 25 |
+
# To manually build the indices for the above dataset.
|
| 26 |
if __name__ == "__main__":
|
| 27 |
from videomatch import get_video_index
|
| 28 |
|