Spaces:
Build error
Build error
try to promote main versions
Browse files
app.py
CHANGED
|
@@ -30,7 +30,7 @@ python_path = hf_hub_download(
|
|
| 30 |
sys.path.append(os.environ["PRIVATE_DIR"])
|
| 31 |
from models import *
|
| 32 |
|
| 33 |
-
max_results =
|
| 34 |
max_output = 50
|
| 35 |
|
| 36 |
# global (faster)
|
|
@@ -40,6 +40,31 @@ catalog = get_catalog()
|
|
| 40 |
url_dict = get_durl_myma()
|
| 41 |
dict_catalog = get_dict_catalog()
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def download_audio(id_video):
|
| 45 |
id = id_video.split("?v=")[-1][:11]
|
|
@@ -85,11 +110,20 @@ def process(file_name, embed_html_all):
|
|
| 85 |
with open(output_csv, "w") as w:
|
| 86 |
writer = csv.writer(w)
|
| 87 |
header = False
|
|
|
|
| 88 |
for position, top in enumerate(tops):
|
| 89 |
if len(formated) / 2 >= max_output:
|
| 90 |
break
|
| 91 |
|
| 92 |
file = os.path.splitext(os.path.basename(top))[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
file_name = file
|
| 94 |
if file in dict_catalog:
|
| 95 |
if not header:
|
|
|
|
| 30 |
sys.path.append(os.environ["PRIVATE_DIR"])
|
| 31 |
from models import *
|
| 32 |
|
| 33 |
+
max_results = 200
|
| 34 |
max_output = 50
|
| 35 |
|
| 36 |
# global (faster)
|
|
|
|
| 40 |
url_dict = get_durl_myma()
|
| 41 |
dict_catalog = get_dict_catalog()
|
| 42 |
|
| 43 |
+
fixation_id_to_file_name = {}
|
| 44 |
+
for file_name, infos in dict_catalog.items():
|
| 45 |
+
# we want only main versions
|
| 46 |
+
if not infos["Parent fixation id"]:
|
| 47 |
+
continue
|
| 48 |
+
fixation_id_to_file_name[infos["Fixation id"]] = file_name
|
| 49 |
+
|
| 50 |
+
child_to_parent_filename = {}
|
| 51 |
+
for file_name, infos in dict_catalog.items():
|
| 52 |
+
if infos["Parent fixation id"]:
|
| 53 |
+
child_to_parent_filename[file_name] = fixation_id_to_file_name[
|
| 54 |
+
infos["Parent fixation id"]
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
parent_file_names = set(list(fixation_id_to_file_name.values()))
|
| 58 |
+
fixation_id_to_file_name = {}
|
| 59 |
+
|
| 60 |
+
file_name_to_url = {}
|
| 61 |
+
for file_name, file_url in zip(ind_filenames, url_dict):
|
| 62 |
+
if file_name not in parent_file_names:
|
| 63 |
+
continue
|
| 64 |
+
file_name_to_url[file_name] = file_url
|
| 65 |
+
|
| 66 |
+
parent_file_names = []
|
| 67 |
+
|
| 68 |
|
| 69 |
def download_audio(id_video):
|
| 70 |
id = id_video.split("?v=")[-1][:11]
|
|
|
|
| 110 |
with open(output_csv, "w") as w:
|
| 111 |
writer = csv.writer(w)
|
| 112 |
header = False
|
| 113 |
+
already = set()
|
| 114 |
for position, top in enumerate(tops):
|
| 115 |
if len(formated) / 2 >= max_output:
|
| 116 |
break
|
| 117 |
|
| 118 |
file = os.path.splitext(os.path.basename(top))[0]
|
| 119 |
+
try:
|
| 120 |
+
file = child_to_parent_filename[file]
|
| 121 |
+
top = file_name_to_url[file]
|
| 122 |
+
except KeyError:
|
| 123 |
+
pass
|
| 124 |
+
if file in already:
|
| 125 |
+
continue
|
| 126 |
+
already.add(file)
|
| 127 |
file_name = file
|
| 128 |
if file in dict_catalog:
|
| 129 |
if not header:
|