firagne commited on
Commit
3b5ec49
·
1 Parent(s): a0772af

try to promote main versions

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -30,7 +30,7 @@ python_path = hf_hub_download(
30
  sys.path.append(os.environ["PRIVATE_DIR"])
31
  from models import *
32
 
33
- max_results = 100
34
  max_output = 50
35
 
36
  # global (faster)
@@ -40,6 +40,31 @@ catalog = get_catalog()
40
  url_dict = get_durl_myma()
41
  dict_catalog = get_dict_catalog()
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def download_audio(id_video):
45
  id = id_video.split("?v=")[-1][:11]
@@ -85,11 +110,20 @@ def process(file_name, embed_html_all):
85
  with open(output_csv, "w") as w:
86
  writer = csv.writer(w)
87
  header = False
 
88
  for position, top in enumerate(tops):
89
  if len(formated) / 2 >= max_output:
90
  break
91
 
92
  file = os.path.splitext(os.path.basename(top))[0]
 
 
 
 
 
 
 
 
93
  file_name = file
94
  if file in dict_catalog:
95
  if not header:
 
30
  sys.path.append(os.environ["PRIVATE_DIR"])
31
  from models import *
32
 
33
+ max_results = 200
34
  max_output = 50
35
 
36
  # global (faster)
 
40
  url_dict = get_durl_myma()
41
  dict_catalog = get_dict_catalog()
42
 
43
+ fixation_id_to_file_name = {}
44
+ for file_name, infos in dict_catalog.items():
45
+ # we want only main versions
46
+ if not infos["Parent fixation id"]:
47
+ continue
48
+ fixation_id_to_file_name[infos["Fixation id"]] = file_name
49
+
50
+ child_to_parent_filename = {}
51
+ for file_name, infos in dict_catalog.items():
52
+ if infos["Parent fixation id"]:
53
+ child_to_parent_filename[file_name] = fixation_id_to_file_name[
54
+ infos["Parent fixation id"]
55
+ ]
56
+
57
+ parent_file_names = set(list(fixation_id_to_file_name.values()))
58
+ fixation_id_to_file_name = {}
59
+
60
+ file_name_to_url = {}
61
+ for file_name, file_url in zip(ind_filenames, url_dict):
62
+ if file_name not in parent_file_names:
63
+ continue
64
+ file_name_to_url[file_name] = file_url
65
+
66
+ parent_file_names = []
67
+
68
 
69
  def download_audio(id_video):
70
  id = id_video.split("?v=")[-1][:11]
 
110
  with open(output_csv, "w") as w:
111
  writer = csv.writer(w)
112
  header = False
113
+ already = set()
114
  for position, top in enumerate(tops):
115
  if len(formated) / 2 >= max_output:
116
  break
117
 
118
  file = os.path.splitext(os.path.basename(top))[0]
119
+ try:
120
+ file = child_to_parent_filename[file]
121
+ top = file_name_to_url[file]
122
+ except KeyError:
123
+ pass
124
+ if file in already:
125
+ continue
126
+ already.add(file)
127
  file_name = file
128
  if file in dict_catalog:
129
  if not header: