firagne commited on
Commit
c85050b
·
1 Parent(s): 9964dc6

apply same fix as asmyma : track title instead of track audio file name, and promote main versions

Browse files
Files changed (1) hide show
  1. app.py +58 -2
app.py CHANGED
@@ -38,6 +38,42 @@ encoder_text = tf.keras.models.load_model(
38
  "encoder_text_retrievaltext_bmg_221022_54_clean"
39
  )
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def process(prompt, lang):
43
  now = datetime.datetime.now()
@@ -68,15 +104,35 @@ def process(prompt, lang):
68
  with open(output_csv, "w") as w:
69
  writer = csv.writer(w)
70
  header = False
 
71
  for position, top in enumerate(I[0]):
72
  if len(formated) / 2 >= max_output:
73
  break
74
 
75
  file = os.path.splitext(os.path.basename(audio_names[top]))[0]
 
 
 
 
 
 
 
 
 
 
76
  if file in dict_catalog:
77
  if not header:
78
  writer.writerow(list(dict_catalog[file].keys()))
79
  header = True
 
 
 
 
 
 
 
 
 
80
  writer.writerow(dict_catalog[file].values())
81
  else:
82
  writer.writerow([file, "no metadata provided"])
@@ -84,8 +140,8 @@ def process(prompt, lang):
84
  try:
85
  formated.append(
86
  {
87
- "f": f"{position+1} - {file}",
88
- "t": get_url_myma(top, audio_names, url_dict),
89
  }
90
  )
91
  except:
 
38
  "encoder_text_retrievaltext_bmg_221022_54_clean"
39
  )
40
 
41
+ fixation_id_to_file_name = {}
42
+ for file_name, infos in dict_catalog.items():
43
+ # we want only main versions
44
+ if infos["Parent fixation id"].strip():
45
+ continue
46
+ fixation_id_to_file_name[infos["Fixation id"].strip()] = file_name
47
+
48
+ child_to_parent_filename = {}
49
+ count = count_failed = 0
50
+ for file_name, infos in dict_catalog.items():
51
+ if not infos["Parent fixation id"].strip():
52
+ continue
53
+
54
+ count += 1
55
+ try:
56
+ child_to_parent_filename[file_name] = fixation_id_to_file_name[
57
+ infos["Parent fixation id"].strip()
58
+ ]
59
+ except Exception as e:
60
+ print(f"No parent for {file_name} : {e}")
61
+ count_failed += 1
62
+
63
+ print(f"{count_failed} tracks have no parent / {count} tracks")
64
+
65
+ parent_file_names = set(list(fixation_id_to_file_name.values()))
66
+
67
+ file_name_to_url = {}
68
+ for file_url in url_dict.values():
69
+ file_name = os.path.splitext(os.path.basename(file_url))[0]
70
+ if file_name not in parent_file_names:
71
+ continue
72
+ file_name_to_url[file_name] = file_url
73
+
74
+ parent_file_names = []
75
+ fixation_id_to_file_name = []
76
+
77
 
78
  def process(prompt, lang):
79
  now = datetime.datetime.now()
 
104
  with open(output_csv, "w") as w:
105
  writer = csv.writer(w)
106
  header = False
107
+ already = set()
108
  for position, top in enumerate(I[0]):
109
  if len(formated) / 2 >= max_output:
110
  break
111
 
112
  file = os.path.splitext(os.path.basename(audio_names[top]))[0]
113
+ top = get_url_myma(top, audio_names, url_dict)
114
+ try:
115
+ file = child_to_parent_filename[file]
116
+ top = file_name_to_url[file]
117
+ except KeyError:
118
+ pass
119
+ if file in already:
120
+ continue
121
+ already.add(file)
122
+ file_name = file
123
  if file in dict_catalog:
124
  if not header:
125
  writer.writerow(list(dict_catalog[file].keys()))
126
  header = True
127
+ file_name = dict_catalog[file]["Track name"]
128
+ try:
129
+ file_name += " - " + dict_catalog[file]["Composer1 full name"]
130
+ except:
131
+ pass
132
+ try:
133
+ file_name += " - " + dict_catalog[file]["Album name"]
134
+ except:
135
+ pass
136
  writer.writerow(dict_catalog[file].values())
137
  else:
138
  writer.writerow([file, "no metadata provided"])
 
140
  try:
141
  formated.append(
142
  {
143
+ "f": f"{position+1} - {file_name}",
144
+ "t": top,
145
  }
146
  )
147
  except: