TextRetrieval_A / app.py
firagne's picture
apply same fix as asmyma : track title instead of track audio file name, and promote main versions
c85050b
import gradio as gr
import os
import sys
import numpy as np
import csv
import time
import datetime
from huggingface_hub import hf_hub_download
import traceback
# NO GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
max_results = 100
max_output = 50
# Cacher le nom du repo
python_path = hf_hub_download(
repo_id=os.environ["REPO_ID"],
repo_type="space",
filename=os.environ["MODEL_FILE"],
use_auth_token=os.environ["TOKEN"],
)
print(python_path)
sys.path.append(os.environ["PRIVATE_DIR"])
from models import *
preprocess_model, model = get_models()
url_dict = get_durl_myma()
dict_catalog = get_dict_catalog()
# audio_names = get_audio_names()
audio_names = get_audio_names_pickle()
index = get_index()
# encoder_text = get_encoder_text() #Error ??
encoder_text = tf.keras.models.load_model(
"encoder_text_retrievaltext_bmg_221022_54_clean"
)
fixation_id_to_file_name = {}
for file_name, infos in dict_catalog.items():
# we want only main versions
if infos["Parent fixation id"].strip():
continue
fixation_id_to_file_name[infos["Fixation id"].strip()] = file_name
child_to_parent_filename = {}
count = count_failed = 0
for file_name, infos in dict_catalog.items():
if not infos["Parent fixation id"].strip():
continue
count += 1
try:
child_to_parent_filename[file_name] = fixation_id_to_file_name[
infos["Parent fixation id"].strip()
]
except Exception as e:
print(f"No parent for {file_name} : {e}")
count_failed += 1
print(f"{count_failed} tracks have no parent / {count} tracks")
parent_file_names = set(list(fixation_id_to_file_name.values()))
file_name_to_url = {}
for file_url in url_dict.values():
file_name = os.path.splitext(os.path.basename(file_url))[0]
if file_name not in parent_file_names:
continue
file_name_to_url[file_name] = file_url
parent_file_names = []
fixation_id_to_file_name = []
def process(prompt, lang):
now = datetime.datetime.now()
print()
print("*************")
print("Current Time: ", str(now))
print("Text input : ", prompt)
print("*************")
print()
a = time.time()
embed_query = get_predict(encoder_text, prompt, preprocess_model, model)
print("Embed time : ", time.time() - a)
do_normalize(embed_query)
D, I = get_distance(index, embed_query, max_output)
print("Search + Embed time : ", time.time() - a)
# print(I)
# print(D)
# print("----")
# for i in range(len(I[0])):
# print(audio_names[I[0][i]], " with distance ", D[0][i])
# print(" url : ", get_url_myma(I[0][i], audio_names, url_dict))
formated = [{"f": "Choose a result to play", "t": ""}]
output_csv = f"prompt_{prompt}_results.csv"
with open(output_csv, "w") as w:
writer = csv.writer(w)
header = False
already = set()
for position, top in enumerate(I[0]):
if len(formated) / 2 >= max_output:
break
file = os.path.splitext(os.path.basename(audio_names[top]))[0]
top = get_url_myma(top, audio_names, url_dict)
try:
file = child_to_parent_filename[file]
top = file_name_to_url[file]
except KeyError:
pass
if file in already:
continue
already.add(file)
file_name = file
if file in dict_catalog:
if not header:
writer.writerow(list(dict_catalog[file].keys()))
header = True
file_name = dict_catalog[file]["Track name"]
try:
file_name += " - " + dict_catalog[file]["Composer1 full name"]
except:
pass
try:
file_name += " - " + dict_catalog[file]["Album name"]
except:
pass
writer.writerow(dict_catalog[file].values())
else:
writer.writerow([file, "no metadata provided"])
try:
formated.append(
{
"f": f"{position+1} - {file_name}",
"t": top,
}
)
except:
print(f"Error with {file}")
print(traceback.format_exc())
print("Total time : ", time.time() - a)
return output_csv, formated
"""return [output_csv,
audio_names[I[0][0]].split('.')[0], get_url_myma(I[0][0], audio_names, url_dict),
"""
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
with gr.Column():
input_search = gr.Textbox(
label="Input", value="type your description", max_lines=2
)
input_search_lang = gr.Radio(
label="Language", choices=["en"], value="en"
)
analyze_btn = gr.Button("Search")
with gr.Column():
csv_results = gr.File(
label="Results CSV file : ready for download", show_label=True
)
results = gr.JSON(visible=False)
select_results = gr.Dropdown(label="Results", choices=[])
audio_player = gr.Audio(None, label="Results player")
@select_results.select(inputs=select_results, outputs=audio_player)
def change_audio(value):
if value:
return gr.Audio(value, label="Results player")
return gr.Audio(None, label="Results player")
@results.change(
inputs=results,
outputs=select_results,
)
def update_select(json_results):
try:
return gr.Dropdown(
label="Results",
choices=[(k["f"], k["t"]) for k in json_results],
value=None,
)
except:
return gr.Dropdown(
choices=[],
label="Results",
)
@input_search.change(
outputs=[results, select_results, csv_results, audio_player]
)
def cleanup_on_url():
print("cleanup on url change")
return (
gr.JSON([{"f": "Choose a result to play", "t": ""}], visible=False),
gr.Dropdown(choices=[], label="Results"),
gr.File(None, label="Results as CSV"),
gr.Audio(None, label="Results player"),
)
gr.Examples(
examples=[
["Mysterious filmscore with Arabic influenced instruments", "en"],
[
"Let's go on a magical adventure with wizzards, dragons and castles",
"en",
],
[
"Creepy piano opening evolves and speeds up into a cinematic orchestral piece",
"en",
],
["Chilled electronic", "en"],
# ["","en"],
["Relax piano", "en"],
["Halloween rock with creepy organ", "en"],
[
"Rhythmic electro dance track for sport, motivation and sweating",
"en",
],
[
"soundtrack for an action movie from the eighties in a retro synth wave style",
"en",
],
[
"Choral female singing is rhythmically accompanied in a church with medieval instruments",
"en",
],
["Christmas", "en"],
["love romantic with piano, strings and vocals", "en"],
["Electronic soundscapes for chilling and relaxing", "en"],
["Minimal, emotional, melancholic piano", "en"],
["A calm and romantic acoustic guitar melody", "en"],
["horror suspense piano", "en"],
["Big Band", "en"],
["90 eurodance beat", "en"],
],
inputs=[input_search, input_search_lang],
outputs=[csv_results, results],
cache_examples=False,
fn=process,
examples_per_page=20,
run_on_click=True,
)
analyze_btn.click(
process,
inputs=[input_search, input_search_lang],
outputs=[csv_results, results],
)
demo.launch(debug=False)