TextRetrieval_A

Runtime error

File size: 8,839 Bytes

import gradio as gr
import os
import sys
import numpy as np
import csv
import time
import datetime
from huggingface_hub import hf_hub_download
import traceback

# NO GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

max_results = 100
max_output = 50


# Cacher le nom du repo
python_path = hf_hub_download(
    repo_id=os.environ["REPO_ID"],
    repo_type="space",
    filename=os.environ["MODEL_FILE"],
    use_auth_token=os.environ["TOKEN"],
)
print(python_path)
sys.path.append(os.environ["PRIVATE_DIR"])
from models import *

preprocess_model, model = get_models()
url_dict = get_durl_myma()
dict_catalog = get_dict_catalog()
# audio_names = get_audio_names()
audio_names = get_audio_names_pickle()
index = get_index()
# encoder_text = get_encoder_text() #Error ??
encoder_text = tf.keras.models.load_model(
    "encoder_text_retrievaltext_bmg_221022_54_clean"
)

fixation_id_to_file_name = {}
for file_name, infos in dict_catalog.items():
    # we want only main versions
    if infos["Parent fixation id"].strip():
        continue
    fixation_id_to_file_name[infos["Fixation id"].strip()] = file_name

child_to_parent_filename = {}
count = count_failed = 0
for file_name, infos in dict_catalog.items():
    if not infos["Parent fixation id"].strip():
        continue

    count += 1
    try:
        child_to_parent_filename[file_name] = fixation_id_to_file_name[
            infos["Parent fixation id"].strip()
        ]
    except Exception as e:
        print(f"No parent for {file_name} : {e}")
        count_failed += 1

print(f"{count_failed} tracks have no parent / {count} tracks")

parent_file_names = set(list(fixation_id_to_file_name.values()))

file_name_to_url = {}
for file_url in url_dict.values():
    file_name = os.path.splitext(os.path.basename(file_url))[0]
    if file_name not in parent_file_names:
        continue
    file_name_to_url[file_name] = file_url

parent_file_names = []
fixation_id_to_file_name = []


def process(prompt, lang):
    now = datetime.datetime.now()

    print()
    print("*************")
    print("Current Time: ", str(now))
    print("Text input : ", prompt)
    print("*************")
    print()
    a = time.time()

    embed_query = get_predict(encoder_text, prompt, preprocess_model, model)
    print("Embed time : ", time.time() - a)
    do_normalize(embed_query)
    D, I = get_distance(index, embed_query, max_output)
    print("Search + Embed time : ", time.time() - a)

    # print(I)
    # print(D)
    # print("----")
    # for i in range(len(I[0])):
    #    print(audio_names[I[0][i]], " with distance ", D[0][i])
    #    print("    url : ", get_url_myma(I[0][i], audio_names, url_dict))

    formated = [{"f": "Choose a result to play", "t": ""}]
    output_csv = f"prompt_{prompt}_results.csv"
    with open(output_csv, "w") as w:
        writer = csv.writer(w)
        header = False
        already = set()
        for position, top in enumerate(I[0]):
            if len(formated) / 2 >= max_output:
                break

            file = os.path.splitext(os.path.basename(audio_names[top]))[0]
            top = get_url_myma(top, audio_names, url_dict)
            try:
                file = child_to_parent_filename[file]
                top = file_name_to_url[file]
            except KeyError:
                pass
            if file in already:
                continue
            already.add(file)
            file_name = file
            if file in dict_catalog:
                if not header:
                    writer.writerow(list(dict_catalog[file].keys()))
                    header = True
                file_name = dict_catalog[file]["Track name"]
                try:
                    file_name += " - " + dict_catalog[file]["Composer1 full name"]
                except:
                    pass
                try:
                    file_name += " - " + dict_catalog[file]["Album name"]
                except:
                    pass
                writer.writerow(dict_catalog[file].values())
            else:
                writer.writerow([file, "no metadata provided"])

            try:
                formated.append(
                    {
                        "f": f"{position+1} - {file_name}",
                        "t": top,
                    }
                )
            except:
                print(f"Error with {file}")
                print(traceback.format_exc())

    print("Total time : ", time.time() - a)
    return output_csv, formated

    """return [output_csv,
            audio_names[I[0][0]].split('.')[0], get_url_myma(I[0][0], audio_names, url_dict),
    """


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    input_search = gr.Textbox(
                        label="Input", value="type your description", max_lines=2
                    )

                    input_search_lang = gr.Radio(
                        label="Language", choices=["en"], value="en"
                    )

                    analyze_btn = gr.Button("Search")

                with gr.Column():
                    csv_results = gr.File(
                        label="Results CSV file : ready for download", show_label=True
                    )
                    results = gr.JSON(visible=False)
                    select_results = gr.Dropdown(label="Results", choices=[])
                    audio_player = gr.Audio(None, label="Results player")

                    @select_results.select(inputs=select_results, outputs=audio_player)
                    def change_audio(value):
                        if value:
                            return gr.Audio(value, label="Results player")
                        return gr.Audio(None, label="Results player")

                    @results.change(
                        inputs=results,
                        outputs=select_results,
                    )
                    def update_select(json_results):
                        try:
                            return gr.Dropdown(
                                label="Results",
                                choices=[(k["f"], k["t"]) for k in json_results],
                                value=None,
                            )
                        except:
                            return gr.Dropdown(
                                choices=[],
                                label="Results",
                            )

            @input_search.change(
                outputs=[results, select_results, csv_results, audio_player]
            )
            def cleanup_on_url():
                print("cleanup on url change")
                return (
                    gr.JSON([{"f": "Choose a result to play", "t": ""}], visible=False),
                    gr.Dropdown(choices=[], label="Results"),
                    gr.File(None, label="Results as CSV"),
                    gr.Audio(None, label="Results player"),
                )

    gr.Examples(
        examples=[
            ["Mysterious filmscore with Arabic influenced instruments", "en"],
            [
                "Let's go on a magical adventure with wizzards, dragons and castles",
                "en",
            ],
            [
                "Creepy piano opening evolves and speeds up into a cinematic orchestral piece",
                "en",
            ],
            ["Chilled electronic", "en"],
            # ["","en"],
            ["Relax piano", "en"],
            ["Halloween rock with creepy organ", "en"],
            [
                "Rhythmic electro dance track for sport, motivation and sweating",
                "en",
            ],
            [
                "soundtrack for an action movie from the eighties in a retro synth wave style",
                "en",
            ],
            [
                "Choral female singing is rhythmically accompanied in a church with medieval instruments",
                "en",
            ],
            ["Christmas", "en"],
            ["love romantic with piano, strings and vocals", "en"],
            ["Electronic soundscapes for chilling and relaxing", "en"],
            ["Minimal, emotional, melancholic piano", "en"],
            ["A calm and romantic acoustic guitar melody", "en"],
            ["horror suspense piano", "en"],
            ["Big Band", "en"],
            ["90 eurodance beat", "en"],
        ],
        inputs=[input_search, input_search_lang],
        outputs=[csv_results, results],
        cache_examples=False,
        fn=process,
        examples_per_page=20,
        run_on_click=True,
    )

    analyze_btn.click(
        process,
        inputs=[input_search, input_search_lang],
        outputs=[csv_results, results],
    )

demo.launch(debug=False)