| | from my_tool import ( |
| | load_json, |
| | load_jsonl, |
| | load_txt, |
| | save_jsonl, |
| | format_meta, |
| | pure_name, |
| | BASE_DIR, |
| | compose_analyze, |
| | get_sample, |
| | get_field_suno, |
| | tags_analyze, |
| | find_json, |
| | show_dir, |
| | convert_mp3, |
| | tar_dir, |
| | tar_size_check, |
| | clean_newlines, |
| | dict_sort_print, |
| | ) |
| | from meta_lang import load_asr_model, get_lang_meta |
| | from meta_tags import load_tag_model, get_tags_meta |
| | from meta_endpoints import get_endpoints_meta |
| | from meta_phonemes import get_phonemes_meta |
| | from filter import filter_lang, filter_length |
| | from convert_convs import get_convert_convs |
| | from convert_segments import get_convert_segments |
| | from convert_lyrics import get_convert_lyrics, get_match_music |
| |
|
| | def pipeline(): |
| | import os |
| | dir = "suno_batch" |
| | name = pure_name(dir) |
| | save_dir = BASE_DIR / f"data/{name}" |
| |
|
| | |
| | os.makedirs(save_dir, exist_ok=True) |
| | raw_path = os.path.join(save_dir, "raw.jsonl") |
| | if os.path.exists(raw_path): |
| | dataset = load_jsonl(raw_path) |
| | else: |
| | dataset = format_meta(dir) |
| | save_jsonl(dataset, raw_path) |
| |
|
| | |
| | dataset = dataset[:1000] |
| | max_workers = 10 |
| | dataset = filter_length(dataset, 120, 360, max_workers) |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | tag_bs = 4 |
| | tag_path = os.path.join(save_dir, "meta_tags.jsonl") |
| | model, processor = load_tag_model() |
| | prompt_path = BASE_DIR / "prompts/new_tags.md" |
| | prompt = load_txt(prompt_path) |
| | get_tags_meta(model, processor, dataset, prompt, tag_bs, tag_path) |
| |
|
| | def repeat(func): |
| | while True: |
| | try: |
| | func() |
| | break |
| | except Exception as e: |
| | print(f"Error: {e}") |
| | continue |
| |
|
| | if __name__ == "__main__": |
| | repeat(pipeline) |