Jacong's picture
Upload 96 files
aa9be1e verified
from my_tool import (
load_json,
load_jsonl,
load_txt,
save_jsonl,
format_meta,
pure_name,
BASE_DIR,
compose_analyze,
get_sample,
get_field_suno,
tags_analyze,
find_json,
show_dir,
convert_mp3,
tar_dir,
tar_size_check,
clean_newlines,
dict_sort_print,
)
from meta_lang import load_asr_model, get_lang_meta
from meta_tags import load_tag_model, get_tags_meta
from meta_endpoints import get_endpoints_meta
from meta_phonemes import get_phonemes_meta
from filter import filter_lang, filter_length
from convert_convs import get_convert_convs
from convert_segments import get_convert_segments
from convert_lyrics import get_convert_lyrics, get_match_music
def pipeline():
import os
dir = "suno_batch"
name = pure_name(dir)
save_dir = BASE_DIR / f"data/{name}"
# Initialize paths (only once)
os.makedirs(save_dir, exist_ok=True)
raw_path = os.path.join(save_dir, "raw.jsonl")
if os.path.exists(raw_path):
dataset = load_jsonl(raw_path)
else:
dataset = format_meta(dir)
save_jsonl(dataset, raw_path)
# Length filtering
dataset = dataset[:1000]
max_workers = 10
dataset = filter_length(dataset, 120, 360, max_workers)
# Language tagging
# lang_bs = 8
# model = load_asr_model(lang_bs)
# lang_path = os.path.join(save_dir, "meta_lang.jsonl")
# dataset = get_lang_meta(model, dataset, lang_bs, lang_path)
# Language filtering
# dataset = filter_lang(dataset, ['zh', 'en'])
# Style tagging
tag_bs = 4
tag_path = os.path.join(save_dir, "meta_tags.jsonl")
model, processor = load_tag_model()
prompt_path = BASE_DIR / "prompts/new_tags.md"
prompt = load_txt(prompt_path)
get_tags_meta(model, processor, dataset, prompt, tag_bs, tag_path)
def repeat(func):
while True:
try:
func()
break
except Exception as e:
print(f"Error: {e}")
continue
if __name__ == "__main__":
repeat(pipeline)