| | import random |
| | import uuid |
| | import numpy |
| | import os |
| | import random |
| | import fnmatch |
| |
|
| | from tqdm.auto import tqdm |
| | from scipy.io import wavfile |
| |
|
| | from bark.generation import load_model, SAMPLE_RATE |
| | from bark.api import semantic_to_waveform |
| |
|
| | from bark import text_to_semantic |
| | from bark.generation import load_model |
| |
|
| | from training.data import load_books, random_split_chunk |
| |
|
| | output = 'training/data/output' |
| | output_wav = 'training/data/output_wav' |
| |
|
| |
|
| | def prepare_semantics_from_text(num_generations): |
| | loaded_data = load_books(True) |
| |
|
| | use_gpu = not os.environ.get("BARK_FORCE_CPU", False) |
| |
|
| | print('Loading semantics model') |
| | load_model(use_gpu=True, use_small=False, force_reload=False, model_type='text') |
| |
|
| | if not os.path.isdir(output): |
| | os.mkdir(output) |
| |
|
| | loop = 1 |
| | while 1: |
| | filename = uuid.uuid4().hex + '.npy' |
| | file_name = os.path.join(output, filename) |
| | text = '' |
| | while not len(text) > 0: |
| | text = random_split_chunk(loaded_data) |
| | text = text.strip() |
| | print(f'{loop} Generating semantics for text:', text) |
| | loop += 1 |
| | semantics = text_to_semantic(text, temp=round(random.uniform(0.6, 0.8), ndigits=2)) |
| | numpy.save(file_name, semantics) |
| |
|
| |
|
| | def prepare_wavs_from_semantics(): |
| | if not os.path.isdir(output): |
| | raise Exception('No \'output\' folder, make sure you run create_data.py first!') |
| | if not os.path.isdir(output_wav): |
| | os.mkdir(output_wav) |
| |
|
| | use_gpu = not os.environ.get("BARK_FORCE_CPU", False) |
| |
|
| | print('Loading coarse model') |
| | load_model(use_gpu=use_gpu, use_small=False, force_reload=False, model_type='coarse') |
| | print('Loading fine model') |
| | load_model(use_gpu=use_gpu, use_small=False, force_reload=False, model_type='fine') |
| |
|
| | files = fnmatch.filter(os.listdir(output), '*.npy') |
| | current = 1 |
| | total = len(files) |
| |
|
| | for i, f in tqdm(enumerate(files), total=len(files)): |
| | real_name = '.'.join(f.split('.')[:-1]) |
| | file_name = os.path.join(output, f) |
| | out_file = os.path.join(output_wav, f'{real_name}.wav') |
| | if not os.path.isfile(out_file) and os.path.isfile(file_name): |
| | print(f'Processing ({i+1}/{total}) -> {f}') |
| | wav = semantic_to_waveform(numpy.load(file_name), temp=round(random.uniform(0.6, 0.8), ndigits=2)) |
| | |
| | |
| | wavfile.write(out_file, SAMPLE_RATE, wav) |
| |
|
| | print('Done!') |
| |
|