Spaces:
Running on Zero
fix: set base dir for hugging face
Browse files--- error message ---
Skipping set_num_interop_threads: Error: cannot set number of interop threads after parallel work has started or set_num_interop_threads called
Initializing PhonemeBpeTokenizer with tokenizer path: ./apps/audio_cloning/vallex/g2p/bpe_69.json
Traceback (most recent call last):
File "/home/user/app/app.py", line 3, in <module>
from apps.audio_cloning.main import main as audio_cloning
File "/home/user/app/apps/audio_cloning/main.py", line 7, in <module>
from .vallex.main import main as vallex
File "/home/user/app/apps/audio_cloning/vallex/main.py", line 113, in <module>
os.makedirs(OUTPUT_DIR_CHECKPOINTS, exist_ok=True)
File "/usr/local/lib/python3.10/os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "/usr/local/lib/python3.10/os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "/usr/local/lib/python3.10/os.py", line 225, in makedirs
mkdir(name, mode)
PermissionError: [Errno 13] Permission denied: '/app'
|
@@ -1,5 +1,7 @@
|
|
| 1 |
"""from https://github.com/keithito/tacotron"""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
# import utils.g2p.cleaners
|
| 4 |
from tokenizers import Tokenizer
|
| 5 |
|
|
@@ -11,7 +13,9 @@ from .symbols import symbols
|
|
| 11 |
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
| 12 |
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class PhonemeBpeTokenizer:
|
|
|
|
| 1 |
"""from https://github.com/keithito/tacotron"""
|
| 2 |
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
# import utils.g2p.cleaners
|
| 6 |
from tokenizers import Tokenizer
|
| 7 |
|
|
|
|
| 13 |
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
| 14 |
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
| 15 |
|
| 16 |
+
|
| 17 |
+
BASE_DIR = os.getenv("HF_HOME", ".")
|
| 18 |
+
TOKENIZER_PATH = os.path.join(BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_1024.json")
|
| 19 |
|
| 20 |
|
| 21 |
class PhonemeBpeTokenizer:
|
|
@@ -40,6 +40,10 @@ from .models.vallex import VALLE
|
|
| 40 |
|
| 41 |
logger = logging.getLogger(__name__)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# set languages
|
| 44 |
langid.set_languages(["en", "zh", "ja"])
|
| 45 |
|
|
@@ -86,7 +90,7 @@ else:
|
|
| 86 |
|
| 87 |
# set text tokenizer and collater
|
| 88 |
logger.info("Setting text tokenizer and collater...")
|
| 89 |
-
tokenizer_path = "
|
| 90 |
text_tokenizer = PhonemeBpeTokenizer(tokenizer_path=tokenizer_path)
|
| 91 |
text_collater = get_text_token_collater()
|
| 92 |
|
|
@@ -100,11 +104,7 @@ if torch.cuda.is_available():
|
|
| 100 |
logger.info("Device set to %s", device)
|
| 101 |
|
| 102 |
# Download VALL-E-X model weights if not exists
|
| 103 |
-
OUTPUT_DIR_CHECKPOINTS = "
|
| 104 |
-
if platform.system().lower() == "linux":
|
| 105 |
-
# docker(linux)環境では /app/models/checkpoints にする
|
| 106 |
-
OUTPUT_DIR_CHECKPOINTS = "/app/models/checkpoints"
|
| 107 |
-
|
| 108 |
OUTPUT_FILENAME_CHECKPOINTS = "vallex-checkpoint.pt"
|
| 109 |
OUTPUT_PATH_CHECKPOINTS = os.path.join(
|
| 110 |
OUTPUT_DIR_CHECKPOINTS, OUTPUT_FILENAME_CHECKPOINTS
|
|
@@ -155,12 +155,10 @@ audio_tokenizer = AudioTokenizer(device)
|
|
| 155 |
vocos = Vocos.from_pretrained("charactr/vocos-encodec-24khz").to(device)
|
| 156 |
|
| 157 |
# initialize ASR model
|
| 158 |
-
OUTPUT_DIR_WHISPER = "
|
| 159 |
-
if platform.system().lower() == "linux":
|
| 160 |
-
OUTPUT_DIR_WHISPER = "/app/models/whisper"
|
| 161 |
-
|
| 162 |
if not os.path.exists(OUTPUT_DIR_WHISPER):
|
| 163 |
os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
|
|
|
|
| 164 |
try:
|
| 165 |
logger.info("Loading Whisper model...")
|
| 166 |
model_name = "tiny"
|
|
@@ -178,7 +176,7 @@ except Exception as e:
|
|
| 178 |
|
| 179 |
# Initialize Voice Presets
|
| 180 |
logger.info("Initializing Voice Presets...")
|
| 181 |
-
PRESETS_DIR = "apps/audio_cloning/vallex/presets"
|
| 182 |
preset_list = os.walk(PRESETS_DIR).__next__()[2]
|
| 183 |
preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
|
| 184 |
|
|
|
|
| 40 |
|
| 41 |
logger = logging.getLogger(__name__)
|
| 42 |
|
| 43 |
+
# set base directory
|
| 44 |
+
BASE_DIR = os.getenv("HF_HOME", ".")
|
| 45 |
+
logger.info("Base directory: %s", BASE_DIR)
|
| 46 |
+
|
| 47 |
# set languages
|
| 48 |
langid.set_languages(["en", "zh", "ja"])
|
| 49 |
|
|
|
|
| 90 |
|
| 91 |
# set text tokenizer and collater
|
| 92 |
logger.info("Setting text tokenizer and collater...")
|
| 93 |
+
tokenizer_path = os.path.join(BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_69.json")
|
| 94 |
text_tokenizer = PhonemeBpeTokenizer(tokenizer_path=tokenizer_path)
|
| 95 |
text_collater = get_text_token_collater()
|
| 96 |
|
|
|
|
| 104 |
logger.info("Device set to %s", device)
|
| 105 |
|
| 106 |
# Download VALL-E-X model weights if not exists
|
| 107 |
+
OUTPUT_DIR_CHECKPOINTS = os.path.join(BASE_DIR, "models/checkpoints")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
OUTPUT_FILENAME_CHECKPOINTS = "vallex-checkpoint.pt"
|
| 109 |
OUTPUT_PATH_CHECKPOINTS = os.path.join(
|
| 110 |
OUTPUT_DIR_CHECKPOINTS, OUTPUT_FILENAME_CHECKPOINTS
|
|
|
|
| 155 |
vocos = Vocos.from_pretrained("charactr/vocos-encodec-24khz").to(device)
|
| 156 |
|
| 157 |
# initialize ASR model
|
| 158 |
+
OUTPUT_DIR_WHISPER = os.path.join(BASE_DIR, "models/whisper")
|
|
|
|
|
|
|
|
|
|
| 159 |
if not os.path.exists(OUTPUT_DIR_WHISPER):
|
| 160 |
os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
|
| 161 |
+
|
| 162 |
try:
|
| 163 |
logger.info("Loading Whisper model...")
|
| 164 |
model_name = "tiny"
|
|
|
|
| 176 |
|
| 177 |
# Initialize Voice Presets
|
| 178 |
logger.info("Initializing Voice Presets...")
|
| 179 |
+
PRESETS_DIR = os.path.join(BASE_DIR, "apps/audio_cloning/vallex/presets")
|
| 180 |
preset_list = os.walk(PRESETS_DIR).__next__()[2]
|
| 181 |
preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
|
| 182 |
|