Spaces:

Transduce
/

vc_demo

Runtime error

App Files Files Community

SefyanKehail commited on Jun 4, 2024

Commit

0288ea7

1 Parent(s): 6d79719

GPU

Browse files

Files changed (1) hide show

app.py +41 -14

app.py CHANGED Viewed

@@ -5,20 +5,48 @@ import gradio as gr
 import multiprocessing
 import os
-print(torch.cuda.is_available())
-hubert, acoustic, hifigan = None, None, None
 # Function to initialize models with CUDA
 def initialize_models():
-    global hubert, acoustic, hifigan
-    print("Initializing models...")
-    hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cuda()
-    acoustic = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cuda()
-    hifigan = torch.hub.load("bshall/hifigan:main", "hifigan_hubert_soft", trust_repo=True).cuda()
-    print("Models initialized.")
@@ -36,15 +64,17 @@ def convert_speech(filename, progress=gr.Progress()):
     if source.shape[0] > 1:
         source = source[0, :].unsqueeze(0)
     source = torchaudio.functional.resample(source, sr, 16000)
-    source = source.unsqueeze(0).cuda()
     progress(0.6, desc="Converting speech")
     # Convert to the target speaker:
     with torch.inference_mode():
-        initialize_models()
         # Extract speech units
         units = hubert.units(source)
         # Generate target spectrogram
@@ -78,9 +108,6 @@ def get_audio_input(mic_input, audio_input):
 def clear_components():
   return None, None
 # Gradio interface
 with gr.Blocks() as interface:
     gr.Markdown("# Soft Speech Units for Improved Voice Conversion")

 import multiprocessing
 import os
+def download_file(url, filename):
+    if not os.path.exists(filename):
+        print(f"{filename} files missing. Downloading ...")
+        response = requests.get(url, stream=True)
+        with open(filename, 'wb') as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    file.write(chunk)
+    else:
+        print(f"{filename} exists. Skipping download")
+model_urls = {
+    "hubert.pt": "https://www.dropbox.com/scl/fi/99ww7w3z1gwiqfuvq85ju/hubert_cpu.pt?rlkey=5wiqve2kdzn7aw7bth3wz1lqu&st=ec63yc3v&dl=1",
+    "acoustic.pt": "https://www.dropbox.com/scl/fi/hi8o0kjr6rhwpjb4jj56w/acoustic_cpu.pt?rlkey=0x62tmwbnjpmmvs9u1kdfb1bk&st=n8u2kzuw&dl=1",
+    "hifigan.pt": "https://www.dropbox.com/scl/fi/oi6642xskncc2fhwubawj/hifigan_cpu.pt?rlkey=amljq6kih4vuocj6335wa1hxa&st=9midccb2&dl=1"
+}
+def verify_file(filename):
+    try:
+        # Attempt to load the file to verify its integrity
+        torch.load(filename, map_location='cpu')
+        torch.load_state_dict()
+        print(f"{filename} is valid.")
+    except Exception as e:
+        print(f"Error with {filename}: {e}")
+for filename, url in model_urls.items():
+    print(f"Downloading {filename}...")
+    download_file(url, filename)
+    print(f"{filename} downloaded successfully.")
+    verify_file(filename)
 # Function to initialize models with CUDA
 def initialize_models():
+    hubert = torch.load("hubert.pt", map_location=torch.device('cpu'))
+    acoustic = torch.load("acoustic.pt", map_location=torch.device('cpu'))
+    hifigan = torch.load("hifigan.pt", map_location=torch.device('cpu'))
+    return hubert, acoustic, hifigan
     if source.shape[0] > 1:
         source = source[0, :].unsqueeze(0)
     source = torchaudio.functional.resample(source, sr, 16000)
+    source = source.unsqueeze(0).to('cpu')
     progress(0.6, desc="Converting speech")
+    # # Initialize models
+    # hubert, acoustic, hifigan = initialize_models()
     # Convert to the target speaker:
     with torch.inference_mode():
+        hubert, acoustic, hifigan = initialize_models()
         # Extract speech units
         units = hubert.units(source)
         # Generate target spectrogram
 def clear_components():
   return None, None
 # Gradio interface
 with gr.Blocks() as interface:
     gr.Markdown("# Soft Speech Units for Improved Voice Conversion")