Spaces:

betatestacct
/

api

Running on CPU Upgrade

App Files Files Community

Ftps commited on Jan 25

Commit

cb98587

1 Parent(s): 08e674b

Add model ZIP download support

Browse files

Files changed (2) hide show

app.py +5 -1
tabs/api/realtime_api.py +58 -21

app.py CHANGED Viewed

@@ -104,6 +104,10 @@ with gr.Blocks(
         )
         gr.Markdown("### Realtime Voice Conversion (Streaming)")
         with gr.Row():
             rt_model = gr.Dropdown(
                 label="Model",
@@ -127,7 +131,7 @@ with gr.Blocks(
         rt_input.stream(
             fn=process_audio_stream,
-            inputs=[rt_state, rt_input, rt_model, rt_pitch, rt_index_rate],
             outputs=[rt_state, rt_output],
             api_name="realtime_convert",
         )

         )
         gr.Markdown("### Realtime Voice Conversion (Streaming)")
+        rt_model_zip = gr.Textbox(
+            label="Model ZIP URL (optional)",
+            placeholder="https://example.com/model.zip",
+        )
         with gr.Row():
             rt_model = gr.Dropdown(
                 label="Model",
         rt_input.stream(
             fn=process_audio_stream,
+            inputs=[rt_state, rt_input, rt_model_zip, rt_model, rt_pitch, rt_index_rate],
             outputs=[rt_state, rt_output],
             api_name="realtime_convert",
         )

tabs/api/realtime_api.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import os
 import sys
 import numpy as np
-from typing import Optional, Tuple, Any
 now_dir = os.getcwd()
 sys.path.append(now_dir)
@@ -10,6 +13,44 @@ LOGS_DIR = os.path.join(now_dir, "logs")
 SAMPLE_RATE = 48000
 def get_available_models() -> list:
     if not os.path.exists(LOGS_DIR):
         return []
@@ -52,13 +93,7 @@ class RealtimeVoiceChanger:
             "f0_autotune_strength": 1.0,
         }
-    def load_model(
-        self,
-        model_name: str,
-        f0_method: str = "rmvpe",
-        pitch: int = 0,
-        index_rate: float = 0.75,
-    ) -> str:
         from rvc.realtime.core import VoiceChanger
         pth_path, index_path, error = get_model_paths(model_name)
@@ -74,8 +109,6 @@ class RealtimeVoiceChanger:
             f0_method=f0_method,
         )
         self.model_name = model_name
-        self.settings["pitch"] = pitch
-        self.settings["index_rate"] = index_rate
         return f"Model '{model_name}' loaded"
     def convert(self, audio: np.ndarray) -> Optional[np.ndarray]:
@@ -94,30 +127,34 @@ class RealtimeVoiceChanger:
         return result
-def create_voice_changer_state() -> RealtimeVoiceChanger:
-    return RealtimeVoiceChanger()
 def process_audio_stream(
     state: Optional[RealtimeVoiceChanger],
     audio_chunk: Optional[Tuple[int, np.ndarray]],
     model_name: str,
     pitch: int,
     index_rate: float,
 ) -> Tuple[RealtimeVoiceChanger, Optional[Tuple[int, np.ndarray]]]:
     if state is None:
-        state = create_voice_changer_state()
     if audio_chunk is None:
         return state, None
-    sr, audio = audio_chunk
-    if state.model_name != model_name and model_name:
-        state.load_model(model_name, pitch=pitch, index_rate=index_rate)
-    else:
-        state.settings["pitch"] = pitch
-        state.settings["index_rate"] = index_rate
     if audio.ndim > 1:
         audio = audio.mean(axis=1)

 import os
 import sys
+import io
+import zipfile
+import requests
 import numpy as np
+from typing import Optional, Tuple
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 SAMPLE_RATE = 48000
+def download_and_extract_model(url: str) -> Tuple[Optional[str], str]:
+    if not url:
+        return None, "URL is empty"
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with zipfile.ZipFile(io.BytesIO(response.content)) as z:
+            model_name = os.path.splitext(os.path.basename(url))[0]
+            model_path = os.path.join(LOGS_DIR, model_name)
+            os.makedirs(model_path, exist_ok=True)
+            pth_file = next((n for n in z.namelist() if n.endswith(".pth")), None)
+            index_file = next((n for n in z.namelist() if n.endswith(".index")), None)
+            if not pth_file:
+                return None, "No .pth file in zip"
+            z.extract(pth_file, model_path)
+            if os.path.dirname(pth_file):
+                os.rename(
+                    os.path.join(model_path, pth_file),
+                    os.path.join(model_path, os.path.basename(pth_file)),
+                )
+            if index_file:
+                z.extract(index_file, model_path)
+                if os.path.dirname(index_file):
+                    os.rename(
+                        os.path.join(model_path, index_file),
+                        os.path.join(model_path, os.path.basename(index_file)),
+                    )
+            return model_name, f"Model '{model_name}' downloaded"
+    except Exception as e:
+        return None, str(e)
 def get_available_models() -> list:
     if not os.path.exists(LOGS_DIR):
         return []
             "f0_autotune_strength": 1.0,
         }
+    def load_model(self, model_name: str, f0_method: str = "rmvpe") -> str:
         from rvc.realtime.core import VoiceChanger
         pth_path, index_path, error = get_model_paths(model_name)
             f0_method=f0_method,
         )
         self.model_name = model_name
         return f"Model '{model_name}' loaded"
     def convert(self, audio: np.ndarray) -> Optional[np.ndarray]:
         return result
 def process_audio_stream(
     state: Optional[RealtimeVoiceChanger],
     audio_chunk: Optional[Tuple[int, np.ndarray]],
+    model_zip_link: str,
     model_name: str,
     pitch: int,
     index_rate: float,
 ) -> Tuple[RealtimeVoiceChanger, Optional[Tuple[int, np.ndarray]]]:
     if state is None:
+        state = RealtimeVoiceChanger()
     if audio_chunk is None:
         return state, None
+    # Download model from ZIP if provided
+    if model_zip_link and not model_name:
+        downloaded_name, msg = download_and_extract_model(model_zip_link)
+        if downloaded_name:
+            model_name = downloaded_name
+    # Load model if changed
+    if model_name and state.model_name != model_name:
+        state.load_model(model_name)
+    state.settings["pitch"] = pitch
+    state.settings["index_rate"] = index_rate
+    sr, audio = audio_chunk
     if audio.ndim > 1:
         audio = audio.mean(axis=1)