Spaces:

Adam3
/

AICoverGenEnhanced

Build error

App Files Files Community

Adam3 commited on Dec 28, 2025

Commit

b22f3f4

verified ·

1 Parent(s): 2dc41b2

Upload 11 files

Browse files

Files changed (11) hide show

AICoverGen_colab.ipynb +127 -0
LICENSE +21 -0
README.md +224 -11
README_Enhanced.md +248 -0
cog.yaml +50 -0
install_enhanced.bat +0 -0
predict.py +276 -0
requirements.txt +22 -0
run_webui.bat +70 -0
start_webui.bat +14 -0
start_webui.ps1 +5 -0

AICoverGen_colab.ipynb ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "39k2mOCNAh6J"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SociallyIneptWeeb/AICoverGen/blob/main/AICoverGen_colab.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# AICoverGen WebUI\n",
+        "\n",
+        "Simply click `Runtime` in the top navigation bar and `Run all`. Wait for the output of the final cell to show the public gradio url and click on it."
+      ],
+      "metadata": {
+        "id": "YYVAKuNBc-X4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vC4gLMHI9xb3",
+        "cellView": "form"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Clone repository\n",
+        "from IPython.display import clear_output, Javascript\n",
+        "import codecs\n",
+        "import threading\n",
+        "import time\n",
+        "cloneing=codecs.decode('uggcf://tvguho.pbz/FbpvnyylVarcgJrro/NVPbireTra.tvg','rot_13')\n",
+        "!git clone $cloneing HRVC\n",
+        "def update_timer_and_print():\n",
+        "    global timer\n",
+        "    while True:\n",
+        "        hours, remainder = divmod(timer, 3600)\n",
+        "        minutes, seconds = divmod(remainder, 60)\n",
+        "        timer_str = f'{hours:02}:{minutes:02}:{seconds:02}'\n",
+        "        print(f'\\rTimer: {timer_str}', end='', flush=True)  # Print without a newline\n",
+        "        time.sleep(1)\n",
+        "        timer += 1\n",
+        "timer = 0\n",
+        "threading.Thread(target=update_timer_and_print, daemon=True).start()\n",
+        "%cd HRVC\n",
+        "clear_output()\n",
+        "print(\"Done Cloning Repository\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "odzpJHpr_PaF"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install requirements\n",
+        "!pip install pip==23.3.1\n",
+        "!pip install -q -r requirements.txt\n",
+        "!pip install gradio-client==0.8.1\n",
+        "!pip install gradio==3.48.0\n",
+        "# install cuda fix\n",
+        "!python -m pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/\n",
+        "clear_output()\n",
+        "print(\"Finished Installing Requirements\")\n",
+        "!sudo apt update\n",
+        "clear_output()\n",
+        "print(\"Finished Updating\")\n",
+        "!sudo apt install sox\n",
+        "clear_output()\n",
+        "print(\"Finished running this cell, proceed to the next cell\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "SLWpcJc0AHSZ"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Download MDXNet Vocal Separation and Hubert Base Models\n",
+        "models=codecs.decode('fep/qbjaybnq_zbqryf.cl','rot_13')\n",
+        "!python $models\n",
+        "clear_output()\n",
+        "print(\"Finished Downloading Voice Separation Model and Hubert Base Model\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Run WebUI\n",
+        "runpice=codecs.decode('fep/jrohv.cl','rot_13')\n",
+        "!python $runpice --share"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "NEglTq6Ya9d0"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 SociallyIneptWeeb
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,14 +1,227 @@
 ---
-title: AICoverGenEnhanced
-emoji: 💻
-colorFrom: pink
-colorTo: gray
-sdk: gradio
-sdk_version: 6.2.0
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Create ai covers with some more effects
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# AICoverGen
+An autonomous pipeline to create covers with any RVC v2 trained AI voice from YouTube videos or a local audio file. For developers who may want to add a singing functionality into their AI assistant/chatbot/vtuber, or for people who want to hear their favourite characters sing their favourite song.
+<img width="1574" height="740" alt="image" src="https://github.com/user-attachments/assets/931189d8-e2e2-4240-84d6-52d7a13ac7f8" />
+# AICoverGen Enhanced
+**AI-Powered Voice Cover Generation with Advanced Audio Enhancement**
+AICoverGen Enhanced is a powerful tool for creating AI voice covers with professional-grade audio enhancement features. This enhanced version includes advanced AI audio processing, EQ controls, dynamic range compression, and much more!
+---
+## ✨ New Features
+### Advanced Audio Enhancement
+- **AI Noise Reduction** – Remove background noise and artifacts
+- **Professional EQ** – 5 EQ types: Balanced, Vocal Boost, Bass Boost, Treble Boost, Flat
+- **Dynamic Range Compression** – Improve loudness and consistency
+- **Harmonic Enhancement** – Add richness and warmth to vocals
+- **Stereo Widening** – Enhance spatial imaging for stereo tracks
+- **Reverb Control** – Add depth and professional polish
+- **Gain Control** – Fine-tune volume (-20 to +20 dB)
+### Enhancement Types
+- **Full** – Balanced enhancement with all features
+- **Light** – Subtle improvements for natural sound
+- **Aggressive** – Maximum enhancement for impact
+- **Custom** – Use your specific settings
+---
+## 🖥️ System Requirements
+### Minimum Requirements
+- **OS:** Windows 10/11, Linux, or macOS
+- **Python:** 3.9+ (3.10+ recommended)
+- **RAM:** 8GB minimum, 16GB recommended
+- **Storage:** 10GB free space
+- **GPU:** NVIDIA GPU with CUDA support (recommended)
+### Recommended Setup
+- **OS:** Windows 11 or Ubuntu 20.04+
+- **Python:** 3.10 or 3.11
+- **RAM:** 16GB or more
+- **GPU:** NVIDIA RTX 3060 or better
+- **CUDA:** 11.8 or 12.0+
+- **cuDNN:** 8.6 or 9.0+
+---
+## ⚙️ Installation Guide
+### Step 1: Clone the Repository
+```bash
+git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
+cd AICoverGen
+```
+### Step 2: Create Virtual Environment
+```bash
+# Windows
+python -m venv AICoverGen
+AICoverGen\Scripts\activate
+# Linux/macOS
+python3 -m venv AICoverGen
+source AICoverGen/bin/activate
+```
+### Step 3: Install Dependencies
+#### Option A: Automatic Installation (Recommended)
+```bash
+pip install -r requirements.txt
+```
+#### Option B: Manual Installation
+```bash
+# Core dependencies
+pip install gradio==3.50.2 librosa==0.9.1 numpy==1.23.5 scipy==1.11.1 soundfile==0.12.1
+pip install pedalboard==0.7.7 pydub==0.25.1 fairseq==0.12.2 faiss-cpu==1.7.3 pyworld==0.3.4
+pip install praat-parselmouth>=0.4.2 ffmpeg-python>=0.2.0 tqdm==4.65.0 yt-dlp>=2025.9.23 sox==1.4.1
+# AI Audio Enhancement dependencies
+pip install noisereduce==3.0.3 scikit-learn==1.6.1
+# PyTorch with CUDA support
+pip install torch==2.0.1+cu118 --find-links https://download.pytorch.org/whl/torch_stable.html
+pip install torchcrepe==0.0.20
+# ONNX Runtime with CUDA support
+pip install onnxruntime-gpu==1.18.0
+```
+### Step 4: Download Models
+```bash
+python src/download_models.py
+```
+### Step 5: Verify Installation
+```bash
+python src/audio_enhancer.py
+```
+---
+## 🚀 Usage
+### Quick Start
+```bash
+python src/webui.py
+```
+Then open your browser and go to: [http://127.0.0.1:7860](http://127.0.0.1:7860)
+1. Upload a song (YouTube URL or audio file)
+2. Select a voice model from the dropdown
+3. Configure audio enhancement:
+   - Expand "AI Audio Enhancement" section
+   - Choose enhancement type (Full/Light/Aggressive/Custom)
+   - Adjust EQ type (Balanced/Vocal Boost/Bass Boost/Treble Boost/Flat)
+   - Set noise reduction strength (0–100%)
+   - Adjust gain (-20 to +20 dB)
+   - Set compression ratio (1–10)
+   - Add reverb amount (0–100%)
+4. Click **Generate** and enjoy your enhanced AI cover!
+---
+## 🛠️ Troubleshooting
+### CUDA Not Detected
+```bash
+# Check CUDA installation
+nvidia-smi
+# Verify PyTorch CUDA support
+python -c "import torch; print(torch.cuda.is_available())"
+# Check ONNX Runtime CUDA
+python -c "import onnxruntime as ort; print('CUDA' in ort.get_available_providers())"
+```
+### Audio Enhancement Errors
+```bash
+# Test audio enhancer
+python src/audio_enhancer.py
+# Check dependencies
+pip list | grep -E "(noisereduce|scikit-learn|pedalboard)"
+```
+### Memory Issues
+- Reduce batch size in settings
+- Use CPU-only mode for ONNX Runtime
+- Close other applications to free RAM
+---
+## 📂 Project Structure
+```
+AICoverGen_Enhanced/
+├── src/
+│   ├── webui.py             # Main web interface
+│   ├── main.py              # Core pipeline with audio enhancement
+│   ├── audio_enhancer.py    # AI audio enhancement module
+│   ├── rvc.py               # RVC voice conversion
+│   ├── mdx.py               # Audio separation
+├── rvc_models/              # Voice models
+├── mdxnet_models/           # Audio separation models
+├── song_output/             # Generated covers
+├── requirements.txt         # Dependencies
+├── README_Enhanced.md       # This file
+```
 ---
+## 🎚️ Audio Enhancement Features
+### AI Noise Reduction
+- Uses ML to identify and remove background noise
+- Preserves vocal clarity while eliminating artifacts
+- Adjustable strength (0–100%)
+### Professional EQ
+- **Balanced**: Gentle mid boost for clarity
+- **Vocal Boost**: Emphasizes 800–3000 Hz range
+- **Bass Boost**: Enhances 60–250 Hz
+- **Treble Boost**: Brightens 4–16 kHz
+- **Flat**: Minimal processing with high-pass filter
+### Dynamic Range Compression
+- Improves loudness consistency
+- Reduces dynamic range for streaming
+- Configurable ratio (1–10)
+### Harmonic Enhancement
+- Adds warmth and richness
+- Uses soft saturation for natural harmonics
+### Stereo Widening
+- Improves spatial imaging
+- Enhances left-right separation
+- Creates immersive experience
+### Reverb Control
+- Adds subtle depth and space
+- Professional room simulation
+- Configurable wet/dry mix
 ---
+## 🤝 Contributing
+We welcome contributions! Please see our **Contributing Guidelines** for details.
+---
+## ⚖️ License
+This project is licensed under the **MIT License** – see the LICENSE file for details.
+---
+## 🙏 Acknowledgments
+- Original AICoverGen by **SociallyIneptWeeb**
+- RVC (Retrieval-based Voice Conversion) framework
+- MDXNet for audio separation
+- All the amazing open-source audio processing libraries

README_Enhanced.md ADDED Viewed

	@@ -0,0 +1,248 @@

+# AICoverGen Enhanced
+**AI-Powered Voice Cover Generation with Advanced Audio Enhancement**
+AICoverGen Enhanced is a powerful tool for creating AI voice covers with professional-grade audio enhancement features. This enhanced version includes advanced AI audio processing, EQ controls, dynamic range compression, and much more!
+##  New Features
+###  Advanced Audio Enhancement
+- **AI Noise Reduction** - Remove background noise and artifacts
+- **Professional EQ** - 5 EQ types: Balanced, Vocal Boost, Bass Boost, Treble Boost, Flat
+- **Dynamic Range Compression** - Improve loudness and consistency
+- **Harmonic Enhancement** - Add richness and warmth to vocals
+- **Stereo Widening** - Enhance spatial imaging for stereo tracks
+- **Reverb Control** - Add depth and professional polish
+- **Gain Control** - Fine-tune volume (-20 to +20 dB)
+###  Enhancement Types
+- **Full** - Balanced enhancement with all features
+- **Light** - Subtle improvements for natural sound
+- **Aggressive** - Maximum enhancement for impact
+- **Custom** - Use your specific settings
+##  System Requirements
+### Minimum Requirements
+- **OS**: Windows 10/11, Linux, or macOS
+- **Python**: 3.9+ (3.10+ recommended)
+- **RAM**: 8GB minimum, 16GB recommended
+- **Storage**: 10GB free space
+- **GPU**: NVIDIA GPU with CUDA support (recommended)
+### Recommended Setup
+- **OS**: Windows 11 or Ubuntu 20.04+
+- **Python**: 3.10 or 3.11
+- **RAM**: 16GB or more
+- **GPU**: NVIDIA RTX 3060 or better
+- **CUDA**: 11.8 or 12.0+
+- **cuDNN**: 8.6 or 9.0+
+##  Installation Guide
+### Step 1: Clone the Repository
+`ash
+git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
+cd AICoverGen
+`
+### Step 2: Create Virtual Environment
+`ash
+# Windows
+python -m venv AICoverGen
+AICoverGen\Scripts\activate
+# Linux/macOS
+python3 -m venv AICoverGen
+source AICoverGen/bin/activate
+`
+### Step 3: Install Dependencies
+#### Option A: Automatic Installation (Recommended)
+`ash
+pip install -r requirements.txt
+`
+#### Option B: Manual Installation
+`ash
+# Core dependencies
+pip install gradio==3.50.2
+pip install librosa==0.9.1
+pip install numpy==1.23.5
+pip install scipy==1.11.1
+pip install soundfile==0.12.1
+pip install pedalboard==0.7.7
+pip install pydub==0.25.1
+pip install fairseq==0.12.2
+pip install faiss-cpu==1.7.3
+pip install pyworld==0.3.4
+pip install praat-parselmouth>=0.4.2
+pip install ffmpeg-python>=0.2.0
+pip install tqdm==4.65.0
+pip install yt-dlp>=2025.9.23
+pip install sox==1.4.1
+# AI Audio Enhancement dependencies
+pip install noisereduce==3.0.3
+pip install scikit-learn==1.6.1
+# PyTorch with CUDA support
+pip install torch==2.0.1+cu118 --find-links https://download.pytorch.org/whl/torch_stable.html
+pip install torchcrepe==0.0.20
+# ONNX Runtime with CUDA support
+pip install onnxruntime-gpu==1.18.0
+`
+### Step 4: Download Models
+`ash
+python src/download_models.py
+`
+### Step 5: Verify Installation
+`ash
+python src/audio_enhancer.py
+`
+##  Usage
+### Quick Start
+1. **Start the Web UI**:
+   `ash
+   python src/webui.py
+   `
+2. **Open your browser** and go to http://127.0.0.1:7860
+3. **Upload a song** (YouTube URL or audio file)
+4. **Select a voice model** from the dropdown
+5. **Configure audio enhancement**:
+   - Expand "AI Audio Enhancement" section
+   - Choose enhancement type (Full/Light/Aggressive/Custom)
+   - Adjust EQ type (Balanced/Vocal Boost/Bass Boost/Treble Boost/Flat)
+   - Set noise reduction strength (0-100%)
+   - Adjust gain (-20 to +20 dB)
+   - Set compression ratio (1-10)
+   - Add reverb amount (0-100%)
+6. **Click Generate** and enjoy your enhanced AI cover!
+##  Troubleshooting
+### Common Issues
+#### CUDA Not Detected
+`ash
+# Check CUDA installation
+nvidia-smi
+# Verify PyTorch CUDA support
+python -c "import torch; print(torch.cuda.is_available())"
+# Check ONNX Runtime CUDA
+python -c "import onnxruntime as ort; print('CUDA' in ort.get_available_providers())"
+`
+#### Audio Enhancement Errors
+`ash
+# Test audio enhancer
+python src/audio_enhancer.py
+# Check dependencies
+pip list | grep -E "(noisereduce|scikit-learn|pedalboard)"
+`
+#### Memory Issues
+- Reduce batch size in settings
+- Use CPU-only mode for ONNX Runtime
+- Close other applications to free RAM
+##  Project Structure
+`
+AICoverGen_Enhanced/
+ src/
+    webui.py              # Main web interface
+    main.py               # Core pipeline with audio enhancement
+    audio_enhancer.py     # AI audio enhancement module
+    rvc.py                # RVC voice conversion
+    mdx.py                # Audio separation
+    ...
+ rvc_models/               # Voice models
+ mdxnet_models/            # Audio separation models
+ song_output/              # Generated covers
+ requirements.txt          # Dependencies
+ README_Enhanced.md        # This file
+`
+##  Audio Enhancement Features
+### AI Noise Reduction
+- Uses machine learning to identify and remove background noise
+- Preserves vocal clarity while eliminating artifacts
+- Adjustable strength from 0-100%
+### Professional EQ
+- **Balanced**: Gentle mid boost for overall clarity
+- **Vocal Boost**: Emphasizes 800-3000 Hz range for vocals
+- **Bass Boost**: Enhances 60-250 Hz for low-end presence
+- **Treble Boost**: Brightens 4-16 kHz for crispness
+- **Flat**: Minimal processing with high-pass filter
+### Dynamic Range Compression
+- Improves loudness consistency
+- Reduces dynamic range for better streaming
+- Configurable ratio from 1-10
+### Harmonic Enhancement
+- Adds warmth and richness to vocals
+- Uses soft saturation for natural harmonics
+- Enhances perceived quality
+### Stereo Widening
+- Improves spatial imaging for stereo tracks
+- Enhances left-right separation
+- Creates more immersive listening experience
+### Reverb Control
+- Adds subtle depth and space
+- Professional room simulation
+- Configurable wet/dry mix
+##  Contributing
+We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
+### Development Setup
+`ash
+git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
+cd AICoverGen
+pip install -r requirements.txt
+pip install -r requirements-dev.txt  # If available
+`
+##  License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+##  Acknowledgments
+- Original AICoverGen by [SociallyIneptWeeb](https://github.com/SociallyIneptWeeb)
+- RVC (Retrieval-based Voice Conversion) framework
+- MDXNet for audio separation
+- All the amazing open-source audio processing libraries
+##  Support
+- **Issues**: [GitHub Issues](https://github.com/SociallyIneptWeeb/AICoverGen/issues)
+- **Discussions**: [GitHub Discussions](https://github.com/SociallyIneptWeeb/AICoverGen/discussions)
+- **Documentation**: [Wiki](https://github.com/SociallyIneptWeeb/AICoverGen/wiki)
+---
+**Happy Creating! **
+*Create amazing AI voice covers with professional audio enhancement!*

cog.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+build:
+  # set to true if your model requires a GPU
+  gpu: true
+  # a list of ubuntu apt packages to install
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "ffmpeg"
+    - "sox"
+  # python version in the form '3.11' or '3.11.4'
+  python_version: "3.9"
+  # a list of packages in the format <package-name>==<version>
+  python_packages:
+    - "deemix"
+    - "fairseq==0.12.2"
+    - "faiss-cpu==1.7.3"
+    - "ffmpeg-python>=0.2.0"
+    - "gradio==3.39.0"
+    - "lib==4.0.0"
+    - "librosa==0.9.1"
+    - "numpy==1.23.5"
+    - "onnxruntime_gpu"
+    - "praat-parselmouth>=0.4.2"
+    - "pedalboard==0.7.7"
+    - "pydub==0.25.1"
+    - "pyworld==0.3.4"
+    - "Requests==2.31.0"
+    - "scipy==1.11.1"
+    - "soundfile==0.12.1"
+    - "--find-links https://download.pytorch.org/whl/torch_stable.html"
+    - "torch==2.0.1+cu118"
+    - "torchcrepe==0.0.20"
+    - "tqdm==4.65.0"
+    - "yt_dlp==2023.7.6"
+    - "sox==1.4.1"
+    - "gradio"
+  # commands run after the environment is setup
+  run:
+    - pip install --upgrade pip
+    - apt-get update && apt-get install -y ffmpeg
+    - pip install imageio[ffmpeg]
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"

install_enhanced.bat ADDED Viewed

Binary file (116 Bytes). View file

predict.py ADDED Viewed

	@@ -0,0 +1,276 @@

+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+import os
+import sys
+import shutil
+import zipfile
+import urllib.request
+from argparse import Namespace
+from cog import BasePredictor, Input, Path as CogPath
+sys.path.insert(0, os.path.abspath("src"))
+import main as m
+def download_online_model(url, dir_name):
+    print(f"[~] Downloading voice model with name {dir_name}...")
+    zip_name = url.split("/")[-1]
+    extraction_folder = os.path.join(m.rvc_models_dir, dir_name)
+    if os.path.exists(extraction_folder):
+        print(f"Voice model directory {dir_name} already exists! Skipping download.")
+        return
+    if "pixeldrain.com" in url:
+        url = f"https://pixeldrain.com/api/file/{zip_name}"
+    urllib.request.urlretrieve(url, zip_name)
+    print("[~] Extracting zip...")
+    with zipfile.ZipFile(zip_name, "r") as zip_ref:
+        for member in zip_ref.infolist():
+            # skip directories
+            if member.is_dir():
+                continue
+            # create target directory if it does not exist
+            os.makedirs(extraction_folder, exist_ok=True)
+            # extract only files directly to extraction_folder
+            with zip_ref.open(member) as source, open(
+                os.path.join(extraction_folder, os.path.basename(member.filename)), "wb"
+            ) as target:
+                shutil.copyfileobj(source, target)
+    print(f"[+] {dir_name} Model successfully downloaded!")
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        pass
+    def predict(
+        self,
+        song_input: CogPath = Input(
+            description="Upload your audio file here.",
+            default=None,
+        ),
+        rvc_model: str = Input(
+            description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
+            default="Squidward",
+            choices=[
+                "Squidward",
+                "MrKrabs",
+                "Plankton",
+                "Drake",
+                "Vader",
+                "Trump",
+                "Biden",
+                "Obama",
+                "Guitar",
+                "Voilin",
+                "CUSTOM",
+                "SamA",  # TODO REMOVE THIS
+            ],
+        ),
+        custom_rvc_model_download_url: str = Input(
+            description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.",
+            default=None,
+        ),
+        pitch_change: str = Input(
+            description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
+            default="no-change",
+            choices=["no-change", "male-to-female", "female-to-male"],
+        ),
+        index_rate: float = Input(
+            description="Control how much of the AI's accent to leave in the vocals.",
+            default=0.5,
+            ge=0,
+            le=1,
+        ),
+        filter_radius: int = Input(
+            description="If >=3: apply median filtering median filtering to the harvested pitch results.",
+            default=3,
+            ge=0,
+            le=7,
+        ),
+        rms_mix_rate: float = Input(
+            description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).",
+            default=0.25,
+            ge=0,
+            le=1,
+        ),
+        pitch_detection_algorithm: str = Input(
+            description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
+            default="rmvpe",
+            choices=["rmvpe", "mangio-crepe"],
+        ),
+        crepe_hop_length: int = Input(
+            description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.",
+            default=128,
+        ),
+        protect: float = Input(
+            description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.",
+            default=0.33,
+            ge=0,
+            le=0.5,
+        ),
+        main_vocals_volume_change: float = Input(
+            description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.",
+            default=0,
+        ),
+        backup_vocals_volume_change: float = Input(
+            description="Control volume of backup AI vocals.",
+            default=0,
+        ),
+        instrumental_volume_change: float = Input(
+            description="Control volume of the background music/instrumentals.",
+            default=0,
+        ),
+        pitch_change_all: float = Input(
+            description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.",
+            default=0,
+        ),
+        reverb_size: float = Input(
+            description="The larger the room, the longer the reverb time.",
+            default=0.15,
+            ge=0,
+            le=1,
+        ),
+        reverb_wetness: float = Input(
+            description="Level of AI vocals with reverb.",
+            default=0.2,
+            ge=0,
+            le=1,
+        ),
+        reverb_dryness: float = Input(
+            description="Level of AI vocals without reverb.",
+            default=0.8,
+            ge=0,
+            le=1,
+        ),
+        reverb_damping: float = Input(
+            description="Absorption of high frequencies in the reverb.",
+            default=0.7,
+            ge=0,
+            le=1,
+        ),
+        output_format: str = Input(
+            description="wav for best quality and large file size, mp3 for decent quality and small file size.",
+            default="mp3",
+            choices=["mp3", "wav"],
+        ),
+    ) -> CogPath:
+        """
+        Runs a single prediction on the model.
+        Required Parameters:
+            song_input (CogPath): Upload your audio file here.
+            rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
+            pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa.
+        Optional Parameters:
+            custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None.
+            index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5.
+            filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3.
+            rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25.
+            pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe".
+            crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128.
+            protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33.
+            main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0.
+            backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0.
+            instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0.
+            pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0.
+            reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15.
+            reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2.
+            reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8.
+            reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7.
+            output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3".
+        Returns:
+            CogPath: The output path of the generated audio file.
+        """
+        if custom_rvc_model_download_url:
+            custom_rvc_model_download_name = urllib.parse.unquote(
+                custom_rvc_model_download_url.split("/")[-1]
+            )
+            custom_rvc_model_download_name = os.path.splitext(
+                custom_rvc_model_download_name
+            )[0]
+            print(
+                f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'."
+            )
+            download_online_model(
+                url=custom_rvc_model_download_url,
+                dir_name=custom_rvc_model_download_name,
+            )
+            rvc_model = custom_rvc_model_download_name
+        else:
+            print(
+                "[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')."
+            )
+        # Convert pitch_change from string to numerical value for processing
+        # 0 for no change, 1 for male to female, -1 for female to male
+        if pitch_change == "no-change":
+            pitch_change = 0
+        elif pitch_change == "male-to-female":
+            pitch_change = 1
+        else:  # pitch_change == "female-to-male"
+            pitch_change = -1
+        args = Namespace(
+            song_input=str(song_input),
+            rvc_dirname=(model_dir_name := rvc_model),
+            pitch_change=pitch_change,
+            keep_files=(keep_files := False),
+            index_rate=index_rate,
+            filter_radius=filter_radius,
+            rms_mix_rate=rms_mix_rate,
+            pitch_detection_algo=pitch_detection_algorithm,
+            crepe_hop_length=crepe_hop_length,
+            protect=protect,
+            main_vol=main_vocals_volume_change,
+            backup_vol=backup_vocals_volume_change,
+            inst_vol=instrumental_volume_change,
+            pitch_change_all=pitch_change_all,
+            reverb_size=reverb_size,
+            reverb_wetness=reverb_wetness,
+            reverb_dryness=reverb_dryness,
+            reverb_damping=reverb_damping,
+            output_format=output_format,
+        )
+        rvc_dirname = args.rvc_dirname
+        if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)):
+            raise Exception(
+                f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist."
+            )
+        cover_path = m.song_cover_pipeline(
+            args.song_input,
+            rvc_dirname,
+            args.pitch_change,
+            args.keep_files,
+            main_gain=args.main_vol,
+            backup_gain=args.backup_vol,
+            inst_gain=args.inst_vol,
+            index_rate=args.index_rate,
+            filter_radius=args.filter_radius,
+            rms_mix_rate=args.rms_mix_rate,
+            f0_method=args.pitch_detection_algo,
+            crepe_hop_length=args.crepe_hop_length,
+            protect=args.protect,
+            pitch_change_all=args.pitch_change_all,
+            reverb_rm_size=args.reverb_size,
+            reverb_wet=args.reverb_wetness,
+            reverb_dry=args.reverb_dryness,
+            reverb_damping=args.reverb_damping,
+            output_format=args.output_format,
+        )
+        print(f"[+] Cover generated at {cover_path}")
+        # Return the output path
+        return CogPath(cover_path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+deemix
+fairseq==0.12.2
+faiss-cpu==1.7.3
+ffmpeg-python>=0.2.0
+gradio==3.39.0
+lib==4.0.0
+librosa==0.9.1
+numpy==1.23.5
+onnxruntime_gpu
+praat-parselmouth>=0.4.2
+pedalboard==0.7.7
+pydub==0.25.1
+pyworld==0.3.4
+Requests==2.31.0
+scipy==1.11.1
+soundfile==0.12.1
+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch==2.0.1+cu118
+torchcrepe==0.0.20
+tqdm==4.65.0
+yt_dlp==2023.7.6
+sox==1.4.1

run_webui.bat ADDED Viewed

	@@ -0,0 +1,70 @@

+@echo off
+setlocal enableextensions enabledelayedexpansion
+REM Change to repo root (folder of this script)
+cd /d "%~dp0"
+REM ---------- Config ----------
+set "VENV_DIR=AICoverGen"
+set "PYTHON_EXE=%VENV_DIR%\Scripts\python.exe"
+set "PIP_EXE=%VENV_DIR%\Scripts\pip.exe"
+REM Activate virtual environment
+call "%VENV_DIR%\Scripts\activate.bat"
+REM Force ONNX Runtime to use CPU provider (PyTorch will still use GPU)
+set "ORT_DISABLE_CUDA=1"
+REM Optional: choose GPU index for PyTorch (0 = first GPU)
+set "CUDA_VISIBLE_DEVICES=0"
+REM Ensure UTF-8 output
+set "PYTHONUTF8=1"
+REM ----------------------------
+REM Check venv
+if not exist "%PYTHON_EXE%" (
+  echo [ERROR] Virtual environment not found at %VENV_DIR%.^>
+  echo         Expected: %PYTHON_EXE%
+  echo         Create one first, or ensure the repo was set up correctly.
+  echo.
+  echo Example to create venv:
+  echo   python -m venv AICoverGen
+  exit /b 1
+)
+REM Upgrade pip (silent-ish)
+"%PYTHON_EXE%" -m pip install --upgrade pip --disable-pip-version-check 1>nul 2>nul
+REM Install core requirements
+"%PIP_EXE%" install -r requirements.txt --no-input
+if errorlevel 1 (
+  echo [ERROR] Failed installing requirements.
+  exit /b 1
+)
+REM Ensure a compatible Gradio version (3.50.2 is known-good with this UI)
+"%PIP_EXE%" install "gradio==3.50.2" --no-input
+if errorlevel 1 (
+  echo [ERROR] Failed installing Gradio.
+  exit /b 1
+)
+REM Check ffmpeg availability (recommended)
+where ffmpeg >nul 2>nul
+if errorlevel 1 (
+  echo [WARN] ffmpeg not found in PATH. Audio processing may fail.
+  echo       Install ffmpeg and add it to PATH: https://ffmpeg.org/download.html
+)
+REM Optional: open the UI in browser after a short delay
+start "" /b cmd /c "timeout /t 3 /nobreak >nul & start http://127.0.0.1:7860"
+REM Run the WebUI. Add --listen to allow LAN access if desired.
+"%PYTHON_EXE%" src\webui.py %*
+REM Preserve exit code
+set "EXIT_CODE=%ERRORLEVEL%"
+echo.
+echo Server exited with code %EXIT_CODE%.
+exit /b %EXIT_CODE%

start_webui.bat ADDED Viewed

	@@ -0,0 +1,14 @@

+@echo off
+cd /d "%~dp0"
+REM Activate virtual environment
+call AICoverGen\Scripts\activate.bat
+REM Update yt-dlp to fix YouTube download issues
+echo Updating yt-dlp...
+pip install --upgrade yt-dlp --quiet
+REM Run webui with CUDA enabled
+python src\webui.py
+pause

start_webui.ps1 ADDED Viewed

	@@ -0,0 +1,5 @@

+# Change to script directory
+Set-Location $PSScriptRoot
+# Activate virtual environment and run webui with CUDA enabled
+& "AICoverGen\Scripts\python.exe" "src\webui.py"