Adam3 commited on
Commit
b22f3f4
·
verified ·
1 Parent(s): 2dc41b2

Upload 11 files

Browse files
Files changed (11) hide show
  1. AICoverGen_colab.ipynb +127 -0
  2. LICENSE +21 -0
  3. README.md +224 -11
  4. README_Enhanced.md +248 -0
  5. cog.yaml +50 -0
  6. install_enhanced.bat +0 -0
  7. predict.py +276 -0
  8. requirements.txt +22 -0
  9. run_webui.bat +70 -0
  10. start_webui.bat +14 -0
  11. start_webui.ps1 +5 -0
AICoverGen_colab.ipynb ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "39k2mOCNAh6J"
7
+ },
8
+ "source": [
9
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SociallyIneptWeeb/AICoverGen/blob/main/AICoverGen_colab.ipynb)"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "source": [
15
+ "# AICoverGen WebUI\n",
16
+ "\n",
17
+ "Simply click `Runtime` in the top navigation bar and `Run all`. Wait for the output of the final cell to show the public gradio url and click on it."
18
+ ],
19
+ "metadata": {
20
+ "id": "YYVAKuNBc-X4"
21
+ }
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {
27
+ "id": "vC4gLMHI9xb3",
28
+ "cellView": "form"
29
+ },
30
+ "outputs": [],
31
+ "source": [
32
+ "#@title Clone repository\n",
33
+ "from IPython.display import clear_output, Javascript\n",
34
+ "import codecs\n",
35
+ "import threading\n",
36
+ "import time\n",
37
+ "cloneing=codecs.decode('uggcf://tvguho.pbz/FbpvnyylVarcgJrro/NVPbireTra.tvg','rot_13')\n",
38
+ "!git clone $cloneing HRVC\n",
39
+ "def update_timer_and_print():\n",
40
+ " global timer\n",
41
+ " while True:\n",
42
+ " hours, remainder = divmod(timer, 3600)\n",
43
+ " minutes, seconds = divmod(remainder, 60)\n",
44
+ " timer_str = f'{hours:02}:{minutes:02}:{seconds:02}'\n",
45
+ " print(f'\\rTimer: {timer_str}', end='', flush=True) # Print without a newline\n",
46
+ " time.sleep(1)\n",
47
+ " timer += 1\n",
48
+ "timer = 0\n",
49
+ "threading.Thread(target=update_timer_and_print, daemon=True).start()\n",
50
+ "%cd HRVC\n",
51
+ "clear_output()\n",
52
+ "print(\"Done Cloning Repository\")"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {
59
+ "cellView": "form",
60
+ "id": "odzpJHpr_PaF"
61
+ },
62
+ "outputs": [],
63
+ "source": [
64
+ "#@title Install requirements\n",
65
+ "!pip install pip==23.3.1\n",
66
+ "!pip install -q -r requirements.txt\n",
67
+ "!pip install gradio-client==0.8.1\n",
68
+ "!pip install gradio==3.48.0\n",
69
+ "# install cuda fix\n",
70
+ "!python -m pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/\n",
71
+ "clear_output()\n",
72
+ "print(\"Finished Installing Requirements\")\n",
73
+ "!sudo apt update\n",
74
+ "clear_output()\n",
75
+ "print(\"Finished Updating\")\n",
76
+ "!sudo apt install sox\n",
77
+ "clear_output()\n",
78
+ "print(\"Finished running this cell, proceed to the next cell\")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "metadata": {
85
+ "cellView": "form",
86
+ "id": "SLWpcJc0AHSZ"
87
+ },
88
+ "outputs": [],
89
+ "source": [
90
+ "#@title Download MDXNet Vocal Separation and Hubert Base Models\n",
91
+ "models=codecs.decode('fep/qbjaybnq_zbqryf.cl','rot_13')\n",
92
+ "!python $models\n",
93
+ "clear_output()\n",
94
+ "print(\"Finished Downloading Voice Separation Model and Hubert Base Model\")"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "source": [
100
+ "#@title Run WebUI\n",
101
+ "runpice=codecs.decode('fep/jrohv.cl','rot_13')\n",
102
+ "!python $runpice --share"
103
+ ],
104
+ "metadata": {
105
+ "cellView": "form",
106
+ "id": "NEglTq6Ya9d0"
107
+ },
108
+ "execution_count": null,
109
+ "outputs": []
110
+ }
111
+ ],
112
+ "metadata": {
113
+ "accelerator": "GPU",
114
+ "colab": {
115
+ "provenance": []
116
+ },
117
+ "kernelspec": {
118
+ "display_name": "Python 3",
119
+ "name": "python3"
120
+ },
121
+ "language_info": {
122
+ "name": "python"
123
+ }
124
+ },
125
+ "nbformat": 4,
126
+ "nbformat_minor": 0
127
+ }
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 SociallyIneptWeeb
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: AICoverGenEnhanced
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.2.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Create ai covers with some more effects
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AICoverGen
2
+ An autonomous pipeline to create covers with any RVC v2 trained AI voice from YouTube videos or a local audio file. For developers who may want to add a singing functionality into their AI assistant/chatbot/vtuber, or for people who want to hear their favourite characters sing their favourite song.
3
+
4
+ <img width="1574" height="740" alt="image" src="https://github.com/user-attachments/assets/931189d8-e2e2-4240-84d6-52d7a13ac7f8" />
5
+
6
+ # AICoverGen Enhanced
7
+ **AI-Powered Voice Cover Generation with Advanced Audio Enhancement**
8
+
9
+ AICoverGen Enhanced is a powerful tool for creating AI voice covers with professional-grade audio enhancement features. This enhanced version includes advanced AI audio processing, EQ controls, dynamic range compression, and much more!
10
+
11
+ ---
12
+
13
+ ## ✨ New Features
14
+
15
+ ### Advanced Audio Enhancement
16
+ - **AI Noise Reduction** – Remove background noise and artifacts
17
+ - **Professional EQ** – 5 EQ types: Balanced, Vocal Boost, Bass Boost, Treble Boost, Flat
18
+ - **Dynamic Range Compression** – Improve loudness and consistency
19
+ - **Harmonic Enhancement** – Add richness and warmth to vocals
20
+ - **Stereo Widening** – Enhance spatial imaging for stereo tracks
21
+ - **Reverb Control** – Add depth and professional polish
22
+ - **Gain Control** – Fine-tune volume (-20 to +20 dB)
23
+
24
+ ### Enhancement Types
25
+ - **Full** – Balanced enhancement with all features
26
+ - **Light** – Subtle improvements for natural sound
27
+ - **Aggressive** – Maximum enhancement for impact
28
+ - **Custom** – Use your specific settings
29
+
30
+ ---
31
+
32
+ ## 🖥️ System Requirements
33
+
34
+ ### Minimum Requirements
35
+ - **OS:** Windows 10/11, Linux, or macOS
36
+ - **Python:** 3.9+ (3.10+ recommended)
37
+ - **RAM:** 8GB minimum, 16GB recommended
38
+ - **Storage:** 10GB free space
39
+ - **GPU:** NVIDIA GPU with CUDA support (recommended)
40
+
41
+ ### Recommended Setup
42
+ - **OS:** Windows 11 or Ubuntu 20.04+
43
+ - **Python:** 3.10 or 3.11
44
+ - **RAM:** 16GB or more
45
+ - **GPU:** NVIDIA RTX 3060 or better
46
+ - **CUDA:** 11.8 or 12.0+
47
+ - **cuDNN:** 8.6 or 9.0+
48
+
49
+ ---
50
+
51
+ ## ⚙️ Installation Guide
52
+
53
+ ### Step 1: Clone the Repository
54
+ ```bash
55
+ git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
56
+ cd AICoverGen
57
+ ```
58
+
59
+ ### Step 2: Create Virtual Environment
60
+ ```bash
61
+ # Windows
62
+ python -m venv AICoverGen
63
+ AICoverGen\Scripts\activate
64
+
65
+ # Linux/macOS
66
+ python3 -m venv AICoverGen
67
+ source AICoverGen/bin/activate
68
+ ```
69
+
70
+ ### Step 3: Install Dependencies
71
+
72
+ #### Option A: Automatic Installation (Recommended)
73
+ ```bash
74
+ pip install -r requirements.txt
75
+ ```
76
+
77
+ #### Option B: Manual Installation
78
+ ```bash
79
+ # Core dependencies
80
+ pip install gradio==3.50.2 librosa==0.9.1 numpy==1.23.5 scipy==1.11.1 soundfile==0.12.1
81
+ pip install pedalboard==0.7.7 pydub==0.25.1 fairseq==0.12.2 faiss-cpu==1.7.3 pyworld==0.3.4
82
+ pip install praat-parselmouth>=0.4.2 ffmpeg-python>=0.2.0 tqdm==4.65.0 yt-dlp>=2025.9.23 sox==1.4.1
83
+
84
+ # AI Audio Enhancement dependencies
85
+ pip install noisereduce==3.0.3 scikit-learn==1.6.1
86
+
87
+ # PyTorch with CUDA support
88
+ pip install torch==2.0.1+cu118 --find-links https://download.pytorch.org/whl/torch_stable.html
89
+ pip install torchcrepe==0.0.20
90
+
91
+ # ONNX Runtime with CUDA support
92
+ pip install onnxruntime-gpu==1.18.0
93
+ ```
94
+
95
+ ### Step 4: Download Models
96
+ ```bash
97
+ python src/download_models.py
98
+ ```
99
+
100
+ ### Step 5: Verify Installation
101
+ ```bash
102
+ python src/audio_enhancer.py
103
+ ```
104
+
105
+ ---
106
+
107
+ ## 🚀 Usage
108
+
109
+ ### Quick Start
110
+ ```bash
111
+ python src/webui.py
112
+ ```
113
+ Then open your browser and go to: [http://127.0.0.1:7860](http://127.0.0.1:7860)
114
+
115
+ 1. Upload a song (YouTube URL or audio file)
116
+ 2. Select a voice model from the dropdown
117
+ 3. Configure audio enhancement:
118
+ - Expand "AI Audio Enhancement" section
119
+ - Choose enhancement type (Full/Light/Aggressive/Custom)
120
+ - Adjust EQ type (Balanced/Vocal Boost/Bass Boost/Treble Boost/Flat)
121
+ - Set noise reduction strength (0–100%)
122
+ - Adjust gain (-20 to +20 dB)
123
+ - Set compression ratio (1–10)
124
+ - Add reverb amount (0–100%)
125
+ 4. Click **Generate** and enjoy your enhanced AI cover!
126
+
127
+ ---
128
+
129
+ ## 🛠️ Troubleshooting
130
+
131
+ ### CUDA Not Detected
132
+ ```bash
133
+ # Check CUDA installation
134
+ nvidia-smi
135
+
136
+ # Verify PyTorch CUDA support
137
+ python -c "import torch; print(torch.cuda.is_available())"
138
+
139
+ # Check ONNX Runtime CUDA
140
+ python -c "import onnxruntime as ort; print('CUDA' in ort.get_available_providers())"
141
+ ```
142
+
143
+ ### Audio Enhancement Errors
144
+ ```bash
145
+ # Test audio enhancer
146
+ python src/audio_enhancer.py
147
+
148
+ # Check dependencies
149
+ pip list | grep -E "(noisereduce|scikit-learn|pedalboard)"
150
+ ```
151
+
152
+ ### Memory Issues
153
+ - Reduce batch size in settings
154
+ - Use CPU-only mode for ONNX Runtime
155
+ - Close other applications to free RAM
156
+
157
+ ---
158
+
159
+ ## 📂 Project Structure
160
+ ```
161
+ AICoverGen_Enhanced/
162
+ ├── src/
163
+ │ ├── webui.py # Main web interface
164
+ │ ├── main.py # Core pipeline with audio enhancement
165
+ │ ├── audio_enhancer.py # AI audio enhancement module
166
+ │ ├── rvc.py # RVC voice conversion
167
+ │ ├── mdx.py # Audio separation
168
+ ├── rvc_models/ # Voice models
169
+ ├── mdxnet_models/ # Audio separation models
170
+ ├── song_output/ # Generated covers
171
+ ├── requirements.txt # Dependencies
172
+ ├── README_Enhanced.md # This file
173
+ ```
174
+
175
  ---
176
+
177
+ ## 🎚️ Audio Enhancement Features
178
+
179
+ ### AI Noise Reduction
180
+ - Uses ML to identify and remove background noise
181
+ - Preserves vocal clarity while eliminating artifacts
182
+ - Adjustable strength (0–100%)
183
+
184
+ ### Professional EQ
185
+ - **Balanced**: Gentle mid boost for clarity
186
+ - **Vocal Boost**: Emphasizes 800–3000 Hz range
187
+ - **Bass Boost**: Enhances 60–250 Hz
188
+ - **Treble Boost**: Brightens 4–16 kHz
189
+ - **Flat**: Minimal processing with high-pass filter
190
+
191
+ ### Dynamic Range Compression
192
+ - Improves loudness consistency
193
+ - Reduces dynamic range for streaming
194
+ - Configurable ratio (1–10)
195
+
196
+ ### Harmonic Enhancement
197
+ - Adds warmth and richness
198
+ - Uses soft saturation for natural harmonics
199
+
200
+ ### Stereo Widening
201
+ - Improves spatial imaging
202
+ - Enhances left-right separation
203
+ - Creates immersive experience
204
+
205
+ ### Reverb Control
206
+ - Adds subtle depth and space
207
+ - Professional room simulation
208
+ - Configurable wet/dry mix
209
+
210
  ---
211
 
212
+ ## 🤝 Contributing
213
+ We welcome contributions! Please see our **Contributing Guidelines** for details.
214
+
215
+ ---
216
+
217
+ ## ⚖️ License
218
+ This project is licensed under the **MIT License** – see the LICENSE file for details.
219
+
220
+ ---
221
+
222
+ ## 🙏 Acknowledgments
223
+ - Original AICoverGen by **SociallyIneptWeeb**
224
+ - RVC (Retrieval-based Voice Conversion) framework
225
+ - MDXNet for audio separation
226
+ - All the amazing open-source audio processing libraries
227
+
README_Enhanced.md ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AICoverGen Enhanced
2
+
3
+ **AI-Powered Voice Cover Generation with Advanced Audio Enhancement**
4
+
5
+ AICoverGen Enhanced is a powerful tool for creating AI voice covers with professional-grade audio enhancement features. This enhanced version includes advanced AI audio processing, EQ controls, dynamic range compression, and much more!
6
+
7
+ ## New Features
8
+
9
+ ### Advanced Audio Enhancement
10
+ - **AI Noise Reduction** - Remove background noise and artifacts
11
+ - **Professional EQ** - 5 EQ types: Balanced, Vocal Boost, Bass Boost, Treble Boost, Flat
12
+ - **Dynamic Range Compression** - Improve loudness and consistency
13
+ - **Harmonic Enhancement** - Add richness and warmth to vocals
14
+ - **Stereo Widening** - Enhance spatial imaging for stereo tracks
15
+ - **Reverb Control** - Add depth and professional polish
16
+ - **Gain Control** - Fine-tune volume (-20 to +20 dB)
17
+
18
+ ### Enhancement Types
19
+ - **Full** - Balanced enhancement with all features
20
+ - **Light** - Subtle improvements for natural sound
21
+ - **Aggressive** - Maximum enhancement for impact
22
+ - **Custom** - Use your specific settings
23
+
24
+ ## System Requirements
25
+
26
+ ### Minimum Requirements
27
+ - **OS**: Windows 10/11, Linux, or macOS
28
+ - **Python**: 3.9+ (3.10+ recommended)
29
+ - **RAM**: 8GB minimum, 16GB recommended
30
+ - **Storage**: 10GB free space
31
+ - **GPU**: NVIDIA GPU with CUDA support (recommended)
32
+
33
+ ### Recommended Setup
34
+ - **OS**: Windows 11 or Ubuntu 20.04+
35
+ - **Python**: 3.10 or 3.11
36
+ - **RAM**: 16GB or more
37
+ - **GPU**: NVIDIA RTX 3060 or better
38
+ - **CUDA**: 11.8 or 12.0+
39
+ - **cuDNN**: 8.6 or 9.0+
40
+
41
+ ## Installation Guide
42
+
43
+ ### Step 1: Clone the Repository
44
+ `ash
45
+ git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
46
+ cd AICoverGen
47
+ `
48
+
49
+ ### Step 2: Create Virtual Environment
50
+ `ash
51
+ # Windows
52
+ python -m venv AICoverGen
53
+ AICoverGen\Scripts\activate
54
+
55
+ # Linux/macOS
56
+ python3 -m venv AICoverGen
57
+ source AICoverGen/bin/activate
58
+ `
59
+
60
+ ### Step 3: Install Dependencies
61
+
62
+ #### Option A: Automatic Installation (Recommended)
63
+ `ash
64
+ pip install -r requirements.txt
65
+ `
66
+
67
+ #### Option B: Manual Installation
68
+ `ash
69
+ # Core dependencies
70
+ pip install gradio==3.50.2
71
+ pip install librosa==0.9.1
72
+ pip install numpy==1.23.5
73
+ pip install scipy==1.11.1
74
+ pip install soundfile==0.12.1
75
+ pip install pedalboard==0.7.7
76
+ pip install pydub==0.25.1
77
+ pip install fairseq==0.12.2
78
+ pip install faiss-cpu==1.7.3
79
+ pip install pyworld==0.3.4
80
+ pip install praat-parselmouth>=0.4.2
81
+ pip install ffmpeg-python>=0.2.0
82
+ pip install tqdm==4.65.0
83
+ pip install yt-dlp>=2025.9.23
84
+ pip install sox==1.4.1
85
+
86
+ # AI Audio Enhancement dependencies
87
+ pip install noisereduce==3.0.3
88
+ pip install scikit-learn==1.6.1
89
+
90
+ # PyTorch with CUDA support
91
+ pip install torch==2.0.1+cu118 --find-links https://download.pytorch.org/whl/torch_stable.html
92
+ pip install torchcrepe==0.0.20
93
+
94
+ # ONNX Runtime with CUDA support
95
+ pip install onnxruntime-gpu==1.18.0
96
+ `
97
+
98
+ ### Step 4: Download Models
99
+ `ash
100
+ python src/download_models.py
101
+ `
102
+
103
+ ### Step 5: Verify Installation
104
+ `ash
105
+ python src/audio_enhancer.py
106
+ `
107
+
108
+ ## Usage
109
+
110
+ ### Quick Start
111
+ 1. **Start the Web UI**:
112
+ `ash
113
+ python src/webui.py
114
+ `
115
+
116
+ 2. **Open your browser** and go to http://127.0.0.1:7860
117
+
118
+ 3. **Upload a song** (YouTube URL or audio file)
119
+
120
+ 4. **Select a voice model** from the dropdown
121
+
122
+ 5. **Configure audio enhancement**:
123
+ - Expand "AI Audio Enhancement" section
124
+ - Choose enhancement type (Full/Light/Aggressive/Custom)
125
+ - Adjust EQ type (Balanced/Vocal Boost/Bass Boost/Treble Boost/Flat)
126
+ - Set noise reduction strength (0-100%)
127
+ - Adjust gain (-20 to +20 dB)
128
+ - Set compression ratio (1-10)
129
+ - Add reverb amount (0-100%)
130
+
131
+ 6. **Click Generate** and enjoy your enhanced AI cover!
132
+
133
+ ## Troubleshooting
134
+
135
+ ### Common Issues
136
+
137
+ #### CUDA Not Detected
138
+ `ash
139
+ # Check CUDA installation
140
+ nvidia-smi
141
+
142
+ # Verify PyTorch CUDA support
143
+ python -c "import torch; print(torch.cuda.is_available())"
144
+
145
+ # Check ONNX Runtime CUDA
146
+ python -c "import onnxruntime as ort; print('CUDA' in ort.get_available_providers())"
147
+ `
148
+
149
+ #### Audio Enhancement Errors
150
+ `ash
151
+ # Test audio enhancer
152
+ python src/audio_enhancer.py
153
+
154
+ # Check dependencies
155
+ pip list | grep -E "(noisereduce|scikit-learn|pedalboard)"
156
+ `
157
+
158
+ #### Memory Issues
159
+ - Reduce batch size in settings
160
+ - Use CPU-only mode for ONNX Runtime
161
+ - Close other applications to free RAM
162
+
163
+ ## Project Structure
164
+
165
+ `
166
+ AICoverGen_Enhanced/
167
+ src/
168
+ webui.py # Main web interface
169
+ main.py # Core pipeline with audio enhancement
170
+ audio_enhancer.py # AI audio enhancement module
171
+ rvc.py # RVC voice conversion
172
+ mdx.py # Audio separation
173
+ ...
174
+ rvc_models/ # Voice models
175
+ mdxnet_models/ # Audio separation models
176
+ song_output/ # Generated covers
177
+ requirements.txt # Dependencies
178
+ README_Enhanced.md # This file
179
+ `
180
+
181
+ ## Audio Enhancement Features
182
+
183
+ ### AI Noise Reduction
184
+ - Uses machine learning to identify and remove background noise
185
+ - Preserves vocal clarity while eliminating artifacts
186
+ - Adjustable strength from 0-100%
187
+
188
+ ### Professional EQ
189
+ - **Balanced**: Gentle mid boost for overall clarity
190
+ - **Vocal Boost**: Emphasizes 800-3000 Hz range for vocals
191
+ - **Bass Boost**: Enhances 60-250 Hz for low-end presence
192
+ - **Treble Boost**: Brightens 4-16 kHz for crispness
193
+ - **Flat**: Minimal processing with high-pass filter
194
+
195
+ ### Dynamic Range Compression
196
+ - Improves loudness consistency
197
+ - Reduces dynamic range for better streaming
198
+ - Configurable ratio from 1-10
199
+
200
+ ### Harmonic Enhancement
201
+ - Adds warmth and richness to vocals
202
+ - Uses soft saturation for natural harmonics
203
+ - Enhances perceived quality
204
+
205
+ ### Stereo Widening
206
+ - Improves spatial imaging for stereo tracks
207
+ - Enhances left-right separation
208
+ - Creates more immersive listening experience
209
+
210
+ ### Reverb Control
211
+ - Adds subtle depth and space
212
+ - Professional room simulation
213
+ - Configurable wet/dry mix
214
+
215
+ ## Contributing
216
+
217
+ We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
218
+
219
+ ### Development Setup
220
+ `ash
221
+ git clone https://github.com/SociallyIneptWeeb/AICoverGen.git
222
+ cd AICoverGen
223
+ pip install -r requirements.txt
224
+ pip install -r requirements-dev.txt # If available
225
+ `
226
+
227
+ ## License
228
+
229
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
230
+
231
+ ## Acknowledgments
232
+
233
+ - Original AICoverGen by [SociallyIneptWeeb](https://github.com/SociallyIneptWeeb)
234
+ - RVC (Retrieval-based Voice Conversion) framework
235
+ - MDXNet for audio separation
236
+ - All the amazing open-source audio processing libraries
237
+
238
+ ## Support
239
+
240
+ - **Issues**: [GitHub Issues](https://github.com/SociallyIneptWeeb/AICoverGen/issues)
241
+ - **Discussions**: [GitHub Discussions](https://github.com/SociallyIneptWeeb/AICoverGen/discussions)
242
+ - **Documentation**: [Wiki](https://github.com/SociallyIneptWeeb/AICoverGen/wiki)
243
+
244
+ ---
245
+
246
+ **Happy Creating! **
247
+
248
+ *Create amazing AI voice covers with professional audio enhancement!*
cog.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration for Cog ⚙️
2
+ # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
3
+
4
+ build:
5
+ # set to true if your model requires a GPU
6
+ gpu: true
7
+
8
+ # a list of ubuntu apt packages to install
9
+ system_packages:
10
+ - "libgl1-mesa-glx"
11
+ - "ffmpeg"
12
+ - "sox"
13
+
14
+ # python version in the form '3.11' or '3.11.4'
15
+ python_version: "3.9"
16
+
17
+ # a list of packages in the format <package-name>==<version>
18
+ python_packages:
19
+ - "deemix"
20
+ - "fairseq==0.12.2"
21
+ - "faiss-cpu==1.7.3"
22
+ - "ffmpeg-python>=0.2.0"
23
+ - "gradio==3.39.0"
24
+ - "lib==4.0.0"
25
+ - "librosa==0.9.1"
26
+ - "numpy==1.23.5"
27
+ - "onnxruntime_gpu"
28
+ - "praat-parselmouth>=0.4.2"
29
+ - "pedalboard==0.7.7"
30
+ - "pydub==0.25.1"
31
+ - "pyworld==0.3.4"
32
+ - "Requests==2.31.0"
33
+ - "scipy==1.11.1"
34
+ - "soundfile==0.12.1"
35
+ - "--find-links https://download.pytorch.org/whl/torch_stable.html"
36
+ - "torch==2.0.1+cu118"
37
+ - "torchcrepe==0.0.20"
38
+ - "tqdm==4.65.0"
39
+ - "yt_dlp==2023.7.6"
40
+ - "sox==1.4.1"
41
+ - "gradio"
42
+
43
+ # commands run after the environment is setup
44
+ run:
45
+ - pip install --upgrade pip
46
+ - apt-get update && apt-get install -y ffmpeg
47
+ - pip install imageio[ffmpeg]
48
+
49
+ # predict.py defines how predictions are run on your model
50
+ predict: "predict.py:Predictor"
install_enhanced.bat ADDED
Binary file (116 Bytes). View file
 
predict.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prediction interface for Cog ⚙️
2
+ # https://github.com/replicate/cog/blob/main/docs/python.md
3
+
4
+ import os
5
+ import sys
6
+ import shutil
7
+ import zipfile
8
+ import urllib.request
9
+ from argparse import Namespace
10
+ from cog import BasePredictor, Input, Path as CogPath
11
+
12
+ sys.path.insert(0, os.path.abspath("src"))
13
+
14
+ import main as m
15
+
16
+
17
+ def download_online_model(url, dir_name):
18
+ print(f"[~] Downloading voice model with name {dir_name}...")
19
+ zip_name = url.split("/")[-1]
20
+ extraction_folder = os.path.join(m.rvc_models_dir, dir_name)
21
+ if os.path.exists(extraction_folder):
22
+ print(f"Voice model directory {dir_name} already exists! Skipping download.")
23
+ return
24
+
25
+ if "pixeldrain.com" in url:
26
+ url = f"https://pixeldrain.com/api/file/{zip_name}"
27
+
28
+ urllib.request.urlretrieve(url, zip_name)
29
+
30
+ print("[~] Extracting zip...")
31
+ with zipfile.ZipFile(zip_name, "r") as zip_ref:
32
+ for member in zip_ref.infolist():
33
+ # skip directories
34
+ if member.is_dir():
35
+ continue
36
+
37
+ # create target directory if it does not exist
38
+ os.makedirs(extraction_folder, exist_ok=True)
39
+
40
+ # extract only files directly to extraction_folder
41
+ with zip_ref.open(member) as source, open(
42
+ os.path.join(extraction_folder, os.path.basename(member.filename)), "wb"
43
+ ) as target:
44
+ shutil.copyfileobj(source, target)
45
+ print(f"[+] {dir_name} Model successfully downloaded!")
46
+
47
+
48
+ class Predictor(BasePredictor):
49
+ def setup(self) -> None:
50
+ """Load the model into memory to make running multiple predictions efficient"""
51
+ pass
52
+
53
+ def predict(
54
+ self,
55
+ song_input: CogPath = Input(
56
+ description="Upload your audio file here.",
57
+ default=None,
58
+ ),
59
+ rvc_model: str = Input(
60
+ description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
61
+ default="Squidward",
62
+ choices=[
63
+ "Squidward",
64
+ "MrKrabs",
65
+ "Plankton",
66
+ "Drake",
67
+ "Vader",
68
+ "Trump",
69
+ "Biden",
70
+ "Obama",
71
+ "Guitar",
72
+ "Voilin",
73
+ "CUSTOM",
74
+ "SamA", # TODO REMOVE THIS
75
+ ],
76
+ ),
77
+ custom_rvc_model_download_url: str = Input(
78
+ description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.",
79
+ default=None,
80
+ ),
81
+ pitch_change: str = Input(
82
+ description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
83
+ default="no-change",
84
+ choices=["no-change", "male-to-female", "female-to-male"],
85
+ ),
86
+ index_rate: float = Input(
87
+ description="Control how much of the AI's accent to leave in the vocals.",
88
+ default=0.5,
89
+ ge=0,
90
+ le=1,
91
+ ),
92
+ filter_radius: int = Input(
93
+ description="If >=3: apply median filtering median filtering to the harvested pitch results.",
94
+ default=3,
95
+ ge=0,
96
+ le=7,
97
+ ),
98
+ rms_mix_rate: float = Input(
99
+ description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).",
100
+ default=0.25,
101
+ ge=0,
102
+ le=1,
103
+ ),
104
+ pitch_detection_algorithm: str = Input(
105
+ description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
106
+ default="rmvpe",
107
+ choices=["rmvpe", "mangio-crepe"],
108
+ ),
109
+ crepe_hop_length: int = Input(
110
+ description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.",
111
+ default=128,
112
+ ),
113
+ protect: float = Input(
114
+ description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.",
115
+ default=0.33,
116
+ ge=0,
117
+ le=0.5,
118
+ ),
119
+ main_vocals_volume_change: float = Input(
120
+ description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.",
121
+ default=0,
122
+ ),
123
+ backup_vocals_volume_change: float = Input(
124
+ description="Control volume of backup AI vocals.",
125
+ default=0,
126
+ ),
127
+ instrumental_volume_change: float = Input(
128
+ description="Control volume of the background music/instrumentals.",
129
+ default=0,
130
+ ),
131
+ pitch_change_all: float = Input(
132
+ description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.",
133
+ default=0,
134
+ ),
135
+ reverb_size: float = Input(
136
+ description="The larger the room, the longer the reverb time.",
137
+ default=0.15,
138
+ ge=0,
139
+ le=1,
140
+ ),
141
+ reverb_wetness: float = Input(
142
+ description="Level of AI vocals with reverb.",
143
+ default=0.2,
144
+ ge=0,
145
+ le=1,
146
+ ),
147
+ reverb_dryness: float = Input(
148
+ description="Level of AI vocals without reverb.",
149
+ default=0.8,
150
+ ge=0,
151
+ le=1,
152
+ ),
153
+ reverb_damping: float = Input(
154
+ description="Absorption of high frequencies in the reverb.",
155
+ default=0.7,
156
+ ge=0,
157
+ le=1,
158
+ ),
159
+ output_format: str = Input(
160
+ description="wav for best quality and large file size, mp3 for decent quality and small file size.",
161
+ default="mp3",
162
+ choices=["mp3", "wav"],
163
+ ),
164
+ ) -> CogPath:
165
+ """
166
+ Runs a single prediction on the model.
167
+
168
+ Required Parameters:
169
+ song_input (CogPath): Upload your audio file here.
170
+ rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
171
+ pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa.
172
+
173
+ Optional Parameters:
174
+ custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None.
175
+ index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5.
176
+ filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3.
177
+ rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25.
178
+ pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe".
179
+ crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128.
180
+ protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33.
181
+ main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0.
182
+ backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0.
183
+ instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0.
184
+ pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0.
185
+ reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15.
186
+ reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2.
187
+ reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8.
188
+ reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7.
189
+ output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3".
190
+
191
+ Returns:
192
+ CogPath: The output path of the generated audio file.
193
+ """
194
+
195
+ if custom_rvc_model_download_url:
196
+ custom_rvc_model_download_name = urllib.parse.unquote(
197
+ custom_rvc_model_download_url.split("/")[-1]
198
+ )
199
+ custom_rvc_model_download_name = os.path.splitext(
200
+ custom_rvc_model_download_name
201
+ )[0]
202
+ print(
203
+ f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'."
204
+ )
205
+ download_online_model(
206
+ url=custom_rvc_model_download_url,
207
+ dir_name=custom_rvc_model_download_name,
208
+ )
209
+ rvc_model = custom_rvc_model_download_name
210
+ else:
211
+ print(
212
+ "[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')."
213
+ )
214
+
215
+ # Convert pitch_change from string to numerical value for processing
216
+ # 0 for no change, 1 for male to female, -1 for female to male
217
+ if pitch_change == "no-change":
218
+ pitch_change = 0
219
+ elif pitch_change == "male-to-female":
220
+ pitch_change = 1
221
+ else: # pitch_change == "female-to-male"
222
+ pitch_change = -1
223
+
224
+ args = Namespace(
225
+ song_input=str(song_input),
226
+ rvc_dirname=(model_dir_name := rvc_model),
227
+ pitch_change=pitch_change,
228
+ keep_files=(keep_files := False),
229
+ index_rate=index_rate,
230
+ filter_radius=filter_radius,
231
+ rms_mix_rate=rms_mix_rate,
232
+ pitch_detection_algo=pitch_detection_algorithm,
233
+ crepe_hop_length=crepe_hop_length,
234
+ protect=protect,
235
+ main_vol=main_vocals_volume_change,
236
+ backup_vol=backup_vocals_volume_change,
237
+ inst_vol=instrumental_volume_change,
238
+ pitch_change_all=pitch_change_all,
239
+ reverb_size=reverb_size,
240
+ reverb_wetness=reverb_wetness,
241
+ reverb_dryness=reverb_dryness,
242
+ reverb_damping=reverb_damping,
243
+ output_format=output_format,
244
+ )
245
+
246
+ rvc_dirname = args.rvc_dirname
247
+ if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)):
248
+ raise Exception(
249
+ f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist."
250
+ )
251
+
252
+ cover_path = m.song_cover_pipeline(
253
+ args.song_input,
254
+ rvc_dirname,
255
+ args.pitch_change,
256
+ args.keep_files,
257
+ main_gain=args.main_vol,
258
+ backup_gain=args.backup_vol,
259
+ inst_gain=args.inst_vol,
260
+ index_rate=args.index_rate,
261
+ filter_radius=args.filter_radius,
262
+ rms_mix_rate=args.rms_mix_rate,
263
+ f0_method=args.pitch_detection_algo,
264
+ crepe_hop_length=args.crepe_hop_length,
265
+ protect=args.protect,
266
+ pitch_change_all=args.pitch_change_all,
267
+ reverb_rm_size=args.reverb_size,
268
+ reverb_wet=args.reverb_wetness,
269
+ reverb_dry=args.reverb_dryness,
270
+ reverb_damping=args.reverb_damping,
271
+ output_format=args.output_format,
272
+ )
273
+ print(f"[+] Cover generated at {cover_path}")
274
+
275
+ # Return the output path
276
+ return CogPath(cover_path)
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ deemix
2
+ fairseq==0.12.2
3
+ faiss-cpu==1.7.3
4
+ ffmpeg-python>=0.2.0
5
+ gradio==3.39.0
6
+ lib==4.0.0
7
+ librosa==0.9.1
8
+ numpy==1.23.5
9
+ onnxruntime_gpu
10
+ praat-parselmouth>=0.4.2
11
+ pedalboard==0.7.7
12
+ pydub==0.25.1
13
+ pyworld==0.3.4
14
+ Requests==2.31.0
15
+ scipy==1.11.1
16
+ soundfile==0.12.1
17
+ --find-links https://download.pytorch.org/whl/torch_stable.html
18
+ torch==2.0.1+cu118
19
+ torchcrepe==0.0.20
20
+ tqdm==4.65.0
21
+ yt_dlp==2023.7.6
22
+ sox==1.4.1
run_webui.bat ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ setlocal enableextensions enabledelayedexpansion
3
+
4
+ REM Change to repo root (folder of this script)
5
+ cd /d "%~dp0"
6
+
7
+ REM ---------- Config ----------
8
+ set "VENV_DIR=AICoverGen"
9
+ set "PYTHON_EXE=%VENV_DIR%\Scripts\python.exe"
10
+ set "PIP_EXE=%VENV_DIR%\Scripts\pip.exe"
11
+
12
+ REM Activate virtual environment
13
+ call "%VENV_DIR%\Scripts\activate.bat"
14
+
15
+ REM Force ONNX Runtime to use CPU provider (PyTorch will still use GPU)
16
+ set "ORT_DISABLE_CUDA=1"
17
+
18
+ REM Optional: choose GPU index for PyTorch (0 = first GPU)
19
+ set "CUDA_VISIBLE_DEVICES=0"
20
+
21
+ REM Ensure UTF-8 output
22
+ set "PYTHONUTF8=1"
23
+ REM ----------------------------
24
+
25
+ REM Check venv
26
+ if not exist "%PYTHON_EXE%" (
27
+ echo [ERROR] Virtual environment not found at %VENV_DIR%.^>
28
+ echo Expected: %PYTHON_EXE%
29
+ echo Create one first, or ensure the repo was set up correctly.
30
+ echo.
31
+ echo Example to create venv:
32
+ echo python -m venv AICoverGen
33
+ exit /b 1
34
+ )
35
+
36
+ REM Upgrade pip (silent-ish)
37
+ "%PYTHON_EXE%" -m pip install --upgrade pip --disable-pip-version-check 1>nul 2>nul
38
+
39
+ REM Install core requirements
40
+ "%PIP_EXE%" install -r requirements.txt --no-input
41
+ if errorlevel 1 (
42
+ echo [ERROR] Failed installing requirements.
43
+ exit /b 1
44
+ )
45
+
46
+ REM Ensure a compatible Gradio version (3.50.2 is known-good with this UI)
47
+ "%PIP_EXE%" install "gradio==3.50.2" --no-input
48
+ if errorlevel 1 (
49
+ echo [ERROR] Failed installing Gradio.
50
+ exit /b 1
51
+ )
52
+
53
+ REM Check ffmpeg availability (recommended)
54
+ where ffmpeg >nul 2>nul
55
+ if errorlevel 1 (
56
+ echo [WARN] ffmpeg not found in PATH. Audio processing may fail.
57
+ echo Install ffmpeg and add it to PATH: https://ffmpeg.org/download.html
58
+ )
59
+
60
+ REM Optional: open the UI in browser after a short delay
61
+ start "" /b cmd /c "timeout /t 3 /nobreak >nul & start http://127.0.0.1:7860"
62
+
63
+ REM Run the WebUI. Add --listen to allow LAN access if desired.
64
+ "%PYTHON_EXE%" src\webui.py %*
65
+
66
+ REM Preserve exit code
67
+ set "EXIT_CODE=%ERRORLEVEL%"
68
+ echo.
69
+ echo Server exited with code %EXIT_CODE%.
70
+ exit /b %EXIT_CODE%
start_webui.bat ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ cd /d "%~dp0"
3
+
4
+ REM Activate virtual environment
5
+ call AICoverGen\Scripts\activate.bat
6
+
7
+ REM Update yt-dlp to fix YouTube download issues
8
+ echo Updating yt-dlp...
9
+ pip install --upgrade yt-dlp --quiet
10
+
11
+ REM Run webui with CUDA enabled
12
+ python src\webui.py
13
+
14
+ pause
start_webui.ps1 ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Change to script directory
2
+ Set-Location $PSScriptRoot
3
+
4
+ # Activate virtual environment and run webui with CUDA enabled
5
+ & "AICoverGen\Scripts\python.exe" "src\webui.py"