Commit ·
ddadeb4
1
Parent(s): 24d9ef5
SESA: GitHub'dan güncel dosyalar aktarıldı - 2026-03-18 23:04
Browse files- assets/config.json +5 -0
- assets/custom_models.json +1 -0
- assets/i18n/languages/ar_sa.json +196 -131
- assets/i18n/languages/de_de.json +186 -120
- assets/i18n/languages/en_us.json +100 -1
- assets/i18n/languages/es_es.json +199 -100
- assets/i18n/languages/fr_fr.json +87 -3
- assets/i18n/languages/hi_in.json +225 -167
- assets/i18n/languages/it_it.json +222 -123
- assets/i18n/languages/ja_jp.json +203 -138
- assets/i18n/languages/ru_ru.json +178 -112
- assets/i18n/languages/tr_tr.json +192 -114
- assets/i18n/languages/zn_cn.json +187 -121
- benchmark_pytorch.py +252 -0
- config_manager.py +5 -1
- download.py +87 -18
- ensemble.py +417 -256
- gui.py +648 -116
- helpers.py +25 -8
- inference.py +154 -180
- inference_pytorch.py +390 -0
- main.py +12 -1
- model.py +1126 -32
- models/bs_roformer/attend_sage.py +145 -0
- models/bs_roformer/bs_roformer_experimental.py +686 -0
- models/bs_roformer/mel_band_roformer_experimental.py +733 -0
- models/conformer_model.py +188 -0
- models/mel_band_conformer.py +424 -0
- models/scnet/scnet_masked.py +419 -0
- models/scnet/scnet_tran.py +668 -0
- phase_fixer.py +139 -0
- processing.py +543 -242
- pytorch_backend.py +621 -0
- requirements.txt +9 -2
- utils.py +455 -130
assets/config.json
CHANGED
|
@@ -2,5 +2,10 @@
|
|
| 2 |
"lang": {
|
| 3 |
"override": true,
|
| 4 |
"selected_lang": "en_us"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
}
|
| 6 |
}
|
|
|
|
| 2 |
"lang": {
|
| 3 |
"override": true,
|
| 4 |
"selected_lang": "en_us"
|
| 5 |
+
},
|
| 6 |
+
"sharing": {
|
| 7 |
+
"method": "gradio",
|
| 8 |
+
"ngrok_token": "",
|
| 9 |
+
"port": 8142
|
| 10 |
}
|
| 11 |
}
|
assets/custom_models.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
assets/i18n/languages/ar_sa.json
CHANGED
|
@@ -17,11 +17,10 @@
|
|
| 17 |
"overlap": "التداخل",
|
| 18 |
"overlap_info": "موصى به: 2-10 (القيم الأعلى تزيد من الجودة ولكن تتطلب المزيد من VRAM)",
|
| 19 |
"tta_boost": "تعزيز TTA",
|
| 20 |
-
"tta_info": "يحسن الجودة ولكنه يبطئ سرعة المعالجة",
|
| 21 |
"phase_fix": "تصحيح الطور",
|
|
|
|
| 22 |
"phase_fix_info": "تصحيح طور متقدم للمسارات الآلية",
|
| 23 |
"instrumental": "آلي",
|
| 24 |
-
"instrumental_info": "عادةً يعطي مخرجين، لكن هناك أوقات يعطي فيها مخرجًا واحدًا، تحتاج إلى التجربة :)",
|
| 25 |
"process": "🚀 معالجة",
|
| 26 |
"reset": "🧹 إعادة تعيين",
|
| 27 |
"status": "الحالة",
|
|
@@ -71,99 +70,210 @@
|
|
| 71 |
"output_preview": "معاينة الإخراج",
|
| 72 |
"refresh_output": "🔄 تحديث الإخراج",
|
| 73 |
"ensemble_copy_status": "سيتم نسخ مخرجات التجميع هنا...",
|
| 74 |
-
"guidelines": "<h4>إرشادات اختيار النموذج</h4><ul><li><strong>تجنب الخلط بين الفئات:</strong> قد يؤدي الجمع بين نماذج الأصوات والآلات إلى خلط غير مرغوب فيه</li><li><strong>ملاحظات نموذج خاصة:</strong><ul><li>نماذج الازدواجية (v1/v2) - ت
|
| 75 |
-
"download_sources_tab": "⬇️ ت
|
| 76 |
-
"direct_links": "🌐 روابط مباشرة",
|
| 77 |
"audio_file_url": "رابط ملف الصوت",
|
| 78 |
-
"download_from_url": "⬇️ ت
|
| 79 |
-
"download_status": "حالة الت
|
| 80 |
"downloaded_file": "الملف المحمل",
|
| 81 |
"cookie_management": "🍪 إدارة الكوكيز",
|
| 82 |
-
"upload_cookies_txt": "رفع
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 لماذا
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ التجميع اليدوي",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
"input_sources": "📂 مصادر الإدخال",
|
| 86 |
"refresh": "🔄 تحديث",
|
| 87 |
"ensemble_algorithm": "خوارزمية التجميع",
|
| 88 |
-
"select_audio_files": "اختر ملفات الصوت",
|
| 89 |
"available_files": "الملفات المتاحة",
|
| 90 |
-
"custom_weights": "أوزان مخصصة (مفصولة بف
|
| 91 |
-
"custom_weights_placeholder": "مثال: 0.8
|
| 92 |
-
"custom_weights_info": "اتركه فارغًا لأوزان متساوية",
|
| 93 |
"result_preview_tab": "🎧 معاينة النتيجة",
|
| 94 |
-
"ensembled_output": "
|
| 95 |
"processing_log_tab": "📋 سجل المعالجة",
|
| 96 |
"processing_details": "تفاصيل المعالجة",
|
| 97 |
"process_ensemble": "⚡ معالجة التجميع",
|
| 98 |
"languages_tab": "🌐 اللغات",
|
| 99 |
"Vocal Models": "نماذج الأصوات",
|
| 100 |
-
"Instrumental Models": "نماذج الآل
|
| 101 |
-
"4-Stem Models": "نماذج 4
|
| 102 |
"Denoise Models": "نماذج إزالة الضوضاء",
|
| 103 |
"Dereverb Models": "نماذج إزالة الصدى",
|
| 104 |
-
"Other
|
| 105 |
-
"
|
|
|
|
| 106 |
"ensemble_files_help": "مسار جميع ملفات الصوت للتجميع",
|
| 107 |
-
"ensemble_type_help": "
|
| 108 |
-
"ensemble_weights_help": "الأوزان لإنشاء التجميع. يجب أن ي
|
| 109 |
-
"ensemble_output_help": "مسار ملف WAV حيث سيتم
|
| 110 |
"ensemble_type_print": "نوع التجميع: {}",
|
| 111 |
"num_input_files_print": "عدد ملفات الإدخال: {}",
|
| 112 |
"weights_print": "الأوزان: {}",
|
| 113 |
"output_file_print": "ملف الإخراج: {}",
|
| 114 |
"duration_mismatch_error": "يجب أن تكون جميع الملفات بنفس المدة",
|
| 115 |
-
"file_not_found_error": "خطأ.
|
| 116 |
"reading_chunk_print": "قراءة قطعة من الملف: {} (البداية: {}ث، المدة: {}ث)",
|
| 117 |
"chunk_result_shape_print": "شكل نتيجة القطعة: {}",
|
| 118 |
"ensemble_completed_print": "اكتمل التجميع. تم حفظ الإخراج في: {}",
|
| 119 |
-
"file_deletion_error": "
|
| 120 |
-
"directory_not_exist_warning": "⚠️ ال
|
| 121 |
-
"not_a_directory_warning": "⚠️ المسار ليس
|
| 122 |
"item_deletion_error": "⚠️ خطأ في حذف {}: {}",
|
|
|
|
| 123 |
"old_output_not_exist": "❌ مجلد الإخراج القديم غير موجود",
|
| 124 |
-
"old_outputs_cleared": "✅ تم مسح ال
|
| 125 |
"error": "🔥 خطأ: {}",
|
| 126 |
"ffmpeg_error": "خطأ FFmpeg ({}): {}",
|
| 127 |
"file_saved_successfully": "تم حفظ الملف بنجاح: {}",
|
| 128 |
-
"total_files_found": "إجمالي الملفات الموجودة: {}.
|
| 129 |
-
"total_progress": "التقدم ال
|
| 130 |
-
"detailed_pbar_enabled": "
|
| 131 |
-
"loaded_audio": "
|
| 132 |
-
"cannot_read_track": "
|
| 133 |
"error_message": "رسالة الخطأ: {}",
|
| 134 |
"demudding_track": "تنظيف المسار (إعادة مزج الطور - آلي): {}",
|
| 135 |
"elapsed_time": "الوقت المنقضي: {:.2f} ثانية.",
|
| 136 |
-
"proc_folder_description": "
|
| 137 |
-
"model_type_help": "نوع النموذج (bandit
|
| 138 |
"config_path_help": "مسار ملف التكوين",
|
| 139 |
-
"demud_phaseremix_help": "ت
|
| 140 |
-
"start_checkpoint_help": "نقطة التفتيش الأولية للأوزان الص
|
| 141 |
-
"input_folder_help": "مجلد يحتوي على ال
|
| 142 |
"audio_path_help": "مسار ملف صوتي واحد للمعالجة",
|
| 143 |
-
"store_dir_help": "مسار
|
| 144 |
"device_ids_help": "قائمة معرفات GPU",
|
| 145 |
-
"extract_instrumental_help": "
|
| 146 |
-
"disable_detailed_pbar_help": "تعطيل شريط التقدم التفصيلي في
|
| 147 |
-
"force_cpu_help": "
|
| 148 |
"flac_file_help": "إخراج ملف FLAC بدلاً من WAV",
|
| 149 |
-
"export_format_help": "
|
| 150 |
"pcm_type_help": "نوع PCM لملفات FLAC",
|
| 151 |
-
"use_tta_help": "ت
|
| 152 |
"lora_checkpoint_help": "نقطة التفتيش الأولية لأوزان LoRA",
|
| 153 |
"cuda_available": "CUDA متاح، استخدم --force_cpu لتعطيله.",
|
| 154 |
-
"using_device": "ا
|
| 155 |
-
"instruments_print": "ال
|
| 156 |
"model_load_time": "وقت تحميل النموذج: {:.2f} ثانية",
|
| 157 |
"invalid_url": "❌ رابط غير صالح",
|
| 158 |
"cookie_file_updated": "✅ تم تحديث ملف الكوكيز!",
|
| 159 |
"cookie_installation_error": "⚠️ خطأ في تثبيت الكوكيز: {}",
|
|
|
|
| 160 |
"file_size_zero_error": "حجم الملف صفر أو لم يتم إنشاء الملف",
|
| 161 |
-
"google_drive_error": "❌ خطأ في ت
|
| 162 |
"wav_conversion_failed": "فشل تحويل WAV",
|
| 163 |
-
"download_error": "❌ خطأ في الت
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
-
"
|
|
|
|
|
|
|
| 167 |
"output_refreshed_successfully": "✅ تم تحديث الإخراج بنجاح!",
|
| 168 |
"error_refreshing_output": "❌ خطأ في تحديث الإخراج: {}",
|
| 169 |
"starting_audio_separation": "بدء فصل الصوت...",
|
|
@@ -172,109 +282,64 @@
|
|
| 172 |
"separation_complete": "اكتمل الفصل!",
|
| 173 |
"progress_parsing_error": "خطأ في تحليل التقدم: {}",
|
| 174 |
"error_occurred": "حدث خطأ: {}",
|
| 175 |
-
"separation_process_completed": "اكتمل عملية الفصل!",
|
| 176 |
-
"no_audio_file_error": "❌ لم يتم ت
|
| 177 |
-
"no_input_progress_label": "خطأ: لم يتم ت
|
| 178 |
"processing_audio_print": "معالجة الصوت من: {} باستخدام النموذج: {}",
|
| 179 |
"starting_audio_separation_progress_label": "بدء فصل الصوت... -- 0.0%",
|
| 180 |
"separating_audio_progress_label": "فصل الصوت... -- {}.0%",
|
| 181 |
"audio_processing_completed_progress_label": "اكتملت معالجة الصوت! -- 100.0%",
|
| 182 |
"audio_processing_completed": "✅ اكتملت معالجة الصوت!",
|
| 183 |
"error_occurred_progress_label": "حدث خطأ -- 0.0%",
|
| 184 |
-
"minimum_files_required": "⚠️
|
| 185 |
"valid_files_not_found": "❌ لم يتم العثور على ملفات صالحة",
|
| 186 |
"starting_ensemble_process": "بدء عملية التجميع...",
|
| 187 |
-
"ensembling_progress": "التجميع... ({:.1f}%)",
|
| 188 |
"finalizing_ensemble_output": "إنهاء إخراج التجميع...",
|
| 189 |
"success_log": "✅ نجاح!\n{}",
|
| 190 |
"error_log": "❌ خطأ!\n{}",
|
| 191 |
-
"critical_error": "⛔ خطأ حرج: {}",
|
| 192 |
"ensemble_process_completed": "اكتملت عملية التجميع!",
|
| 193 |
-
"no_models_selected": "❌ لم يتم اختيار نماذج",
|
| 194 |
-
"no_input_audio_provided": "❌ لم يتم ت
|
| 195 |
-
"loading_model": "تحميل النموذج {}/{}: {}...",
|
| 196 |
-
"loading_model_progress_label": "تحميل النموذج {}/{}: {}... -- {}.0%",
|
| 197 |
"completed_model": "اكتمل النموذج {}/{}: {}",
|
| 198 |
"completed_model_progress_label": "اكتمل النموذج {}/{}: {} -- {}.0%",
|
| 199 |
"running_command": "تشغيل الأمر: {}",
|
| 200 |
"model_failed": "فشل النموذج {}: {}",
|
| 201 |
-
"critical_error_with_model": "خطأ ح
|
| 202 |
-
"model_output_failed": "فشل {} في إنتاج
|
| 203 |
"waiting_for_files": "في انتظار جاهزية جميع الملفات...",
|
| 204 |
"waiting_for_files_progress_label": "في انتظار جاهزية جميع الملفات... -- 90.0%",
|
| 205 |
-
"performing_ensemble": "
|
| 206 |
-
"performing_ensemble_progress_label": "
|
| 207 |
"memory_usage_before_ensemble": "استخدام الذاكرة قبل التجميع: {}%",
|
| 208 |
"memory_usage_after_ensemble": "استخدام الذاكرة بعد التجميع: {}%",
|
| 209 |
"finalizing_ensemble_output_progress_label": "إنهاء إخراج التجميع... -- 98.0%",
|
| 210 |
-
"ensemble_file_creation_failed": "
|
| 211 |
"ensemble_completed_progress_label": "اكتمل التجميع بنجاح! -- 100.0%",
|
| 212 |
"success_output_created": "✅ نجاح! تم إنشاء ملف الإخراج.",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"starting_method": "بدء {method} على المنفذ {port}...",
|
| 237 |
-
"process_stopped": "\n🛑 تم إيقاف العملية من قبل المستخدم",
|
| 238 |
-
"selected_port": "المنفذ المختار: {port}",
|
| 239 |
-
"starting_gradio_with_sharing": "بدء Gradio مع المشاركة المدمجة...",
|
| 240 |
-
"starting_localtunnel": "بدء Localtunnel على المنفذ {port}...",
|
| 241 |
-
"share_link": "رابط المشاركة: {url}",
|
| 242 |
-
"password_ip": "كلمة مرور IP: {ip}",
|
| 243 |
-
"starting_ngrok": "بدء Ngrok على المنفذ {port}...",
|
| 244 |
-
"ngrok_url": "رابط Ngrok: {url}",
|
| 245 |
-
"ngrok_error": "خطأ في بدء Ngrok: {error}",
|
| 246 |
-
"apollo_chunk_size": "حجم قطعة Apollo",
|
| 247 |
-
"apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
|
| 248 |
-
"apollo_overlap": "تداخل Apollo",
|
| 249 |
-
"enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
|
| 250 |
-
"apollo_processing_completed": "اكتملت معالجة Apollo!",
|
| 251 |
-
"apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
|
| 252 |
-
"invalid_method": "خطأ: طريقة غير صالحة! استخدم 'gradio'، 'localtunnel'، أو 'ngrok'.",
|
| 253 |
-
"apollo_enhancement_settings": "إعدادات تحسين Apollo",
|
| 254 |
-
"enhance_with_apollo": "تحسين مع Apollo",
|
| 255 |
-
"enhance_with_apollo_info": "تفعيل Apollo لتحسين الصوت بعد الفصل",
|
| 256 |
-
"apollo_method": "طريقة Apollo",
|
| 257 |
-
"apollo_method_info": "اختر طريقة المعالجة لـ Apollo",
|
| 258 |
-
"normal_method": "الطريقة العادية",
|
| 259 |
-
"mid_side_method": "طريقة الوسط/الجانب",
|
| 260 |
-
"apollo_normal_model": "نموذج Apollo العادي",
|
| 261 |
-
"apollo_normal_model_info": "النموذج المستخدم لمعالجة Apollo العادية",
|
| 262 |
-
"apollo_midside_model": "نموذج Apollo الوسط/الجانب",
|
| 263 |
-
"apollo_enhancement_info": "تحسين جودة الصوت",
|
| 264 |
-
"selected_models": "النماذج المختارة",
|
| 265 |
-
"save_preset": "حفظ الإعداد المسبق",
|
| 266 |
-
"delete_preset": "حذف الإعداد المسبق",
|
| 267 |
-
"refresh_presets": "تحديث الإعدادات المسبقة",
|
| 268 |
-
"preset_name": "اسم الإعداد المسبق",
|
| 269 |
-
"select_preset": "اختيار إعداد مسبق",
|
| 270 |
-
"add_favorite": "إضافة إلى المفضلة",
|
| 271 |
-
"apollo_midside_model_info": "النموذج المستخدم لمعالجة الوسط/الجانب (اختياري)",
|
| 272 |
-
"language_changed_message": "تم تغيير اللغة. يرجى إعادة تشغيل الخلية.",
|
| 273 |
-
"apply_matchering": "تطبيق Matchering",
|
| 274 |
-
"matchering_info": "تطبيق Matchering لتحسين جودة الصوت وتوازنه",
|
| 275 |
-
"matchering_passes": "تمريرات Matchering",
|
| 276 |
-
"matchering_passes_info": "عدد التكرارات لـ Matchering (1-5، القيم الأعلى تحسن الجودة ولكن تستغرق وقتًا أطول)",
|
| 277 |
-
"matchering_processing": "جارٍ تطبيق Matchering... ({:.1f}%)",
|
| 278 |
-
"matchering_completed": "✅ اكتمل معالجة Matchering!",
|
| 279 |
-
"matchering_error": "❌ حدث خطأ أثناء Matchering: {}"
|
| 280 |
}
|
|
|
|
| 17 |
"overlap": "التداخل",
|
| 18 |
"overlap_info": "موصى به: 2-10 (القيم الأعلى تزيد من الجودة ولكن تتطلب المزيد من VRAM)",
|
| 19 |
"tta_boost": "تعزيز TTA",
|
|
|
|
| 20 |
"phase_fix": "تصحيح الطور",
|
| 21 |
+
"use_phase_correction": "تطبيق تصحيح طور متقدم",
|
| 22 |
"phase_fix_info": "تصحيح طور متقدم للمسارات الآلية",
|
| 23 |
"instrumental": "آلي",
|
|
|
|
| 24 |
"process": "🚀 معالجة",
|
| 25 |
"reset": "🧹 إعادة تعيين",
|
| 26 |
"status": "الحالة",
|
|
|
|
| 70 |
"output_preview": "معاينة الإخراج",
|
| 71 |
"refresh_output": "🔄 تحديث الإخراج",
|
| 72 |
"ensemble_copy_status": "سيتم نسخ مخرجات التجميع هنا...",
|
| 73 |
+
"guidelines": "<h4>إرشادات اختيار النموذج</h4><ul><li><strong>تجنب الخلط بين الفئات:</strong> قد يؤدي الجمع بين نماذج الأصوات والآلات إلى خلط غير مرغوب فيه</li><li><strong>ملاحظات نموذج خاصة:</strong><ul><li>نماذج الازدواجية (v1/v2) - تنتج كلا الستيمات</li><li>فاصل MDX23C - نتائج هجينة</li></ul></li><li><strong>أفضل ممارسة:</strong> استخدم 3-5 نماذج مشابهة من نفس الفئة</li></ul><div>💡 نصيحة محترف: ابدأ بمزيج \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
|
| 74 |
+
"download_sources_tab": "⬇️ تنزيل المصادر",
|
| 75 |
+
"direct_links": "🌐 الروابط المباشرة",
|
| 76 |
"audio_file_url": "رابط ملف الصوت",
|
| 77 |
+
"download_from_url": "⬇️ تنزيل من الرابط",
|
| 78 |
+
"download_status": "حالة التنزيل",
|
| 79 |
"downloaded_file": "الملف المحمل",
|
| 80 |
"cookie_management": "🍪 إدارة الكوكيز",
|
| 81 |
+
"upload_cookies_txt": "رفع Cookies.txt",
|
| 82 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 لماذا مطلوب؟**<br>- الوصول إلى المحتوى المقيد بالعمر<br>- تنزيل الفيديوهات الخاصة/غير المدرجة<br>- تجاوز القيود الإقليمية<br>- تجنب حدود تنزيل يوتيوب<br><br>**⚠️ ملاحظات مهمة**<br>- لا تشارك ملفات الكوكيز الخاصة بك أبدًا!<br>- قم بتحديث الكوكيز عند:<br> • الحصول على أخطاء \"403 ممنوع\"<br> • توقف التنزيلات فجأة<br> • رؤية رسائل \"انتهت صلاحي�� الجلسة\"<br><br>**🔄 خطوات التحديث**<br>1. قم بتثبيت <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">إضافة كروم هذه</a><br>2. تسجيل الدخول إلى يوتيوب في كروم<br>3. انقر على أيقونة الإضافة → \"تصدير\"<br>4. قم برفع الملف المحمل هنا<br><br>**⏳ عمر الكوكيز**<br>- الجلسات العادية: 24 ساعة<br>- العمليات الحساسة: 1 ساعة<br>- تغيير كلمة المرور: تحديث فوري</div>",
|
| 83 |
+
"ensemble_output_copied": "✅ تم نسخ إخراج التجميع إلى {}",
|
| 84 |
+
"error_copying_ensemble_output": "❌ خطأ في نسخ إخراج التجميع: {}",
|
| 85 |
+
"drive_mounted_copying_ensemble": "تم تحميل Google Drive. نسخ إخراج التجميع...",
|
| 86 |
+
"drive_already_mounted_copying_ensemble": "Google Drive محمل بالفعل. نسخ إخراج التجميع...",
|
| 87 |
+
"no_ensemble_output_files_found": "❌ لم يتم العثور على ملفات إخراج تجميع.",
|
| 88 |
+
"drive_mounted_copying_files": "تم تحميل Google Drive. نسخ الملفات...",
|
| 89 |
+
"drive_already_mounted_copying_files": "Google Drive محمل بالفعل. نسخ الملفات...",
|
| 90 |
+
"files_copied_to_drive": "✅ تم نسخ الملفات إلى {}",
|
| 91 |
+
"error_copying_files": "❌ خطأ في نسخ الملفات: {}",
|
| 92 |
+
"mounting_drive": "تحميل Google Drive...",
|
| 93 |
+
"settings_saved": "تم حفظ الإعدادات في config.json: اللغة={lang}, الطريقة={method}, المنفذ={port}",
|
| 94 |
+
"selected_language": "اللغة المختارة: {lang}",
|
| 95 |
+
"available_languages": "اللغات المتاحة: {langs}",
|
| 96 |
+
"select_language_prompt": "اختر لغة (افتراضي: {default}): ",
|
| 97 |
+
"invalid_language": "اختيار لغة غير صالح! استخدام اللغة الافتراضية ({default}).",
|
| 98 |
+
"available_sharing_methods": "طرق المشاركة المتاحة: gradio، localtunnel، ngrok",
|
| 99 |
+
"select_sharing_method_prompt": "اختر طريقة مشاركة (الافتراضي: {default}): ",
|
| 100 |
+
"enter_ngrok_token_prompt": "أدخل رمز Ngrok الخاص بك (الافتراضي: {default}): ",
|
| 101 |
+
"ngrok_token_required": "خطأ: رمز Ngrok مطلوب لطريقة ngrok!",
|
| 102 |
+
"enter_port_prompt": "أدخل رقم المنفذ (الافتراضي: {default}): ",
|
| 103 |
+
"opening_previous_url": "فتح الرابط السابق: {url}",
|
| 104 |
+
"open_url_manually": "افتح الرابط يدويًا في Colab: {url}",
|
| 105 |
+
"invalid_previous_url": "رابط سابق غير صالح: {url}، لم يتم فتحه.",
|
| 106 |
+
"starting_method": "بدء {method} على المنفذ {port}...",
|
| 107 |
+
"process_stopped": "\n🛑 تم إيقاف العملية من قبل المستخدم",
|
| 108 |
+
"selected_port": "المنفذ المختار: {port}",
|
| 109 |
+
"starting_gradio_with_sharing": "بدء Gradio مع المشاركة المدمجة...",
|
| 110 |
+
"starting_localtunnel": "بدء Localtunnel على المنفذ {port}...",
|
| 111 |
+
"share_link": "رابط المشاركة: {url}",
|
| 112 |
+
"password_ip": "كلمة مرور IP: {ip}",
|
| 113 |
+
"starting_ngrok": "بدء Ngrok على المنفذ {port}...",
|
| 114 |
+
"ngrok_url": "رابط Ngrok: {url}",
|
| 115 |
+
"ngrok_error": "خطأ في بدء Ngrok: {error}",
|
| 116 |
+
"auto_apollo_chunk_size": "حجم قطعة Apollo",
|
| 117 |
+
"auto_apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
|
| 118 |
+
"auto_apollo_overlap": "تداخل Apollo",
|
| 119 |
+
"auto_enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
|
| 120 |
+
"auto_apollo_processing_completed": "اكتملت معالجة Apollo!",
|
| 121 |
+
"auto_apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
|
| 122 |
+
"apollo_chunk_size": "حجم قطعة Apollo",
|
| 123 |
+
"apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
|
| 124 |
+
"apollo_overlap": "تداخل Apollo",
|
| 125 |
+
"enhance_with_apollo": "تحسين مع Apollo",
|
| 126 |
+
"apollo_enhancement_info": "تحسين جودة الصوت",
|
| 127 |
+
"enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
|
| 128 |
+
"apollo_processing_completed": "اكتملت معالجة Apollo!",
|
| 129 |
+
"apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
|
| 130 |
+
"selected_models": "النماذج المختارة",
|
| 131 |
+
"save_preset": "حفظ الإعداد المسبق",
|
| 132 |
+
"delete_preset": "حذف الإعداد المسبق",
|
| 133 |
+
"refresh_presets": "تحديث الإعدادات المسبقة",
|
| 134 |
+
"preset_name": "اسم الإعداد المسبق",
|
| 135 |
+
"select_preset": "اختيار إعداد مسبق",
|
| 136 |
+
"add_favorite": "إضافة إلى المفضلة",
|
| 137 |
+
"apply_matchering": "تطبيق Matchering",
|
| 138 |
+
"matchering_info": "تطبيق Matchering لتحسين جودة الصوت وتوازنه",
|
| 139 |
+
"matchering_passes": "تمريرات Matchering",
|
| 140 |
+
"matchering_passes_info": "عدد التكر��رات لـ Matchering (1-5، القيم الأعلى تحسن الجودة ولكن تستغرق وقتًا أطول)",
|
| 141 |
+
"matchering_processing": "جارٍ تطبيق Matchering... ({:.1f}%)",
|
| 142 |
+
"matchering_completed": "✅ اكتمل معالجة Matchering!",
|
| 143 |
+
"matchering_error": "❌ حدث خطأ أثناء Matchering: {}",
|
| 144 |
+
"invalid_method": "خطأ: طريقة غير صالحة! استخدم 'gradio'، 'localtunnel'، أو 'ngrok'.",
|
| 145 |
+
"language_changed_message": "تم تغيير اللغة. يرجى إعادة تشغيل الخلية.",
|
| 146 |
+
"backend_settings": "🚀 إعدادات الخلفية",
|
| 147 |
+
"inference_backend": "خلفية الاستدلال",
|
| 148 |
+
"optimization_mode": "وضع التحسين",
|
| 149 |
+
"optimization_mode_info": "اختيار وضع تحسين PyTorch",
|
| 150 |
+
"mixed_precision_amp": "الدقة المختلطة (AMP)",
|
| 151 |
+
"mixed_precision_info": "استدلال أسرع 2x - موصى به",
|
| 152 |
+
"tf32_acceleration": "تسريع TF32",
|
| 153 |
+
"tf32_acceleration_info": "زيادة سرعة إضافية لـ RTX 30xx+",
|
| 154 |
+
"cudnn_benchmark": "معيار cuDNN",
|
| 155 |
+
"cudnn_benchmark_info": "أبطأ في التشغيل الأول، أسرع بكثير في التشغيلات اللاحقة",
|
| 156 |
+
"ultra_optimized_pytorch": "خلفية PyTorch فائقة التحسين",
|
| 157 |
+
"default_active_max_speed": "نشط افتراضيًا - تحسين سرعة قصوى",
|
| 158 |
+
"channels_last_mode": "الأسرع لـ RTX GPUs",
|
| 159 |
+
"compile_mode": "سرعة إضافية لـ PyTorch 2.0+",
|
| 160 |
+
"default_mode": "قياسي",
|
| 161 |
+
"tta_info": "يزيد من الجودة لكنه يبطئ المعالجة",
|
| 162 |
+
"direct_download_error": "❌ خطأ في التنزيل المباشر: {}",
|
| 163 |
+
"invalid_google_drive_url": "❌ رابط Google Drive غير صالح",
|
| 164 |
+
"continuing_without_google_drive": "فشل الاتصال بـ Google Drive، الاستمرار بدونه...",
|
| 165 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive غير محمل، تخطي النسخ إلى Drive...",
|
| 166 |
+
"direct_download_failed": "فشل تنزيل الملف مباشرة",
|
| 167 |
+
"instrumental_info": "عادةً ينتج 2 ستيمات، لكن أحيانًا واحدة فقط - تحتاج إلى التجربة! :)",
|
| 168 |
"manual_ensemble_tab": "🎚️ التجميع اليدوي",
|
| 169 |
+
"phase_fixer_tab": "🔧 مصحح الطور",
|
| 170 |
+
"batch_processing_tab": "📦 المعالجة الدفعية",
|
| 171 |
+
"batch_description": "معالجة ملفات متعددة في وقت واحد بنفس النموذج",
|
| 172 |
+
"batch_input_folder": "مجلد الإدخال",
|
| 173 |
+
"batch_input_folder_placeholder": "أدخل مسار المجلد الذي يحتوي على ملفات الصوت",
|
| 174 |
+
"batch_output_folder": "مجلد الإخراج",
|
| 175 |
+
"batch_output_folder_placeholder": "أدخل مسار المجلد لملفات الإخراج",
|
| 176 |
+
"batch_file_list": "الملفات المراد معالجتها",
|
| 177 |
+
"batch_add_files": "إضافة ملفات",
|
| 178 |
+
"batch_clear_list": "مسح القائمة",
|
| 179 |
+
"batch_start": "🚀 بدء المعالجة الدفعية",
|
| 180 |
+
"batch_stop": "⏹️ إيقاف",
|
| 181 |
+
"batch_progress": "تقدم المعالجة الدفعية",
|
| 182 |
+
"batch_current_file": "يُعالَج حاليًا",
|
| 183 |
+
"batch_completed": "✅ اكتملت المعالجة الدفعية!",
|
| 184 |
+
"batch_stopped": "⏹️ تم إيقاف المعالجة الدفعية",
|
| 185 |
+
"batch_no_files": "❌ لم يتم اختيار ملفات للمعالجة",
|
| 186 |
+
"source_model": "النموذج المصدر (أصوات)",
|
| 187 |
+
"source_model_info": "نموذج لاستخراج الأصوات/بيانات الطور",
|
| 188 |
+
"target_model": "النموذج الهدف (آلي)",
|
| 189 |
+
"target_model_info": "النموذج الذي يتم تطبيق تصحيح الطور عليه",
|
| 190 |
"input_sources": "📂 مصادر الإدخال",
|
| 191 |
"refresh": "🔄 تحديث",
|
| 192 |
"ensemble_algorithm": "خوارزمية التجميع",
|
| 193 |
+
"select_audio_files": "اختيار ملفات الصوت",
|
| 194 |
"available_files": "الملفات المتاحة",
|
| 195 |
+
"custom_weights": "أوزان مخصصة (مفصولة بفاصلة)",
|
| 196 |
+
"custom_weights_placeholder": "مثال: 0.8, 1.2, 1.0, ...",
|
| 197 |
+
"custom_weights_info": "اتركه فارغًا للأوزان المتساوية",
|
| 198 |
"result_preview_tab": "🎧 معاينة النتيجة",
|
| 199 |
+
"ensembled_output": "الإخراج المجمَّع",
|
| 200 |
"processing_log_tab": "📋 سجل المعالجة",
|
| 201 |
"processing_details": "تفاصيل المعالجة",
|
| 202 |
"process_ensemble": "⚡ معالجة التجميع",
|
| 203 |
"languages_tab": "🌐 اللغات",
|
| 204 |
"Vocal Models": "نماذج الأصوات",
|
| 205 |
+
"Instrumental Models": "النماذج الآلية",
|
| 206 |
+
"4-Stem Models": "نماذج 4 مسارات",
|
| 207 |
"Denoise Models": "نماذج إزالة الضوضاء",
|
| 208 |
"Dereverb Models": "نماذج إزالة الصدى",
|
| 209 |
+
"Other Shapes": "نماذج أخرى",
|
| 210 |
+
"Custom Models": "النماذج المخصصة",
|
| 211 |
+
"ensemble_files_description": "يجمع ملفات صوتية متعددة في إخراج واحد باستخدام طرق تجميع محددة.",
|
| 212 |
"ensemble_files_help": "مسار جميع ملفات الصوت للتجميع",
|
| 213 |
+
"ensemble_type_help": "أحد: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 214 |
+
"ensemble_weights_help": "الأوزان لإنشاء التجميع. يجب أن يساوي عدد الأوزان عدد الملفات",
|
| 215 |
+
"ensemble_output_help": "مسار ملف WAV حيث سيتم تخزين نتيجة التجميع",
|
| 216 |
"ensemble_type_print": "نوع التجميع: {}",
|
| 217 |
"num_input_files_print": "عدد ملفات الإدخال: {}",
|
| 218 |
"weights_print": "الأوزان: {}",
|
| 219 |
"output_file_print": "ملف الإخراج: {}",
|
| 220 |
"duration_mismatch_error": "يجب أن تكون جميع الملفات بنفس المدة",
|
| 221 |
+
"file_not_found_error": "خطأ. تعذر العثور على الملف: {}. تحقق من المسارات.",
|
| 222 |
"reading_chunk_print": "قراءة قطعة من الملف: {} (البداية: {}ث، المدة: {}ث)",
|
| 223 |
"chunk_result_shape_print": "شكل نتيجة القطعة: {}",
|
| 224 |
"ensemble_completed_print": "اكتمل التجميع. تم حفظ الإخراج في: {}",
|
| 225 |
+
"file_deletion_error": "تعذر حذف {}: {}",
|
| 226 |
+
"directory_not_exist_warning": "⚠️ المجلد غير موجود: {}",
|
| 227 |
+
"not_a_directory_warning": "⚠️ المسار ليس مجلدًا: {}",
|
| 228 |
"item_deletion_error": "⚠️ خطأ في حذف {}: {}",
|
| 229 |
+
"critical_error": "⛔ خطأ فادح: {}",
|
| 230 |
"old_output_not_exist": "❌ مجلد الإخراج القديم غير موجود",
|
| 231 |
+
"old_outputs_cleared": "✅ تم مسح الإخراجات القديمة بنجاح!",
|
| 232 |
"error": "🔥 خطأ: {}",
|
| 233 |
"ffmpeg_error": "خطأ FFmpeg ({}): {}",
|
| 234 |
"file_saved_successfully": "تم حفظ الملف بنجاح: {}",
|
| 235 |
+
"total_files_found": "إجمالي الملفات الموجودة: {}. معدل العينات المستخدم: {}",
|
| 236 |
+
"total_progress": "التقدم الكلي",
|
| 237 |
+
"detailed_pbar_enabled": "شريط التقدم التفصيلي مُفعَّل: {}",
|
| 238 |
+
"loaded_audio": "تم تحميل الصوت: {}، الشكل: {}",
|
| 239 |
+
"cannot_read_track": "تعذر قراءة المسار: {}",
|
| 240 |
"error_message": "رسالة الخطأ: {}",
|
| 241 |
"demudding_track": "تنظيف المسار (إعادة مزج الطور - آلي): {}",
|
| 242 |
"elapsed_time": "الوقت المنقضي: {:.2f} ثانية.",
|
| 243 |
+
"proc_folder_description": "معالجة ملفات الصوت في مجلد باستخدام نموذج محدد.",
|
| 244 |
+
"model_type_help": "نوع النموذج (bandit, bs_roformer, mdx23c، إلخ)",
|
| 245 |
"config_path_help": "مسار ملف التكوين",
|
| 246 |
+
"demud_phaseremix_help": "تمكين إعادة مزج الطور للآلات الموسيقية",
|
| 247 |
+
"start_checkpoint_help": "نقطة التفتيش الأولية للأوزان الصحيحة",
|
| 248 |
+
"input_folder_help": "المجلد الذي يحتوي على المزيجات للمعالجة",
|
| 249 |
"audio_path_help": "مسار ملف صوتي واحد للمعالجة",
|
| 250 |
+
"store_dir_help": "مسار تخزين النتائج",
|
| 251 |
"device_ids_help": "قائمة معرفات GPU",
|
| 252 |
+
"extract_instrumental_help": "عكس الأصوات للحصول على الآلي إذا كان متاحًا",
|
| 253 |
+
"disable_detailed_pbar_help": "تعطيل شريط التقدم التفصيلي في demix",
|
| 254 |
+
"force_cpu_help": "إجبار استخدام CPU حتى لو كانت CUDA متاحة",
|
| 255 |
"flac_file_help": "إخراج ملف FLAC بدلاً من WAV",
|
| 256 |
+
"export_format_help": "تنسيق التصدير ونوع PCM",
|
| 257 |
"pcm_type_help": "نوع PCM لملفات FLAC",
|
| 258 |
+
"use_tta_help": "تمكين زيادة وقت الاختبار",
|
| 259 |
"lora_checkpoint_help": "نقطة التفتيش الأولية لأوزان LoRA",
|
| 260 |
"cuda_available": "CUDA متاح، استخدم --force_cpu لتعطيله.",
|
| 261 |
+
"using_device": "الجهاز المستخدم: {}",
|
| 262 |
+
"instruments_print": "الأدوات الموسيقية: {}",
|
| 263 |
"model_load_time": "وقت تحميل النموذج: {:.2f} ثانية",
|
| 264 |
"invalid_url": "❌ رابط غير صالح",
|
| 265 |
"cookie_file_updated": "✅ تم تحديث ملف الكوكيز!",
|
| 266 |
"cookie_installation_error": "⚠️ خطأ في تثبيت الكوكيز: {}",
|
| 267 |
+
"google_drive_mount_error": "❌ خطأ في ربط Google Drive: {}",
|
| 268 |
"file_size_zero_error": "حجم الملف صفر أو لم يتم إنشاء الملف",
|
| 269 |
+
"google_drive_error": "❌ خطأ في تنزيل Google Drive: {}",
|
| 270 |
"wav_conversion_failed": "فشل تحويل WAV",
|
| 271 |
+
"download_error": "❌ خطأ في التنزيل: {}",
|
| 272 |
+
"file_copied_to_drive": "تم نسخ الملف إلى Google Drive: {}",
|
| 273 |
+
"copy_to_drive_error": "❌ خطأ في النسخ إلى Google Drive: {}",
|
| 274 |
+
"download_success": "🎉 تم التنزيل بنجاح!",
|
| 275 |
+
"download_failed": "❌ فشل التنزيل",
|
| 276 |
+
"no_output_files_found": "❌ لم يتم العثور على ملفات الإخراج.",
|
| 277 |
"output_refreshed_successfully": "✅ تم تحديث الإخراج بنجاح!",
|
| 278 |
"error_refreshing_output": "❌ خطأ في تحديث الإخراج: {}",
|
| 279 |
"starting_audio_separation": "بدء فصل الصوت...",
|
|
|
|
| 282 |
"separation_complete": "اكتمل الفصل!",
|
| 283 |
"progress_parsing_error": "خطأ في تحليل التقدم: {}",
|
| 284 |
"error_occurred": "حدث خطأ: {}",
|
| 285 |
+
"separation_process_completed": "اكتملت عملية الفصل!",
|
| 286 |
+
"no_audio_file_error": "❌ لم يتم تقديم ملف صوتي ولا يوجد ملف في مجلد الإدخال.",
|
| 287 |
+
"no_input_progress_label": "خطأ: لم يتم تقديم إدخال -- 0.0%",
|
| 288 |
"processing_audio_print": "معالجة الصوت من: {} باستخدام النموذج: {}",
|
| 289 |
"starting_audio_separation_progress_label": "بدء فصل الصوت... -- 0.0%",
|
| 290 |
"separating_audio_progress_label": "فصل الصوت... -- {}.0%",
|
| 291 |
"audio_processing_completed_progress_label": "اكتملت معالجة الصوت! -- 100.0%",
|
| 292 |
"audio_processing_completed": "✅ اكتملت معالجة الصوت!",
|
| 293 |
"error_occurred_progress_label": "حدث خطأ -- 0.0%",
|
| 294 |
+
"minimum_files_required": "⚠️ مطلوب ملفان على الأقل",
|
| 295 |
"valid_files_not_found": "❌ لم يتم العثور على ملفات صالحة",
|
| 296 |
"starting_ensemble_process": "بدء عملية التجميع...",
|
| 297 |
+
"ensembling_progress": "جارٍ التجميع... ({:.1f}%)",
|
| 298 |
"finalizing_ensemble_output": "إنهاء إخراج التجميع...",
|
| 299 |
"success_log": "✅ نجاح!\n{}",
|
| 300 |
"error_log": "❌ خطأ!\n{}",
|
|
|
|
| 301 |
"ensemble_process_completed": "اكتملت عملية التجميع!",
|
| 302 |
+
"no_models_selected": "❌ لم يتم اختيار أي نماذج",
|
| 303 |
+
"no_input_audio_provided": "❌ لم يتم تقديم صوت إدخال",
|
| 304 |
+
"loading_model": "جارٍ تحميل النموذج {}/{}: {}...",
|
| 305 |
+
"loading_model_progress_label": "جارٍ تحميل النموذج {}/{}: {}... -- {}.0%",
|
| 306 |
"completed_model": "اكتمل النموذج {}/{}: {}",
|
| 307 |
"completed_model_progress_label": "اكتمل النموذج {}/{}: {} -- {}.0%",
|
| 308 |
"running_command": "تشغيل الأمر: {}",
|
| 309 |
"model_failed": "فشل النموذج {}: {}",
|
| 310 |
+
"critical_error_with_model": "خطأ فادح مع {}: {}",
|
| 311 |
+
"model_output_failed": "فشل {} في إنتاج الإخراج",
|
| 312 |
"waiting_for_files": "في انتظار جاهزية جميع الملفات...",
|
| 313 |
"waiting_for_files_progress_label": "في انتظار جاهزية جميع الملفات... -- 90.0%",
|
| 314 |
+
"performing_ensemble": "جارٍ التجميع...",
|
| 315 |
+
"performing_ensemble_progress_label": "جارٍ التجميع... -- 92.0%",
|
| 316 |
"memory_usage_before_ensemble": "استخدام الذاكرة قبل التجميع: {}%",
|
| 317 |
"memory_usage_after_ensemble": "استخدام الذاكرة بعد التجميع: {}%",
|
| 318 |
"finalizing_ensemble_output_progress_label": "إنهاء إخراج التجميع... -- 98.0%",
|
| 319 |
+
"ensemble_file_creation_failed": "تعذر إنشاء ملف التجميع: {}",
|
| 320 |
"ensemble_completed_progress_label": "اكتمل التجميع بنجاح! -- 100.0%",
|
| 321 |
"success_output_created": "✅ نجاح! تم إنشاء ملف الإخراج.",
|
| 322 |
+
"custom_models_tab": "⚡ النماذج المخصصة",
|
| 323 |
+
"add_custom_model": "➕ إضافة نموذج مخصص",
|
| 324 |
+
"custom_model_name": "اسم النموذج",
|
| 325 |
+
"custom_model_name_placeholder": "أدخل اسمًا لنموذجك",
|
| 326 |
+
"checkpoint_url": "رابط نقطة التفتيش (.ckpt/.pth)",
|
| 327 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 328 |
+
"config_url": "رابط التكوين (.yaml)",
|
| 329 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 330 |
+
"custom_py_url": "رابط ملف Python المخصص (اختياري)",
|
| 331 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 332 |
+
"auto_detect_type": "الكشف التلقائي عن نوع النموذج",
|
| 333 |
+
"model_type": "نوع النموذج",
|
| 334 |
+
"add_model_btn": "➕ إضافة نموذج",
|
| 335 |
+
"custom_models_list": "📋 قائمة النماذج المخصصة",
|
| 336 |
+
"delete_model": "🗑️ حذف",
|
| 337 |
+
"no_custom_models": "لم تتم إضافة نماذج مخصصة بعد",
|
| 338 |
+
"model_added_success": "✅ تم إضافة النموذج بنجاح!",
|
| 339 |
+
"model_deleted_success": "✅ تم حذف النموذج بنجاح!",
|
| 340 |
+
"model_add_error": "❌ خطأ في إضافة النموذج: {}",
|
| 341 |
+
"model_delete_error": "❌ خطأ في حذف النموذج: {}",
|
| 342 |
+
"refresh_models": "🔄 تحديث",
|
| 343 |
+
"custom_model_info": "أضف نماذج مخصصة عن طريق توفير روابط التنزيل. سيتم تنزيل النماذج تلقائيًا عند استخدامها.",
|
| 344 |
+
"select_model_to_delete": "اختر نموذجًا للحذف"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
}
|
assets/i18n/languages/de_de.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"select_language": "Sprache auswählen",
|
| 3 |
"gecekondu_production": "Gecekondu Production",
|
| 4 |
-
"ultimate_audio_separation": "
|
| 5 |
"presented_by": "Präsentiert von Gecekondu Production © 2025",
|
| 6 |
"audio_separation_tab": "🎙️ Audio-Trennung",
|
| 7 |
"input_model": "📥 Eingabe & Modell",
|
|
@@ -13,15 +13,16 @@
|
|
| 13 |
"settings": "⚙ Einstellungen",
|
| 14 |
"format": "Format",
|
| 15 |
"chunk_size": "Chunk-Größe",
|
| 16 |
-
"chunk_size_info": "Nicht ändern, es sei denn,
|
| 17 |
"overlap": "Überlappung",
|
| 18 |
-
"overlap_info": "Empfohlen: 2-10 (
|
| 19 |
-
"tta_boost": "TTA
|
| 20 |
-
"tta_info": "
|
| 21 |
"phase_fix": "Phasenkorrektur",
|
| 22 |
-
"
|
|
|
|
| 23 |
"instrumental": "Instrumental",
|
| 24 |
-
"instrumental_info": "
|
| 25 |
"process": "🚀 Verarbeiten",
|
| 26 |
"reset": "🧹 Zurücksetzen",
|
| 27 |
"status": "Status",
|
|
@@ -37,28 +38,28 @@
|
|
| 37 |
"drums": "Schlagzeug",
|
| 38 |
"bass": "Bass",
|
| 39 |
"effects": "Effekte",
|
| 40 |
-
"advanced_tab": "⚙
|
| 41 |
"phase_remix": "Phasen-Remix",
|
| 42 |
"dry": "Trocken",
|
| 43 |
"music": "Musik",
|
| 44 |
"karaoke": "Karaoke",
|
| 45 |
-
"bleed": "
|
| 46 |
-
"copy_to_drive": "📂 Auf
|
| 47 |
"copy_status": "Kopierstatus",
|
| 48 |
"files_will_be_copied": "Dateien werden hierher kopiert...",
|
| 49 |
-
"processing_tip": "<b>🔈 Verarbeitungstipp:</b> Für verrauschte Ergebnisse
|
| 50 |
"waiting_for_processing": "Warten auf Verarbeitung...",
|
| 51 |
"auto_ensemble_tab": "🤖 Auto-Ensemble",
|
| 52 |
"upload_file": "Datei hochladen",
|
| 53 |
"enter_file_path": "Oder Dateipfad eingeben",
|
| 54 |
"file_path_placeholder": "Vollständigen Pfad zur Audiodatei eingeben",
|
| 55 |
-
"advanced_settings": "⚙️
|
| 56 |
"use_tta": "TTA verwenden",
|
| 57 |
"instrumental_only": "Nur Instrumental",
|
| 58 |
"auto_overlap": "Überlappung",
|
| 59 |
"auto_chunk_size": "Chunk-Größe",
|
| 60 |
"output_format": "Ausgabeformat",
|
| 61 |
-
"model_selection": "🧠
|
| 62 |
"model_category": "Modellkategorie",
|
| 63 |
"select_models": "Modelle aus Kategorie auswählen",
|
| 64 |
"ensemble_settings": "⚡ Ensemble-Einstellungen",
|
|
@@ -68,10 +69,10 @@
|
|
| 68 |
"original_audio_tab": "🔊 Original-Audio",
|
| 69 |
"original_audio": "Original-Audio",
|
| 70 |
"ensemble_result_tab": "🎚️ Ensemble-Ergebnis",
|
| 71 |
-
"output_preview": "
|
| 72 |
"refresh_output": "🔄 Ausgabe aktualisieren",
|
| 73 |
"ensemble_copy_status": "Ensemble-Ausgabe wird hierher kopiert...",
|
| 74 |
-
"guidelines": "<h4>Richtlinien zur
|
| 75 |
"download_sources_tab": "⬇️ Quellen herunterladen",
|
| 76 |
"direct_links": "🌐 Direkte Links",
|
| 77 |
"audio_file_url": "Audio-Datei-URL",
|
|
@@ -80,7 +81,85 @@
|
|
| 80 |
"downloaded_file": "Heruntergeladene Datei",
|
| 81 |
"cookie_management": "🍪 Cookie-Verwaltung",
|
| 82 |
"upload_cookies_txt": "Cookies.txt hochladen",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Warum
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ Manuelles Ensemble",
|
| 85 |
"input_sources": "📂 Eingabequellen",
|
| 86 |
"refresh": "🔄 Aktualisieren",
|
|
@@ -99,23 +178,24 @@
|
|
| 99 |
"Vocal Models": "Gesangsmodelle",
|
| 100 |
"Instrumental Models": "Instrumentalmodelle",
|
| 101 |
"4-Stem Models": "4-Stem-Modelle",
|
| 102 |
-
"Denoise Models": "
|
| 103 |
-
"Dereverb Models": "
|
| 104 |
"Other Models": "Andere Modelle",
|
| 105 |
-
"
|
|
|
|
| 106 |
"ensemble_files_help": "Pfad zu allen Audiodateien für das Ensemble",
|
| 107 |
-
"ensemble_type_help": "Einer von avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 108 |
-
"ensemble_weights_help": "Gewichte
|
| 109 |
"ensemble_output_help": "Pfad zur WAV-Datei, in der das Ensemble-Ergebnis gespeichert wird",
|
| 110 |
"ensemble_type_print": "Ensemble-Typ: {}",
|
| 111 |
"num_input_files_print": "Anzahl der Eingabedateien: {}",
|
| 112 |
"weights_print": "Gewichte: {}",
|
| 113 |
"output_file_print": "Ausgabedatei: {}",
|
| 114 |
"duration_mismatch_error": "Alle Dateien müssen die gleiche Dauer haben",
|
| 115 |
-
"file_not_found_error": "Fehler. Datei nicht gefunden: {}.
|
| 116 |
"reading_chunk_print": "Lese Chunk aus Datei: {} (Start: {}s, Dauer: {}s)",
|
| 117 |
"chunk_result_shape_print": "Chunk-Ergebnisform: {}",
|
| 118 |
-
"ensemble_completed_print": "Ensemble abgeschlossen. Ausgabe gespeichert
|
| 119 |
"file_deletion_error": "{} konnte nicht gelöscht werden: {}",
|
| 120 |
"directory_not_exist_warning": "⚠️ Verzeichnis existiert nicht: {}",
|
| 121 |
"not_a_directory_warning": "⚠️ Pfad ist kein Verzeichnis: {}",
|
|
@@ -125,42 +205,50 @@
|
|
| 125 |
"error": "🔥 Fehler: {}",
|
| 126 |
"ffmpeg_error": "FFmpeg-Fehler ({}): {}",
|
| 127 |
"file_saved_successfully": "Datei erfolgreich gespeichert: {}",
|
| 128 |
-
"total_files_found": "Gesamtzahl gefundener Dateien: {}.
|
| 129 |
"total_progress": "Gesamtfortschritt",
|
| 130 |
-
"detailed_pbar_enabled": "
|
| 131 |
-
"loaded_audio": "
|
| 132 |
-
"cannot_read_track": "
|
| 133 |
"error_message": "Fehlermeldung: {}",
|
| 134 |
-
"demudding_track": "
|
| 135 |
"elapsed_time": "Verstrichene Zeit: {:.2f} Sekunden.",
|
| 136 |
-
"proc_folder_description": "Verarbeitet Audiodateien in einem Ordner mit einem
|
| 137 |
"model_type_help": "Modelltyp (bandit, bs_roformer, mdx23c, etc.)",
|
| 138 |
"config_path_help": "Pfad zur Konfigurationsdatei",
|
| 139 |
"demud_phaseremix_help": "Phasen-Remix für Instrumental aktivieren",
|
| 140 |
-
"start_checkpoint_help": "
|
| 141 |
-
"input_folder_help": "Ordner mit
|
| 142 |
-
"audio_path_help": "Pfad zu einer einzelnen
|
| 143 |
"store_dir_help": "Pfad zum Speichern der Ergebnisse",
|
| 144 |
"device_ids_help": "Liste der GPU-IDs",
|
| 145 |
"extract_instrumental_help": "Gesang invertieren, um Instrumental zu erhalten, falls vorhanden",
|
| 146 |
-
"disable_detailed_pbar_help": "
|
| 147 |
-
"force_cpu_help": "CPU erzwingen,
|
| 148 |
-
"flac_file_help": "FLAC-Datei
|
| 149 |
"export_format_help": "Exportformat und PCM-Typ",
|
| 150 |
"pcm_type_help": "PCM-Typ für FLAC-Dateien",
|
| 151 |
-
"use_tta_help": "
|
| 152 |
-
"lora_checkpoint_help": "
|
| 153 |
-
"cuda_available": "CUDA ist verfügbar,
|
| 154 |
"using_device": "Verwendetes Gerät: {}",
|
| 155 |
"instruments_print": "Instrumente: {}",
|
| 156 |
"model_load_time": "Modell-Ladezeit: {:.2f} Sek.",
|
| 157 |
"invalid_url": "❌ Ungültige URL",
|
| 158 |
"cookie_file_updated": "✅ Cookie-Datei aktualisiert!",
|
| 159 |
-
"cookie_installation_error": "⚠️
|
| 160 |
-
"
|
| 161 |
-
"
|
|
|
|
| 162 |
"wav_conversion_failed": "WAV-Konvertierung fehlgeschlagen",
|
| 163 |
"download_error": "❌ Download-Fehler: {}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"download_success": "🎉 Erfolgreich heruntergeladen!",
|
| 165 |
"download_failed": "❌ Download fehlgeschlagen",
|
| 166 |
"no_output_files_found": "❌ Keine Ausgabedateien gefunden.",
|
|
@@ -170,19 +258,19 @@
|
|
| 170 |
"processing_audio": "Audio wird verarbeitet",
|
| 171 |
"separating_audio": "Audio wird getrennt... ({:.1f}%)",
|
| 172 |
"separation_complete": "Trennung abgeschlossen!",
|
| 173 |
-
"progress_parsing_error": "
|
| 174 |
"error_occurred": "Ein Fehler ist aufgetreten: {}",
|
| 175 |
"separation_process_completed": "Trennungsprozess abgeschlossen!",
|
| 176 |
-
"no_audio_file_error": "❌ Keine Audiodatei bereitgestellt und keine vorhandene Datei im
|
| 177 |
"no_input_progress_label": "Fehler: Keine Eingabe bereitgestellt -- 0.0%",
|
| 178 |
"processing_audio_print": "Verarbeite Audio von: {} mit Modell: {}",
|
| 179 |
"starting_audio_separation_progress_label": "Audio-Trennung wird gestartet... -- 0.0%",
|
| 180 |
"separating_audio_progress_label": "Audio wird getrennt... -- {}.0%",
|
| 181 |
-
"audio_processing_completed_progress_label": "
|
| 182 |
-
"audio_processing_completed": "✅
|
| 183 |
"error_occurred_progress_label": "Fehler aufgetreten -- 0.0%",
|
| 184 |
"minimum_files_required": "⚠️ Mindestens 2 Dateien erforderlich",
|
| 185 |
-
"valid_files_not_found": "❌
|
| 186 |
"starting_ensemble_process": "Ensemble-Prozess wird gestartet...",
|
| 187 |
"ensembling_progress": "Ensemble wird erstellt... ({:.1f}%)",
|
| 188 |
"finalizing_ensemble_output": "Ensemble-Ausgabe wird finalisiert...",
|
|
@@ -194,87 +282,65 @@
|
|
| 194 |
"no_input_audio_provided": "❌ Kein Eingabe-Audio bereitgestellt",
|
| 195 |
"loading_model": "Lade Modell {}/{}: {}...",
|
| 196 |
"loading_model_progress_label": "Lade Modell {}/{}: {}... -- {}.0%",
|
| 197 |
-
"completed_model": "
|
| 198 |
-
"completed_model_progress_label": "
|
| 199 |
"running_command": "Führe Befehl aus: {}",
|
| 200 |
"model_failed": "Modell {} fehlgeschlagen: {}",
|
| 201 |
"critical_error_with_model": "Kritischer Fehler mit {}: {}",
|
| 202 |
"model_output_failed": "{} konnte keine Ausgabe erzeugen",
|
| 203 |
-
"waiting_for_files": "Warten auf
|
| 204 |
-
"waiting_for_files_progress_label": "Warten auf
|
| 205 |
"performing_ensemble": "Ensemble wird durchgeführt...",
|
| 206 |
"performing_ensemble_progress_label": "Ensemble wird durchgeführt... -- 92.0%",
|
| 207 |
-
"memory_usage_before_ensemble": "
|
| 208 |
-
"memory_usage_after_ensemble": "
|
| 209 |
"finalizing_ensemble_output_progress_label": "Ensemble-Ausgabe wird finalisiert... -- 98.0%",
|
| 210 |
"ensemble_file_creation_failed": "Ensemble-Datei konnte nicht erstellt werden: {}",
|
| 211 |
"ensemble_completed_progress_label": "Ensemble erfolgreich abgeschlossen! -- 100.0%",
|
| 212 |
"success_output_created": "✅ Erfolg! Ausgabedatei erstellt.",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"
|
| 259 |
-
"mid_side_method": "Mitte/Seite-Methode",
|
| 260 |
-
"apollo_normal_model": "Apollo Normalmodell",
|
| 261 |
-
"apollo_normal_model_info": "Modell für normale Apollo-Verarbeitung",
|
| 262 |
-
"apollo_midside_model": "Apollo Mitte/Seite-Modell",
|
| 263 |
-
"apollo_enhancement_info": "Verbesserung der Klangqualität",
|
| 264 |
-
"selected_models": "Ausgewählte Modelle",
|
| 265 |
-
"save_preset": "Voreinstellung Speichern",
|
| 266 |
-
"delete_preset": "Voreinstellung Löschen",
|
| 267 |
-
"refresh_presets": "Voreinstellungen Aktualisieren",
|
| 268 |
-
"preset_name": "Name der Voreinstellung",
|
| 269 |
-
"select_preset": "Voreinstellung Auswählen",
|
| 270 |
-
"add_favorite": "Zu Favoriten Hinzufügen",
|
| 271 |
-
"apollo_midside_model_info": "Modell für Mitte/Seite-Verarbeitung (optional)",
|
| 272 |
-
"language_changed_message": "Sprache geändert. Bitte starte die Zelle neu.",
|
| 273 |
-
"apply_matchering": "Matchering anwenden",
|
| 274 |
-
"matchering_info": "Matchering anwenden, um Audio-Mastering und -Balance zu verbessern",
|
| 275 |
-
"matchering_passes": "Matchering-Durchgänge",
|
| 276 |
-
"matchering_passes_info": "Anzahl der Matchering-Iterationen (1-5, höhere Werte verbessern die Qualität, dauern aber länger)",
|
| 277 |
-
"matchering_processing": "Matchering wird angewendet... ({:.1f}%)",
|
| 278 |
-
"matchering_completed": "✅ Matchering-Verarbeitung abgeschlossen!",
|
| 279 |
-
"matchering_error": "❌ Fehler während Matchering: {}"
|
| 280 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
"select_language": "Sprache auswählen",
|
| 3 |
"gecekondu_production": "Gecekondu Production",
|
| 4 |
+
"ultimate_audio_separation": "Ihre ultimative Audio-Trennung",
|
| 5 |
"presented_by": "Präsentiert von Gecekondu Production © 2025",
|
| 6 |
"audio_separation_tab": "🎙️ Audio-Trennung",
|
| 7 |
"input_model": "📥 Eingabe & Modell",
|
|
|
|
| 13 |
"settings": "⚙ Einstellungen",
|
| 14 |
"format": "Format",
|
| 15 |
"chunk_size": "Chunk-Größe",
|
| 16 |
+
"chunk_size_info": "Nicht ändern, es sei denn, Sie haben spezifische Anforderungen",
|
| 17 |
"overlap": "Überlappung",
|
| 18 |
+
"overlap_info": "Empfohlen: 2-10 (höhere Werte erhöhen die Qualität, erfordern aber mehr VRAM)",
|
| 19 |
+
"tta_boost": "TTA-Boost",
|
| 20 |
+
"tta_info": "Erhöht die Qualität, verlangsamt aber die Verarbeitung",
|
| 21 |
"phase_fix": "Phasenkorrektur",
|
| 22 |
+
"use_phase_correction": "Erweiterte Phasenkorrektur anwenden",
|
| 23 |
+
"phase_fix_info": "Erweiterte Phasenkorrektur für Instrumentalspuren",
|
| 24 |
"instrumental": "Instrumental",
|
| 25 |
+
"instrumental_info": "Gibt normalerweise 2 Ausgaben, manchmal nur eine - Sie müssen es ausprobieren! :)",
|
| 26 |
"process": "🚀 Verarbeiten",
|
| 27 |
"reset": "🧹 Zurücksetzen",
|
| 28 |
"status": "Status",
|
|
|
|
| 38 |
"drums": "Schlagzeug",
|
| 39 |
"bass": "Bass",
|
| 40 |
"effects": "Effekte",
|
| 41 |
+
"advanced_tab": "⚙ Erweitert",
|
| 42 |
"phase_remix": "Phasen-Remix",
|
| 43 |
"dry": "Trocken",
|
| 44 |
"music": "Musik",
|
| 45 |
"karaoke": "Karaoke",
|
| 46 |
+
"bleed": "Übersprechen",
|
| 47 |
+
"copy_to_drive": "📂 Auf Drive kopieren",
|
| 48 |
"copy_status": "Kopierstatus",
|
| 49 |
"files_will_be_copied": "Dateien werden hierher kopiert...",
|
| 50 |
+
"processing_tip": "<b>🔈 Verarbeitungstipp:</b> Für verrauschte Ergebnisse verwenden Sie die Modelle <code>bleed_suppressor_v1</code> oder <code>denoisedebleed</code> in der Kategorie <i>\"Entrauschen & Effektentfernung\"</i>, um die Ausgabe zu bereinigen",
|
| 51 |
"waiting_for_processing": "Warten auf Verarbeitung...",
|
| 52 |
"auto_ensemble_tab": "🤖 Auto-Ensemble",
|
| 53 |
"upload_file": "Datei hochladen",
|
| 54 |
"enter_file_path": "Oder Dateipfad eingeben",
|
| 55 |
"file_path_placeholder": "Vollständigen Pfad zur Audiodatei eingeben",
|
| 56 |
+
"advanced_settings": "⚙️ Erweiterte Einstellungen",
|
| 57 |
"use_tta": "TTA verwenden",
|
| 58 |
"instrumental_only": "Nur Instrumental",
|
| 59 |
"auto_overlap": "Überlappung",
|
| 60 |
"auto_chunk_size": "Chunk-Größe",
|
| 61 |
"output_format": "Ausgabeformat",
|
| 62 |
+
"model_selection": "🧠 Modellauswahl",
|
| 63 |
"model_category": "Modellkategorie",
|
| 64 |
"select_models": "Modelle aus Kategorie auswählen",
|
| 65 |
"ensemble_settings": "⚡ Ensemble-Einstellungen",
|
|
|
|
| 69 |
"original_audio_tab": "🔊 Original-Audio",
|
| 70 |
"original_audio": "Original-Audio",
|
| 71 |
"ensemble_result_tab": "🎚️ Ensemble-Ergebnis",
|
| 72 |
+
"output_preview": "Ausgabevorschau",
|
| 73 |
"refresh_output": "🔄 Ausgabe aktualisieren",
|
| 74 |
"ensemble_copy_status": "Ensemble-Ausgabe wird hierher kopiert...",
|
| 75 |
+
"guidelines": "<h4>Richtlinien zur Modellauswahl</h4><ul><li><strong>Kategorien nicht mischen:</strong> Das Kombinieren von Gesangs- und Instrumentalmodellen kann unerwünschte Mischungen erzeugen</li><li><strong>Hinweise zu Spezialmodellen:</strong><ul><li>Dualitätsmodelle (v1/v2) - Geben beide Stems aus</li><li>MDX23C Separator - Hybride Ergebnisse</li></ul></li><li><strong>Best Practice:</strong> Verwenden Sie 3-5 ähnliche Modelle aus derselben Kategorie</li></ul><div>💡 Pro-Tipp: Starten Sie mit der Kombination \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
|
| 76 |
"download_sources_tab": "⬇️ Quellen herunterladen",
|
| 77 |
"direct_links": "🌐 Direkte Links",
|
| 78 |
"audio_file_url": "Audio-Datei-URL",
|
|
|
|
| 81 |
"downloaded_file": "Heruntergeladene Datei",
|
| 82 |
"cookie_management": "🍪 Cookie-Verwaltung",
|
| 83 |
"upload_cookies_txt": "Cookies.txt hochladen",
|
| 84 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Warum notwendig?**<br>- Zugriff auf altersbeschränkte Inhalte<br>- Herunterladen privater/nicht gelisteter Videos<br>- Umgehung regionaler Einschränkungen<br>- Vermeidung von YouTube-Download-Limits<br><br>**⚠️ Wichtige Hinweise**<br>- Teilen Sie Ihre Cookie-Dateien NIEMALS!<br>- Aktualisieren Sie Cookies, wenn:<br> • \"403 Verboten\"-Fehler auftreten<br> • Downloads plötzlich stoppen<br> • \"Sitzung abgelaufen\"-Meldungen erscheinen<br><br>**🔄 Aktualisierungsschritte**<br>1. Installieren Sie diese <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome-Erweiterung</a><br>2. Melden Sie sich in Chrome bei YouTube an<br>3. Klicken Sie auf das Erweiterungs-Symbol → \"Exportieren\"<br>4. Laden Sie die heruntergeladene Datei hier hoch<br><br>**⏳ Cookie-Lebensdauer**<br>- Normale Sitzungen: 24 Stunden<br>- Sensible Operationen: 1 Stunde<br>- Passwortänderung: Sofortige Aktualisierung</div>",
|
| 85 |
+
"ensemble_output_copied": "✅ Ensemble-Ausgabe nach {} kopiert",
|
| 86 |
+
"error_copying_ensemble_output": "❌ Fehler beim Kopieren der Ensemble-Ausgabe: {}",
|
| 87 |
+
"drive_mounted_copying_ensemble": "Google Drive gemountet. Kopiere Ensemble-Ausgabe...",
|
| 88 |
+
"drive_already_mounted_copying_ensemble": "Google Drive bereits gemountet. Kopiere Ensemble-Ausgabe...",
|
| 89 |
+
"no_ensemble_output_files_found": "❌ Keine Ensemble-Ausgabedateien gefunden.",
|
| 90 |
+
"drive_mounted_copying_files": "Google Drive gemountet. Kopiere Dateien...",
|
| 91 |
+
"drive_already_mounted_copying_files": "Google Drive bereits gemountet. Kopiere Dateien...",
|
| 92 |
+
"files_copied_to_drive": "✅ Dateien nach {} kopiert",
|
| 93 |
+
"error_copying_files": "❌ Fehler beim Kopieren der Dateien: {}",
|
| 94 |
+
"mounting_drive": "Google Drive wird gemountet...",
|
| 95 |
+
"settings_saved": "Einstellungen in config.json gespeichert: Sprache={lang}, Methode={method}, Port={port}",
|
| 96 |
+
"selected_language": "Ausgewählte Sprache: {lang}",
|
| 97 |
+
"available_languages": "Verfügbare Sprachen: {langs}",
|
| 98 |
+
"select_language_prompt": "Wählen Sie eine Sprache (Standard: {default}): ",
|
| 99 |
+
"invalid_language": "Ungültige Sprachauswahl! Verwende Standardsprache ({default}).",
|
| 100 |
+
"available_sharing_methods": "Verfügbare Freigabemethoden: gradio, localtunnel, ngrok",
|
| 101 |
+
"select_sharing_method_prompt": "Wählen Sie eine Freigabemethode (Standard: {default}): ",
|
| 102 |
+
"enter_ngrok_token_prompt": "Geben Sie Ihren Ngrok-Token ein (Standard: {default}): ",
|
| 103 |
+
"ngrok_token_required": "Fehler: Ngrok-Token für ngrok-Methode erforderlich!",
|
| 104 |
+
"enter_port_prompt": "Geben Sie die Portnummer ein (Standard: {default}): ",
|
| 105 |
+
"opening_previous_url": "Öffne vorherige URL: {url}",
|
| 106 |
+
"open_url_manually": "Öffnen Sie die URL manuell in Colab: {url}",
|
| 107 |
+
"invalid_previous_url": "Ungültige vorherige URL: {url}, nicht geöffnet.",
|
| 108 |
+
"starting_method": "Starte {method} auf Port {port}...",
|
| 109 |
+
"process_stopped": "\n🛑 Prozess vom Benutzer gestoppt",
|
| 110 |
+
"selected_port": "Ausgewählter Port: {port}",
|
| 111 |
+
"starting_gradio_with_sharing": "Starte Gradio mit integrierter Freigabe...",
|
| 112 |
+
"starting_localtunnel": "Starte Localtunnel auf Port {port}...",
|
| 113 |
+
"share_link": "Freigabelink: {url}",
|
| 114 |
+
"password_ip": "Passwort-IP: {ip}",
|
| 115 |
+
"starting_ngrok": "Starte Ngrok auf Port {port}...",
|
| 116 |
+
"ngrok_url": "Ngrok-URL: {url}",
|
| 117 |
+
"ngrok_error": "Fehler beim Starten von Ngrok: {error}",
|
| 118 |
+
"auto_apollo_chunk_size": "Apollo-Chunk-Größe",
|
| 119 |
+
"auto_apollo_chunk_size_info": "Chunk-Größe für Apollo-Verbesserung (empfohlen: 19 für Universal-Modell)",
|
| 120 |
+
"auto_apollo_overlap": "Apollo-Überlappung",
|
| 121 |
+
"auto_enhancing_with_apollo": "Verbesserung mit Apollo ({}/{} Dateien)...",
|
| 122 |
+
"auto_apollo_processing_completed": "Apollo-Verarbeitung abgeschlossen!",
|
| 123 |
+
"auto_apollo_overlap_info": "Überlappung für Apollo-Verbesserung (empfohlen: 2)",
|
| 124 |
+
"apollo_chunk_size": "Apollo-Chunk-Größe",
|
| 125 |
+
"apollo_chunk_size_info": "Chunk-Größe für Apollo-Verbesserung (empfohlen: 19 für Universal-Modell)",
|
| 126 |
+
"apollo_overlap": "Apollo-Überlappung",
|
| 127 |
+
"enhance_with_apollo": "Mit Apollo verbessern",
|
| 128 |
+
"apollo_enhancement_info": "Audioqualitätsverbesserung",
|
| 129 |
+
"enhancing_with_apollo": "Verbesserung mit Apollo ({}/{} Dateien)...",
|
| 130 |
+
"apollo_processing_completed": "Apollo-Verarbeitung abgeschlossen!",
|
| 131 |
+
"apollo_overlap_info": "Überlappung für Apollo-Verbesserung (empfohlen: 2)",
|
| 132 |
+
"selected_models": "Ausgewählte Modelle",
|
| 133 |
+
"save_preset": "Preset speichern",
|
| 134 |
+
"delete_preset": "Preset löschen",
|
| 135 |
+
"refresh_presets": "Presets aktualisieren",
|
| 136 |
+
"preset_name": "Preset-Name",
|
| 137 |
+
"select_preset": "Preset auswählen",
|
| 138 |
+
"add_favorite": "Zu Favoriten hinzufügen",
|
| 139 |
+
"apply_matchering": "Matchering anwenden",
|
| 140 |
+
"matchering_info": "Matchering anwenden, um Audio-Mastering und -Balance zu verbessern",
|
| 141 |
+
"matchering_passes": "Matchering-Durchläufe",
|
| 142 |
+
"matchering_passes_info": "Anzahl der Matchering-Iterationen (1-5, höhere Werte verbessern die Qualität, dauern aber länger)",
|
| 143 |
+
"matchering_processing": "Matchering wird angewendet... ({:.1f}%)",
|
| 144 |
+
"matchering_completed": "✅ Matchering-Verarbeitung abgeschlossen!",
|
| 145 |
+
"matchering_error": "❌ Fehler während Matchering: {}",
|
| 146 |
+
"invalid_method": "Fehler: Ungültige Methode! Verwenden Sie 'gradio', 'localtunnel' oder 'ngrok'.",
|
| 147 |
+
"language_changed_message": "Sprache geändert. Bitte starten Sie die Zelle neu.",
|
| 148 |
+
"backend_settings": "🚀 Backend-Einstellungen",
|
| 149 |
+
"inference_backend": "Inference-Backend",
|
| 150 |
+
"optimization_mode": "Optimierungsmodus",
|
| 151 |
+
"optimization_mode_info": "Auswahl des PyTorch-Optimierungsmodus",
|
| 152 |
+
"mixed_precision_amp": "Gemischte Präzision (AMP)",
|
| 153 |
+
"mixed_precision_info": "2x schnellere Inference - empfohlen",
|
| 154 |
+
"tf32_acceleration": "TF32-Beschleunigung",
|
| 155 |
+
"tf32_acceleration_info": "Zusätzlicher Geschwindigkeitsboost für RTX 30xx+",
|
| 156 |
+
"cudnn_benchmark": "cuDNN-Benchmark",
|
| 157 |
+
"cudnn_benchmark_info": "Langsamer beim ersten Lauf, viel schneller bei nachfolgenden Läufen",
|
| 158 |
+
"ultra_optimized_pytorch": "ULTRA-OPTIMIERTES PyTorch-Backend",
|
| 159 |
+
"default_active_max_speed": "Standardmäßig aktiv - Maximale Geschwindigkeitsoptimierung",
|
| 160 |
+
"channels_last_mode": "Schnellstes für RTX-GPUs",
|
| 161 |
+
"compile_mode": "Zusätzliche Geschwindigkeit für PyTorch 2.0+",
|
| 162 |
+
"default_mode": "Standard",
|
| 163 |
"manual_ensemble_tab": "🎚️ Manuelles Ensemble",
|
| 164 |
"input_sources": "📂 Eingabequellen",
|
| 165 |
"refresh": "🔄 Aktualisieren",
|
|
|
|
| 178 |
"Vocal Models": "Gesangsmodelle",
|
| 179 |
"Instrumental Models": "Instrumentalmodelle",
|
| 180 |
"4-Stem Models": "4-Stem-Modelle",
|
| 181 |
+
"Denoise Models": "Entrauschungsmodelle",
|
| 182 |
+
"Dereverb Models": "Nachhallentfernungsmodelle",
|
| 183 |
"Other Models": "Andere Modelle",
|
| 184 |
+
"Other Shapes": "Andere Modelle",
|
| 185 |
+
"ensemble_files_description": "Kombiniert mehrere Audiodateien zu einer einzigen Ausgabe unter Verwendung angegebener Ensemble-Methoden.",
|
| 186 |
"ensemble_files_help": "Pfad zu allen Audiodateien für das Ensemble",
|
| 187 |
+
"ensemble_type_help": "Einer von: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 188 |
+
"ensemble_weights_help": "Gewichte zum Erstellen des Ensembles. Anzahl der Gewichte muss der Anzahl der Dateien entsprechen",
|
| 189 |
"ensemble_output_help": "Pfad zur WAV-Datei, in der das Ensemble-Ergebnis gespeichert wird",
|
| 190 |
"ensemble_type_print": "Ensemble-Typ: {}",
|
| 191 |
"num_input_files_print": "Anzahl der Eingabedateien: {}",
|
| 192 |
"weights_print": "Gewichte: {}",
|
| 193 |
"output_file_print": "Ausgabedatei: {}",
|
| 194 |
"duration_mismatch_error": "Alle Dateien müssen die gleiche Dauer haben",
|
| 195 |
+
"file_not_found_error": "Fehler. Datei nicht gefunden: {}. Überprüfen Sie die Pfade.",
|
| 196 |
"reading_chunk_print": "Lese Chunk aus Datei: {} (Start: {}s, Dauer: {}s)",
|
| 197 |
"chunk_result_shape_print": "Chunk-Ergebnisform: {}",
|
| 198 |
+
"ensemble_completed_print": "Ensemble abgeschlossen. Ausgabe gespeichert in: {}",
|
| 199 |
"file_deletion_error": "{} konnte nicht gelöscht werden: {}",
|
| 200 |
"directory_not_exist_warning": "⚠️ Verzeichnis existiert nicht: {}",
|
| 201 |
"not_a_directory_warning": "⚠️ Pfad ist kein Verzeichnis: {}",
|
|
|
|
| 205 |
"error": "🔥 Fehler: {}",
|
| 206 |
"ffmpeg_error": "FFmpeg-Fehler ({}): {}",
|
| 207 |
"file_saved_successfully": "Datei erfolgreich gespeichert: {}",
|
| 208 |
+
"total_files_found": "Gesamtzahl gefundener Dateien: {}. Verwendete Abtastrate: {}",
|
| 209 |
"total_progress": "Gesamtfortschritt",
|
| 210 |
+
"detailed_pbar_enabled": "Detaillierte Fortschrittsanzeige aktiviert: {}",
|
| 211 |
+
"loaded_audio": "Audio geladen: {}, Form: {}",
|
| 212 |
+
"cannot_read_track": "Track kann nicht gelesen werden: {}",
|
| 213 |
"error_message": "Fehlermeldung: {}",
|
| 214 |
+
"demudding_track": "Track wird bereinigt (Phasen-Remix - Instrumental): {}",
|
| 215 |
"elapsed_time": "Verstrichene Zeit: {:.2f} Sekunden.",
|
| 216 |
+
"proc_folder_description": "Verarbeitet Audiodateien in einem Ordner mit einem angegebenen Modell.",
|
| 217 |
"model_type_help": "Modelltyp (bandit, bs_roformer, mdx23c, etc.)",
|
| 218 |
"config_path_help": "Pfad zur Konfigurationsdatei",
|
| 219 |
"demud_phaseremix_help": "Phasen-Remix für Instrumental aktivieren",
|
| 220 |
+
"start_checkpoint_help": "Anfangs-Checkpoint für gültige Gewichte",
|
| 221 |
+
"input_folder_help": "Ordner mit zu verarbeitenden Mischungen",
|
| 222 |
+
"audio_path_help": "Pfad zu einer einzelnen zu verarbeitenden Audiodatei",
|
| 223 |
"store_dir_help": "Pfad zum Speichern der Ergebnisse",
|
| 224 |
"device_ids_help": "Liste der GPU-IDs",
|
| 225 |
"extract_instrumental_help": "Gesang invertieren, um Instrumental zu erhalten, falls vorhanden",
|
| 226 |
+
"disable_detailed_pbar_help": "Detaillierte Fortschrittsanzeige beim Demix deaktivieren",
|
| 227 |
+
"force_cpu_help": "CPU-Nutzung erzwingen, auch wenn CUDA verfügbar ist",
|
| 228 |
+
"flac_file_help": "FLAC-Datei statt WAV ausgeben",
|
| 229 |
"export_format_help": "Exportformat und PCM-Typ",
|
| 230 |
"pcm_type_help": "PCM-Typ für FLAC-Dateien",
|
| 231 |
+
"use_tta_help": "Test-Time-Augmentation aktivieren",
|
| 232 |
+
"lora_checkpoint_help": "Anfangs-Checkpoint für LoRA-Gewichte",
|
| 233 |
+
"cuda_available": "CUDA ist verfügbar, verwenden Sie --force_cpu zum Deaktivieren.",
|
| 234 |
"using_device": "Verwendetes Gerät: {}",
|
| 235 |
"instruments_print": "Instrumente: {}",
|
| 236 |
"model_load_time": "Modell-Ladezeit: {:.2f} Sek.",
|
| 237 |
"invalid_url": "❌ Ungültige URL",
|
| 238 |
"cookie_file_updated": "✅ Cookie-Datei aktualisiert!",
|
| 239 |
+
"cookie_installation_error": "⚠️ Cookie-Installationsfehler: {}",
|
| 240 |
+
"google_drive_mount_error": "❌ Google Drive-Verbindungsfehler: {}",
|
| 241 |
+
"file_size_zero_error": "Dateigröße Null oder Datei nicht erstellt",
|
| 242 |
+
"google_drive_error": "❌ Google Drive-Download-Fehler: {}",
|
| 243 |
"wav_conversion_failed": "WAV-Konvertierung fehlgeschlagen",
|
| 244 |
"download_error": "❌ Download-Fehler: {}",
|
| 245 |
+
"file_copied_to_drive": "Datei nach Google Drive kopiert: {}",
|
| 246 |
+
"copy_to_drive_error": "❌ Fehler beim Kopieren nach Google Drive: {}",
|
| 247 |
+
"direct_download_failed": "Direkter Download der Datei fehlgeschlagen",
|
| 248 |
+
"direct_download_error": "❌ Direkter Download-Fehler: {}",
|
| 249 |
+
"invalid_google_drive_url": "❌ Ungültige Google Drive-URL",
|
| 250 |
+
"continuing_without_google_drive": "Google Drive-Verbindung fehlgeschlagen, fahre ohne fort...",
|
| 251 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive nicht gemountet, überspringe Drive-Kopie...",
|
| 252 |
"download_success": "🎉 Erfolgreich heruntergeladen!",
|
| 253 |
"download_failed": "❌ Download fehlgeschlagen",
|
| 254 |
"no_output_files_found": "❌ Keine Ausgabedateien gefunden.",
|
|
|
|
| 258 |
"processing_audio": "Audio wird verarbeitet",
|
| 259 |
"separating_audio": "Audio wird getrennt... ({:.1f}%)",
|
| 260 |
"separation_complete": "Trennung abgeschlossen!",
|
| 261 |
+
"progress_parsing_error": "Fortschritts-Parsing-Fehler: {}",
|
| 262 |
"error_occurred": "Ein Fehler ist aufgetreten: {}",
|
| 263 |
"separation_process_completed": "Trennungsprozess abgeschlossen!",
|
| 264 |
+
"no_audio_file_error": "❌ Keine Audiodatei bereitgestellt und keine vorhandene Datei im Eingabeverzeichnis.",
|
| 265 |
"no_input_progress_label": "Fehler: Keine Eingabe bereitgestellt -- 0.0%",
|
| 266 |
"processing_audio_print": "Verarbeite Audio von: {} mit Modell: {}",
|
| 267 |
"starting_audio_separation_progress_label": "Audio-Trennung wird gestartet... -- 0.0%",
|
| 268 |
"separating_audio_progress_label": "Audio wird getrennt... -- {}.0%",
|
| 269 |
+
"audio_processing_completed_progress_label": "Audio-Verarbeitung abgeschlossen! -- 100.0%",
|
| 270 |
+
"audio_processing_completed": "✅ Audio-Verarbeitung abgeschlossen!",
|
| 271 |
"error_occurred_progress_label": "Fehler aufgetreten -- 0.0%",
|
| 272 |
"minimum_files_required": "⚠️ Mindestens 2 Dateien erforderlich",
|
| 273 |
+
"valid_files_not_found": "❌ Keine gültigen Dateien gefunden",
|
| 274 |
"starting_ensemble_process": "Ensemble-Prozess wird gestartet...",
|
| 275 |
"ensembling_progress": "Ensemble wird erstellt... ({:.1f}%)",
|
| 276 |
"finalizing_ensemble_output": "Ensemble-Ausgabe wird finalisiert...",
|
|
|
|
| 282 |
"no_input_audio_provided": "❌ Kein Eingabe-Audio bereitgestellt",
|
| 283 |
"loading_model": "Lade Modell {}/{}: {}...",
|
| 284 |
"loading_model_progress_label": "Lade Modell {}/{}: {}... -- {}.0%",
|
| 285 |
+
"completed_model": "Modell abgeschlossen {}/{}: {}",
|
| 286 |
+
"completed_model_progress_label": "Modell abgeschlossen {}/{}: {} -- {}.0%",
|
| 287 |
"running_command": "Führe Befehl aus: {}",
|
| 288 |
"model_failed": "Modell {} fehlgeschlagen: {}",
|
| 289 |
"critical_error_with_model": "Kritischer Fehler mit {}: {}",
|
| 290 |
"model_output_failed": "{} konnte keine Ausgabe erzeugen",
|
| 291 |
+
"waiting_for_files": "Warten auf Bereitstellung aller Dateien...",
|
| 292 |
+
"waiting_for_files_progress_label": "Warten auf Bereitstellung aller Dateien... -- 90.0%",
|
| 293 |
"performing_ensemble": "Ensemble wird durchgeführt...",
|
| 294 |
"performing_ensemble_progress_label": "Ensemble wird durchgeführt... -- 92.0%",
|
| 295 |
+
"memory_usage_before_ensemble": "Speichernutzung vor Ensemble: {}%",
|
| 296 |
+
"memory_usage_after_ensemble": "Speichernutzung nach Ensemble: {}%",
|
| 297 |
"finalizing_ensemble_output_progress_label": "Ensemble-Ausgabe wird finalisiert... -- 98.0%",
|
| 298 |
"ensemble_file_creation_failed": "Ensemble-Datei konnte nicht erstellt werden: {}",
|
| 299 |
"ensemble_completed_progress_label": "Ensemble erfolgreich abgeschlossen! -- 100.0%",
|
| 300 |
"success_output_created": "✅ Erfolg! Ausgabedatei erstellt.",
|
| 301 |
+
"phase_fixer_tab": "🔧 Phasenkorrektor",
|
| 302 |
+
"batch_processing_tab": "📦 Stapelverarbeitung",
|
| 303 |
+
"batch_description": "Mehrere Dateien gleichzeitig mit demselben Modell verarbeiten",
|
| 304 |
+
"batch_input_folder": "Eingabeordner",
|
| 305 |
+
"batch_input_folder_placeholder": "Pfad zum Ordner mit Audiodateien eingeben",
|
| 306 |
+
"batch_output_folder": "Ausgabeordner",
|
| 307 |
+
"batch_output_folder_placeholder": "Pfad zum Ausgabeordner eingeben",
|
| 308 |
+
"batch_file_list": "Zu verarbeitende Dateien",
|
| 309 |
+
"batch_add_files": "Dateien hinzufügen",
|
| 310 |
+
"batch_clear_list": "Liste leeren",
|
| 311 |
+
"batch_start": "🚀 Stapelverarbeitung starten",
|
| 312 |
+
"batch_stop": "⏹️ Stopp",
|
| 313 |
+
"batch_progress": "Stapelverarbeitungsfortschritt",
|
| 314 |
+
"batch_current_file": "Aktuell in Bearbeitung",
|
| 315 |
+
"batch_completed": "✅ Stapelverarbeitung abgeschlossen!",
|
| 316 |
+
"batch_stopped": "⏹️ Stapelverarbeitung gestoppt",
|
| 317 |
+
"batch_no_files": "❌ Keine Dateien für die Verarbeitung ausgewählt",
|
| 318 |
+
"source_model": "Quellmodell (Gesang)",
|
| 319 |
+
"source_model_info": "Modell zur Extraktion von Gesang/Phasendaten",
|
| 320 |
+
"target_model": "Zielmodell (Instrumental)",
|
| 321 |
+
"target_model_info": "Modell zur Anwendung der Phasenkorrektur",
|
| 322 |
+
"custom_models_tab": "⚡ Benutzerdefinierte Modelle",
|
| 323 |
+
"Custom Models": "Benutzerdefinierte Modelle",
|
| 324 |
+
"add_custom_model": "➕ Benutzerdefiniertes Modell hinzufügen",
|
| 325 |
+
"custom_model_name": "Modellname",
|
| 326 |
+
"custom_model_name_placeholder": "Geben Sie einen Namen für Ihr Modell ein",
|
| 327 |
+
"checkpoint_url": "Checkpoint-URL (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "Konfigurations-URL (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "Benutzerdefinierte Python-Datei URL (optional)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "Modelltyp automatisch erkennen",
|
| 334 |
+
"model_type": "Modelltyp",
|
| 335 |
+
"add_model_btn": "➕ Modell hinzufügen",
|
| 336 |
+
"custom_models_list": "📋 Liste benutzerdefinierter Modelle",
|
| 337 |
+
"delete_model": "🗑️ Löschen",
|
| 338 |
+
"no_custom_models": "Noch keine benutzerdefinierten Modelle hinzugefügt",
|
| 339 |
+
"model_added_success": "✅ Modell erfolgreich hinzugefügt!",
|
| 340 |
+
"model_deleted_success": "✅ Modell erfolgreich gelöscht!",
|
| 341 |
+
"model_add_error": "❌ Fehler beim Hinzufügen des Modells: {}",
|
| 342 |
+
"model_delete_error": "❌ Fehler beim Löschen des Modells: {}",
|
| 343 |
+
"refresh_models": "🔄 Aktualisieren",
|
| 344 |
+
"custom_model_info": "Fügen Sie benutzerdefinierte Modelle durch Angabe von Download-Links hinzu. Modelle werden bei Verwendung automatisch heruntergeladen.",
|
| 345 |
+
"select_model_to_delete": "Wählen Sie ein Modell zum Löschen aus",
|
| 346 |
+
"downloading_model_file": "Modell wird heruntergeladen: {}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/i18n/languages/en_us.json
CHANGED
|
@@ -81,6 +81,27 @@
|
|
| 81 |
"upload_cookies_txt": "Upload Cookies.txt",
|
| 82 |
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Why Needed?**<br>- Access age-restricted content<br>- Download private/unlisted videos<br>- Bypass regional restrictions<br>- Avoid YouTube download limits<br><br>**⚠️ Important Notes**<br>- NEVER share your cookie files!<br>- Refresh cookies when:<br> • Getting \"403 Forbidden\" errors<br> • Downloads suddenly stop<br> • Seeing \"Session expired\" messages<br><br>**🔄 Renewal Steps**<br>1. Install this <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome extension</a><br>2. Login to YouTube in Chrome<br>3. Click extension icon → \"Export\"<br>4. Upload the downloaded file here<br><br>**⏳ Cookie Lifespan**<br>- Normal sessions: 24 hours<br>- Sensitive operations: 1 hour<br>- Password changes: Immediate invalidation</div>",
|
| 83 |
"manual_ensemble_tab": "🎚️ Manual Ensemble",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"input_sources": "📂 Input Sources",
|
| 85 |
"refresh": "🔄 Refresh",
|
| 86 |
"ensemble_algorithm": "Ensemble Algorithm",
|
|
@@ -165,6 +186,10 @@
|
|
| 165 |
"file_copied_to_drive": "File copied to Google Drive: {}",
|
| 166 |
"copy_to_drive_error": "❌ Error copying to Google Drive: {}",
|
| 167 |
"direct_download_failed": "Failed to download the file directly",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
"download_success": "🎉 Downloaded successfully!",
|
| 169 |
"download_failed": "❌ Download failed",
|
| 170 |
"no_output_files_found": "❌ No output files found.",
|
|
@@ -275,5 +300,79 @@
|
|
| 275 |
"matchering_completed": "✅ Matchering processing completed!",
|
| 276 |
"matchering_error": "❌ Error during Matchering: {}",
|
| 277 |
"invalid_method": "Error: Invalid method! Use 'gradio', 'localtunnel', or 'ngrok'.",
|
| 278 |
-
"language_changed_message": "Language changed. please restart the cell."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
}
|
|
|
|
| 81 |
"upload_cookies_txt": "Upload Cookies.txt",
|
| 82 |
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Why Needed?**<br>- Access age-restricted content<br>- Download private/unlisted videos<br>- Bypass regional restrictions<br>- Avoid YouTube download limits<br><br>**⚠️ Important Notes**<br>- NEVER share your cookie files!<br>- Refresh cookies when:<br> • Getting \"403 Forbidden\" errors<br> • Downloads suddenly stop<br> • Seeing \"Session expired\" messages<br><br>**🔄 Renewal Steps**<br>1. Install this <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome extension</a><br>2. Login to YouTube in Chrome<br>3. Click extension icon → \"Export\"<br>4. Upload the downloaded file here<br><br>**⏳ Cookie Lifespan**<br>- Normal sessions: 24 hours<br>- Sensitive operations: 1 hour<br>- Password changes: Immediate invalidation</div>",
|
| 83 |
"manual_ensemble_tab": "🎚️ Manual Ensemble",
|
| 84 |
+
"phase_fixer_tab": "🔧 Phase Fixer",
|
| 85 |
+
"batch_processing_tab": "📦 Batch Processing",
|
| 86 |
+
"batch_description": "Process multiple files at once with the same model",
|
| 87 |
+
"batch_input_folder": "Input Folder",
|
| 88 |
+
"batch_input_folder_placeholder": "Enter folder path containing audio files",
|
| 89 |
+
"batch_output_folder": "Output Folder",
|
| 90 |
+
"batch_output_folder_placeholder": "Enter folder path for output files",
|
| 91 |
+
"batch_file_list": "Files to Process",
|
| 92 |
+
"batch_add_files": "Add Files",
|
| 93 |
+
"batch_clear_list": "Clear List",
|
| 94 |
+
"batch_start": "🚀 Start Batch Processing",
|
| 95 |
+
"batch_stop": "⏹️ Stop",
|
| 96 |
+
"batch_progress": "Batch Progress",
|
| 97 |
+
"batch_current_file": "Currently Processing",
|
| 98 |
+
"batch_completed": "✅ Batch processing completed!",
|
| 99 |
+
"batch_stopped": "⏹️ Batch processing stopped",
|
| 100 |
+
"batch_no_files": "❌ No files selected for processing",
|
| 101 |
+
"source_model": "Source Model (Vocals)",
|
| 102 |
+
"source_model_info": "Model for extracting vocals/phase data",
|
| 103 |
+
"target_model": "Target Model (Instrumental)",
|
| 104 |
+
"target_model_info": "Model to apply phase correction",
|
| 105 |
"input_sources": "📂 Input Sources",
|
| 106 |
"refresh": "🔄 Refresh",
|
| 107 |
"ensemble_algorithm": "Ensemble Algorithm",
|
|
|
|
| 186 |
"file_copied_to_drive": "File copied to Google Drive: {}",
|
| 187 |
"copy_to_drive_error": "❌ Error copying to Google Drive: {}",
|
| 188 |
"direct_download_failed": "Failed to download the file directly",
|
| 189 |
+
"direct_download_error": "❌ Direct download error: {}",
|
| 190 |
+
"invalid_google_drive_url": "❌ Invalid Google Drive URL",
|
| 191 |
+
"continuing_without_google_drive": "Google Drive connection failed, continuing without it...",
|
| 192 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive not mounted, skipping Drive copy...",
|
| 193 |
"download_success": "🎉 Downloaded successfully!",
|
| 194 |
"download_failed": "❌ Download failed",
|
| 195 |
"no_output_files_found": "❌ No output files found.",
|
|
|
|
| 300 |
"matchering_completed": "✅ Matchering processing completed!",
|
| 301 |
"matchering_error": "❌ Error during Matchering: {}",
|
| 302 |
"invalid_method": "Error: Invalid method! Use 'gradio', 'localtunnel', or 'ngrok'.",
|
| 303 |
+
"language_changed_message": "Language changed. please restart the cell.",
|
| 304 |
+
"backend_settings": "🚀 Backend Settings",
|
| 305 |
+
"inference_backend": "Inference Backend",
|
| 306 |
+
"optimization_mode": "Optimization Mode",
|
| 307 |
+
"optimization_mode_info": "PyTorch optimization mode selection",
|
| 308 |
+
"mixed_precision_amp": "Mixed Precision (AMP)",
|
| 309 |
+
"mixed_precision_info": "2x faster inference - recommended",
|
| 310 |
+
"tf32_acceleration": "TF32 Acceleration",
|
| 311 |
+
"tf32_acceleration_info": "Extra speed boost for RTX 30xx+",
|
| 312 |
+
"cudnn_benchmark": "cuDNN Benchmark",
|
| 313 |
+
"cudnn_benchmark_info": "Slower at first run, much faster on subsequent runs",
|
| 314 |
+
"ultra_optimized_pytorch": "ULTRA-OPTIMIZED PyTorch Backend",
|
| 315 |
+
"default_active_max_speed": "Active by default - Maximum speed optimization",
|
| 316 |
+
"channels_last_mode": "Fastest for RTX GPUs",
|
| 317 |
+
"compile_mode": "Extra speed for PyTorch 2.0+",
|
| 318 |
+
"default_mode": "Standard",
|
| 319 |
+
"tta_info": "Increases quality but slows down processing",
|
| 320 |
+
"instrumental_info": "Usually outputs 2 stems, but sometimes only one - you need to try! :)",
|
| 321 |
+
"custom_models_tab": "⚡ Custom Models",
|
| 322 |
+
"Custom Models": "Custom Models",
|
| 323 |
+
"add_custom_model": "➕ Add Custom Model",
|
| 324 |
+
"custom_model_name": "Model Name",
|
| 325 |
+
"custom_model_name_placeholder": "Enter a name for your model",
|
| 326 |
+
"checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
|
| 327 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 328 |
+
"config_url": "Config URL (.yaml)",
|
| 329 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 330 |
+
"custom_py_url": "Custom Python File URL (optional)",
|
| 331 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 332 |
+
"auto_detect_type": "Auto-detect model type",
|
| 333 |
+
"model_type": "Model Type",
|
| 334 |
+
"add_model_btn": "➕ Add Model",
|
| 335 |
+
"custom_models_list": "📋 Custom Models List",
|
| 336 |
+
"delete_model": "🗑️ Delete",
|
| 337 |
+
"no_custom_models": "No custom models added yet",
|
| 338 |
+
"model_added_success": "✅ Model added successfully!",
|
| 339 |
+
"model_deleted_success": "✅ Model deleted successfully!",
|
| 340 |
+
"model_add_error": "❌ Error adding model: {}",
|
| 341 |
+
"model_delete_error": "❌ Error deleting model: {}",
|
| 342 |
+
"refresh_models": "🔄 Refresh",
|
| 343 |
+
"custom_model_info": "Add custom models by providing download links. Models will be downloaded automatically when used.",
|
| 344 |
+
"select_model_to_delete": "Select a model to delete",
|
| 345 |
+
"downloading_model_file": "Downloading model: {}",
|
| 346 |
+
"downloading_file_progress": "Downloading: {} - {}%",
|
| 347 |
+
"chunk_size_mode": "Chunk Size Mode",
|
| 348 |
+
"chunk_size_mode_info": "base: standard presets | custom: enter your own value | yaml: read from model YAML after download",
|
| 349 |
+
"chunk_size_custom_label": "Custom Chunk Size",
|
| 350 |
+
"chunk_size_custom_info": "Enter a custom chunk size value",
|
| 351 |
+
"chunk_size_yaml_label": "YAML Chunk Size",
|
| 352 |
+
"chunk_size_yaml_display_info": "Will be read from the model's YAML config after download",
|
| 353 |
+
"chunk_size_yaml_not_downloaded": "⏳ Model YAML not downloaded yet — value will be read during processing",
|
| 354 |
+
"chunk_size_yaml_detected": "✅ From YAML: {}",
|
| 355 |
+
"remove_favorite": "Remove from Favorites",
|
| 356 |
+
"normal_method": "Normal method",
|
| 357 |
+
"mid_side_method": "Mid-side method",
|
| 358 |
+
"apollo_processing_method": "Apollo Processing Method",
|
| 359 |
+
"apollo_normal_model": "Apollo Normal Model",
|
| 360 |
+
"apollo_mid_side_model": "Apollo Mid-Side Model",
|
| 361 |
+
"scale_factor": "Scale Factor",
|
| 362 |
+
"scale_factor_info": "Phase scaling factor (recommended: 1.4)",
|
| 363 |
+
"source_file_label": "Source File (Vocals)",
|
| 364 |
+
"target_file_label": "Target File (Instrumental)",
|
| 365 |
+
"run_phase_fixer": "🔧 Run Phase Fixer",
|
| 366 |
+
"phase_fixed_output": "Phase-Fixed Output",
|
| 367 |
+
"phase_fixer_settings": "Phase Fixer Settings",
|
| 368 |
+
"low_cutoff": "Low Cutoff (Hz)",
|
| 369 |
+
"high_cutoff": "High Cutoff (Hz)",
|
| 370 |
+
"low_cutoff_info": "Low frequency cutoff for phase correction",
|
| 371 |
+
"high_cutoff_info": "High frequency cutoff for phase correction",
|
| 372 |
+
"no_preset_name_provided": "⚠️ No preset name provided",
|
| 373 |
+
"no_models_selected_for_preset": "⚠️ No models selected for preset",
|
| 374 |
+
"preset_saved": "✅ Preset '{}' saved!",
|
| 375 |
+
"please_upload_both_files": "⚠️ Please upload both source and target files",
|
| 376 |
+
"processing_log_placeholder": "Processing log will appear here...",
|
| 377 |
+
"enter_preset_name": "Enter preset name..."
|
| 378 |
}
|
assets/i18n/languages/es_es.json
CHANGED
|
@@ -17,11 +17,10 @@
|
|
| 17 |
"overlap": "Solapamiento",
|
| 18 |
"overlap_info": "Recomendado: 2-10 (valores más altos mejoran la calidad pero requieren más VRAM)",
|
| 19 |
"tta_boost": "Impulso TTA",
|
| 20 |
-
"tta_info": "Mejora la calidad pero ralentiza la velocidad de procesamiento",
|
| 21 |
"phase_fix": "Corrección de fase",
|
|
|
|
| 22 |
"phase_fix_info": "Corrección avanzada de fase para pistas instrumentales",
|
| 23 |
"instrumental": "Instrumental",
|
| 24 |
-
"instrumental_info": "Normalmente da 2 salidas, pero a veces da una sola salida, ¡tienes que probar! :)",
|
| 25 |
"process": "🚀 Procesar",
|
| 26 |
"reset": "🧹 Restablecer",
|
| 27 |
"status": "Estado",
|
|
@@ -80,7 +79,87 @@
|
|
| 80 |
"downloaded_file": "Archivo descargado",
|
| 81 |
"cookie_management": "🍪 Gestión de cookies",
|
| 82 |
"upload_cookies_txt": "Subir Cookies.txt",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 ¿Por qué es necesario?**<br>- Acceso a contenido
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ Ensamblaje manual",
|
| 85 |
"input_sources": "📂 Fuentes de entrada",
|
| 86 |
"refresh": "🔄 Actualizar",
|
|
@@ -93,19 +172,20 @@
|
|
| 93 |
"result_preview_tab": "🎧 Vista previa del resultado",
|
| 94 |
"ensembled_output": "Salida ensamblada",
|
| 95 |
"processing_log_tab": "📋 Registro de procesamiento",
|
| 96 |
-
"processing_details": "Detalles
|
| 97 |
"process_ensemble": "⚡ Procesar ensamblaje",
|
| 98 |
"languages_tab": "🌐 Idiomas",
|
| 99 |
"Vocal Models": "Modelos vocales",
|
| 100 |
"Instrumental Models": "Modelos instrumentales",
|
| 101 |
-
"4-Stem Models": "Modelos de 4
|
| 102 |
"Denoise Models": "Modelos de eliminación de ruido",
|
| 103 |
"Dereverb Models": "Modelos de eliminación de reverberación",
|
| 104 |
"Other Models": "Otros modelos",
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
-
"
|
|
|
|
| 109 |
"ensemble_output_help": "Ruta al archivo WAV donde se almacenará el resultado del ensamblaje",
|
| 110 |
"ensemble_type_print": "Tipo de ensamblaje: {}",
|
| 111 |
"num_input_files_print": "Número de archivos de entrada: {}",
|
|
@@ -116,7 +196,7 @@
|
|
| 116 |
"reading_chunk_print": "Leyendo fragmento del archivo: {} (inicio: {}s, duración: {}s)",
|
| 117 |
"chunk_result_shape_print": "Forma del resultado del fragmento: {}",
|
| 118 |
"ensemble_completed_print": "Ensamblaje completado. Salida guardada en: {}",
|
| 119 |
-
"file_deletion_error": "{} no pudo
|
| 120 |
"directory_not_exist_warning": "⚠️ El directorio no existe: {}",
|
| 121 |
"not_a_directory_warning": "⚠️ La ruta no es un directorio: {}",
|
| 122 |
"item_deletion_error": "⚠️ Error al eliminar {}: {}",
|
|
@@ -125,67 +205,75 @@
|
|
| 125 |
"error": "🔥 Error: {}",
|
| 126 |
"ffmpeg_error": "Error de FFmpeg ({}): {}",
|
| 127 |
"file_saved_successfully": "Archivo guardado con éxito: {}",
|
| 128 |
-
"total_files_found": "Total de archivos encontrados: {}.
|
| 129 |
"total_progress": "Progreso total",
|
| 130 |
-
"detailed_pbar_enabled": "Barra de progreso detallada
|
| 131 |
"loaded_audio": "Audio cargado: {}, forma: {}",
|
| 132 |
"cannot_read_track": "No se puede leer la pista: {}",
|
| 133 |
"error_message": "Mensaje de error: {}",
|
| 134 |
-
"demudding_track": "
|
| 135 |
"elapsed_time": "Tiempo transcurrido: {:.2f} segundos.",
|
| 136 |
"proc_folder_description": "Procesa archivos de audio en una carpeta usando un modelo especificado.",
|
| 137 |
"model_type_help": "Tipo de modelo (bandit, bs_roformer, mdx23c, etc.)",
|
| 138 |
"config_path_help": "Ruta al archivo de configuración",
|
| 139 |
"demud_phaseremix_help": "Habilitar remezcla de fase para instrumental",
|
| 140 |
"start_checkpoint_help": "Punto de control inicial para pesos válidos",
|
| 141 |
-
"input_folder_help": "Carpeta con mezclas
|
| 142 |
-
"audio_path_help": "Ruta a un solo archivo de audio
|
| 143 |
-
"store_dir_help": "Ruta para almacenar
|
| 144 |
"device_ids_help": "Lista de IDs de GPU",
|
| 145 |
-
"extract_instrumental_help": "Invertir
|
| 146 |
-
"disable_detailed_pbar_help": "
|
| 147 |
"force_cpu_help": "Forzar el uso de CPU incluso si CUDA está disponible",
|
| 148 |
"flac_file_help": "Generar archivo FLAC en lugar de WAV",
|
| 149 |
"export_format_help": "Formato de exportación y tipo PCM",
|
| 150 |
"pcm_type_help": "Tipo PCM para archivos FLAC",
|
| 151 |
"use_tta_help": "Habilitar aumento en tiempo de prueba",
|
| 152 |
"lora_checkpoint_help": "Punto de control inicial para pesos LoRA",
|
| 153 |
-
"cuda_available": "CUDA está disponible,
|
| 154 |
"using_device": "Usando dispositivo: {}",
|
| 155 |
"instruments_print": "Instrumentos: {}",
|
| 156 |
"model_load_time": "Tiempo de carga del modelo: {:.2f} seg",
|
| 157 |
"invalid_url": "❌ URL inválida",
|
| 158 |
"cookie_file_updated": "✅ ¡Archivo de cookies actualizado!",
|
| 159 |
-
"cookie_installation_error": "⚠️ Error
|
| 160 |
-
"
|
|
|
|
| 161 |
"google_drive_error": "❌ Error de descarga de Google Drive: {}",
|
| 162 |
-
"wav_conversion_failed": "
|
| 163 |
"download_error": "❌ Error de descarga: {}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"download_success": "🎉 ¡Descargado con éxito!",
|
| 165 |
-
"download_failed": "❌
|
| 166 |
"no_output_files_found": "❌ No se encontraron archivos de salida.",
|
| 167 |
"output_refreshed_successfully": "✅ ¡Salida actualizada con éxito!",
|
| 168 |
-
"error_refreshing_output": "❌ Error al actualizar
|
| 169 |
"starting_audio_separation": "Iniciando separación de audio...",
|
| 170 |
"processing_audio": "Procesando audio",
|
| 171 |
"separating_audio": "Separando audio... ({:.1f}%)",
|
| 172 |
-
"separation_complete": "¡Separación
|
| 173 |
-
"progress_parsing_error": "Error
|
| 174 |
"error_occurred": "Ocurrió un error: {}",
|
| 175 |
"separation_process_completed": "¡Proceso de separación completado!",
|
| 176 |
"no_audio_file_error": "❌ No se proporcionó archivo de audio y no hay archivo existente en el directorio de entrada.",
|
| 177 |
"no_input_progress_label": "Error: No se proporcionó entrada -- 0.0%",
|
| 178 |
-
"processing_audio_print": "Procesando audio desde: {} usando
|
| 179 |
"starting_audio_separation_progress_label": "Iniciando separación de audio... -- 0.0%",
|
| 180 |
"separating_audio_progress_label": "Separando audio... -- {}.0%",
|
| 181 |
"audio_processing_completed_progress_label": "¡Procesamiento de audio completado! -- 100.0%",
|
| 182 |
"audio_processing_completed": "✅ ¡Procesamiento de audio completado!",
|
| 183 |
"error_occurred_progress_label": "Ocurrió un error -- 0.0%",
|
| 184 |
-
"minimum_files_required": "⚠️ Se requieren
|
| 185 |
"valid_files_not_found": "❌ No se encontraron archivos válidos",
|
| 186 |
"starting_ensemble_process": "Iniciando proceso de ensamblaje...",
|
| 187 |
"ensembling_progress": "Ensamblando... ({:.1f}%)",
|
| 188 |
-
"finalizing_ensemble_output": "Finalizando salida
|
| 189 |
"success_log": "✅ ¡Éxito!\n{}",
|
| 190 |
"error_log": "❌ ¡Error!\n{}",
|
| 191 |
"critical_error": "⛔ Error crítico: {}",
|
|
@@ -197,84 +285,95 @@
|
|
| 197 |
"completed_model": "Modelo completado {}/{}: {}",
|
| 198 |
"completed_model_progress_label": "Modelo completado {}/{}: {} -- {}.0%",
|
| 199 |
"running_command": "Ejecutando comando: {}",
|
| 200 |
-
"model_failed": "
|
| 201 |
"critical_error_with_model": "Error crítico con {}: {}",
|
| 202 |
-
"model_output_failed": "{}
|
| 203 |
-
"waiting_for_files": "Esperando
|
| 204 |
-
"waiting_for_files_progress_label": "Esperando
|
| 205 |
"performing_ensemble": "Realizando ensamblaje...",
|
| 206 |
"performing_ensemble_progress_label": "Realizando ensamblaje... -- 92.0%",
|
| 207 |
"memory_usage_before_ensemble": "Uso de memoria antes del ensamblaje: {}%",
|
| 208 |
"memory_usage_after_ensemble": "Uso de memoria después del ensamblaje: {}%",
|
| 209 |
-
"finalizing_ensemble_output_progress_label": "Finalizando salida
|
| 210 |
"ensemble_file_creation_failed": "No se pudo crear el archivo de ensamblaje: {}",
|
| 211 |
"ensemble_completed_progress_label": "¡Ensamblaje completado con éxito! -- 100.0%",
|
| 212 |
"success_output_created": "✅ ¡Éxito! Archivo de salida creado.",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
"normal_method": "Método normal",
|
| 259 |
-
"mid_side_method": "Método
|
| 260 |
-
"
|
| 261 |
-
"
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
| 274 |
-
"
|
| 275 |
-
"
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
}
|
|
|
|
| 17 |
"overlap": "Solapamiento",
|
| 18 |
"overlap_info": "Recomendado: 2-10 (valores más altos mejoran la calidad pero requieren más VRAM)",
|
| 19 |
"tta_boost": "Impulso TTA",
|
|
|
|
| 20 |
"phase_fix": "Corrección de fase",
|
| 21 |
+
"use_phase_correction": "Aplicar Corrección de Fase Avanzada",
|
| 22 |
"phase_fix_info": "Corrección avanzada de fase para pistas instrumentales",
|
| 23 |
"instrumental": "Instrumental",
|
|
|
|
| 24 |
"process": "🚀 Procesar",
|
| 25 |
"reset": "🧹 Restablecer",
|
| 26 |
"status": "Estado",
|
|
|
|
| 79 |
"downloaded_file": "Archivo descargado",
|
| 80 |
"cookie_management": "🍪 Gestión de cookies",
|
| 81 |
"upload_cookies_txt": "Subir Cookies.txt",
|
| 82 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 ¿Por qué es necesario?**<br>- Acceso a contenido con restricción de edad<br>- Descarga de videos privados/no listados<br>- Evitar restricciones regionales<br>- Evitar límites de descarga de YouTube<br><br>**⚠️ Notas Importantes**<br>- ¡NUNCA compartas tus archivos de cookies!<br>- Actualiza cookies cuando:<br> • Obtengas errores \"403 Prohibido\"<br> • Las descargas se detengan de repente<br> • Veas mensajes \"Sesión expirada\"<br><br>**🔄 Pasos de Renovación**<br>1. Instala esta <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">extensión de Chrome</a><br>2. Inicia sesión en YouTube en Chrome<br>3. Haz clic en el icono de la extensión → \"Exportar\"<br>4. Sube el archivo descargado aquí<br><br>**⏳ Vida Útil de Cookies**<br>- Sesiones normales: 24 horas<br>- Operaciones sensibles: 1 hora<br>- Cambio de contraseña: Actualización inmediata</div>",
|
| 83 |
+
"ensemble_output_copied": "✅ Salida del ensamblaje copiada a {}",
|
| 84 |
+
"error_copying_ensemble_output": "❌ Error al copiar salida del ensamblaje: {}",
|
| 85 |
+
"drive_mounted_copying_ensemble": "Google Drive montado. Copiando salida del ensamblaje...",
|
| 86 |
+
"drive_already_mounted_copying_ensemble": "Google Drive ya montado. Copiando salida del ensamblaje...",
|
| 87 |
+
"no_ensemble_output_files_found": "❌ No se encontraron archivos de salida del ensamblaje.",
|
| 88 |
+
"drive_mounted_copying_files": "Google Drive montado. Copiando archivos...",
|
| 89 |
+
"drive_already_mounted_copying_files": "Google Drive ya montado. Copiando archivos...",
|
| 90 |
+
"files_copied_to_drive": "✅ Archivos copiados a {}",
|
| 91 |
+
"error_copying_files": "❌ Error al copiar archivos: {}",
|
| 92 |
+
"mounting_drive": "Montando Google Drive...",
|
| 93 |
+
"settings_saved": "Configuración guardada en config.json: Idioma={lang}, Método={method}, Puerto={port}",
|
| 94 |
+
"selected_language": "Idioma seleccionado: {lang}",
|
| 95 |
+
"available_languages": "Idiomas disponibles: {langs}",
|
| 96 |
+
"select_language_prompt": "Selecciona un idioma (predeterminado: {default}): ",
|
| 97 |
+
"invalid_language": "¡Selección de idioma inválida! Usando idioma predeterminado ({default}).",
|
| 98 |
+
"available_sharing_methods": "Métodos de compartición disponibles: gradio, localtunnel, ngrok",
|
| 99 |
+
"select_sharing_method_prompt": "Elige un método de compartición (predeterminado: {default}): ",
|
| 100 |
+
"enter_ngrok_token_prompt": "Ingresa tu token de Ngrok (predeterminado: {default}): ",
|
| 101 |
+
"ngrok_token_required": "Error: ¡Se requiere un token de Ngrok para el método ngrok!",
|
| 102 |
+
"enter_port_prompt": "Ingresa el número de puerto (predeterminado: {default}): ",
|
| 103 |
+
"opening_previous_url": "Abriendo URL anterior: {url}",
|
| 104 |
+
"open_url_manually": "Abre la URL manualmente en Colab: {url}",
|
| 105 |
+
"invalid_previous_url": "URL anterior inválida: {url}, no abierta.",
|
| 106 |
+
"starting_method": "Iniciando {method} en el puerto {port}...",
|
| 107 |
+
"process_stopped": "\n🛑 Proceso detenido por el usuario",
|
| 108 |
+
"selected_port": "Puerto seleccionado: {port}",
|
| 109 |
+
"starting_gradio_with_sharing": "Iniciando Gradio con compartición integrada...",
|
| 110 |
+
"starting_localtunnel": "Iniciando Localtunnel en el puerto {port}...",
|
| 111 |
+
"share_link": "Enlace para compartir: {url}",
|
| 112 |
+
"password_ip": "Contraseña IP: {ip}",
|
| 113 |
+
"starting_ngrok": "Iniciando Ngrok en el puerto {port}...",
|
| 114 |
+
"ngrok_url": "URL de Ngrok: {url}",
|
| 115 |
+
"ngrok_error": "Error al iniciar Ngrok: {error}",
|
| 116 |
+
"auto_apollo_chunk_size": "Tamaño de fragmento Apollo",
|
| 117 |
+
"auto_apollo_chunk_size_info": "Tamaño de fragmento para mejora Apollo (recomendado: 19 para modelo universal)",
|
| 118 |
+
"auto_apollo_overlap": "Solapamiento Apollo",
|
| 119 |
+
"auto_enhancing_with_apollo": "Mejorando con Apollo ({}/{} archivos)...",
|
| 120 |
+
"auto_apollo_processing_completed": "¡Procesamiento Apollo completado!",
|
| 121 |
+
"auto_apollo_overlap_info": "Solapamiento para mejora Apollo (recomendado: 2)",
|
| 122 |
+
"apollo_chunk_size": "Tamaño de fragmento Apollo",
|
| 123 |
+
"apollo_chunk_size_info": "Tamaño de fragmento para mejora Apollo (recomendado: 19 para modelo universal)",
|
| 124 |
+
"apollo_overlap": "Solapamiento Apollo",
|
| 125 |
+
"enhance_with_apollo": "Mejorar con Apollo",
|
| 126 |
+
"apollo_enhancement_info": "Mejora de la calidad del sonido",
|
| 127 |
+
"enhancing_with_apollo": "Mejorando con Apollo ({}/{} archivos)...",
|
| 128 |
+
"apollo_processing_completed": "¡Procesamiento Apollo completado!",
|
| 129 |
+
"apollo_overlap_info": "Solapamiento para mejora Apollo (recomendado: 2)",
|
| 130 |
+
"selected_models": "Modelos seleccionados",
|
| 131 |
+
"save_preset": "Guardar preajuste",
|
| 132 |
+
"delete_preset": "Eliminar preajuste",
|
| 133 |
+
"refresh_presets": "Actualizar preajustes",
|
| 134 |
+
"preset_name": "Nombre del preajuste",
|
| 135 |
+
"select_preset": "Seleccionar preajuste",
|
| 136 |
+
"add_favorite": "Añadir a favoritos",
|
| 137 |
+
"apply_matchering": "Aplicar Matchering",
|
| 138 |
+
"matchering_info": "Aplicar Matchering para mejorar el masterizado y el balance del audio",
|
| 139 |
+
"matchering_passes": "Pasadas de Matchering",
|
| 140 |
+
"matchering_passes_info": "Número de iteraciones de Matchering (1-5, valores más altos mejoran la calidad pero toman más tiempo)",
|
| 141 |
+
"matchering_processing": "Aplicando Matchering... ({:.1f}%)",
|
| 142 |
+
"matchering_completed": "✅ ¡Procesamiento de Matchering completado!",
|
| 143 |
+
"matchering_error": "❌ Error durante Matchering: {}",
|
| 144 |
+
"invalid_method": "Error: ¡Método inválido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
|
| 145 |
+
"language_changed_message": "Idioma cambiado. Por favor, reinicia la celda.",
|
| 146 |
+
"backend_settings": "🚀 Configuración de backend",
|
| 147 |
+
"inference_backend": "Backend de inferencia",
|
| 148 |
+
"optimization_mode": "Modo de optimización",
|
| 149 |
+
"optimization_mode_info": "Selección de modo de optimización PyTorch",
|
| 150 |
+
"mixed_precision_amp": "Precisión mixta (AMP)",
|
| 151 |
+
"mixed_precision_info": "Inferencia 2x más rápida - recomendado",
|
| 152 |
+
"tf32_acceleration": "Aceleración TF32",
|
| 153 |
+
"tf32_acceleration_info": "Aumento de velocidad extra para RTX 30xx+",
|
| 154 |
+
"cudnn_benchmark": "Benchmark cuDNN",
|
| 155 |
+
"cudnn_benchmark_info": "Más lento en la primera ejecución, mucho más rápido en ejecuciones posteriores",
|
| 156 |
+
"ultra_optimized_pytorch": "Backend PyTorch ULTRA-OPTIMIZADO",
|
| 157 |
+
"default_active_max_speed": "Activo por defecto - Optimización de velocidad máxima",
|
| 158 |
+
"channels_last_mode": "El más rápido para GPUs RTX",
|
| 159 |
+
"compile_mode": "Velocidad extra para PyTorch 2.0+",
|
| 160 |
+
"default_mode": "Estándar",
|
| 161 |
+
"tta_info": "Aumenta la calidad pero ralentiza el procesamiento",
|
| 162 |
+
"instrumental_info": "Normalmente produce 2 tallos, pero a veces solo uno - ¡tienes que probar! :)",
|
| 163 |
"manual_ensemble_tab": "🎚️ Ensamblaje manual",
|
| 164 |
"input_sources": "📂 Fuentes de entrada",
|
| 165 |
"refresh": "🔄 Actualizar",
|
|
|
|
| 172 |
"result_preview_tab": "🎧 Vista previa del resultado",
|
| 173 |
"ensembled_output": "Salida ensamblada",
|
| 174 |
"processing_log_tab": "📋 Registro de procesamiento",
|
| 175 |
+
"processing_details": "Detalles del procesamiento",
|
| 176 |
"process_ensemble": "⚡ Procesar ensamblaje",
|
| 177 |
"languages_tab": "🌐 Idiomas",
|
| 178 |
"Vocal Models": "Modelos vocales",
|
| 179 |
"Instrumental Models": "Modelos instrumentales",
|
| 180 |
+
"4-Stem Models": "Modelos de 4 tallos",
|
| 181 |
"Denoise Models": "Modelos de eliminación de ruido",
|
| 182 |
"Dereverb Models": "Modelos de eliminación de reverberación",
|
| 183 |
"Other Models": "Otros modelos",
|
| 184 |
+
"Other Shapes": "Otros modelos",
|
| 185 |
+
"ensemble_files_description": "Combina múltiples archivos de audio en una sola salida usando métodos de ensamblaje especificados.",
|
| 186 |
+
"ensemble_files_help": "Ruta a todos los archivos de audio para ensamblaje",
|
| 187 |
+
"ensemble_type_help": "Uno de: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 188 |
+
"ensemble_weights_help": "Pesos para crear el ensamblaje. El número de pesos debe ser igual al número de archivos",
|
| 189 |
"ensemble_output_help": "Ruta al archivo WAV donde se almacenará el resultado del ensamblaje",
|
| 190 |
"ensemble_type_print": "Tipo de ensamblaje: {}",
|
| 191 |
"num_input_files_print": "Número de archivos de entrada: {}",
|
|
|
|
| 196 |
"reading_chunk_print": "Leyendo fragmento del archivo: {} (inicio: {}s, duración: {}s)",
|
| 197 |
"chunk_result_shape_print": "Forma del resultado del fragmento: {}",
|
| 198 |
"ensemble_completed_print": "Ensamblaje completado. Salida guardada en: {}",
|
| 199 |
+
"file_deletion_error": "{} no se pudo eliminar: {}",
|
| 200 |
"directory_not_exist_warning": "⚠️ El directorio no existe: {}",
|
| 201 |
"not_a_directory_warning": "⚠️ La ruta no es un directorio: {}",
|
| 202 |
"item_deletion_error": "⚠️ Error al eliminar {}: {}",
|
|
|
|
| 205 |
"error": "🔥 Error: {}",
|
| 206 |
"ffmpeg_error": "Error de FFmpeg ({}): {}",
|
| 207 |
"file_saved_successfully": "Archivo guardado con éxito: {}",
|
| 208 |
+
"total_files_found": "Total de archivos encontrados: {}. Tasa de muestreo utilizada: {}",
|
| 209 |
"total_progress": "Progreso total",
|
| 210 |
+
"detailed_pbar_enabled": "Barra de progreso detallada habilitada: {}",
|
| 211 |
"loaded_audio": "Audio cargado: {}, forma: {}",
|
| 212 |
"cannot_read_track": "No se puede leer la pista: {}",
|
| 213 |
"error_message": "Mensaje de error: {}",
|
| 214 |
+
"demudding_track": "Limpiando pista (remezcla de fase - instrumental): {}",
|
| 215 |
"elapsed_time": "Tiempo transcurrido: {:.2f} segundos.",
|
| 216 |
"proc_folder_description": "Procesa archivos de audio en una carpeta usando un modelo especificado.",
|
| 217 |
"model_type_help": "Tipo de modelo (bandit, bs_roformer, mdx23c, etc.)",
|
| 218 |
"config_path_help": "Ruta al archivo de configuración",
|
| 219 |
"demud_phaseremix_help": "Habilitar remezcla de fase para instrumental",
|
| 220 |
"start_checkpoint_help": "Punto de control inicial para pesos válidos",
|
| 221 |
+
"input_folder_help": "Carpeta con mezclas a procesar",
|
| 222 |
+
"audio_path_help": "Ruta a un solo archivo de audio a procesar",
|
| 223 |
+
"store_dir_help": "Ruta para almacenar resultados",
|
| 224 |
"device_ids_help": "Lista de IDs de GPU",
|
| 225 |
+
"extract_instrumental_help": "Invertir voces para obtener instrumental si se proporciona",
|
| 226 |
+
"disable_detailed_pbar_help": "Desactivar barra de progreso detallada en demix",
|
| 227 |
"force_cpu_help": "Forzar el uso de CPU incluso si CUDA está disponible",
|
| 228 |
"flac_file_help": "Generar archivo FLAC en lugar de WAV",
|
| 229 |
"export_format_help": "Formato de exportación y tipo PCM",
|
| 230 |
"pcm_type_help": "Tipo PCM para archivos FLAC",
|
| 231 |
"use_tta_help": "Habilitar aumento en tiempo de prueba",
|
| 232 |
"lora_checkpoint_help": "Punto de control inicial para pesos LoRA",
|
| 233 |
+
"cuda_available": "CUDA está disponible, use --force_cpu para desactivarlo.",
|
| 234 |
"using_device": "Usando dispositivo: {}",
|
| 235 |
"instruments_print": "Instrumentos: {}",
|
| 236 |
"model_load_time": "Tiempo de carga del modelo: {:.2f} seg",
|
| 237 |
"invalid_url": "❌ URL inválida",
|
| 238 |
"cookie_file_updated": "✅ ¡Archivo de cookies actualizado!",
|
| 239 |
+
"cookie_installation_error": "⚠️ Error de instalación de cookies: {}",
|
| 240 |
+
"google_drive_mount_error": "❌ Error de conexión de Google Drive: {}",
|
| 241 |
+
"file_size_zero_error": "Tamaño de archivo cero o archivo no creado",
|
| 242 |
"google_drive_error": "❌ Error de descarga de Google Drive: {}",
|
| 243 |
+
"wav_conversion_failed": "Conversión WAV fallida",
|
| 244 |
"download_error": "❌ Error de descarga: {}",
|
| 245 |
+
"file_copied_to_drive": "Archivo copiado a Google Drive: {}",
|
| 246 |
+
"copy_to_drive_error": "❌ Error al copiar a Google Drive: {}",
|
| 247 |
+
"direct_download_failed": "Falló la descarga directa del archivo",
|
| 248 |
+
"direct_download_error": "❌ Error de descarga directa: {}",
|
| 249 |
+
"invalid_google_drive_url": "❌ URL de Google Drive no válida",
|
| 250 |
+
"continuing_without_google_drive": "Falló la conexión con Google Drive, continuando sin él...",
|
| 251 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive no montado, omitiendo copia a Drive...",
|
| 252 |
"download_success": "🎉 ¡Descargado con éxito!",
|
| 253 |
+
"download_failed": "❌ Descarga fallida",
|
| 254 |
"no_output_files_found": "❌ No se encontraron archivos de salida.",
|
| 255 |
"output_refreshed_successfully": "✅ ¡Salida actualizada con éxito!",
|
| 256 |
+
"error_refreshing_output": "❌ Error al actualizar salida: {}",
|
| 257 |
"starting_audio_separation": "Iniciando separación de audio...",
|
| 258 |
"processing_audio": "Procesando audio",
|
| 259 |
"separating_audio": "Separando audio... ({:.1f}%)",
|
| 260 |
+
"separation_complete": "¡Separación completa!",
|
| 261 |
+
"progress_parsing_error": "Error de análisis de progreso: {}",
|
| 262 |
"error_occurred": "Ocurrió un error: {}",
|
| 263 |
"separation_process_completed": "¡Proceso de separación completado!",
|
| 264 |
"no_audio_file_error": "❌ No se proporcionó archivo de audio y no hay archivo existente en el directorio de entrada.",
|
| 265 |
"no_input_progress_label": "Error: No se proporcionó entrada -- 0.0%",
|
| 266 |
+
"processing_audio_print": "Procesando audio desde: {} usando modelo: {}",
|
| 267 |
"starting_audio_separation_progress_label": "Iniciando separación de audio... -- 0.0%",
|
| 268 |
"separating_audio_progress_label": "Separando audio... -- {}.0%",
|
| 269 |
"audio_processing_completed_progress_label": "¡Procesamiento de audio completado! -- 100.0%",
|
| 270 |
"audio_processing_completed": "✅ ¡Procesamiento de audio completado!",
|
| 271 |
"error_occurred_progress_label": "Ocurrió un error -- 0.0%",
|
| 272 |
+
"minimum_files_required": "⚠️ Se requieren mínimo 2 archivos",
|
| 273 |
"valid_files_not_found": "❌ No se encontraron archivos válidos",
|
| 274 |
"starting_ensemble_process": "Iniciando proceso de ensamblaje...",
|
| 275 |
"ensembling_progress": "Ensamblando... ({:.1f}%)",
|
| 276 |
+
"finalizing_ensemble_output": "Finalizando salida de ensamblaje...",
|
| 277 |
"success_log": "✅ ¡Éxito!\n{}",
|
| 278 |
"error_log": "❌ ¡Error!\n{}",
|
| 279 |
"critical_error": "⛔ Error crítico: {}",
|
|
|
|
| 285 |
"completed_model": "Modelo completado {}/{}: {}",
|
| 286 |
"completed_model_progress_label": "Modelo completado {}/{}: {} -- {}.0%",
|
| 287 |
"running_command": "Ejecutando comando: {}",
|
| 288 |
+
"model_failed": "Modelo {} falló: {}",
|
| 289 |
"critical_error_with_model": "Error crítico con {}: {}",
|
| 290 |
+
"model_output_failed": "{} no pudo producir salida",
|
| 291 |
+
"waiting_for_files": "Esperando que todos los archivos estén listos...",
|
| 292 |
+
"waiting_for_files_progress_label": "Esperando que todos los archivos estén listos... -- 90.0%",
|
| 293 |
"performing_ensemble": "Realizando ensamblaje...",
|
| 294 |
"performing_ensemble_progress_label": "Realizando ensamblaje... -- 92.0%",
|
| 295 |
"memory_usage_before_ensemble": "Uso de memoria antes del ensamblaje: {}%",
|
| 296 |
"memory_usage_after_ensemble": "Uso de memoria después del ensamblaje: {}%",
|
| 297 |
+
"finalizing_ensemble_output_progress_label": "Finalizando salida de ensamblaje... -- 98.0%",
|
| 298 |
"ensemble_file_creation_failed": "No se pudo crear el archivo de ensamblaje: {}",
|
| 299 |
"ensemble_completed_progress_label": "¡Ensamblaje completado con éxito! -- 100.0%",
|
| 300 |
"success_output_created": "✅ ¡Éxito! Archivo de salida creado.",
|
| 301 |
+
"phase_fixer_tab": "🔧 Corrector de fase",
|
| 302 |
+
"batch_processing_tab": "📦 Procesamiento por lotes",
|
| 303 |
+
"batch_description": "Procesa múltiples archivos a la vez con el mismo modelo",
|
| 304 |
+
"batch_input_folder": "Carpeta de entrada",
|
| 305 |
+
"batch_input_folder_placeholder": "Ingresa la ruta de la carpeta con archivos de audio",
|
| 306 |
+
"batch_output_folder": "Carpeta de salida",
|
| 307 |
+
"batch_output_folder_placeholder": "Ingresa la ruta de la carpeta de salida",
|
| 308 |
+
"batch_file_list": "Archivos a procesar",
|
| 309 |
+
"batch_add_files": "Añadir archivos",
|
| 310 |
+
"batch_clear_list": "Limpiar lista",
|
| 311 |
+
"batch_start": "🚀 Iniciar procesamiento por lotes",
|
| 312 |
+
"batch_stop": "⏹️ Detener",
|
| 313 |
+
"batch_progress": "Progreso del lote",
|
| 314 |
+
"batch_current_file": "Procesando actualmente",
|
| 315 |
+
"batch_completed": "✅ ¡Procesamiento por lotes completado!",
|
| 316 |
+
"batch_stopped": "⏹️ Procesamiento por lotes detenido",
|
| 317 |
+
"batch_no_files": "❌ No se seleccionaron archivos para procesar",
|
| 318 |
+
"source_model": "Modelo fuente (Voces)",
|
| 319 |
+
"source_model_info": "Modelo para extraer voces/datos de fase",
|
| 320 |
+
"target_model": "Modelo objetivo (Instrumental)",
|
| 321 |
+
"target_model_info": "Modelo al que se aplica la corrección de fase",
|
| 322 |
+
"custom_models_tab": "⚡ Modelos personalizados",
|
| 323 |
+
"Custom Models": "Modelos personalizados",
|
| 324 |
+
"add_custom_model": "➕ Añadir modelo personalizado",
|
| 325 |
+
"custom_model_name": "Nombre del modelo",
|
| 326 |
+
"custom_model_name_placeholder": "Ingresa un nombre para tu modelo",
|
| 327 |
+
"checkpoint_url": "URL del Checkpoint (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "URL de configuración (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "URL de archivo Python personalizado (opcional)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "Detectar tipo de modelo automáticamente",
|
| 334 |
+
"model_type": "Tipo de modelo",
|
| 335 |
+
"add_model_btn": "➕ Añadir modelo",
|
| 336 |
+
"custom_models_list": "📋 Lista de modelos personalizados",
|
| 337 |
+
"delete_model": "🗑️ Eliminar",
|
| 338 |
+
"no_custom_models": "Aún no se han añadido modelos personalizados",
|
| 339 |
+
"model_added_success": "✅ ¡Modelo añadido con éxito!",
|
| 340 |
+
"model_deleted_success": "✅ ¡Modelo eliminado con éxito!",
|
| 341 |
+
"model_add_error": "❌ Error al añadir el modelo: {}",
|
| 342 |
+
"model_delete_error": "❌ Error al eliminar el modelo: {}",
|
| 343 |
+
"refresh_models": "🔄 Actualizar",
|
| 344 |
+
"custom_model_info": "Añade modelos personalizados proporcionando enlaces de descarga. Los modelos se descargarán automáticamente cuando se utilicen.",
|
| 345 |
+
"select_model_to_delete": "Selecciona un modelo para eliminar",
|
| 346 |
+
"downloading_model_file": "Descargando modelo: {}",
|
| 347 |
+
"downloading_file_progress": "Descargando: {} - {}%",
|
| 348 |
+
"chunk_size_mode": "Modo de Tamaño de Fragmento",
|
| 349 |
+
"chunk_size_mode_info": "base: presets estándar | custom: introduce tu propio valor | yaml: leer del YAML del modelo",
|
| 350 |
+
"chunk_size_custom_label": "Tamaño de Fragmento Personalizado",
|
| 351 |
+
"chunk_size_custom_info": "Introduce un valor personalizado de tamaño de fragmento",
|
| 352 |
+
"chunk_size_yaml_label": "Tamaño de Fragmento YAML",
|
| 353 |
+
"chunk_size_yaml_display_info": "Se leerá del archivo de configuración YAML del modelo tras la descarga",
|
| 354 |
+
"chunk_size_yaml_not_downloaded": "⏳ YAML del modelo aún no descargado — el valor se leerá durante el procesamiento",
|
| 355 |
+
"chunk_size_yaml_detected": "✅ Del YAML: {}",
|
| 356 |
+
"remove_favorite": "Eliminar de Favoritos",
|
| 357 |
"normal_method": "Método normal",
|
| 358 |
+
"mid_side_method": "Método Mid-Side",
|
| 359 |
+
"apollo_processing_method": "Método de Procesamiento Apollo",
|
| 360 |
+
"apollo_normal_model": "Modelo Normal Apollo",
|
| 361 |
+
"apollo_mid_side_model": "Modelo Mid-Side Apollo",
|
| 362 |
+
"scale_factor": "Factor de Escala",
|
| 363 |
+
"scale_factor_info": "Factor de escala de fase (recomendado: 1.4)",
|
| 364 |
+
"source_file_label": "Archivo Fuente (Voces)",
|
| 365 |
+
"target_file_label": "Archivo Destino (Instrumental)",
|
| 366 |
+
"run_phase_fixer": "🔧 Ejecutar Corrector de Fase",
|
| 367 |
+
"phase_fixed_output": "Salida con Fase Corregida",
|
| 368 |
+
"phase_fixer_settings": "Ajustes del Corrector de Fase",
|
| 369 |
+
"low_cutoff": "Corte Bajo (Hz)",
|
| 370 |
+
"high_cutoff": "Corte Alto (Hz)",
|
| 371 |
+
"low_cutoff_info": "Frecuencia de corte baja para la corrección de fase",
|
| 372 |
+
"high_cutoff_info": "Frecuencia de corte alta para la corrección de fase",
|
| 373 |
+
"no_preset_name_provided": "⚠️ No se proporcionó nombre de preset",
|
| 374 |
+
"no_models_selected_for_preset": "⚠️ No se seleccionaron modelos para el preset",
|
| 375 |
+
"preset_saved": "✅ ¡Preset '{}' guardado!",
|
| 376 |
+
"please_upload_both_files": "⚠️ Por favor sube tanto el archivo fuente como el destino",
|
| 377 |
+
"processing_log_placeholder": "El registro de procesamiento aparecerá aquí...",
|
| 378 |
+
"enter_preset_name": "Introduce el nombre del preset..."
|
| 379 |
}
|
assets/i18n/languages/fr_fr.json
CHANGED
|
@@ -209,7 +209,6 @@
|
|
| 209 |
"finalizing_ensemble_output_progress_label": "Finalisation de la sortie de l'assemblage... -- 98.0%",
|
| 210 |
"ensemble_file_creation_failed": "Le fichier d'assemblage n'a pas pu être créé : {}",
|
| 211 |
"ensemble_completed_progress_label": "Assemblage terminé avec succès ! -- 100.0%",
|
| 212 |
-
"success_output_created": "✅ Succès ! Fichier de sortie créé.",
|
| 213 |
"drive_mounted_copying_ensemble": "Google Drive monté. Copie de la sortie de l'assemblage...",
|
| 214 |
"drive_already_mounted_copying_ensemble": "Google Drive déjà monté. Copie de la sortie de l'assemblage...",
|
| 215 |
"no_ensemble_output_files_found": "❌ Aucun fichier de sortie d'assemblage trouvé.",
|
|
@@ -269,5 +268,90 @@
|
|
| 269 |
"select_preset": "Sélectionner un Préréglage",
|
| 270 |
"add_favorite": "Ajouter aux Favoris",
|
| 271 |
"apollo_midside_model_info": "Modèle à utiliser pour le traitement centre/côté (optionnel)",
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
"finalizing_ensemble_output_progress_label": "Finalisation de la sortie de l'assemblage... -- 98.0%",
|
| 210 |
"ensemble_file_creation_failed": "Le fichier d'assemblage n'a pas pu être créé : {}",
|
| 211 |
"ensemble_completed_progress_label": "Assemblage terminé avec succès ! -- 100.0%",
|
|
|
|
| 212 |
"drive_mounted_copying_ensemble": "Google Drive monté. Copie de la sortie de l'assemblage...",
|
| 213 |
"drive_already_mounted_copying_ensemble": "Google Drive déjà monté. Copie de la sortie de l'assemblage...",
|
| 214 |
"no_ensemble_output_files_found": "❌ Aucun fichier de sortie d'assemblage trouvé.",
|
|
|
|
| 268 |
"select_preset": "Sélectionner un Préréglage",
|
| 269 |
"add_favorite": "Ajouter aux Favoris",
|
| 270 |
"apollo_midside_model_info": "Modèle à utiliser pour le traitement centre/côté (optionnel)",
|
| 271 |
+
"language_changed_message": "Langue modifiée. Veuillez redémarrer la cellule.",
|
| 272 |
+
"Other Shapes": "Autres modèles",
|
| 273 |
+
"use_phase_correction": "Appliquer la correction de phase avancée",
|
| 274 |
+
"google_drive_mount_error": "❌ Erreur de liaison Google Drive : {}",
|
| 275 |
+
"file_copied_to_drive": "Fichier copié sur Google Drive : {}",
|
| 276 |
+
"copy_to_drive_error": "❌ Erreur lors de la copie sur Google Drive : {}",
|
| 277 |
+
"direct_download_failed": "Échec du téléchargement direct du fichier",
|
| 278 |
+
"direct_download_error": "❌ Erreur de téléchargement direct : {}",
|
| 279 |
+
"invalid_google_drive_url": "❌ URL Google Drive non valide",
|
| 280 |
+
"continuing_without_google_drive": "La connexion Google Drive a échoué, poursuite sans...",
|
| 281 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive non monté, omission de la copie sur Drive...",
|
| 282 |
+
"auto_apollo_chunk_size": "Taille de fragment Apollo",
|
| 283 |
+
"auto_apollo_chunk_size_info": "Taille de fragment pour l'amélioration Apollo (recommandé : 19 pour le modèle universel)",
|
| 284 |
+
"auto_apollo_overlap": "Chevauchement Apollo",
|
| 285 |
+
"auto_enhancing_with_apollo": "Amélioration avec Apollo ({}/{} fichiers)...",
|
| 286 |
+
"auto_apollo_processing_completed": "Traitement Apollo terminé !",
|
| 287 |
+
"auto_apollo_overlap_info": "Chevauchement pour l'amélioration Apollo (recommandé : 2)",
|
| 288 |
+
"apply_matchering": "Appliquer Matchering",
|
| 289 |
+
"matchering_info": "Appliquer Matchering pour améliorer le mastering et l'équilibre audio",
|
| 290 |
+
"matchering_passes": "Passages Matchering",
|
| 291 |
+
"matchering_passes_info": "Nombre d'itérations Matchering (1-5, des valeurs plus élevées améliorent la qualité mais prennent plus de temps)",
|
| 292 |
+
"matchering_processing": "Application de Matchering... ({:.1f}%)",
|
| 293 |
+
"matchering_completed": "✅ Traitement Matchering terminé !",
|
| 294 |
+
"matchering_error": "❌ Erreur pendant Matchering : {}",
|
| 295 |
+
"backend_settings": "🚀 Paramètres Backend",
|
| 296 |
+
"inference_backend": "Backend d'Inference",
|
| 297 |
+
"optimization_mode": "Mode d'Optimisation",
|
| 298 |
+
"optimization_mode_info": "Sélection du mode d'optimisation PyTorch",
|
| 299 |
+
"mixed_precision_amp": "Précision Mixte (AMP)",
|
| 300 |
+
"mixed_precision_info": "Inférence 2x plus rapide - recommandé",
|
| 301 |
+
"tf32_acceleration": "Accélération TF32",
|
| 302 |
+
"tf32_acceleration_info": "Augmentation de vitesse supplémentaire pour RTX 30xx+",
|
| 303 |
+
"cudnn_benchmark": "Benchmark cuDNN",
|
| 304 |
+
"cudnn_benchmark_info": "Plus lent lors de la première exécution, beaucoup plus rapide lors des exécutions suivantes",
|
| 305 |
+
"ultra_optimized_pytorch": "Backend PyTorch ULTRA-OPTIMISÉ",
|
| 306 |
+
"default_active_max_speed": "Actif par défaut - Optimisation de vitesse maximale",
|
| 307 |
+
"channels_last_mode": "Le plus rapide pour les GPU RTX",
|
| 308 |
+
"compile_mode": "Vitesse supplémentaire pour PyTorch 2.0+",
|
| 309 |
+
"default_mode": "Standard",
|
| 310 |
+
"phase_fixer_tab": "🔧 Correcteur de phase",
|
| 311 |
+
"batch_processing_tab": "📦 Traitement par lots",
|
| 312 |
+
"batch_description": "Traiter plusieurs fichiers à la fois avec le même modèle",
|
| 313 |
+
"batch_input_folder": "Dossier d'entrée",
|
| 314 |
+
"batch_input_folder_placeholder": "Entrez le chemin du dossier contenant les fichiers audio",
|
| 315 |
+
"batch_output_folder": "Dossier de sortie",
|
| 316 |
+
"batch_output_folder_placeholder": "Entrez le chemin du dossier de sortie",
|
| 317 |
+
"batch_file_list": "Fichiers à traiter",
|
| 318 |
+
"batch_add_files": "Ajouter des fichiers",
|
| 319 |
+
"batch_clear_list": "Vider la liste",
|
| 320 |
+
"batch_start": "🚀 Démarrer le traitement par lots",
|
| 321 |
+
"batch_stop": "⏹️ Arrêter",
|
| 322 |
+
"batch_progress": "Progression du lot",
|
| 323 |
+
"batch_current_file": "En cours de traitement",
|
| 324 |
+
"batch_completed": "✅ Traitement par lots terminé !",
|
| 325 |
+
"batch_stopped": "⏹️ Traitement par lots arrêté",
|
| 326 |
+
"batch_no_files": "❌ Aucun fichier sélectionné pour le traitement",
|
| 327 |
+
"source_model": "Modèle source (Voix)",
|
| 328 |
+
"source_model_info": "Modèle pour extraire les voix/données de phase",
|
| 329 |
+
"target_model": "Modèle cible (Instrumental)",
|
| 330 |
+
"target_model_info": "Modèle auquel appliquer la correction de phase",
|
| 331 |
+
"custom_models_tab": "⚡ Modèles personnalisés",
|
| 332 |
+
"Custom Models": "Modèles personnalisés",
|
| 333 |
+
"add_custom_model": "➕ Ajouter un modèle personnalisé",
|
| 334 |
+
"custom_model_name": "Nom du modèle",
|
| 335 |
+
"custom_model_name_placeholder": "Entrez un nom pour votre modèle",
|
| 336 |
+
"checkpoint_url": "URL du Checkpoint (.ckpt/.pth)",
|
| 337 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 338 |
+
"config_url": "URL de configuration (.yaml)",
|
| 339 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 340 |
+
"custom_py_url": "URL du fichier Python personnalisé (optionnel)",
|
| 341 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 342 |
+
"auto_detect_type": "Détecter automatiquement le type de modèle",
|
| 343 |
+
"model_type": "Type de modèle",
|
| 344 |
+
"add_model_btn": "➕ Ajouter le modèle",
|
| 345 |
+
"custom_models_list": "📋 Liste des modèles personnalisés",
|
| 346 |
+
"delete_model": "🗑️ Supprimer",
|
| 347 |
+
"no_custom_models": "Aucun modèle personnalisé ajouté pour l'instant",
|
| 348 |
+
"model_added_success": "✅ Modèle ajouté avec succès !",
|
| 349 |
+
"model_deleted_success": "✅ Modèle supprimé avec succès !",
|
| 350 |
+
"model_add_error": "❌ Erreur lors de l'ajout du modèle : {}",
|
| 351 |
+
"model_delete_error": "❌ Erreur lors de la suppression du modèle : {}",
|
| 352 |
+
"refresh_models": "🔄 Actualiser",
|
| 353 |
+
"custom_model_info": "Ajoutez des modèles personnalisés en fournissant des liens de téléchargement. Les modèles seront téléchargés automatiquement lors de leur utilisation.",
|
| 354 |
+
"select_model_to_delete": "Sélectionnez un modèle à supprimer",
|
| 355 |
+
"success_output_created": "✅ Succès ! Fichier de sortie créé.",
|
| 356 |
+
"downloading_model_file": "Téléchargement du modèle : {}",
|
| 357 |
+
"downloading_file_progress": "Téléchargement : {} - {}%",
|
assets/i18n/languages/hi_in.json
CHANGED
|
@@ -17,11 +17,10 @@
|
|
| 17 |
"overlap": "ओवरलैप",
|
| 18 |
"overlap_info": "अनुशंसित: 2-10 (उच्च मान गुणवत्ता बढ़ाते हैं लेकिन अधिक VRAM की आवश्यकता होती है)",
|
| 19 |
"tta_boost": "TTA बूस्ट",
|
| 20 |
-
"tta_info": "गुणवत्ता में सुधार करता है लेकिन प्रसंस्करण गति को धीमा करता है",
|
| 21 |
"phase_fix": "फेज सुधार",
|
|
|
|
| 22 |
"phase_fix_info": "वाद्य ट्रैक के लिए उन्नत फेज सुधार",
|
| 23 |
"instrumental": "वाद्य",
|
| 24 |
-
"instrumental_info": "आमतौर पर 2 आउटपुट देता है, लेकिन कभी-कभी एक ही आउटपुट देता है, आपको कोशिश करनी होगी :)",
|
| 25 |
"process": "🚀 प्रक्रिया करें",
|
| 26 |
"reset": "🧹 रीसेट करें",
|
| 27 |
"status": "स्थिति",
|
|
@@ -73,215 +72,274 @@
|
|
| 73 |
"ensemble_copy_status": "संयोजन आउटपुट यहाँ कॉपी किया जाएगा...",
|
| 74 |
"guidelines": "<h4>मॉडल चयन दिशानिर्देश</h4><ul><li><strong>क्रॉस-श्रेणी मिश्रण से बचें:</strong> स्वर और वाद्य मॉडल का संयोजन अवांछित मिश्रण बना सकता है</li><li><strong>विशेष मॉडल नोट्स:</strong><ul><li>द्वैत मॉडल (v1/v2) - दोनों स्टेम्स आउटपुट करते हैं</li><li>MDX23C सेपरेटर - हाइब्रिड परिणाम</li></ul></li><li><strong>सर्वोत्तम अभ्यास:</strong> एक ही श्रेणी से 3-5 समान मॉडल का उपयोग करें</li></ul><div>💡 प्रो टिप: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" संयोजन से शुरू करें</div>",
|
| 75 |
"download_sources_tab": "⬇️ स्रोत डाउनलोड करें",
|
| 76 |
-
"direct_links": "🌐
|
| 77 |
"audio_file_url": "ऑडियो फाइल URL",
|
| 78 |
"download_from_url": "⬇️ URL से डाउनलोड करें",
|
| 79 |
"download_status": "डाउनलोड स्थिति",
|
| 80 |
"downloaded_file": "डाउनलोड की गई फाइल",
|
| 81 |
"cookie_management": "🍪 कुकी प्रबंधन",
|
| 82 |
-
"upload_cookies_txt": "
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 क्यों
|
| 84 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
"input_sources": "📂 इनपुट स्रोत",
|
| 86 |
-
"refresh": "🔄
|
| 87 |
-
"ensemble_algorithm": "
|
| 88 |
"select_audio_files": "ऑडियो फाइलें चुनें",
|
| 89 |
"available_files": "उपलब्ध फाइलें",
|
| 90 |
-
"custom_weights": "कस्टम व
|
| 91 |
"custom_weights_placeholder": "उदाहरण: 0.8, 1.2, 1.0, ...",
|
| 92 |
-
"custom_weights_info": "समान व
|
| 93 |
"result_preview_tab": "🎧 परिणाम पूर्वावलोकन",
|
| 94 |
-
"ensembled_output": "स
|
| 95 |
-
"processing_log_tab": "📋 प्रस
|
| 96 |
-
"processing_details": "प्रस
|
| 97 |
-
"process_ensemble": "⚡
|
| 98 |
-
"languages_tab": "🌐 भाषाए
|
| 99 |
"Vocal Models": "स्वर मॉडल",
|
| 100 |
"Instrumental Models": "वाद्य मॉडल",
|
| 101 |
"4-Stem Models": "4-स्टेम मॉडल",
|
| 102 |
-
"Denoise Models": "
|
| 103 |
-
"Dereverb Models": "रिवर्ब
|
| 104 |
-
"Other
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
-
"
|
| 110 |
-
"
|
|
|
|
| 111 |
"num_input_files_print": "इनपुट फाइलों की संख्या: {}",
|
| 112 |
-
"weights_print": "व
|
| 113 |
"output_file_print": "आउटपुट फाइल: {}",
|
| 114 |
"duration_mismatch_error": "सभी फाइलों की अवधि समान होनी चाहिए",
|
| 115 |
-
"file_not_found_error": "त्रुटि। फाइल नहीं मिली: {}। पथ जा
|
| 116 |
-
"reading_chunk_print": "फाइल से खंड पढ़ रह
|
| 117 |
-
"chunk_result_shape_print": "खंड परिणाम आकार: {}",
|
| 118 |
-
"ensemble_completed_print": "
|
| 119 |
"file_deletion_error": "{} हटाया नहीं जा सका: {}",
|
| 120 |
-
"directory_not_exist_warning": "⚠️ डायरेक्टरी मौजूद नहीं
|
| 121 |
-
"not_a_directory_warning": "⚠️ पथ डायरेक्टरी नहीं है: {}",
|
| 122 |
"item_deletion_error": "⚠️ {} हटाने में त्रुटि: {}",
|
| 123 |
-
"
|
| 124 |
-
"
|
|
|
|
| 125 |
"error": "🔥 त्रुटि: {}",
|
| 126 |
"ffmpeg_error": "FFmpeg त्रुटि ({}): {}",
|
| 127 |
-
"file_saved_successfully": "फाइल सफलतापूर्वक स
|
| 128 |
-
"total_files_found": "कुल फा
|
| 129 |
"total_progress": "कुल प्रगति",
|
| 130 |
"detailed_pbar_enabled": "विस्तृत प्रगति बार सक्षम: {}",
|
| 131 |
-
"loaded_audio": "
|
| 132 |
-
"cannot_read_track": "ट्रैक
|
| 133 |
"error_message": "त्रुटि संदेश: {}",
|
| 134 |
-
"demudding_track": "ट्रैक
|
| 135 |
"elapsed_time": "बीता हुआ समय: {:.2f} सेकंड।",
|
| 136 |
-
"proc_folder_description": "
|
| 137 |
"model_type_help": "मॉडल प्रकार (bandit, bs_roformer, mdx23c, आदि)",
|
| 138 |
"config_path_help": "कॉन्फ़िग फाइल का पथ",
|
| 139 |
-
"demud_phaseremix_help": "वाद्य के लिए फेज रीमिक्स सक्षम करें",
|
| 140 |
-
"start_checkpoint_help": "
|
| 141 |
-
"input_folder_help": "प्र
|
| 142 |
-
"audio_path_help": "
|
| 143 |
"store_dir_help": "परिणाम संग्रहीत करने का पथ",
|
| 144 |
-
"device_ids_help": "GPU
|
| 145 |
-
"extract_instrumental_help": "यदि प्रदान किया गया हो तो
|
| 146 |
-
"disable_detailed_pbar_help": "ड
|
| 147 |
-
"force_cpu_help": "CUDA उपलब्ध होने पर भी CPU का उपयोग करें",
|
| 148 |
"flac_file_help": "WAV के बजाय FLAC फाइल आउटपुट करें",
|
| 149 |
"export_format_help": "निर्यात प्रारूप और PCM प्रकार",
|
| 150 |
"pcm_type_help": "FLAC फाइलों के लिए PCM प्रकार",
|
| 151 |
-
"use_tta_help": "टेस्ट टाइम ऑगमेंटेशन सक्षम करें",
|
| 152 |
-
"lora_checkpoint_help": "LoRA व
|
| 153 |
"cuda_available": "CUDA उपलब्ध है, इसे अक्षम करने के लिए --force_cpu का उपयोग करें।",
|
| 154 |
-
"using_device": "
|
| 155 |
-
"instruments_print": "वाद्य: {}",
|
| 156 |
"model_load_time": "मॉडल लोड समय: {:.2f} सेकंड",
|
| 157 |
"invalid_url": "❌ अमान्य URL",
|
| 158 |
-
"cookie_file_updated": "✅ कुकी फाइल अपडेट
|
| 159 |
-
"cookie_installation_error": "⚠️ कुकी स्
|
| 160 |
-
"
|
|
|
|
| 161 |
"google_drive_error": "❌ Google Drive डाउनलोड त्रुटि: {}",
|
| 162 |
"wav_conversion_failed": "WAV रूपांतरण विफल",
|
| 163 |
"download_error": "❌ डाउनलोड त्रुटि: {}",
|
| 164 |
-
"
|
|
|
|
|
|
|
| 165 |
"download_failed": "❌ डाउनलोड विफल",
|
| 166 |
-
"no_output_files_found": "❌ कोई आउटपुट फाइल
|
| 167 |
-
"output_refreshed_successfully": "✅ आउटपुट सफलतापूर्वक
|
| 168 |
-
"error_refreshing_output": "❌ आउटपुट
|
| 169 |
"starting_audio_separation": "ऑडियो पृथक्करण शुरू हो रहा है...",
|
| 170 |
-
"processing_audio": "ऑडियो प्रोसेस
|
| 171 |
-
"separating_audio": "ऑडियो अलग
|
| 172 |
-
"separation_complete": "पृथक्करण पूर
|
| 173 |
"progress_parsing_error": "प्रगति पार्सिंग त्रुटि: {}",
|
| 174 |
"error_occurred": "एक त्रुटि हुई: {}",
|
| 175 |
-
"separation_process_completed": "पृथक्करण प्रक्रिया पूर
|
| 176 |
-
"no_audio_file_error": "❌ कोई ऑडियो फाइल
|
| 177 |
-
"no_input_progress_label": "त्रुटि: कोई इनपुट
|
| 178 |
-
"processing_audio_print": "ऑडियो प्रोसेस
|
| 179 |
"starting_audio_separation_progress_label": "ऑडियो पृथक्करण शुरू हो रहा है... -- 0.0%",
|
| 180 |
-
"separating_audio_progress_label": "ऑडियो अलग
|
| 181 |
-
"audio_processing_completed_progress_label": "ऑडियो प्र
|
| 182 |
-
"audio_processing_completed": "✅ ऑडियो प्र
|
| 183 |
"error_occurred_progress_label": "त्रुटि ह���ई -- 0.0%",
|
| 184 |
"minimum_files_required": "⚠️ न्यूनतम 2 फाइलें आवश्यक",
|
| 185 |
-
"valid_files_not_found": "❌
|
| 186 |
-
"starting_ensemble_process": "
|
| 187 |
-
"ensembling_progress": "स
|
| 188 |
-
"finalizing_ensemble_output": "
|
| 189 |
-
"success_log": "✅ सफल
|
| 190 |
"error_log": "❌ त्रुटि!\n{}",
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
-
"
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
-
"running_command": "कमांड चला रहा है: {}",
|
| 200 |
"model_failed": "मॉडल {} विफल: {}",
|
| 201 |
"critical_error_with_model": "{} के साथ गंभीर त्रुटि: {}",
|
| 202 |
-
"model_output_failed": "{} आउटपुट
|
| 203 |
-
"waiting_for_files": "सभी फाइलों के तैयार होने क
|
| 204 |
-
"waiting_for_files_progress_label": "सभी फाइलों के तैयार होने क
|
| 205 |
-
"performing_ensemble": "
|
| 206 |
-
"performing_ensemble_progress_label": "
|
| 207 |
-
"memory_usage_before_ensemble": "
|
| 208 |
-
"memory_usage_after_ensemble": "
|
| 209 |
-
"finalizing_ensemble_output_progress_label": "
|
| 210 |
-
"ensemble_file_creation_failed": "
|
| 211 |
-
"ensemble_completed_progress_label": "
|
| 212 |
-
"success_output_created": "✅ सफल
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"starting_method": "{method} को पोर्ट {port} पर शुरू कर रहा है...",
|
| 237 |
-
"process_stopped": "\n🛑 उपयोगकर्ता द्वारा प्रक्रिया रोक दी गई",
|
| 238 |
-
"selected_port": "चयनित पोर्ट: {port}",
|
| 239 |
-
"starting_gradio_with_sharing": "Gradio को अंतर्निहित साझाकरण के साथ शुरू कर रहा है...",
|
| 240 |
-
"starting_localtunnel": "Localtunnel को पोर्ट {port} पर शुरू कर रहा है...",
|
| 241 |
-
"share_link": "साझा लिंक: {url}",
|
| 242 |
-
"password_ip": "पासवर्ड IP: {ip}",
|
| 243 |
-
"starting_ngrok": "Ngrok को पोर्ट {port} पर शुरू कर रहा है...",
|
| 244 |
-
"ngrok_url": "Ngrok URL: {url}",
|
| 245 |
-
"ngrok_error": "Ngrok शुरू करने में त्रुटि: {error}",
|
| 246 |
-
"apollo_chunk_size": "Apollo खंड आकार",
|
| 247 |
-
"apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
|
| 248 |
-
"apollo_overlap": "Apollo ओवरलैप",
|
| 249 |
-
"enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
|
| 250 |
-
"apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
|
| 251 |
-
"apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
|
| 252 |
-
"invalid_method": "त्रुटि: अमान्य विधि! 'gradio', 'localtunnel', या 'ngrok' का उपयोग करें।",
|
| 253 |
-
"apollo_enhancement_settings": "Apollo सुधार सेटिंग्स",
|
| 254 |
-
"enhance_with_apollo": "Apollo के साथ सुधार करें",
|
| 255 |
-
"enhance_with_apollo_info": "पृथक्करण के बाद ऑडियो सुधार के लिए Apollo सक्षम करें",
|
| 256 |
-
"apollo_method": "Apollo विधि",
|
| 257 |
-
"apollo_method_info": "Apollo के लिए प्रसंस्करण विधि चुनें",
|
| 258 |
-
"normal_method": "सामान्य विधि",
|
| 259 |
-
"mid_side_method": "मध्य/पार्श्व विधि",
|
| 260 |
-
"apollo_normal_model": "Apollo सामान्य मॉडल",
|
| 261 |
-
"apollo_normal_model_info": "सामान्य Apollo प्रसंस्करण के लिए उपयोग करने वाला मॉडल",
|
| 262 |
-
"apollo_midside_model": "Apollo मध्य/पार्श्व मॉडल",
|
| 263 |
-
"apollo_enhancement_info": "ध्वनि गुणवत्ता सुधार",
|
| 264 |
-
"selected_models": "चयनित मॉडल",
|
| 265 |
-
"save_preset": "प्रीसेट सहेजें",
|
| 266 |
-
"delete_preset": "प्रीसेट हटाएं",
|
| 267 |
-
"refresh_presets": "प्रीसेट रिफ्रेश करें",
|
| 268 |
-
"presets_name": "प्रीसेट का नाम",
|
| 269 |
-
"select_preset": "प्रीसेट चुनें",
|
| 270 |
-
"add_favorite": "पसंदीदा में जोड़ें",
|
| 271 |
-
"apollo_midside_model_info": "मध्य/पार्श्व प्रसंस्करण के लिए उपयोग करने वाला मॉडल (वैकल्पिक)",
|
| 272 |
-
"language_changed_message": "भाषा बदली गई। कृपया सेल को पुनः शुरू करें।"
|
| 273 |
-
"selected_models": "選択されたモデル",
|
| 274 |
-
"save_preset": "プリセットを保存",
|
| 275 |
-
"delete_preset": "プリセットを削除",
|
| 276 |
-
"refresh_presets": "プリセットを更新",
|
| 277 |
-
"preset_name": "プリセット名",
|
| 278 |
-
"select_preset": "プリセットを選択",
|
| 279 |
-
"add_favorite": "お気に入りに追加",
|
| 280 |
-
"apply_matchering": "मैचरिंग लागू करें",
|
| 281 |
-
"matchering_info": "ऑडियो मास्टरिंग और संतुलन को बढ़ाने के लिए मैचरिंग लागू करें",
|
| 282 |
-
"matchering_passes": "मैचरिंग पास",
|
| 283 |
-
"matchering_passes_info": "मैचरिंग की पुनरावृत्तियों की संख्या (1-5, उच्च मान गुणवत्ता को बेहतर बनाते हैं लेकिन अधिक समय लेते हैं)",
|
| 284 |
-
"matchering_processing": "मैचरिंग लागू किया जा रहा है... ({:.1f}%)",
|
| 285 |
-
"matchering_completed": "✅ मैचरिंग प्रक्रिया पूरी हुई!",
|
| 286 |
-
"matchering_error": "❌ मैचरिंग के दौरान त्रुटि: {}"
|
| 287 |
}
|
|
|
|
| 17 |
"overlap": "ओवरलैप",
|
| 18 |
"overlap_info": "अनुशंसित: 2-10 (उच्च मान गुणवत्ता बढ़ाते हैं लेकिन अधिक VRAM की आवश्यकता होती है)",
|
| 19 |
"tta_boost": "TTA बूस्ट",
|
|
|
|
| 20 |
"phase_fix": "फेज सुधार",
|
| 21 |
+
"use_phase_correction": "उन्नत फेज सुधार लागू करें",
|
| 22 |
"phase_fix_info": "वाद्य ट्रैक के लिए उन्नत फेज सुधार",
|
| 23 |
"instrumental": "वाद्य",
|
|
|
|
| 24 |
"process": "🚀 प्रक्रिया करें",
|
| 25 |
"reset": "🧹 रीसेट करें",
|
| 26 |
"status": "स्थिति",
|
|
|
|
| 72 |
"ensemble_copy_status": "संयोजन आउटपुट यहाँ कॉपी किया जाएगा...",
|
| 73 |
"guidelines": "<h4>मॉडल चयन दिशानिर्देश</h4><ul><li><strong>क्रॉस-श्रेणी मिश्रण से बचें:</strong> स्वर और वाद्य मॉडल का संयोजन अवांछित मिश्रण बना सकता है</li><li><strong>विशेष मॉडल नोट्स:</strong><ul><li>द्वैत मॉडल (v1/v2) - दोनों स्टेम्स आउटपुट करते हैं</li><li>MDX23C सेपरेटर - हाइब्रिड परिणाम</li></ul></li><li><strong>सर्वोत्तम अभ्यास:</strong> एक ही श्रेणी से 3-5 समान मॉडल का उपयोग करें</li></ul><div>💡 प्रो टिप: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" संयोजन से शुरू करें</div>",
|
| 74 |
"download_sources_tab": "⬇️ स्रोत डाउनलोड करें",
|
| 75 |
+
"direct_links": "🌐 डायरेक्ट लिंक्स",
|
| 76 |
"audio_file_url": "ऑडियो फाइल URL",
|
| 77 |
"download_from_url": "⬇️ URL से डाउनलोड करें",
|
| 78 |
"download_status": "डाउनलोड स्थिति",
|
| 79 |
"downloaded_file": "डाउनलोड की गई फाइल",
|
| 80 |
"cookie_management": "🍪 कुकी प्रबंधन",
|
| 81 |
+
"upload_cookies_txt": "कुकीज.txt अपलोड करें",
|
| 82 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 क्यों आवश्यक?**<br>- आयु-प्रतिबंधित सामग्री तक पहुंच<br>- न��जी/अप्रकाशित वीडियो डाउनलोड<br>- क्षेत्रीय प्रतिबंधों को बायपास करें<br>- यूट्यूब डाउनलोड सीमाओं से बचें<br><br>**⚠️ महत्वपूर्ण नोट्स**<br>- कभी भी अपनी कुकी फाइलें साझा न करें!<br>- जब रिफ्रेश करें:<br> • \"403 निषिद्ध\" त्रुटियां प्राप्त हो रही हों<br> • डाउनलोड अचानक रुक जाएं<br> • \"सत्र समाप्त\" संदेश देखें<br><br>**🔄 नवीनीकरण चरण**<br>1. इस <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">क्रोम एक्सटेंशन</a> को इंस्टॉल करें<br>2. क्रोम में यूट्यूब पर लॉगिन करें<br>3. एक्सटेंशन आइकन पर क्लिक करें → \"एक्सपोर्ट\"<br>4. यहां डाउनलोड की गई फाइल अपलोड करें<br><br>**⏳ कुकी जीवनकाल**<br>- सामान्य सत्र: 24 घंटे<br>- संवेदनशील संचालन: 1 घंटा<br>- पासवर्ड परिवर्तन: तत्काल रिफ्रेश</div>",
|
| 83 |
+
"ensemble_output_copied": "✅ संयोजन आउटपुट {} में कॉपी किया गया",
|
| 84 |
+
"error_copying_ensemble_output": "❌ संयोजन आउटपुट कॉपी करने में त्रुटि: {}",
|
| 85 |
+
"drive_mounted_copying_ensemble": "गूगल ड्राइव माउंटेड। संयोजन आउटपुट कॉपी कर रहा है...",
|
| 86 |
+
"drive_already_mounted_copying_ensemble": "गूगल ड्राइव पहले से माउंटेड। संयोजन आउटपुट कॉपी कर रहा है...",
|
| 87 |
+
"no_ensemble_output_files_found": "❌ कोई संयोजन आउटपुट फाइल नहीं मिली।",
|
| 88 |
+
"drive_mounted_copying_files": "गूगल ड्राइव माउंटेड। फाइलें कॉपी कर रहा है...",
|
| 89 |
+
"drive_already_mounted_copying_files": "गूगल ड्राइव पहले से माउंटेड। फाइलें कॉपी कर रहा है...",
|
| 90 |
+
"files_copied_to_drive": "✅ फाइलें {} में कॉपी की गईं",
|
| 91 |
+
"error_copying_files": "❌ फाइलें कॉपी करने में त्रुटि: {}",
|
| 92 |
+
"mounting_drive": "गूगल ड्राइव माउंट कर रहा है...",
|
| 93 |
+
"settings_saved": "सेटिंग्स config.json में सहेजी गईं: भाषा={lang}, विधि={method}, पोर्ट={port}",
|
| 94 |
+
"selected_language": "चयनित भाषा: {lang}",
|
| 95 |
+
"available_languages": "उपलब्ध भाषाएं: {langs}",
|
| 96 |
+
"select_language_prompt": "भाषा चुनें (डिफ़ॉल्ट: {default}): ",
|
| 97 |
+
"invalid_language": "अमान्य भाषा चयन! डिफ़ॉल्ट भाषा ({default}) का उपयोग कर रहा है।",
|
| 98 |
+
"available_sharing_methods": "उपलब्ध साझाकरण विधियां: gradio, localtunnel, ngrok",
|
| 99 |
+
"select_sharing_method_prompt": "साझाकरण विधि चुनें (डिफ़ॉल्ट: {default}): ",
|
| 100 |
+
"enter_ngrok_token_prompt": "अपना Ngrok टोकन दर्ज करें (डिफ़ॉल्ट: {default}): ",
|
| 101 |
+
"ngrok_token_required": "त्रुटि: ngrok विधि के लिए Ngrok टोकन आवश्यक है!",
|
| 102 |
+
"enter_port_prompt": "पोर्ट नंबर दर्ज करें (डिफ़ॉल्ट: {default}): ",
|
| 103 |
+
"opening_previous_url": "पिछला URL खोल रहा है: {url}",
|
| 104 |
+
"open_url_manually": "Colab में URL मैन्युअल रूप से खोलें: {url}",
|
| 105 |
+
"invalid_previous_url": "अमान्य पिछला URL: {url}, नहीं खोला गया।",
|
| 106 |
+
"starting_method": "{method} को पोर्ट {port} पर शुरू कर रहा है...",
|
| 107 |
+
"process_stopped": "\n🛑 उपयोगकर्ता द्वारा प्रक्रिया रोक दी गई",
|
| 108 |
+
"selected_port": "चयनित पोर्ट: {port}",
|
| 109 |
+
"starting_gradio_with_sharing": "Gradio को अंतर्निहित साझाकरण के साथ शुरू कर रहा है...",
|
| 110 |
+
"starting_localtunnel": "Localtunnel को पोर्ट {port} प��� शुरू कर रहा है...",
|
| 111 |
+
"share_link": "साझा लिंक: {url}",
|
| 112 |
+
"password_ip": "पासवर्ड IP: {ip}",
|
| 113 |
+
"starting_ngrok": "Ngrok को पोर्ट {port} पर शुरू कर रहा है...",
|
| 114 |
+
"ngrok_url": "Ngrok URL: {url}",
|
| 115 |
+
"ngrok_error": "Ngrok शुरू करने में त्रुटि: {error}",
|
| 116 |
+
"auto_apollo_chunk_size": "Apollo खंड आकार",
|
| 117 |
+
"auto_apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
|
| 118 |
+
"auto_apollo_overlap": "Apollo ओवरलैप",
|
| 119 |
+
"auto_enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
|
| 120 |
+
"auto_apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
|
| 121 |
+
"auto_apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
|
| 122 |
+
"apollo_chunk_size": "Apollo खंड आकार",
|
| 123 |
+
"apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
|
| 124 |
+
"apollo_overlap": "Apollo ओवरलैप",
|
| 125 |
+
"enhance_with_apollo": "Apollo के साथ सुधार करें",
|
| 126 |
+
"apollo_enhancement_info": "ध्वनि गुणवत्ता सुधार",
|
| 127 |
+
"enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
|
| 128 |
+
"apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
|
| 129 |
+
"apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
|
| 130 |
+
"selected_models": "चयनित मॉडल",
|
| 131 |
+
"save_preset": "प्रीसेट सहेजें",
|
| 132 |
+
"delete_preset": "प्रीसेट हटाएं",
|
| 133 |
+
"refresh_presets": "प्रीसेट रिफ्रेश करें",
|
| 134 |
+
"preset_name": "प्रीसेट का नाम",
|
| 135 |
+
"select_preset": "प्रीसेट चुनें",
|
| 136 |
+
"add_favorite": "पसंदीदा में जोड़ें",
|
| 137 |
+
"apply_matchering": "मैचरिंग लागू करें",
|
| 138 |
+
"matchering_info": "ऑडियो मास्टरिंग और संतुलन को बढ़ाने के लिए मैचरिंग लागू करें",
|
| 139 |
+
"matchering_passes": "मैचरिंग पास",
|
| 140 |
+
"matchering_passes_info": "मैचरिंग की पुनरावृत्तियों की संख्या (1-5, उच्च मान गुणवत्ता को बेहतर बनाते हैं लेकिन अधिक समय लेते हैं)",
|
| 141 |
+
"matchering_processing": "मैचरिंग लागू किया जा रहा है... ({:.1f}%)",
|
| 142 |
+
"matchering_completed": "✅ मैचरिंग प्रक्रिया पूरी हुई!",
|
| 143 |
+
"matchering_error": "❌ मैचरिंग के दौरान त्रुटि: {}",
|
| 144 |
+
"invalid_method": "त्रुटि: अमान्य विधि! 'gradio', 'localtunnel', या 'ngrok' का उपयोग करें।",
|
| 145 |
+
"language_changed_message": "भाषा बदली गई। कृपया सेल को पुनः शुरू करें।",
|
| 146 |
+
"backend_settings": "🚀 बैकएंड सेटिंग्स",
|
| 147 |
+
"inference_backend": "इन्फरेंस बैकएंड",
|
| 148 |
+
"optimization_mode": "ऑप्टिमाइजेशन मोड",
|
| 149 |
+
"optimization_mode_info": "PyTorch ऑप्टिमाइजेशन मोड चयन",
|
| 150 |
+
"mixed_precision_amp": "मिश्रित प्रेसिजन (AMP)",
|
| 151 |
+
"mixed_precision_info": "2x तेज इन्फरेंस - अनुशंसित",
|
| 152 |
+
"tf32_acceleration": "TF32 असेलरेशन",
|
| 153 |
+
"tf32_acceleration_info": "RTX 30xx+ के लिए अतिरिक्त गति बूस्ट",
|
| 154 |
+
"cudnn_benchmark": "cuDNN बेंचमार्क",
|
| 155 |
+
"cudnn_benchmark_info": "पहले रन में धीमा, बाद के रनों में बहुत तेज",
|
| 156 |
+
"ultra_optimized_pytorch": "अल्ट्रा-ऑप्टिमाइज्ड PyTorch बैकएंड",
|
| 157 |
+
"default_active_max_speed": "डिफ़ॉल्ट रूप से सक्रिय - अधिकतम गति ऑप्टिमाइजेशन",
|
| 158 |
+
"channels_last_mode": "RTX GPUs के लिए सबसे तेज",
|
| 159 |
+
"compile_mode": "PyTorch 2.0+ के लिए अतिरिक्त गति",
|
| 160 |
+
"default_mode": "मानक",
|
| 161 |
+
"tta_info": "गुणवत्ता बढ़ाता है लेकिन प्रसंस्करण को धीमा करता है",
|
| 162 |
+
"instrumental_info": "आमतौर पर 2 स���टेम आउटपुट करता है, लेकिन कभी-कभी केवल एक - आपको कोशिश करनी होगी! :)",
|
| 163 |
+
"direct_download_error": "❌ सीधा डाउनलोड त्रुटि: {}",
|
| 164 |
+
"invalid_google_drive_url": "❌ अमान्य Google Drive URL",
|
| 165 |
+
"continuing_without_google_drive": "Google Drive कनेक्शन विफल, इसके बिना जारी रखते हुए...",
|
| 166 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive माउंट नहीं है, Drive पर कॉपी छोड़ रहे हैं...",
|
| 167 |
+
"direct_download_failed": "फ़ाइल को सीधे डाउनलोड करना विफल रहा",
|
| 168 |
+
"manual_ensemble_tab": "🎚️ मैनुअल एन्सेम्बल",
|
| 169 |
+
"phase_fixer_tab": "🔧 फेज़ फिक्सर",
|
| 170 |
+
"batch_processing_tab": "📦 बैच प्रोसेसिंग",
|
| 171 |
+
"batch_description": "एक ही मॉडल से एक साथ कई फाइलें प्रोसेस करें",
|
| 172 |
+
"batch_input_folder": "इनपुट फोल्डर",
|
| 173 |
+
"batch_input_folder_placeholder": "ऑडियो फाइलों वाले फोल्डर का पथ दर्ज करें",
|
| 174 |
+
"batch_output_folder": "आउटपुट फोल्डर",
|
| 175 |
+
"batch_output_folder_placeholder": "आउटपुट फाइलों के लिए फोल्डर पथ दर्ज करें",
|
| 176 |
+
"batch_file_list": "प्रोसेस करने के लिए फाइलें",
|
| 177 |
+
"batch_add_files": "फाइलें जोड़ें",
|
| 178 |
+
"batch_clear_list": "सूची साफ करें",
|
| 179 |
+
"batch_start": "🚀 बैच प्रोसेसिंग शुरू करें",
|
| 180 |
+
"batch_stop": "⏹️ रोकें",
|
| 181 |
+
"batch_progress": "बैच प्रगति",
|
| 182 |
+
"batch_current_file": "वर्तमान में प्रक्रिया हो रही है",
|
| 183 |
+
"batch_completed": "✅ बैच प्रोसेसिंग पूर्ण!",
|
| 184 |
+
"batch_stopped": "⏹️ बैच प्रोसेसिंग रोकी गई",
|
| 185 |
+
"batch_no_files": "❌ प्रोसेसिंग के लिए कोई फाइल नहीं चुनी गई",
|
| 186 |
+
"source_model": "स्रोत मॉडल (स्वर)",
|
| 187 |
+
"source_model_info": "स्वर/फेज़ डेटा निकालने के लिए मॉडल",
|
| 188 |
+
"target_model": "लक्ष्य मॉडल (वाद्य)",
|
| 189 |
+
"target_model_info": "फेज़ सुधार लागू करने के लिए मॉडल",
|
| 190 |
"input_sources": "📂 इनपुट स्रोत",
|
| 191 |
+
"refresh": "🔄 रिफ्रेश",
|
| 192 |
+
"ensemble_algorithm": "एन्सेम्बल एल्गोरिदम",
|
| 193 |
"select_audio_files": "ऑडियो फाइलें चुनें",
|
| 194 |
"available_files": "उपलब्ध फाइलें",
|
| 195 |
+
"custom_weights": "कस्टम वज़न (अल्पविराम से अलग)",
|
| 196 |
"custom_weights_placeholder": "उदाहरण: 0.8, 1.2, 1.0, ...",
|
| 197 |
+
"custom_weights_info": "समान वज़न के लिए खाली छोड़ें",
|
| 198 |
"result_preview_tab": "🎧 परिणाम पूर्वावलोकन",
|
| 199 |
+
"ensembled_output": "एन्सेम्बल आउटपुट",
|
| 200 |
+
"processing_log_tab": "📋 प्रोसेसिंग लॉग",
|
| 201 |
+
"processing_details": "प्रोसेसिंग विवरण",
|
| 202 |
+
"process_ensemble": "⚡ एन्सेम्बल प्रोसेस करें",
|
| 203 |
+
"languages_tab": "🌐 भाषाएं",
|
| 204 |
"Vocal Models": "स्वर मॉडल",
|
| 205 |
"Instrumental Models": "वाद्य मॉडल",
|
| 206 |
"4-Stem Models": "4-स्टेम मॉडल",
|
| 207 |
+
"Denoise Models": "डीनॉइज़ मॉडल",
|
| 208 |
+
"Dereverb Models": "डीरिवर्ब मॉडल",
|
| 209 |
+
"Other Shapes": "अन्य मॉडल",
|
| 210 |
+
"Custom Models": "कस्टम मॉडल",
|
| 211 |
+
"ensemble_files_description": "निर्दिष्ट एन्सेम्बल विधियों का उपयोग करके कई ऑडियो फाइलों को एक आउटपुट में जोड़ता है।",
|
| 212 |
+
"ensemble_files_help": "एन्सेम्बल के लिए सभी ऑडियो फाइलों का पथ",
|
| 213 |
+
"ensemble_type_help": "इनमें से एक: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 214 |
+
"ensemble_weights_help": "एन्सेम्बल बनाने के लिए वज़न। वज़न की संख्या फाइलों की संख्या के बराबर होनी चाहिए",
|
| 215 |
+
"ensemble_output_help": "WAV फाइल का पथ जहाँ एन्सेम्बल परिणाम संग्रहीत होगा",
|
| 216 |
+
"ensemble_type_print": "एन्सेम्बल प्रकार: {}",
|
| 217 |
"num_input_files_print": "इनपुट फाइलों की संख्या: {}",
|
| 218 |
+
"weights_print": "वज़न: {}",
|
| 219 |
"output_file_print": "आउटपुट फाइल: {}",
|
| 220 |
"duration_mismatch_error": "सभी फाइलों की अवधि समान होनी चाहिए",
|
| 221 |
+
"file_not_found_error": "त्रुटि। फाइल नहीं मिली: {}। पथ जाँचें।",
|
| 222 |
+
"reading_chunk_print": "फाइल से खंड पढ़ रहे हैं: {} (शुरू: {}s, अवधि: {}s)",
|
| 223 |
+
"chunk_result_shape_print": "खंड परिणाम का आकार: {}",
|
| 224 |
+
"ensemble_completed_print": "एन्सेम्बल पूर्ण। आउटपुट यहाँ सहेजा गया: {}",
|
| 225 |
"file_deletion_error": "{} हटाया नहीं जा सका: {}",
|
| 226 |
+
"directory_not_exist_warning": "⚠️ डायरेक्टरी मौजूद नहीं: {}",
|
| 227 |
+
"not_a_directory_warning": "⚠️ पथ एक डायरेक्टरी नहीं है: {}",
|
| 228 |
"item_deletion_error": "⚠️ {} हटाने में त्रुटि: {}",
|
| 229 |
+
"critical_error": "⛔ गंभीर त्रुटि: {}",
|
| 230 |
+
"old_output_not_exist": "❌ पुराना आउटपुट फोल्डर मौजूद नहीं है",
|
| 231 |
+
"old_outputs_cleared": "✅ पुराने आउटपुट सफलतापूर्वक साफ किए गए!",
|
| 232 |
"error": "🔥 त्रुटि: {}",
|
| 233 |
"ffmpeg_error": "FFmpeg त्रुटि ({}): {}",
|
| 234 |
+
"file_saved_successfully": "फाइल सफलतापूर्वक सहेजी गई: {}",
|
| 235 |
+
"total_files_found": "कुल फा���लें मिलीं: {}। उपयोग की गई सैंपल रेट: {}",
|
| 236 |
"total_progress": "कुल प्रगति",
|
| 237 |
"detailed_pbar_enabled": "विस्तृत प्रगति बार सक्षम: {}",
|
| 238 |
+
"loaded_audio": "ऑडियो लोड हुआ: {}, आकार: {}",
|
| 239 |
+
"cannot_read_track": "ट्रैक नहीं पढ़ सकते: {}",
|
| 240 |
"error_message": "त्रुटि संदेश: {}",
|
| 241 |
+
"demudding_track": "ट्रैक साफ कर रहे हैं (फेज़ रीमिक्स - वाद्य): {}",
|
| 242 |
"elapsed_time": "बीता हुआ समय: {:.2f} सेकंड।",
|
| 243 |
+
"proc_folder_description": "निर्दिष्ट मॉडल का उपयोग करके फोल्डर में ऑडियो फाइलें प्रोसेस करता है।",
|
| 244 |
"model_type_help": "मॉडल प्रकार (bandit, bs_roformer, mdx23c, आदि)",
|
| 245 |
"config_path_help": "कॉन्फ़िग फाइल का पथ",
|
| 246 |
+
"demud_phaseremix_help": "वाद्य के लिए फेज़ रीमिक्स सक्षम करें",
|
| 247 |
+
"start_checkpoint_help": "वैध वज़न के लिए प्रारंभिक चेकपॉइंट",
|
| 248 |
+
"input_folder_help": "प्रक्रिया करने के लिए मिश्रण वाला फोल्डर",
|
| 249 |
+
"audio_path_help": "एकल ऑडियो फाइल का पथ",
|
| 250 |
"store_dir_help": "परिणाम संग्रहीत करने का पथ",
|
| 251 |
+
"device_ids_help": "GPU आईडी की सूची",
|
| 252 |
+
"extract_instrumental_help": "यदि प्रदान किया गया हो तो वाद्य प्राप्त करने के लिए स्वर को उलटें",
|
| 253 |
+
"disable_detailed_pbar_help": "डीमिक्स में विस्तृत प्रगति बार अक्षम करें",
|
| 254 |
+
"force_cpu_help": "CUDA उपलब्ध होने पर भी CPU का उपयोग करने के लिए बाध्य करें",
|
| 255 |
"flac_file_help": "WAV के बजाय FLAC फाइल आउटपुट करें",
|
| 256 |
"export_format_help": "निर्यात प्रारूप और PCM प्रकार",
|
| 257 |
"pcm_type_help": "FLAC फाइलों के लिए PCM प्रकार",
|
| 258 |
+
"use_tta_help": "टेस्ट टाइम ऑग्मेंटेशन सक्षम करें",
|
| 259 |
+
"lora_checkpoint_help": "LoRA वज़न के लिए प्रारंभिक चेकपॉइंट",
|
| 260 |
"cuda_available": "CUDA उपलब्ध है, इसे अक्षम करने के लिए --force_cpu का उपयोग करें।",
|
| 261 |
+
"using_device": "डिवाइस उपयोग: {}",
|
| 262 |
+
"instruments_print": "वाद्ययंत्र: {}",
|
| 263 |
"model_load_time": "मॉडल लोड समय: {:.2f} सेकंड",
|
| 264 |
"invalid_url": "❌ अमान्य URL",
|
| 265 |
+
"cookie_file_updated": "✅ कुकी फाइल अपडेट हुई!",
|
| 266 |
+
"cookie_installation_error": "⚠️ कुकी इंस्टॉलेशन त्रुटि: {}",
|
| 267 |
+
"google_drive_mount_error": "❌ Google Drive लिंक त्रुटि: {}",
|
| 268 |
+
"file_size_zero_error": "फाइल का आकार शून्य है या फाइल नहीं बनाई गई",
|
| 269 |
"google_drive_error": "❌ Google Drive डाउनलोड त्रुटि: {}",
|
| 270 |
"wav_conversion_failed": "WAV रूपांतरण विफल",
|
| 271 |
"download_error": "❌ डाउनलोड त्रुटि: {}",
|
| 272 |
+
"file_copied_to_drive": "फाइल Google Drive पर कॉपी की गई: {}",
|
| 273 |
+
"copy_to_drive_error": "❌ Google Drive पर कॉपी करने में त्रुटि: {}",
|
| 274 |
+
"download_success": "🎉 सफलतापूर्वक डाउनलोड हुआ!",
|
| 275 |
"download_failed": "❌ डाउनलोड विफल",
|
| 276 |
+
"no_output_files_found": "❌ कोई आउटपुट फाइल नहीं मिली।",
|
| 277 |
+
"output_refreshed_successfully": "✅ आउटपुट सफलतापूर्वक रिफ्रेश हुआ!",
|
| 278 |
+
"error_refreshing_output": "❌ आउटपुट रिफ्रेश करने में त्रुटि: {}",
|
| 279 |
"starting_audio_separation": "ऑडियो पृथक्करण शुरू हो रहा है...",
|
| 280 |
+
"processing_audio": "ऑडियो प्रोसेस हो रहा है",
|
| 281 |
+
"separating_audio": "ऑडियो अलग हो रहा है... ({:.1f}%)",
|
| 282 |
+
"separation_complete": "पृथक्करण पूर्ण!",
|
| 283 |
"progress_parsing_error": "प्रगति पार्सिंग त्रुटि: {}",
|
| 284 |
"error_occurred": "एक त्रुटि हुई: {}",
|
| 285 |
+
"separation_process_completed": "पृथक्करण प्रक्रिया पूर्ण!",
|
| 286 |
+
"no_audio_file_error": "❌ कोई ऑडियो फाइल नहीं दी गई और इनपुट डायरेक्टरी में कोई मौजूदा फाइल नहीं है।",
|
| 287 |
+
"no_input_progress_label": "त्रुटि: कोई इनपुट नहीं दिया गया -- 0.0%",
|
| 288 |
+
"processing_audio_print": "ऑडियो प्रोसेस हो रहा है: {} मॉडल: {}",
|
| 289 |
"starting_audio_separation_progress_label": "ऑडियो पृथक्करण शुरू हो रहा है... -- 0.0%",
|
| 290 |
+
"separating_audio_progress_label": "ऑडियो अलग हो रहा है... -- {}.0%",
|
| 291 |
+
"audio_processing_completed_progress_label": "ऑडियो प्रक्रिया पूर्ण! -- 100.0%",
|
| 292 |
+
"audio_processing_completed": "✅ ऑडियो प्रक्रिया पूर्ण!",
|
| 293 |
"error_occurred_progress_label": "त्रुटि ह���ई -- 0.0%",
|
| 294 |
"minimum_files_required": "⚠️ न्यूनतम 2 फाइलें आवश्यक",
|
| 295 |
+
"valid_files_not_found": "❌ कोई वैध फाइल नहीं मिली",
|
| 296 |
+
"starting_ensemble_process": "एन्सेम्बल प्रक्रिया शुरू हो रही है...",
|
| 297 |
+
"ensembling_progress": "एन्सेम्बल हो रहा है... ({:.1f}%)",
|
| 298 |
+
"finalizing_ensemble_output": "एन्सेम्बल आउटपुट अंतिम रूप दे रहे हैं...",
|
| 299 |
+
"success_log": "✅ सफल!\n{}",
|
| 300 |
"error_log": "❌ त्रुटि!\n{}",
|
| 301 |
+
"ensemble_process_completed": "एन्सेम्बल प्रक्रिया पूर्ण!",
|
| 302 |
+
"no_models_selected": "❌ कोई मॉडल नहीं चुना गया",
|
| 303 |
+
"no_input_audio_provided": "❌ कोई इनपुट ऑडियो नहीं दिया गया",
|
| 304 |
+
"loading_model": "मॉडल लोड हो रहा है {}/{}: {}...",
|
| 305 |
+
"loading_model_progress_label": "मॉडल लोड हो रहा है {}/{}: {}... -- {}.0%",
|
| 306 |
+
"completed_model": "मॉडल पूर्ण {}/{}: {}",
|
| 307 |
+
"completed_model_progress_label": "मॉडल पूर्ण {}/{}: {} -- {}.0%",
|
| 308 |
+
"running_command": "कमांड चला रहे हैं: {}",
|
|
|
|
| 309 |
"model_failed": "मॉडल {} विफल: {}",
|
| 310 |
"critical_error_with_model": "{} के साथ गंभीर त्रुटि: {}",
|
| 311 |
+
"model_output_failed": "{} आउटपुट नहीं दे सका",
|
| 312 |
+
"waiting_for_files": "सभी फाइलों के तैयार होने का इंतजार...",
|
| 313 |
+
"waiting_for_files_progress_label": "सभी फाइलों के तैयार होने का इंतजार... -- 90.0%",
|
| 314 |
+
"performing_ensemble": "एन्सेम्बल कर रहे हैं...",
|
| 315 |
+
"performing_ensemble_progress_label": "एन्सेम्बल कर रहे हैं... -- 92.0%",
|
| 316 |
+
"memory_usage_before_ensemble": "एन्सेम्बल से पहले मेमोरी उपयोग: {}%",
|
| 317 |
+
"memory_usage_after_ensemble": "एन्सेम्बल के बाद मेमोरी उपयोग: {}%",
|
| 318 |
+
"finalizing_ensemble_output_progress_label": "एन्सेम्बल आउटपुट अंतिम रूप दे रहे हैं... -- 98.0%",
|
| 319 |
+
"ensemble_file_creation_failed": "एन्सेम्बल फाइल नहीं बनाई जा सकी: {}",
|
| 320 |
+
"ensemble_completed_progress_label": "एन्सेम्बल सफलतापूर्वक पूर्ण! -- 100.0%",
|
| 321 |
+
"success_output_created": "✅ सफल! आउटपुट फाइल बनाई गई।",
|
| 322 |
+
"custom_models_tab": "⚡ कस्टम मॉडल",
|
| 323 |
+
"add_custom_model": "➕ कस्टम मॉडल जोड़ें",
|
| 324 |
+
"custom_model_name": "मॉडल नाम",
|
| 325 |
+
"custom_model_name_placeholder": "अपने मॉडल के लिए एक नाम दर्ज करें",
|
| 326 |
+
"checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
|
| 327 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 328 |
+
"config_url": "Config URL (.yaml)",
|
| 329 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 330 |
+
"custom_py_url": "कस्टम Python फाइल URL (वैकल्पिक)",
|
| 331 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 332 |
+
"auto_detect_type": "मॉडल प्रकार स्वचालित रूप से पहचानें",
|
| 333 |
+
"model_type": "मॉडल प्रकार",
|
| 334 |
+
"add_model_btn": "➕ मॉडल जोड़ें",
|
| 335 |
+
"custom_models_list": "📋 कस्टम मॉडल सूची",
|
| 336 |
+
"delete_model": "🗑️ हटाएं",
|
| 337 |
+
"no_custom_models": "अभी तक कोई कस्टम मॉडल नहीं जोड़ा गया",
|
| 338 |
+
"model_added_success": "✅ मॉडल सफलतापूर्वक जोड़ा गया!",
|
| 339 |
+
"model_deleted_success": "✅ मॉडल सफलतापूर्वक हटाया गया!",
|
| 340 |
+
"model_add_error": "❌ मॉडल जोड़ने में त्रुटि: {}",
|
| 341 |
+
"model_delete_error": "❌ मॉडल हटाने में त्रुटि: {}",
|
| 342 |
+
"refresh_models": "🔄 रिफ्रेश",
|
| 343 |
+
"custom_model_info": "डाउनलोड लिंक प्रदान करके कस्टम मॉडल जोड़ें। मॉडल उपयोग होने पर स्वचालित रूप से डाउनलोड होंगे।",
|
| 344 |
+
"select_model_to_delete": "हटाने के लिए मॉडल चु��ें"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
}
|
assets/i18n/languages/it_it.json
CHANGED
|
@@ -17,8 +17,8 @@
|
|
| 17 |
"overlap": "Sovrapposizione",
|
| 18 |
"overlap_info": "Consigliato: 2-10 (valori più alti migliorano la qualità ma richiedono più VRAM)",
|
| 19 |
"tta_boost": "Boost TTA",
|
| 20 |
-
"tta_info": "Migliora la qualità ma rallenta la velocità di elaborazione",
|
| 21 |
"phase_fix": "Correzione fase",
|
|
|
|
| 22 |
"phase_fix_info": "Correzione avanzata della fase per tracce strumentali",
|
| 23 |
"instrumental": "Strumentale",
|
| 24 |
"instrumental_info": "Di solito produce 2 output, ma a volte solo uno, prova! :)",
|
|
@@ -80,111 +80,199 @@
|
|
| 80 |
"downloaded_file": "File scaricato",
|
| 81 |
"cookie_management": "🍪 Gestione cookie",
|
| 82 |
"upload_cookies_txt": "Carica Cookies.txt",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Perché è necessario?**<br>- Accesso a contenuti con restrizione d'età<br>- Download di video privati/non elencati<br>- Superamento delle restrizioni regionali<br>- Evitare i limiti di download di YouTube<br><br>**⚠️ Note
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ Ensemble manuale",
|
| 85 |
-
"input_sources": "📂 Sorgenti
|
| 86 |
"refresh": "🔄 Aggiorna",
|
| 87 |
"ensemble_algorithm": "Algoritmo ensemble",
|
| 88 |
"select_audio_files": "Seleziona file audio",
|
| 89 |
"available_files": "File disponibili",
|
| 90 |
-
"custom_weights": "Pesi personalizzati (separati da
|
| 91 |
"custom_weights_placeholder": "Esempio: 0.8, 1.2, 1.0, ...",
|
| 92 |
"custom_weights_info": "Lascia vuoto per pesi uguali",
|
| 93 |
"result_preview_tab": "🎧 Anteprima risultato",
|
| 94 |
"ensembled_output": "Output ensemble",
|
| 95 |
-
"processing_log_tab": "📋
|
| 96 |
"processing_details": "Dettagli elaborazione",
|
| 97 |
"process_ensemble": "⚡ Elabora ensemble",
|
| 98 |
"languages_tab": "🌐 Lingue",
|
| 99 |
"Vocal Models": "Modelli vocali",
|
| 100 |
"Instrumental Models": "Modelli strumentali",
|
| 101 |
-
"4-Stem Models": "Modelli
|
| 102 |
-
"Denoise Models": "Modelli
|
| 103 |
-
"Dereverb Models": "Modelli
|
| 104 |
"Other Models": "Altri modelli",
|
| 105 |
-
"
|
|
|
|
| 106 |
"ensemble_files_help": "Percorso a tutti i file audio per l'ensemble",
|
| 107 |
-
"ensemble_type_help": "Uno
|
| 108 |
-
"ensemble_weights_help": "Pesi per creare l'ensemble. Il numero di pesi deve
|
| 109 |
-
"ensemble_output_help": "Percorso al file WAV dove verrà
|
| 110 |
"ensemble_type_print": "Tipo ensemble: {}",
|
| 111 |
"num_input_files_print": "Numero file input: {}",
|
| 112 |
"weights_print": "Pesi: {}",
|
| 113 |
"output_file_print": "File output: {}",
|
| 114 |
"duration_mismatch_error": "Tutti i file devono avere la stessa durata",
|
| 115 |
-
"file_not_found_error": "Errore.
|
| 116 |
-
"reading_chunk_print": "Lettura
|
| 117 |
-
"chunk_result_shape_print": "Forma risultato
|
| 118 |
"ensemble_completed_print": "Ensemble completato. Output salvato in: {}",
|
| 119 |
"file_deletion_error": "{} non può essere eliminato: {}",
|
| 120 |
"directory_not_exist_warning": "⚠️ La directory non esiste: {}",
|
| 121 |
"not_a_directory_warning": "⚠️ Il percorso non è una directory: {}",
|
| 122 |
-
"item_deletion_error": "⚠️ Errore
|
| 123 |
"old_output_not_exist": "❌ La cartella output vecchia non esiste",
|
| 124 |
-
"old_outputs_cleared": "✅ Output vecchi
|
| 125 |
"error": "🔥 Errore: {}",
|
| 126 |
"ffmpeg_error": "Errore FFmpeg ({}): {}",
|
| 127 |
"file_saved_successfully": "File salvato con successo: {}",
|
| 128 |
-
"total_files_found": "
|
| 129 |
"total_progress": "Progresso totale",
|
| 130 |
-
"detailed_pbar_enabled": "Barra
|
| 131 |
"loaded_audio": "Audio caricato: {}, forma: {}",
|
| 132 |
-
"cannot_read_track": "Impossibile leggere
|
| 133 |
-
"error_message": "Messaggio
|
| 134 |
-
"demudding_track": "Pulizia traccia (remix
|
| 135 |
"elapsed_time": "Tempo trascorso: {:.2f} secondi.",
|
| 136 |
-
"proc_folder_description": "Elabora
|
| 137 |
-
"model_type_help": "Tipo
|
| 138 |
-
"config_path_help": "Percorso al file
|
| 139 |
-
"demud_phaseremix_help": "Abilita remix
|
| 140 |
-
"start_checkpoint_help": "
|
| 141 |
-
"input_folder_help": "Cartella
|
| 142 |
-
"audio_path_help": "Percorso a
|
| 143 |
-
"store_dir_help": "Percorso per
|
| 144 |
-
"device_ids_help": "Lista
|
| 145 |
-
"extract_instrumental_help": "Inverti
|
| 146 |
-
"disable_detailed_pbar_help": "Disabilita barra
|
| 147 |
-
"force_cpu_help": "Forza
|
| 148 |
-
"flac_file_help": "
|
| 149 |
-
"export_format_help": "Formato
|
| 150 |
"pcm_type_help": "Tipo PCM per file FLAC",
|
| 151 |
-
"use_tta_help": "Abilita
|
| 152 |
-
"lora_checkpoint_help": "
|
| 153 |
-
"cuda_available": "CUDA disponibile, usa --force_cpu per disabilitarlo.",
|
| 154 |
-
"using_device": "
|
| 155 |
"instruments_print": "Strumenti: {}",
|
| 156 |
-
"model_load_time": "Tempo
|
| 157 |
"invalid_url": "❌ URL non valido",
|
| 158 |
"cookie_file_updated": "✅ File cookie aggiornato!",
|
| 159 |
"cookie_installation_error": "⚠️ Errore installazione cookie: {}",
|
|
|
|
| 160 |
"file_size_zero_error": "Dimensione file zero o file non creato",
|
| 161 |
"google_drive_error": "❌ Errore download Google Drive: {}",
|
| 162 |
"wav_conversion_failed": "Conversione WAV fallita",
|
| 163 |
"download_error": "❌ Errore download: {}",
|
| 164 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
"download_failed": "❌ Download fallito",
|
| 166 |
"no_output_files_found": "❌ Nessun file output trovato.",
|
| 167 |
"output_refreshed_successfully": "✅ Output aggiornato con successo!",
|
| 168 |
-
"error_refreshing_output": "❌ Errore
|
| 169 |
-
"starting_audio_separation": "
|
| 170 |
"processing_audio": "Elaborazione audio",
|
| 171 |
"separating_audio": "Separazione audio... ({:.1f}%)",
|
| 172 |
"separation_complete": "Separazione completata!",
|
| 173 |
-
"progress_parsing_error": "Errore
|
| 174 |
"error_occurred": "Si è verificato un errore: {}",
|
| 175 |
-
"separation_process_completed": "Processo
|
| 176 |
-
"no_audio_file_error": "❌ Nessun file audio fornito e nessun file esistente nella directory
|
| 177 |
"no_input_progress_label": "Errore: Nessun input fornito -- 0.0%",
|
| 178 |
-
"processing_audio_print": "Elaborazione audio da: {} usando
|
| 179 |
-
"starting_audio_separation_progress_label": "
|
| 180 |
"separating_audio_progress_label": "Separazione audio... -- {}.0%",
|
| 181 |
"audio_processing_completed_progress_label": "Elaborazione audio completata! -- 100.0%",
|
| 182 |
"audio_processing_completed": "✅ Elaborazione audio completata!",
|
| 183 |
"error_occurred_progress_label": "Errore verificato -- 0.0%",
|
| 184 |
-
"minimum_files_required": "⚠️
|
| 185 |
"valid_files_not_found": "❌ Nessun file valido trovato",
|
| 186 |
-
"starting_ensemble_process": "
|
| 187 |
-
"ensembling_progress": "
|
| 188 |
"finalizing_ensemble_output": "Finalizzazione output ensemble...",
|
| 189 |
"success_log": "✅ Successo!\n{}",
|
| 190 |
"error_log": "❌ Errore!\n{}",
|
|
@@ -200,81 +288,92 @@
|
|
| 200 |
"model_failed": "Modello {} fallito: {}",
|
| 201 |
"critical_error_with_model": "Errore critico con {}: {}",
|
| 202 |
"model_output_failed": "{} non è riuscito a produrre output",
|
| 203 |
-
"waiting_for_files": "
|
| 204 |
-
"waiting_for_files_progress_label": "
|
| 205 |
"performing_ensemble": "Esecuzione ensemble...",
|
| 206 |
"performing_ensemble_progress_label": "Esecuzione ensemble... -- 92.0%",
|
| 207 |
-
"memory_usage_before_ensemble": "Uso memoria prima
|
| 208 |
-
"memory_usage_after_ensemble": "Uso memoria dopo
|
| 209 |
"finalizing_ensemble_output_progress_label": "Finalizzazione output ensemble... -- 98.0%",
|
| 210 |
-
"ensemble_file_creation_failed": "
|
| 211 |
"ensemble_completed_progress_label": "Ensemble completato con successo! -- 100.0%",
|
| 212 |
"success_output_created": "✅ Successo! File output creato.",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
"normal_method": "Metodo normale",
|
| 259 |
-
"mid_side_method": "Metodo
|
| 260 |
-
"
|
| 261 |
-
"
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
| 274 |
-
"
|
| 275 |
-
"
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
}
|
|
|
|
| 17 |
"overlap": "Sovrapposizione",
|
| 18 |
"overlap_info": "Consigliato: 2-10 (valori più alti migliorano la qualità ma richiedono più VRAM)",
|
| 19 |
"tta_boost": "Boost TTA",
|
|
|
|
| 20 |
"phase_fix": "Correzione fase",
|
| 21 |
+
"use_phase_correction": "Applica Correzione Fase Avanzata",
|
| 22 |
"phase_fix_info": "Correzione avanzata della fase per tracce strumentali",
|
| 23 |
"instrumental": "Strumentale",
|
| 24 |
"instrumental_info": "Di solito produce 2 output, ma a volte solo uno, prova! :)",
|
|
|
|
| 80 |
"downloaded_file": "File scaricato",
|
| 81 |
"cookie_management": "🍪 Gestione cookie",
|
| 82 |
"upload_cookies_txt": "Carica Cookies.txt",
|
| 83 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Perché è necessario?**<br>- Accesso a contenuti con restrizione d'età<br>- Download di video privati/non elencati<br>- Superamento delle restrizioni regionali<br>- Evitare i limiti di download di YouTube<br><br>**⚠️ Note Importanti**<br>- NON condividere mai i tuoi file cookie!<br>- Aggiorna i cookie quando:<br> • Ricevi errori \"403 Vietato\"<br> • I download si fermano improvvisamente<br> • Vedi messaggi \"Sessione scaduta\"<br><br>**🔄 Passi per il Rinnovo**<br>1. Installa questa <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">estensione Chrome</a><br>2. Accedi a YouTube su Chrome<br>3. Clicca sull'icona dell'estensione → \"Esporta\"<br>4. Carica il file scaricato qui<br><br>**⏳ Durata Cookie**<br>- Sessioni normali: 24 ore<br>- Operazioni sensibili: 1 ora<br>- Cambio password: Rinnovo immediato</div>",
|
| 84 |
+
"ensemble_output_copied": "✅ Output ensemble copiato in {}",
|
| 85 |
+
"error_copying_ensemble_output": "❌ Errore copia output ensemble: {}",
|
| 86 |
+
"drive_mounted_copying_ensemble": "Google Drive montato. Copia output ensemble...",
|
| 87 |
+
"drive_already_mounted_copying_ensemble": "Google Drive già montato. Copia output ensemble...",
|
| 88 |
+
"no_ensemble_output_files_found": "❌ Nessun file output ensemble trovato.",
|
| 89 |
+
"drive_mounted_copying_files": "Google Drive montato. Copia file...",
|
| 90 |
+
"drive_already_mounted_copying_files": "Google Drive già montato. Copia file...",
|
| 91 |
+
"files_copied_to_drive": "✅ File copiati in {}",
|
| 92 |
+
"error_copying_files": "❌ Errore copia file: {}",
|
| 93 |
+
"mounting_drive": "Montaggio Google Drive...",
|
| 94 |
+
"settings_saved": "Impostazioni salvate in config.json: Lingua={lang}, Metodo={method}, Porta={port}",
|
| 95 |
+
"selected_language": "Lingua selezionata: {lang}",
|
| 96 |
+
"available_languages": "Lingue disponibili: {langs}",
|
| 97 |
+
"select_language_prompt": "Seleziona una lingua (default: {default}): ",
|
| 98 |
+
"invalid_language": "Selezione lingua non valida! Uso lingua default ({default}).",
|
| 99 |
+
"available_sharing_methods": "Metodi di condivisione disponibili: gradio, localtunnel, ngrok",
|
| 100 |
+
"select_sharing_method_prompt": "Scegli un metodo di condivisione (default: {default}): ",
|
| 101 |
+
"enter_ngrok_token_prompt": "Inserisci il tuo token Ngrok (default: {default}): ",
|
| 102 |
+
"ngrok_token_required": "Errore: Token Ngrok richiesto per il metodo ngrok!",
|
| 103 |
+
"enter_port_prompt": "Inserisci il numero di porta (default: {default}): ",
|
| 104 |
+
"opening_previous_url": "Apertura URL precedente: {url}",
|
| 105 |
+
"open_url_manually": "Apri l'URL manualmente in Colab: {url}",
|
| 106 |
+
"invalid_previous_url": "URL precedente non valido: {url}, non aperto.",
|
| 107 |
+
"starting_method": "Avvio di {method} sulla porta {port}...",
|
| 108 |
+
"process_stopped": "\n🛑 Processo fermato dall'utente",
|
| 109 |
+
"selected_port": "Porta selezionata: {port}",
|
| 110 |
+
"starting_gradio_with_sharing": "Avvio Gradio con condivisione integrata...",
|
| 111 |
+
"starting_localtunnel": "Avvio Localtunnel sulla porta {port}...",
|
| 112 |
+
"share_link": "Link di condivisione: {url}",
|
| 113 |
+
"password_ip": "Password IP: {ip}",
|
| 114 |
+
"starting_ngrok": "Avvio Ngrok sulla porta {port}...",
|
| 115 |
+
"ngrok_url": "URL Ngrok: {url}",
|
| 116 |
+
"ngrok_error": "Errore avvio Ngrok: {error}",
|
| 117 |
+
"auto_apollo_chunk_size": "Dimensione frammento Apollo",
|
| 118 |
+
"auto_apollo_chunk_size_info": "Dimensione frammento per miglioramento Apollo (consigliato: 19 per modello universale)",
|
| 119 |
+
"auto_apollo_overlap": "Sovrapposizione Apollo",
|
| 120 |
+
"auto_enhancing_with_apollo": "Miglioramento con Apollo ({}/{} file)...",
|
| 121 |
+
"auto_apollo_processing_completed": "Elaborazione Apollo completata!",
|
| 122 |
+
"auto_apollo_overlap_info": "Sovrapposizione per miglioramento Apollo (consigliato: 2)",
|
| 123 |
+
"apollo_chunk_size": "Dimensione frammento Apollo",
|
| 124 |
+
"apollo_chunk_size_info": "Dimensione frammento per miglioramento Apollo (consigliato: 19 per modello universale)",
|
| 125 |
+
"apollo_overlap": "Sovrapposizione Apollo",
|
| 126 |
+
"enhance_with_apollo": "Migliora con Apollo",
|
| 127 |
+
"apollo_enhancement_info": "Miglioramento qualità audio",
|
| 128 |
+
"enhancing_with_apollo": "Miglioramento con Apollo ({}/{} file)...",
|
| 129 |
+
"apollo_processing_completed": "Elaborazione Apollo completata!",
|
| 130 |
+
"apollo_overlap_info": "Sovrapposizione per miglioramento Apollo (consigliato: 2)",
|
| 131 |
+
"selected_models": "Modelli selezionati",
|
| 132 |
+
"save_preset": "Salva preimpostazione",
|
| 133 |
+
"delete_preset": "Elimina preimpostazione",
|
| 134 |
+
"refresh_presets": "Aggiorna preimpostazioni",
|
| 135 |
+
"preset_name": "Nome preimpostazione",
|
| 136 |
+
"select_preset": "Seleziona preimpostazione",
|
| 137 |
+
"add_favorite": "Aggiungi ai preferiti",
|
| 138 |
+
"apply_matchering": "Applica Matchering",
|
| 139 |
+
"matchering_info": "Applica Matchering per migliorare il mastering e il bilanciamento dell'audio",
|
| 140 |
+
"matchering_passes": "Passi Matchering",
|
| 141 |
+
"matchering_passes_info": "Numero di iterazioni Matchering (1-5, valori più alti migliorano la qualità ma richiedono più tempo)",
|
| 142 |
+
"matchering_processing": "Applicazione Matchering in corso... ({:.1f}%)",
|
| 143 |
+
"matchering_completed": "✅ Elaborazione Matchering completata!",
|
| 144 |
+
"matchering_error": "❌ Errore durante Matchering: {}",
|
| 145 |
+
"invalid_method": "Errore: Metodo non valido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
|
| 146 |
+
"language_changed_message": "Lingua cambiata. Riavvia la cella.",
|
| 147 |
+
"backend_settings": "🚀 Impostazioni backend",
|
| 148 |
+
"inference_backend": "Backend inferenza",
|
| 149 |
+
"optimization_mode": "Modalità ottimizzazione",
|
| 150 |
+
"optimization_mode_info": "Selezione modalità ottimizzazione PyTorch",
|
| 151 |
+
"mixed_precision_amp": "Precisione mista (AMP)",
|
| 152 |
+
"mixed_precision_info": "Inferenza 2x più veloce - consigliato",
|
| 153 |
+
"tf32_acceleration": "Accelerazione TF32",
|
| 154 |
+
"tf32_acceleration_info": "Boost di velocità extra per RTX 30xx+",
|
| 155 |
+
"cudnn_benchmark": "Benchmark cuDNN",
|
| 156 |
+
"cudnn_benchmark_info": "Più lento al primo run, molto più veloce nei run successivi",
|
| 157 |
+
"ultra_optimized_pytorch": "Backend PyTorch ULTRA-OTTIMIZZATO",
|
| 158 |
+
"default_active_max_speed": "Attivo per impostazione predefinita - Ottimizzazione velocità massima",
|
| 159 |
+
"channels_last_mode": "Il più veloce per GPU RTX",
|
| 160 |
+
"compile_mode": "Velocità extra per PyTorch 2.0+",
|
| 161 |
+
"default_mode": "Standard",
|
| 162 |
+
"tta_info": "Aumenta la qualità ma rallenta l'elaborazione",
|
| 163 |
"manual_ensemble_tab": "🎚️ Ensemble manuale",
|
| 164 |
+
"input_sources": "📂 Sorgenti input",
|
| 165 |
"refresh": "🔄 Aggiorna",
|
| 166 |
"ensemble_algorithm": "Algoritmo ensemble",
|
| 167 |
"select_audio_files": "Seleziona file audio",
|
| 168 |
"available_files": "File disponibili",
|
| 169 |
+
"custom_weights": "Pesi personalizzati (separati da virgola)",
|
| 170 |
"custom_weights_placeholder": "Esempio: 0.8, 1.2, 1.0, ...",
|
| 171 |
"custom_weights_info": "Lascia vuoto per pesi uguali",
|
| 172 |
"result_preview_tab": "🎧 Anteprima risultato",
|
| 173 |
"ensembled_output": "Output ensemble",
|
| 174 |
+
"processing_log_tab": "📋 Registro elaborazione",
|
| 175 |
"processing_details": "Dettagli elaborazione",
|
| 176 |
"process_ensemble": "⚡ Elabora ensemble",
|
| 177 |
"languages_tab": "🌐 Lingue",
|
| 178 |
"Vocal Models": "Modelli vocali",
|
| 179 |
"Instrumental Models": "Modelli strumentali",
|
| 180 |
+
"4-Stem Models": "Modelli 4-stem",
|
| 181 |
+
"Denoise Models": "Modelli denoising",
|
| 182 |
+
"Dereverb Models": "Modelli dereverb",
|
| 183 |
"Other Models": "Altri modelli",
|
| 184 |
+
"Other Shapes": "Altri modelli",
|
| 185 |
+
"ensemble_files_description": "Combina più file audio in un singolo output usando metodi ensemble specificati.",
|
| 186 |
"ensemble_files_help": "Percorso a tutti i file audio per l'ensemble",
|
| 187 |
+
"ensemble_type_help": "Uno di: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 188 |
+
"ensemble_weights_help": "Pesi per creare l'ensemble. Il numero di pesi deve essere uguale al numero di file",
|
| 189 |
+
"ensemble_output_help": "Percorso al file WAV dove verrà memorizzato il risultato dell'ensemble",
|
| 190 |
"ensemble_type_print": "Tipo ensemble: {}",
|
| 191 |
"num_input_files_print": "Numero file input: {}",
|
| 192 |
"weights_print": "Pesi: {}",
|
| 193 |
"output_file_print": "File output: {}",
|
| 194 |
"duration_mismatch_error": "Tutti i file devono avere la stessa durata",
|
| 195 |
+
"file_not_found_error": "Errore. Impossibile trovare il file: {}. Controlla i percorsi.",
|
| 196 |
+
"reading_chunk_print": "Lettura chunk dal file: {} (inizio: {}s, durata: {}s)",
|
| 197 |
+
"chunk_result_shape_print": "Forma risultato chunk: {}",
|
| 198 |
"ensemble_completed_print": "Ensemble completato. Output salvato in: {}",
|
| 199 |
"file_deletion_error": "{} non può essere eliminato: {}",
|
| 200 |
"directory_not_exist_warning": "⚠️ La directory non esiste: {}",
|
| 201 |
"not_a_directory_warning": "⚠️ Il percorso non è una directory: {}",
|
| 202 |
+
"item_deletion_error": "⚠️ Errore eliminazione {}: {}",
|
| 203 |
"old_output_not_exist": "❌ La cartella output vecchia non esiste",
|
| 204 |
+
"old_outputs_cleared": "✅ Output vecchi cancellati con successo!",
|
| 205 |
"error": "🔥 Errore: {}",
|
| 206 |
"ffmpeg_error": "Errore FFmpeg ({}): {}",
|
| 207 |
"file_saved_successfully": "File salvato con successo: {}",
|
| 208 |
+
"total_files_found": "File totali trovati: {}. Sample rate usato: {}",
|
| 209 |
"total_progress": "Progresso totale",
|
| 210 |
+
"detailed_pbar_enabled": "Barra progresso dettagliata abilitata: {}",
|
| 211 |
"loaded_audio": "Audio caricato: {}, forma: {}",
|
| 212 |
+
"cannot_read_track": "Impossibile leggere traccia: {}",
|
| 213 |
+
"error_message": "Messaggio errore: {}",
|
| 214 |
+
"demudding_track": "Pulizia traccia (phase remix - strumentale): {}",
|
| 215 |
"elapsed_time": "Tempo trascorso: {:.2f} secondi.",
|
| 216 |
+
"proc_folder_description": "Elabora file audio in una cartella usando un modello specificato.",
|
| 217 |
+
"model_type_help": "Tipo modello (bandit, bs_roformer, mdx23c, ecc.)",
|
| 218 |
+
"config_path_help": "Percorso al file configurazione",
|
| 219 |
+
"demud_phaseremix_help": "Abilita phase remix per strumentale",
|
| 220 |
+
"start_checkpoint_help": "Checkpoint iniziale per pesi validi",
|
| 221 |
+
"input_folder_help": "Cartella con mix da elaborare",
|
| 222 |
+
"audio_path_help": "Percorso a singolo file audio da elaborare",
|
| 223 |
+
"store_dir_help": "Percorso per memorizzare risultati",
|
| 224 |
+
"device_ids_help": "Lista ID GPU",
|
| 225 |
+
"extract_instrumental_help": "Inverti vocali per ottenere strumentale se fornito",
|
| 226 |
+
"disable_detailed_pbar_help": "Disabilita barra progresso dettagliata in demix",
|
| 227 |
+
"force_cpu_help": "Forza uso CPU anche se CUDA è disponibile",
|
| 228 |
+
"flac_file_help": "Output file FLAC invece di WAV",
|
| 229 |
+
"export_format_help": "Formato export e tipo PCM",
|
| 230 |
"pcm_type_help": "Tipo PCM per file FLAC",
|
| 231 |
+
"use_tta_help": "Abilita test time augmentation",
|
| 232 |
+
"lora_checkpoint_help": "Checkpoint iniziale per pesi LoRA",
|
| 233 |
+
"cuda_available": "CUDA è disponibile, usa --force_cpu per disabilitarlo.",
|
| 234 |
+
"using_device": "Uso dispositivo: {}",
|
| 235 |
"instruments_print": "Strumenti: {}",
|
| 236 |
+
"model_load_time": "Tempo caricamento modello: {:.2f} sec",
|
| 237 |
"invalid_url": "❌ URL non valido",
|
| 238 |
"cookie_file_updated": "✅ File cookie aggiornato!",
|
| 239 |
"cookie_installation_error": "⚠️ Errore installazione cookie: {}",
|
| 240 |
+
"google_drive_mount_error": "❌ Errore connessione Google Drive: {}",
|
| 241 |
"file_size_zero_error": "Dimensione file zero o file non creato",
|
| 242 |
"google_drive_error": "❌ Errore download Google Drive: {}",
|
| 243 |
"wav_conversion_failed": "Conversione WAV fallita",
|
| 244 |
"download_error": "❌ Errore download: {}",
|
| 245 |
+
"file_copied_to_drive": "File copiato su Google Drive: {}",
|
| 246 |
+
"copy_to_drive_error": "❌ Errore copia su Google Drive: {}",
|
| 247 |
+
"direct_download_failed": "Download diretto file fallito",
|
| 248 |
+
"direct_download_error": "❌ Errore download diretto: {}",
|
| 249 |
+
"invalid_google_drive_url": "❌ URL Google Drive non valido",
|
| 250 |
+
"continuing_without_google_drive": "Connessione Google Drive fallita, continuo senza...",
|
| 251 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive non montato, salto copia su Drive...",
|
| 252 |
+
"download_success": "🎉 Scaricato con successo!",
|
| 253 |
"download_failed": "❌ Download fallito",
|
| 254 |
"no_output_files_found": "❌ Nessun file output trovato.",
|
| 255 |
"output_refreshed_successfully": "✅ Output aggiornato con successo!",
|
| 256 |
+
"error_refreshing_output": "❌ Errore aggiornamento output: {}",
|
| 257 |
+
"starting_audio_separation": "Avvio separazione audio...",
|
| 258 |
"processing_audio": "Elaborazione audio",
|
| 259 |
"separating_audio": "Separazione audio... ({:.1f}%)",
|
| 260 |
"separation_complete": "Separazione completata!",
|
| 261 |
+
"progress_parsing_error": "Errore parsing progresso: {}",
|
| 262 |
"error_occurred": "Si è verificato un errore: {}",
|
| 263 |
+
"separation_process_completed": "Processo separazione completato!",
|
| 264 |
+
"no_audio_file_error": "❌ Nessun file audio fornito e nessun file esistente nella directory input.",
|
| 265 |
"no_input_progress_label": "Errore: Nessun input fornito -- 0.0%",
|
| 266 |
+
"processing_audio_print": "Elaborazione audio da: {} usando modello: {}",
|
| 267 |
+
"starting_audio_separation_progress_label": "Avvio separazione audio... -- 0.0%",
|
| 268 |
"separating_audio_progress_label": "Separazione audio... -- {}.0%",
|
| 269 |
"audio_processing_completed_progress_label": "Elaborazione audio completata! -- 100.0%",
|
| 270 |
"audio_processing_completed": "✅ Elaborazione audio completata!",
|
| 271 |
"error_occurred_progress_label": "Errore verificato -- 0.0%",
|
| 272 |
+
"minimum_files_required": "⚠️ Minimo 2 file richiesti",
|
| 273 |
"valid_files_not_found": "❌ Nessun file valido trovato",
|
| 274 |
+
"starting_ensemble_process": "Avvio processo ensemble...",
|
| 275 |
+
"ensembling_progress": "Ensemble in corso... ({:.1f}%)",
|
| 276 |
"finalizing_ensemble_output": "Finalizzazione output ensemble...",
|
| 277 |
"success_log": "✅ Successo!\n{}",
|
| 278 |
"error_log": "❌ Errore!\n{}",
|
|
|
|
| 288 |
"model_failed": "Modello {} fallito: {}",
|
| 289 |
"critical_error_with_model": "Errore critico con {}: {}",
|
| 290 |
"model_output_failed": "{} non è riuscito a produrre output",
|
| 291 |
+
"waiting_for_files": "Attesa che tutti i file siano pronti...",
|
| 292 |
+
"waiting_for_files_progress_label": "Attesa che tutti i file siano pronti... -- 90.0%",
|
| 293 |
"performing_ensemble": "Esecuzione ensemble...",
|
| 294 |
"performing_ensemble_progress_label": "Esecuzione ensemble... -- 92.0%",
|
| 295 |
+
"memory_usage_before_ensemble": "Uso memoria prima ensemble: {}%",
|
| 296 |
+
"memory_usage_after_ensemble": "Uso memoria dopo ensemble: {}%",
|
| 297 |
"finalizing_ensemble_output_progress_label": "Finalizzazione output ensemble... -- 98.0%",
|
| 298 |
+
"ensemble_file_creation_failed": "Impossibile creare file ensemble: {}",
|
| 299 |
"ensemble_completed_progress_label": "Ensemble completato con successo! -- 100.0%",
|
| 300 |
"success_output_created": "✅ Successo! File output creato.",
|
| 301 |
+
"phase_fixer_tab": "🔧 Correttore di fase",
|
| 302 |
+
"batch_processing_tab": "📦 Elaborazione batch",
|
| 303 |
+
"batch_description": "Elabora più file contemporaneamente con lo stesso modello",
|
| 304 |
+
"batch_input_folder": "Cartella di input",
|
| 305 |
+
"batch_input_folder_placeholder": "Inserisci il percorso della cartella contenente i file audio",
|
| 306 |
+
"batch_output_folder": "Cartella di output",
|
| 307 |
+
"batch_output_folder_placeholder": "Inserisci il percorso della cartella di output",
|
| 308 |
+
"batch_file_list": "File da elaborare",
|
| 309 |
+
"batch_add_files": "Aggiungi file",
|
| 310 |
+
"batch_clear_list": "Svuota lista",
|
| 311 |
+
"batch_start": "🚀 Avvia elaborazione batch",
|
| 312 |
+
"batch_stop": "⏹️ Ferma",
|
| 313 |
+
"batch_progress": "Progresso batch",
|
| 314 |
+
"batch_current_file": "In elaborazione",
|
| 315 |
+
"batch_completed": "✅ Elaborazione batch completata!",
|
| 316 |
+
"batch_stopped": "⏹️ Elaborazione batch fermata",
|
| 317 |
+
"batch_no_files": "❌ Nessun file selezionato per l'elaborazione",
|
| 318 |
+
"source_model": "Modello sorgente (Vocali)",
|
| 319 |
+
"source_model_info": "Modello per estrarre vocali/dati di fase",
|
| 320 |
+
"target_model": "Modello destinazione (Strumentale)",
|
| 321 |
+
"target_model_info": "Modello a cui applicare la correzione di fase",
|
| 322 |
+
"custom_models_tab": "⚡ Modelli personalizzati",
|
| 323 |
+
"Custom Models": "Modelli personalizzati",
|
| 324 |
+
"add_custom_model": "➕ Aggiungi modello personalizzato",
|
| 325 |
+
"custom_model_name": "Nome modello",
|
| 326 |
+
"custom_model_name_placeholder": "Inserisci un nome per il tuo modello",
|
| 327 |
+
"checkpoint_url": "URL Checkpoint (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "URL Configurazione (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "URL file Python personalizzato (opzionale)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "Rileva automaticamente il tipo di modello",
|
| 334 |
+
"model_type": "Tipo modello",
|
| 335 |
+
"add_model_btn": "➕ Aggiungi modello",
|
| 336 |
+
"custom_models_list": "📋 Lista modelli personalizzati",
|
| 337 |
+
"delete_model": "🗑️ Elimina",
|
| 338 |
+
"no_custom_models": "Nessun modello personalizzato aggiunto",
|
| 339 |
+
"model_added_success": "✅ Modello aggiunto con successo!",
|
| 340 |
+
"model_deleted_success": "✅ Modello eliminato con successo!",
|
| 341 |
+
"model_add_error": "❌ Errore aggiunta modello: {}",
|
| 342 |
+
"model_delete_error": "❌ Errore eliminazione modello: {}",
|
| 343 |
+
"refresh_models": "🔄 Aggiorna",
|
| 344 |
+
"custom_model_info": "Aggiungi modelli personalizzati fornendo link di download. I modelli verranno scaricati automaticamente quando usati.",
|
| 345 |
+
"select_model_to_delete": "Seleziona un modello da eliminare",
|
| 346 |
+
"downloading_model_file": "Download modello: {}",
|
| 347 |
+
"downloading_file_progress": "Download: {} - {}%",
|
| 348 |
+
"chunk_size_mode": "Modalità Dimensione Frammento",
|
| 349 |
+
"chunk_size_mode_info": "base: preset standard | custom: inserisci il tuo valore | yaml: leggi dallo YAML del modello",
|
| 350 |
+
"chunk_size_custom_label": "Dimensione Frammento Personalizzata",
|
| 351 |
+
"chunk_size_custom_info": "Inserisci un valore personalizzato per la dimensione del frammento",
|
| 352 |
+
"chunk_size_yaml_label": "Dimensione Frammento YAML",
|
| 353 |
+
"chunk_size_yaml_display_info": "Verrà letta dal file di configurazione YAML del modello dopo il download",
|
| 354 |
+
"chunk_size_yaml_not_downloaded": "⏳ YAML del modello non ancora scaricato — il valore verrà letto durante l'elaborazione",
|
| 355 |
+
"chunk_size_yaml_detected": "✅ Da YAML: {}",
|
| 356 |
+
"remove_favorite": "Rimuovi dai Preferiti",
|
| 357 |
"normal_method": "Metodo normale",
|
| 358 |
+
"mid_side_method": "Metodo Mid-Side",
|
| 359 |
+
"apollo_processing_method": "Metodo di Elaborazione Apollo",
|
| 360 |
+
"apollo_normal_model": "Modello Normale Apollo",
|
| 361 |
+
"apollo_mid_side_model": "Modello Mid-Side Apollo",
|
| 362 |
+
"scale_factor": "Fattore di Scala",
|
| 363 |
+
"scale_factor_info": "Fattore di scala della fase (consigliato: 1.4)",
|
| 364 |
+
"source_file_label": "File Sorgente (Voce)",
|
| 365 |
+
"target_file_label": "File di Destinazione (Strumentale)",
|
| 366 |
+
"run_phase_fixer": "🔧 Avvia Correttore di Fase",
|
| 367 |
+
"phase_fixed_output": "Output con Fase Corretta",
|
| 368 |
+
"phase_fixer_settings": "Impostazioni Correttore di Fase",
|
| 369 |
+
"low_cutoff": "Taglio Basso (Hz)",
|
| 370 |
+
"high_cutoff": "Taglio Alto (Hz)",
|
| 371 |
+
"low_cutoff_info": "Frequenza di taglio bassa per la correzione della fase",
|
| 372 |
+
"high_cutoff_info": "Frequenza di taglio alta per la correzione della fase",
|
| 373 |
+
"no_preset_name_provided": "⚠️ Nessun nome preset fornito",
|
| 374 |
+
"no_models_selected_for_preset": "⚠️ Nessun modello selezionato per il preset",
|
| 375 |
+
"preset_saved": "✅ Preset '{}' salvato!",
|
| 376 |
+
"please_upload_both_files": "⚠️ Carica sia il file sorgente che quello di destinazione",
|
| 377 |
+
"processing_log_placeholder": "Il registro di elaborazione apparirà qui...",
|
| 378 |
+
"enter_preset_name": "Inserisci il nome del preset..."
|
| 379 |
}
|
assets/i18n/languages/ja_jp.json
CHANGED
|
@@ -17,11 +17,10 @@
|
|
| 17 |
"overlap": "オーバーラップ",
|
| 18 |
"overlap_info": "推奨: 2-10 (高い値は品質を向上させますが、より多くのVRAMが必要です)",
|
| 19 |
"tta_boost": "TTAブースト",
|
| 20 |
-
"tta_info": "品質を向上させますが、処理速度が遅くなります",
|
| 21 |
"phase_fix": "位相補正",
|
|
|
|
| 22 |
"phase_fix_info": "インストゥルメンタルトラックのための高度な位相補正",
|
| 23 |
"instrumental": "インストゥルメンタル",
|
| 24 |
-
"instrumental_info": "通常2つの出力が得られますが、時には1つの出力だけの場合もあります。試してみてください :)",
|
| 25 |
"process": "🚀 処理",
|
| 26 |
"reset": "🧹 リセット",
|
| 27 |
"status": "ステータス",
|
|
@@ -80,201 +79,267 @@
|
|
| 80 |
"downloaded_file": "ダウンロードされたファイル",
|
| 81 |
"cookie_management": "🍪 クッキー管理",
|
| 82 |
"upload_cookies_txt": "Cookies.txtをアップロード",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 なぜ必要
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ 手動アンサンブル",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
"input_sources": "📂 入力ソース",
|
| 86 |
"refresh": "🔄 更新",
|
| 87 |
"ensemble_algorithm": "アンサンブルアルゴリズム",
|
| 88 |
"select_audio_files": "オーディオファイルを選択",
|
| 89 |
"available_files": "利用可能なファイル",
|
| 90 |
-
"custom_weights": "カスタムウェイト
|
| 91 |
"custom_weights_placeholder": "例: 0.8, 1.2, 1.0, ...",
|
| 92 |
-
"custom_weights_info": "
|
| 93 |
"result_preview_tab": "🎧 結果プレビュー",
|
| 94 |
"ensembled_output": "アンサンブル出力",
|
| 95 |
"processing_log_tab": "📋 処理ログ",
|
| 96 |
-
"processing_details": "処理詳細",
|
| 97 |
"process_ensemble": "⚡ アンサンブルを処理",
|
| 98 |
"languages_tab": "🌐 言語",
|
| 99 |
"Vocal Models": "ボーカルモデル",
|
| 100 |
"Instrumental Models": "インストゥルメンタルモデル",
|
| 101 |
"4-Stem Models": "4ステムモデル",
|
| 102 |
"Denoise Models": "ノイズ除去モデル",
|
| 103 |
-
"Dereverb Models": "
|
| 104 |
-
"Other
|
| 105 |
-
"
|
|
|
|
| 106 |
"ensemble_files_help": "アンサンブル用のすべてのオーディオファイルへのパス",
|
| 107 |
-
"ensemble_type_help": "avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
|
| 108 |
-
"ensemble_weights_help": "アンサンブル
|
| 109 |
-
"ensemble_output_help": "アンサンブル結果を保存するWAVファイル
|
| 110 |
"ensemble_type_print": "アンサンブルタイプ: {}",
|
| 111 |
"num_input_files_print": "入力ファイル数: {}",
|
| 112 |
"weights_print": "ウェイト: {}",
|
| 113 |
"output_file_print": "出力ファイル: {}",
|
| 114 |
-
"duration_mismatch_error": "すべてのファイル
|
| 115 |
"file_not_found_error": "エラー。ファイルが見つかりません: {}。パスを確認してください。",
|
| 116 |
-
"reading_chunk_print": "ファイルからチャンクを読み込み: {} (開始: {}
|
| 117 |
"chunk_result_shape_print": "チャンク結果の形状: {}",
|
| 118 |
-
"ensemble_completed_print": "アンサンブル完了。出力
|
| 119 |
"file_deletion_error": "{} を削除できませんでした: {}",
|
| 120 |
"directory_not_exist_warning": "⚠️ ディレクトリが存在しません: {}",
|
| 121 |
-
"not_a_directory_warning": "⚠️ パス
|
| 122 |
"item_deletion_error": "⚠️ {} の削除エラー: {}",
|
|
|
|
| 123 |
"old_output_not_exist": "❌ 古い出力フォルダが存在しません",
|
| 124 |
-
"old_outputs_cleared": "✅ 古い出
|
| 125 |
"error": "🔥 エラー: {}",
|
| 126 |
"ffmpeg_error": "FFmpegエラー ({}): {}",
|
| 127 |
"file_saved_successfully": "ファイルが正常に保存されました: {}",
|
| 128 |
"total_files_found": "見つかったファイルの総数: {}。使用サンプルレート: {}",
|
| 129 |
-
"total_progress": "
|
| 130 |
-
"detailed_pbar_enabled": "詳細プログレスバー有効: {}",
|
| 131 |
-
"loaded_audio": "
|
| 132 |
"cannot_read_track": "トラックを読み込めません: {}",
|
| 133 |
"error_message": "エラーメッセージ: {}",
|
| 134 |
-
"demudding_track": "トラック
|
| 135 |
-
"elapsed_time": "経過時間: {:.2f}
|
| 136 |
"proc_folder_description": "指定されたモデルを使用してフォルダ内のオーディオファイルを処理します。",
|
| 137 |
-
"model_type_help": "モデルタイプ (bandit, bs_roformer, mdx23c
|
| 138 |
"config_path_help": "設定ファイルへのパス",
|
| 139 |
-
"demud_phaseremix_help": "インストゥルメンタルの位相リミックスを有効
|
| 140 |
"start_checkpoint_help": "有効なウェイトの初期チェックポイント",
|
| 141 |
-
"input_folder_help": "処理するミックス
|
| 142 |
-
"audio_path_help": "処理する単一
|
| 143 |
"store_dir_help": "結果を保存するパス",
|
| 144 |
"device_ids_help": "GPU IDのリスト",
|
| 145 |
"extract_instrumental_help": "提供されている場合、ボーカルを反転してインストゥルメンタルを取得",
|
| 146 |
-
"disable_detailed_pbar_help": "デミックス
|
| 147 |
-
"force_cpu_help": "CUDAが利用可能でもCPUを強制使用",
|
| 148 |
"flac_file_help": "WAVの代わりにFLACファイルを出力",
|
| 149 |
-
"export_format_help": "エクスポート
|
| 150 |
"pcm_type_help": "FLACファイルのPCMタイプ",
|
| 151 |
-
"use_tta_help": "テスト時間
|
| 152 |
"lora_checkpoint_help": "LoRAウェイトの初期チェックポイント",
|
| 153 |
-
"cuda_available": "CUDAが利用可能です。無効
|
| 154 |
"using_device": "使用デバイス: {}",
|
| 155 |
"instruments_print": "楽器: {}",
|
| 156 |
-
"model_load_time": "モデル
|
| 157 |
"invalid_url": "❌ 無効なURL",
|
| 158 |
-
"cookie_file_updated": "✅
|
| 159 |
-
"cookie_installation_error": "⚠️
|
|
|
|
| 160 |
"file_size_zero_error": "ファイルサイズがゼロまたはファイルが作成されていません",
|
| 161 |
"google_drive_error": "❌ Google Driveダウンロードエラー: {}",
|
| 162 |
-
"wav_conversion_failed": "WAV変換に失敗",
|
| 163 |
"download_error": "❌ ダウンロードエラー: {}",
|
| 164 |
-
"
|
| 165 |
-
"
|
|
|
|
|
|
|
| 166 |
"no_output_files_found": "❌ 出力ファイルが見つかりません。",
|
| 167 |
-
"output_refreshed_successfully": "✅ 出力が正常に更新されました
|
| 168 |
"error_refreshing_output": "❌ 出力更新エラー: {}",
|
| 169 |
-
"starting_audio_separation": "オーディオ分離を開始...",
|
| 170 |
-
"processing_audio": "オーディオ処理中",
|
| 171 |
-
"separating_audio": "オーディオ分離中... ({:.1f}%)",
|
| 172 |
-
"separation_complete": "分離完了
|
| 173 |
"progress_parsing_error": "進捗解析エラー: {}",
|
| 174 |
"error_occurred": "エラーが発生しました: {}",
|
| 175 |
-
"separation_process_completed": "分離プロセス完了
|
| 176 |
"no_audio_file_error": "❌ オーディオファイルが提供されておらず、入力ディレクトリに既存のファイルもありません。",
|
| 177 |
"no_input_progress_label": "エラー: 入力が提供されていません -- 0.0%",
|
| 178 |
-
"processing_audio_print": "
|
| 179 |
-
"starting_audio_separation_progress_label": "オーディオ分離を開始... -- 0.0%",
|
| 180 |
-
"separating_audio_progress_label": "オーディオ分離中... -- {}.0%",
|
| 181 |
-
"audio_processing_completed_progress_label": "オーディオ処理完了
|
| 182 |
-
"audio_processing_completed": "✅ オーディオ処理完了
|
| 183 |
"error_occurred_progress_label": "エラーが発生しました -- 0.0%",
|
| 184 |
-
"minimum_files_required": "⚠️ 最低2ファイル必要",
|
| 185 |
"valid_files_not_found": "❌ 有効なファイルが見つかりません",
|
| 186 |
-
"starting_ensemble_process": "アンサンブルプロセスを開始...",
|
| 187 |
"ensembling_progress": "アンサンブル中... ({:.1f}%)",
|
| 188 |
-
"finalizing_ensemble_output": "アンサンブル出力を最終
|
| 189 |
-
"success_log": "✅ 成功
|
| 190 |
-
"error_log": "❌ エラー
|
| 191 |
-
"
|
| 192 |
-
"ensemble_process_completed": "アンサンブルプロセス完了!",
|
| 193 |
"no_models_selected": "❌ モデルが選択されていません",
|
| 194 |
"no_input_audio_provided": "❌ 入力オーディオが提供されていません",
|
| 195 |
-
"loading_model": "モデル {}/{}
|
| 196 |
-
"loading_model_progress_label": "モデル {}/{}
|
| 197 |
-
"completed_model": "モデル {}/{}
|
| 198 |
-
"completed_model_progress_label": "モデル {}/{}
|
| 199 |
-
"running_command": "コマンド実行中: {}",
|
| 200 |
-
"model_failed": "モデル {} が失敗: {}",
|
| 201 |
-
"critical_error_with_model": "{} で
|
| 202 |
-
"model_output_failed": "{}
|
| 203 |
-
"waiting_for_files": "すべてのファイル
|
| 204 |
-
"waiting_for_files_progress_label": "すべてのファイル
|
| 205 |
-
"performing_ensemble": "アンサンブル実行中...",
|
| 206 |
-
"performing_ensemble_progress_label": "アンサンブル実行中... -- 92.0%",
|
| 207 |
"memory_usage_before_ensemble": "アンサンブル前のメモリ使用量: {}%",
|
| 208 |
"memory_usage_after_ensemble": "アンサンブル後のメモリ使用量: {}%",
|
| 209 |
-
"finalizing_ensemble_output_progress_label": "アンサンブル出力を最終
|
| 210 |
-
"ensemble_file_creation_failed": "アンサンブルファイル
|
| 211 |
-
"ensemble_completed_progress_label": "アンサンブルが正常に完了しました
|
| 212 |
-
"success_output_created": "✅ 成功
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"starting_method": "{method} をポート {port} で開始...",
|
| 237 |
-
"process_stopped": "\n🛑 ユーザーによってプロセスが停止されました",
|
| 238 |
-
"selected_port": "選択されたポート: {port}",
|
| 239 |
-
"starting_gradio_with_sharing": "Gradioを共有機能付きで開始...",
|
| 240 |
-
"starting_localtunnel": "Localtunnelをポート {port} で開始...",
|
| 241 |
-
"share_link": "共有リンク: {url}",
|
| 242 |
-
"password_ip": "パスワードIP: {ip}",
|
| 243 |
-
"starting_ngrok": "Ngrokをポート {port} で開始...",
|
| 244 |
-
"ngrok_url": "Ngrok URL: {url}",
|
| 245 |
-
"ngrok_error": "Ngrok開始エラー: {error}",
|
| 246 |
-
"apollo_chunk_size": "Apolloチャンクサイズ",
|
| 247 |
-
"apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
|
| 248 |
-
"apollo_overlap": "Apolloオーバーラップ",
|
| 249 |
-
"enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
|
| 250 |
-
"apollo_processing_completed": "Apollo処理完了!",
|
| 251 |
-
"apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
|
| 252 |
-
"invalid_method": "エラー: 無効なメソッド! 'gradio', 'localtunnel', または 'ngrok' を使用してください。",
|
| 253 |
-
"apollo_enhancement_settings": "Apollo改善設定",
|
| 254 |
-
"enhance_with_apollo": "Apolloで改善",
|
| 255 |
-
"enhance_with_apollo_info": "分離後のオーディオ改善のためにApolloを有効化",
|
| 256 |
-
"apollo_method": "Apolloメソッド",
|
| 257 |
-
"apollo_method_info": "Apolloの処理メソッドを選択",
|
| 258 |
-
"normal_method": "通常メソッド",
|
| 259 |
-
"mid_side_method": "ミッド/サイドメソッド",
|
| 260 |
-
"apollo_normal_model": "Apollo通常モデル",
|
| 261 |
-
"apollo_normal_model_info": "通常Apollo処理に使用するモデル",
|
| 262 |
-
"apollo_midside_model": "Apolloミッド/サイドモデル",
|
| 263 |
-
"apollo_enhancement_info": "音質の改善",
|
| 264 |
-
"selected_models": "選択されたモデル",
|
| 265 |
-
"save_preset": "プリセットを保存",
|
| 266 |
-
"delete_preset": "プリセットを削除",
|
| 267 |
-
"refresh_presets": "プリセットを更新",
|
| 268 |
-
"preset_name": "プリセット名",
|
| 269 |
-
"select_preset": "プリセットを選択",
|
| 270 |
-
"add_favorite": "お気に入りに追加",
|
| 271 |
-
"apollo_midside_model_info": "ミッド/サイド処理に使用するモデル (オプション)",
|
| 272 |
-
"language_changed_message": "言語が変更されました。セルを再起動してください。",
|
| 273 |
-
"apply_matchering": "Matcheringを適用",
|
| 274 |
-
"matchering_info": "オーディオのマスタリングとバランスを向上させるためにMatcheringを適用",
|
| 275 |
-
"matchering_passes": "Matcheringの反復回数",
|
| 276 |
-
"matchering_passes_info": "Matcheringの反復回数(1-5、高い値は品質を向上させますが時間がかかります)",
|
| 277 |
-
"matchering_processing": "Matcheringを適用中... ({:.1f}%)",
|
| 278 |
-
"matchering_completed": "✅ Matchering処理が完了しました!",
|
| 279 |
-
"matchering_error": "❌ Matchering中にエラーが発生しました:{}"
|
| 280 |
}
|
|
|
|
| 17 |
"overlap": "オーバーラップ",
|
| 18 |
"overlap_info": "推奨: 2-10 (高い値は品質を向上させますが、より多くのVRAMが必要です)",
|
| 19 |
"tta_boost": "TTAブースト",
|
|
|
|
| 20 |
"phase_fix": "位相補正",
|
| 21 |
+
"use_phase_correction": "高度な位相補正を適用",
|
| 22 |
"phase_fix_info": "インストゥルメンタルトラックのための高度な位相補正",
|
| 23 |
"instrumental": "インストゥルメンタル",
|
|
|
|
| 24 |
"process": "🚀 処理",
|
| 25 |
"reset": "🧹 リセット",
|
| 26 |
"status": "ステータス",
|
|
|
|
| 79 |
"downloaded_file": "ダウンロードされたファイル",
|
| 80 |
"cookie_management": "🍪 クッキー管理",
|
| 81 |
"upload_cookies_txt": "Cookies.txtをアップロード",
|
| 82 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 なぜ必要?**<br>- 年齢制限コンテンツへのアクセス<br>- プライベート/非公開ビデオのダウンロード<br>- 地域制限の回避<br>- YouTubeダウンロード制限の回避<br><br>**⚠️ 重要な注意**<br>- クッキーファイルを共有しないでください!<br>- 以下の時に更新:<br> • \"403 Forbidden\"エラー<br> • ダウンロードが突然停止<br> • \"セッション期限切れ\"メッセージ<br><br>**🔄 更新手順**<br>1. この<a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome拡張</a>をインストール<br>2. ChromeでYouTubeにログイン<br>3. 拡張アイコンをクリック → \"エクスポート\"<br>4. ここにダウンロードしたファイルをアップロード<br><br>**⏳ クッキー寿命**<br>- 通常セッション: 24時間<br>- 敏感な操作: 1時間<br>- パスワード変更: 即時更新</div>",
|
| 83 |
+
"ensemble_output_copied": "✅ アンサンブル出力が {} にコピーされました",
|
| 84 |
+
"error_copying_ensemble_output": "❌ アンサンブル出力のコピーエラー: {}",
|
| 85 |
+
"drive_mounted_copying_ensemble": "Google Driveがマウントされました。アンサンブル出力をコピー中...",
|
| 86 |
+
"drive_already_mounted_copying_ensemble": "Google Driveはすでにマウントされています。アンサンブル出力をコピー中...",
|
| 87 |
+
"no_ensemble_output_files_found": "❌ アンサンブル出力ファイルが見つかりません。",
|
| 88 |
+
"drive_mounted_copying_files": "Google Driveがマウントされました。ファイルをコピー中...",
|
| 89 |
+
"drive_already_mounted_copying_files": "Google Driveはすでにマウントされています。ファイルをコピー中...",
|
| 90 |
+
"files_copied_to_drive": "✅ ファイルが {} にコピーされました",
|
| 91 |
+
"error_copying_files": "❌ ファイルのコピーエラー: {}",
|
| 92 |
+
"mounting_drive": "Google Driveをマウント中...",
|
| 93 |
+
"settings_saved": "設定が config.json に保存されました: 言語={lang}, メソッド={method}, ポート={port}",
|
| 94 |
+
"selected_language": "選択された言語: {lang}",
|
| 95 |
+
"available_languages": "利用可能な言語: {langs}",
|
| 96 |
+
"select_language_prompt": "言語を選択 (デフォルト: {default}): ",
|
| 97 |
+
"invalid_language": "無効な言語選択! デフォルト言語 ({default}) を使用します。",
|
| 98 |
+
"available_sharing_methods": "利用可能な共有方法: gradio, localtunnel, ngrok",
|
| 99 |
+
"select_sharing_method_prompt": "共有方法を選択 (デフォルト: {default}): ",
|
| 100 |
+
"enter_ngrok_token_prompt": "Ngrokトークンを入力 (デフォルト: {default}): ",
|
| 101 |
+
"ngrok_token_required": "エラー: ngrokメソッドにはNgrokトークンが必要です!",
|
| 102 |
+
"enter_port_prompt": "ポート���号を入力 (デフォルト: {default}): ",
|
| 103 |
+
"opening_previous_url": "前のURLを開く: {url}",
|
| 104 |
+
"open_url_manually": "ColabでURLを手動で開く: {url}",
|
| 105 |
+
"invalid_previous_url": "無効な前のURL: {url}, 開かれませんでした。",
|
| 106 |
+
"starting_method": "{method} をポート {port} で開始...",
|
| 107 |
+
"process_stopped": "\n🛑 ユーザーによってプロセスが停止されました",
|
| 108 |
+
"selected_port": "選択されたポート: {port}",
|
| 109 |
+
"starting_gradio_with_sharing": "Gradioを共有機能付きで開始...",
|
| 110 |
+
"starting_localtunnel": "Localtunnelをポート {port} で開始...",
|
| 111 |
+
"share_link": "共有リンク: {url}",
|
| 112 |
+
"password_ip": "パスワードIP: {ip}",
|
| 113 |
+
"starting_ngrok": "Ngrokをポート {port} で開始...",
|
| 114 |
+
"ngrok_url": "Ngrok URL: {url}",
|
| 115 |
+
"ngrok_error": "Ngrok開始エラー: {error}",
|
| 116 |
+
"auto_apollo_chunk_size": "Apolloチャンクサイズ",
|
| 117 |
+
"auto_apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
|
| 118 |
+
"auto_apollo_overlap": "Apolloオーバーラップ",
|
| 119 |
+
"auto_enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
|
| 120 |
+
"auto_apollo_processing_completed": "Apollo処理完了!",
|
| 121 |
+
"auto_apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
|
| 122 |
+
"apollo_chunk_size": "Apolloチャンクサイズ",
|
| 123 |
+
"apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
|
| 124 |
+
"apollo_overlap": "Apolloオーバーラップ",
|
| 125 |
+
"enhance_with_apollo": "Apolloで改善",
|
| 126 |
+
"apollo_enhancement_info": "音質の改善",
|
| 127 |
+
"enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
|
| 128 |
+
"apollo_processing_completed": "Apollo処理完了!",
|
| 129 |
+
"apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
|
| 130 |
+
"selected_models": "選択されたモデル",
|
| 131 |
+
"save_preset": "プリセットを保存",
|
| 132 |
+
"delete_preset": "プリセットを削除",
|
| 133 |
+
"refresh_presets": "プリセットを更新",
|
| 134 |
+
"preset_name": "プリセット名",
|
| 135 |
+
"select_preset": "プリセットを選択",
|
| 136 |
+
"add_favorite": "お気に入りに追加",
|
| 137 |
+
"apply_matchering": "Matcheringを適用",
|
| 138 |
+
"matchering_info": "オーディオのマスタリングとバランスを向上させるためにMatcheringを適用",
|
| 139 |
+
"matchering_passes": "Matcheringの反復回数",
|
| 140 |
+
"matchering_passes_info": "Matcheringの反復回数(1-5、高い値は品質を向上させますが時間がかかります)",
|
| 141 |
+
"matchering_processing": "Matcheringを適用中... ({:.1f}%)",
|
| 142 |
+
"matchering_completed": "✅ Matchering処理が完了しました!",
|
| 143 |
+
"matchering_error": "❌ Matchering中にエラーが発生しました:{}",
|
| 144 |
+
"invalid_method": "エラー: 無効なメソッド! 'gradio', 'localtunnel', または 'ngrok' を使用してください。",
|
| 145 |
+
"language_changed_message": "言語が変更されました。セルを再起動してください。",
|
| 146 |
+
"backend_settings": "🚀 バックエンド設定",
|
| 147 |
+
"inference_backend": "推論バックエンド",
|
| 148 |
+
"optimization_mode": "最適化モード",
|
| 149 |
+
"optimization_mode_info": "PyTorch最適化モードの選択",
|
| 150 |
+
"mixed_precision_amp": "混合精度 (AMP)",
|
| 151 |
+
"mixed_precision_info": "2x速い推論 - 推奨",
|
| 152 |
+
"tf32_acceleration": "TF32アクセラレーション",
|
| 153 |
+
"tf32_acceleration_info": "RTX 30xx+のための追加速度ブースト",
|
| 154 |
+
"cudnn_benchmark": "cuDNNベンチマーク",
|
| 155 |
+
"cudnn_benchmark_info": "最初の実行で遅い、後続の実行で非常に速い",
|
| 156 |
+
"ultra_optimized_pytorch": "超最適化PyTorchバックエンド",
|
| 157 |
+
"default_active_max_speed": "デフォルトでアクティブ - 最大速度最適化",
|
| 158 |
+
"channels_last_mode": "RTX GPUで最速",
|
| 159 |
+
"compile_mode": "PyTorch 2.0+のための追加速度",
|
| 160 |
+
"default_mode": "標準",
|
| 161 |
+
"tta_info": "品質を向上させますが、処理速度が遅くなります",
|
| 162 |
+
"instrumental_info": "通常2つのステムを出力しますが、時には1つだけ - 試してみてください! :)",
|
| 163 |
+
"direct_download_error": "❌ 直接ダウンロードエラー:{}",
|
| 164 |
+
"invalid_google_drive_url": "❌ 無効な Google Drive URL",
|
| 165 |
+
"continuing_without_google_drive": "Google Drive接続に失敗しました。続行します...",
|
| 166 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Driveがマウントされていません、Driveへのコピーをスキップします...",
|
| 167 |
+
"direct_download_failed": "ファイルの直接ダウンロードに失敗しました",
|
| 168 |
"manual_ensemble_tab": "🎚️ 手動アンサンブル",
|
| 169 |
+
"phase_fixer_tab": "🔧 位相補正ツール",
|
| 170 |
+
"batch_processing_tab": "📦 バッチ処理",
|
| 171 |
+
"batch_description": "同じモデルで複数のファイルを一度に処理",
|
| 172 |
+
"batch_input_folder": "入力フォルダ",
|
| 173 |
+
"batch_input_folder_placeholder": "オーディオファイルを含むフォルダのパスを入力",
|
| 174 |
+
"batch_output_folder": "出力フォルダ",
|
| 175 |
+
"batch_output_folder_placeholder": "出力ファイル��フォルダのパスを入力",
|
| 176 |
+
"batch_file_list": "処理するファイル",
|
| 177 |
+
"batch_add_files": "ファイルを追加",
|
| 178 |
+
"batch_clear_list": "リストをクリア",
|
| 179 |
+
"batch_start": "🚀 バッチ処理を開始",
|
| 180 |
+
"batch_stop": "⏹️ 停止",
|
| 181 |
+
"batch_progress": "バッチ処理の進捗",
|
| 182 |
+
"batch_current_file": "現在処理中",
|
| 183 |
+
"batch_completed": "✅ バッチ処理が完了しました!",
|
| 184 |
+
"batch_stopped": "⏹️ バッチ処理が停止されました",
|
| 185 |
+
"batch_no_files": "❌ 処理するファイルが選択されていません",
|
| 186 |
+
"source_model": "ソースモデル(ボーカル)",
|
| 187 |
+
"source_model_info": "ボーカル/位相データを抽出するモデル",
|
| 188 |
+
"target_model": "ターゲットモデル(インストゥルメンタル)",
|
| 189 |
+
"target_model_info": "位相補正を適用するモデル",
|
| 190 |
"input_sources": "📂 入力ソース",
|
| 191 |
"refresh": "🔄 更新",
|
| 192 |
"ensemble_algorithm": "アンサンブルアルゴリズム",
|
| 193 |
"select_audio_files": "オーディオファイルを選択",
|
| 194 |
"available_files": "利用可能なファイル",
|
| 195 |
+
"custom_weights": "カスタムウェイト(カンマ区切り)",
|
| 196 |
"custom_weights_placeholder": "例: 0.8, 1.2, 1.0, ...",
|
| 197 |
+
"custom_weights_info": "等しいウェイトにするには空白のままにしてください",
|
| 198 |
"result_preview_tab": "🎧 結果プレビュー",
|
| 199 |
"ensembled_output": "アンサンブル出力",
|
| 200 |
"processing_log_tab": "📋 処理ログ",
|
| 201 |
+
"processing_details": "処理の詳細",
|
| 202 |
"process_ensemble": "⚡ アンサンブルを処理",
|
| 203 |
"languages_tab": "🌐 言語",
|
| 204 |
"Vocal Models": "ボーカルモデル",
|
| 205 |
"Instrumental Models": "インストゥルメンタルモデル",
|
| 206 |
"4-Stem Models": "4ステムモデル",
|
| 207 |
"Denoise Models": "ノイズ除去モデル",
|
| 208 |
+
"Dereverb Models": "残響除去モデル",
|
| 209 |
+
"Other Shapes": "その他のモデル",
|
| 210 |
+
"Custom Models": "カスタムモデル",
|
| 211 |
+
"ensemble_files_description": "指定されたアンサンブル方法を使用して複数のオーディオファイルを1つの出力に結合します。",
|
| 212 |
"ensemble_files_help": "アンサンブル用のすべてのオーディオファイルへのパス",
|
| 213 |
+
"ensemble_type_help": "次のいずれか: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 214 |
+
"ensemble_weights_help": "アンサンブル作成のウェイト。ウェイトの数はファイルの数と同じである必要があります",
|
| 215 |
+
"ensemble_output_help": "アンサンブル結果を保存するWAVファイルのパス",
|
| 216 |
"ensemble_type_print": "アンサンブルタイプ: {}",
|
| 217 |
"num_input_files_print": "入力ファイル数: {}",
|
| 218 |
"weights_print": "ウェイト: {}",
|
| 219 |
"output_file_print": "出力ファイル: {}",
|
| 220 |
+
"duration_mismatch_error": "すべてのファイルの長さが同じである必要があります",
|
| 221 |
"file_not_found_error": "エラー。ファイルが見つかりません: {}。パスを確認してください。",
|
| 222 |
+
"reading_chunk_print": "ファイルからチャンクを読み込み中: {} (開始: {}s, 長さ: {}s)",
|
| 223 |
"chunk_result_shape_print": "チャンク結果の形状: {}",
|
| 224 |
+
"ensemble_completed_print": "アンサンブル完了。出力を保存: {}",
|
| 225 |
"file_deletion_error": "{} を削除できませんでした: {}",
|
| 226 |
"directory_not_exist_warning": "⚠️ ディレクトリが存在しません: {}",
|
| 227 |
+
"not_a_directory_warning": "⚠️ パスがディレクトリではありません: {}",
|
| 228 |
"item_deletion_error": "⚠️ {} の削除エラー: {}",
|
| 229 |
+
"critical_error": "⛔ 致命的なエラー: {}",
|
| 230 |
"old_output_not_exist": "❌ 古い出力フォルダが存在しません",
|
| 231 |
+
"old_outputs_cleared": "✅ 古い出��が正常に削除されました!",
|
| 232 |
"error": "🔥 エラー: {}",
|
| 233 |
"ffmpeg_error": "FFmpegエラー ({}): {}",
|
| 234 |
"file_saved_successfully": "ファイルが正常に保存されました: {}",
|
| 235 |
"total_files_found": "見つかったファイルの総数: {}。使用サンプルレート: {}",
|
| 236 |
+
"total_progress": "全体の進捗",
|
| 237 |
+
"detailed_pbar_enabled": "詳細プログレスバーが有効: {}",
|
| 238 |
+
"loaded_audio": "オーディオを読み込みました: {}、形状: {}",
|
| 239 |
"cannot_read_track": "トラックを読み込めません: {}",
|
| 240 |
"error_message": "エラーメッセージ: {}",
|
| 241 |
+
"demudding_track": "トラックをクリーニング中(位相リミックス - インストゥルメンタル): {}",
|
| 242 |
+
"elapsed_time": "経過時間: {:.2f}秒。",
|
| 243 |
"proc_folder_description": "指定されたモデルを使用してフォルダ内のオーディオファイルを処理します。",
|
| 244 |
+
"model_type_help": "モデルタイプ (bandit, bs_roformer, mdx23cなど)",
|
| 245 |
"config_path_help": "設定ファイルへのパス",
|
| 246 |
+
"demud_phaseremix_help": "インストゥルメンタルの位相リミックスを有効にする",
|
| 247 |
"start_checkpoint_help": "有効なウェイトの初期チェックポイント",
|
| 248 |
+
"input_folder_help": "処理するミックスが入ったフォルダ",
|
| 249 |
+
"audio_path_help": "処理する単一オーディオファイルのパス",
|
| 250 |
"store_dir_help": "結果を保存するパス",
|
| 251 |
"device_ids_help": "GPU IDのリスト",
|
| 252 |
"extract_instrumental_help": "提供されている場合、ボーカルを反転してインストゥルメンタルを取得",
|
| 253 |
+
"disable_detailed_pbar_help": "デミックスの詳細プログレスバーを無効にする",
|
| 254 |
+
"force_cpu_help": "CUDAが利用可能でもCPUを強制使用する",
|
| 255 |
"flac_file_help": "WAVの代わりにFLACファイルを出力",
|
| 256 |
+
"export_format_help": "エクスポート形式とPCMタイプ",
|
| 257 |
"pcm_type_help": "FLACファイルのPCMタイプ",
|
| 258 |
+
"use_tta_help": "テスト時間拡張を有効にする",
|
| 259 |
"lora_checkpoint_help": "LoRAウェイトの初期チェックポイント",
|
| 260 |
+
"cuda_available": "CUDAが利用可能です。無効にするには --force_cpu を使用してください。",
|
| 261 |
"using_device": "使用デバイス: {}",
|
| 262 |
"instruments_print": "楽器: {}",
|
| 263 |
+
"model_load_time": "モデル読み込み時間: {:.2f}秒",
|
| 264 |
"invalid_url": "❌ 無効なURL",
|
| 265 |
+
"cookie_file_updated": "✅ Cookieファイルが更新されました!",
|
| 266 |
+
"cookie_installation_error": "⚠️ Cookieインストールエラー: {}",
|
| 267 |
+
"google_drive_mount_error": "❌ Google Drive接続エラー: {}",
|
| 268 |
"file_size_zero_error": "ファイルサイズがゼロまたはファイルが作成されていません",
|
| 269 |
"google_drive_error": "❌ Google Driveダウンロードエラー: {}",
|
| 270 |
+
"wav_conversion_failed": "WAV変換に失敗しました",
|
| 271 |
"download_error": "❌ ダウンロードエラー: {}",
|
| 272 |
+
"file_copied_to_drive": "ファイルがGoogle Driveにコピーされました: {}",
|
| 273 |
+
"copy_to_drive_error": "❌ Google Driveへのコピーエラー: {}",
|
| 274 |
+
"download_success": "🎉 ダウンロードに成功しました!",
|
| 275 |
+
"download_failed": "❌ ダウンロードに失敗しました",
|
| 276 |
"no_output_files_found": "❌ 出力ファイルが見つかりません。",
|
| 277 |
+
"output_refreshed_successfully": "✅ 出力が正常に更新されました!",
|
| 278 |
"error_refreshing_output": "❌ 出力更新エラー: {}",
|
| 279 |
+
"starting_audio_separation": "オーディオ分離を開始しています...",
|
| 280 |
+
"processing_audio": "オーディオを処理中",
|
| 281 |
+
"separating_audio": "オーディオを分離中... ({:.1f}%)",
|
| 282 |
+
"separation_complete": "分離完了!",
|
| 283 |
"progress_parsing_error": "進捗解析エラー: {}",
|
| 284 |
"error_occurred": "エラーが発生しました: {}",
|
| 285 |
+
"separation_process_completed": "分離プロセスが完了しました!",
|
| 286 |
"no_audio_file_error": "❌ オーディオファイルが提供されておらず、入力ディレクトリに既存のファイルもありません。",
|
| 287 |
"no_input_progress_label": "エラー: 入力が提供されていません -- 0.0%",
|
| 288 |
+
"processing_audio_print": "オーディオを処理中: {}、モデル使用: {}",
|
| 289 |
+
"starting_audio_separation_progress_label": "オーディオ分離を開始しています... -- 0.0%",
|
| 290 |
+
"separating_audio_progress_label": "オーディオを分離中... -- {}.0%",
|
| 291 |
+
"audio_processing_completed_progress_label": "オーディオ処理が完了しました! -- 100.0%",
|
| 292 |
+
"audio_processing_completed": "✅ オーディオ処理が完了しました!",
|
| 293 |
"error_occurred_progress_label": "エラーが発生しました -- 0.0%",
|
| 294 |
+
"minimum_files_required": "⚠️ 最低2つのファイルが必要です",
|
| 295 |
"valid_files_not_found": "❌ 有効なファイルが見つかりません",
|
| 296 |
+
"starting_ensemble_process": "アンサンブルプロセスを開始しています...",
|
| 297 |
"ensembling_progress": "アンサンブル中... ({:.1f}%)",
|
| 298 |
+
"finalizing_ensemble_output": "アンサンブル出力を最終化中...",
|
| 299 |
+
"success_log": "✅ 成功!\n{}",
|
| 300 |
+
"error_log": "❌ エラー!\n{}",
|
| 301 |
+
"ensemble_process_completed": "アンサンブルプロセスが完了しました!",
|
|
|
|
| 302 |
"no_models_selected": "❌ モデルが選択されていません",
|
| 303 |
"no_input_audio_provided": "❌ 入力オーディオが提供されていません",
|
| 304 |
+
"loading_model": "モデルを読み込み中 {}/{}: {}...",
|
| 305 |
+
"loading_model_progress_label": "モデルを読み込み中 {}/{}: {}... -- {}.0%",
|
| 306 |
+
"completed_model": "モデル完了 {}/{}: {}",
|
| 307 |
+
"completed_model_progress_label": "モデル完了 {}/{}: {} -- {}.0%",
|
| 308 |
+
"running_command": "コマンドを実行中: {}",
|
| 309 |
+
"model_failed": "モデル {} が失敗しました: {}",
|
| 310 |
+
"critical_error_with_model": "{} で致命的なエラー: {}",
|
| 311 |
+
"model_output_failed": "{} は出力を生成できませんでした",
|
| 312 |
+
"waiting_for_files": "すべてのファイルが準備できるまで待機中...",
|
| 313 |
+
"waiting_for_files_progress_label": "すべてのファイルが準備できるまで待機中... -- 90.0%",
|
| 314 |
+
"performing_ensemble": "アンサンブルを実行中...",
|
| 315 |
+
"performing_ensemble_progress_label": "アンサンブルを実行中... -- 92.0%",
|
| 316 |
"memory_usage_before_ensemble": "アンサンブル前のメモリ使用量: {}%",
|
| 317 |
"memory_usage_after_ensemble": "アンサンブル後のメモリ使用量: {}%",
|
| 318 |
+
"finalizing_ensemble_output_progress_label": "アンサンブル出力を最終化中... -- 98.0%",
|
| 319 |
+
"ensemble_file_creation_failed": "アンサンブルファイルを作成できませんでした: {}",
|
| 320 |
+
"ensemble_completed_progress_label": "アンサンブルが正常に完了しました! -- 100.0%",
|
| 321 |
+
"success_output_created": "✅ 成功!出力ファイルが作成されました。",
|
| 322 |
+
"custom_models_tab": "⚡ カスタムモデル",
|
| 323 |
+
"add_custom_model": "➕ カスタムモデルを追加",
|
| 324 |
+
"custom_model_name": "モデル名",
|
| 325 |
+
"custom_model_name_placeholder": "モデルの名前を入力してください",
|
| 326 |
+
"checkpoint_url": "チェックポイントURL (.ckpt/.pth)",
|
| 327 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 328 |
+
"config_url": "設定URL (.yaml)",
|
| 329 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 330 |
+
"custom_py_url": "カスタムPythonファイルURL (オプション)",
|
| 331 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 332 |
+
"auto_detect_type": "モデルタイプを自動検出",
|
| 333 |
+
"model_type": "モデルタイプ",
|
| 334 |
+
"add_model_btn": "➕ モデルを追加",
|
| 335 |
+
"custom_models_list": "📋 カスタムモデルリスト",
|
| 336 |
+
"delete_model": "🗑️ 削除",
|
| 337 |
+
"no_custom_models": "カスタムモデルはまだ追加されていません",
|
| 338 |
+
"model_added_success": "✅ モデルが正常に追加されました!",
|
| 339 |
+
"model_deleted_success": "✅ モデルが正常に削除されました!",
|
| 340 |
+
"model_add_error": "❌ モデル追加エラー: {}",
|
| 341 |
+
"model_delete_error": "❌ モデル削除エラー: {}",
|
| 342 |
+
"refresh_models": "🔄 更新",
|
| 343 |
+
"custom_model_info": "ダウンロードリンクを提供してカスタムモデルを追加します。モデルは使用時に自動的にダウンロードされます。",
|
| 344 |
+
"select_model_to_delete": "削除するモデルを選択してください"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
}
|
assets/i18n/languages/ru_ru.json
CHANGED
|
@@ -19,6 +19,7 @@
|
|
| 19 |
"tta_boost": "Усиление TTA",
|
| 20 |
"tta_info": "Улучшает качество, но замедляет скорость обработки",
|
| 21 |
"phase_fix": "Коррекция фазы",
|
|
|
|
| 22 |
"phase_fix_info": "Расширенная коррекция фазы для инструментальных треков",
|
| 23 |
"instrumental": "Инструментал",
|
| 24 |
"instrumental_info": "Обычно дает 2 выхода, но иногда только один, нужно экспериментировать :)",
|
|
@@ -46,7 +47,7 @@
|
|
| 46 |
"copy_to_drive": "📂 Копировать на Диск",
|
| 47 |
"copy_status": "Статус копирования",
|
| 48 |
"files_will_be_copied": "Файлы будут скопированы сюда...",
|
| 49 |
-
"processing_tip": "<b>🔈 Совет по обработке:</b> Для шумных результатов используйте модели <code>bleed_suppressor_v1</code> или <code>denoisedebleed</code> в категории <i>
|
| 50 |
"waiting_for_processing": "Ожидание обработки...",
|
| 51 |
"auto_ensemble_tab": "🤖 Автоматический ансамбль",
|
| 52 |
"upload_file": "Загрузить файл",
|
|
@@ -71,68 +72,147 @@
|
|
| 71 |
"output_preview": "Предпросмотр вывода",
|
| 72 |
"refresh_output": "🔄 Обновить вывод",
|
| 73 |
"ensemble_copy_status": "Вывод ансамбля будет скопирован сюда...",
|
| 74 |
-
"guidelines": "<h4>Руководство по выбору модел
|
| 75 |
"download_sources_tab": "⬇️ Скачать источники",
|
| 76 |
"direct_links": "🌐 Прямые ссылки",
|
| 77 |
"audio_file_url": "URL аудиофайла",
|
| 78 |
-
"download_from_url": "⬇️ Скачать
|
| 79 |
"download_status": "Статус загрузки",
|
| 80 |
-
"downloaded_file": "
|
| 81 |
-
"cookie_management": "🍪 Управление
|
| 82 |
"upload_cookies_txt": "Загрузить Cookies.txt",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ Ручной ансамбль",
|
| 85 |
"input_sources": "📂 Источники ввода",
|
| 86 |
"refresh": "🔄 Обновить",
|
| 87 |
"ensemble_algorithm": "Алгоритм ансамбля",
|
| 88 |
-
"select_audio_files": "Выб
|
| 89 |
"available_files": "Доступные файлы",
|
| 90 |
-
"custom_weights": "Пользовательские веса (р
|
| 91 |
"custom_weights_placeholder": "Пример: 0.8, 1.2, 1.0, ...",
|
| 92 |
"custom_weights_info": "Оставьте пустым для равных весов",
|
| 93 |
"result_preview_tab": "🎧 Предпросмотр результата",
|
| 94 |
-
"ensembled_output": "
|
| 95 |
"processing_log_tab": "📋 Журнал обработки",
|
| 96 |
"processing_details": "Детали обработки",
|
| 97 |
"process_ensemble": "⚡ Обработать ансамбль",
|
| 98 |
"languages_tab": "🌐 Языки",
|
| 99 |
"Vocal Models": "Вокальные модели",
|
| 100 |
"Instrumental Models": "Инструментальные модели",
|
| 101 |
-
"4-Stem Models": "
|
| 102 |
"Denoise Models": "Модели шумоподавления",
|
| 103 |
"Dereverb Models": "Модели удаления реверберации",
|
| 104 |
"Other Models": "Другие модели",
|
| 105 |
-
"
|
|
|
|
| 106 |
"ensemble_files_help": "Путь ко всем аудиофайлам для ансамбля",
|
| 107 |
"ensemble_type_help": "Один из: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 108 |
"ensemble_weights_help": "Веса для создания ансамбля. Количество весов должно совпадать с количеством файлов",
|
| 109 |
-
"ensemble_output_help": "Путь к WAV-файлу,
|
| 110 |
"ensemble_type_print": "Тип ансамбля: {}",
|
| 111 |
"num_input_files_print": "Количество входных файлов: {}",
|
| 112 |
"weights_print": "Веса: {}",
|
| 113 |
"output_file_print": "Выходной файл: {}",
|
| 114 |
-
"duration_mismatch_error": "Все файлы должны иметь одинаковую длительность",
|
| 115 |
-
"file_not_found_error": "Ошибка.
|
| 116 |
-
"reading_chunk_print": "Чтение фрагмента из файла: {} (начало: {}
|
| 117 |
"chunk_result_shape_print": "Форма результата фрагмента: {}",
|
| 118 |
-
"ensemble_completed_print": "Ансамбль завершен. Вы
|
| 119 |
-
"file_deletion_error": "
|
| 120 |
-
"directory_not_exist_warning": "⚠️
|
| 121 |
-
"not_a_directory_warning": "⚠️ Путь не является
|
| 122 |
"item_deletion_error": "⚠️ Ошибка удаления {}: {}",
|
| 123 |
"old_output_not_exist": "❌ Старая папка вывода не существует",
|
| 124 |
-
"old_outputs_cleared": "✅ Старые выходные данные успешно
|
| 125 |
"error": "🔥 Ошибка: {}",
|
| 126 |
"ffmpeg_error": "Ошибка FFmpeg ({}): {}",
|
| 127 |
"file_saved_successfully": "Файл успешно сохранен: {}",
|
| 128 |
"total_files_found": "Всего найдено файлов: {}. Используемая частота дискретизации: {}",
|
| 129 |
"total_progress": "Общий прогресс",
|
| 130 |
-
"detailed_pbar_enabled": "Подробная
|
| 131 |
-
"loaded_audio": "Загружен
|
| 132 |
"cannot_read_track": "Не удается прочитать трек: {}",
|
| 133 |
"error_message": "Сообщение об ошибке: {}",
|
| 134 |
"demudding_track": "Очистка трека (ремикс фазы - инструментал): {}",
|
| 135 |
-
"elapsed_time": "Прошедшее время: {:.2f} сек.",
|
| 136 |
"proc_folder_description": "Обрабатывает аудиофайлы в папке с использованием указанной модели.",
|
| 137 |
"model_type_help": "Тип модели (bandit, bs_roformer, mdx23c и т.д.)",
|
| 138 |
"config_path_help": "Путь к файлу конфигурации",
|
|
@@ -140,32 +220,40 @@
|
|
| 140 |
"start_checkpoint_help": "Начальная контрольная точка для действительных весов",
|
| 141 |
"input_folder_help": "Папка с миксами для обработки",
|
| 142 |
"audio_path_help": "Путь к одному аудиофайлу для обработки",
|
| 143 |
-
"store_dir_help": "Путь для хранения результатов",
|
| 144 |
"device_ids_help": "Список идентификаторов GPU",
|
| 145 |
-
"extract_instrumental_help": "Инвертировать вокал для получения инструментала, если предоставлен
|
| 146 |
-
"disable_detailed_pbar_help": "Отключить подробную
|
| 147 |
-
"force_cpu_help": "Принудительно использовать CPU, даже если
|
| 148 |
-
"flac_file_help": "Вывод
|
| 149 |
"export_format_help": "Формат экспорта и тип PCM",
|
| 150 |
-
"pcm_type_help": "Тип PCM для файлов
|
| 151 |
-
"use_tta_help": "Включить
|
| 152 |
"lora_checkpoint_help": "Начальная контрольная точка для весов LoRA",
|
| 153 |
-
"cuda_available": "CUDA доступ
|
| 154 |
"using_device": "Используемое устройство: {}",
|
| 155 |
"instruments_print": "Инструменты: {}",
|
| 156 |
"model_load_time": "Время загрузки модели: {:.2f} сек",
|
| 157 |
-
"invalid_url": "❌ Неве
|
| 158 |
-
"cookie_file_updated": "✅ Файл
|
| 159 |
-
"cookie_installation_error": "⚠️ Ошибка установки
|
| 160 |
-
"
|
| 161 |
-
"
|
| 162 |
-
"
|
|
|
|
| 163 |
"download_error": "❌ Ошибка загрузки: {}",
|
| 164 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
"download_failed": "❌ Загрузка не удалась",
|
| 166 |
"no_output_files_found": "❌ Выходные файлы не найдены.",
|
| 167 |
"output_refreshed_successfully": "✅ Вывод успешно обновлен!",
|
| 168 |
-
"error_refreshing_output": "❌ Ошибка
|
| 169 |
"starting_audio_separation": "Запуск разделения аудио...",
|
| 170 |
"processing_audio": "Обработка аудио",
|
| 171 |
"separating_audio": "Разделение аудио... ({:.1f}%)",
|
|
@@ -173,7 +261,7 @@
|
|
| 173 |
"progress_parsing_error": "Ошибка разбора прогресса: {}",
|
| 174 |
"error_occurred": "Произошла ошибка: {}",
|
| 175 |
"separation_process_completed": "Процесс разделения завершен!",
|
| 176 |
-
"no_audio_file_error": "❌ Аудиофайл не предоставлен
|
| 177 |
"no_input_progress_label": "Ошибка: Ввод не предоставлен -- 0.0%",
|
| 178 |
"processing_audio_print": "Обработка аудио из: {} с использованием модели: {}",
|
| 179 |
"starting_audio_separation_progress_label": "Запуск разделения аудио... -- 0.0%",
|
|
@@ -182,11 +270,11 @@
|
|
| 182 |
"audio_processing_completed": "✅ Обработка аудио завершена!",
|
| 183 |
"error_occurred_progress_label": "Произошла ошибка -- 0.0%",
|
| 184 |
"minimum_files_required": "⚠️ Требуется минимум 2 файла",
|
| 185 |
-
"valid_files_not_found": "❌
|
| 186 |
"starting_ensemble_process": "Запуск процесса ансамбля...",
|
| 187 |
-
"ensembling_progress": "
|
| 188 |
"finalizing_ensemble_output": "Завершение вывода ансамбля...",
|
| 189 |
-
"success_log": "✅ Успе
|
| 190 |
"error_log": "❌ Ошибка!\n{}",
|
| 191 |
"critical_error": "⛔ Критическая ошибка: {}",
|
| 192 |
"ensemble_process_completed": "Процесс ансамбля завершен!",
|
|
@@ -204,77 +292,55 @@
|
|
| 204 |
"waiting_for_files_progress_label": "Ожидание готовности всех файлов... -- 90.0%",
|
| 205 |
"performing_ensemble": "Выполнение ансамбля...",
|
| 206 |
"performing_ensemble_progress_label": "Выполнение ансамбля... -- 92.0%",
|
| 207 |
-
"memory_usage_before_ensemble": "Использование памяти
|
| 208 |
"memory_usage_after_ensemble": "Использование памяти после ансамбля: {}%",
|
| 209 |
"finalizing_ensemble_output_progress_label": "Завершение вывода ансамбля... -- 98.0%",
|
| 210 |
"ensemble_file_creation_failed": "Не удалось создать файл ансамбля: {}",
|
| 211 |
"ensemble_completed_progress_label": "Ансамбль успешно завершен! -- 100.0%",
|
| 212 |
-
"success_output_created": "✅ Успе
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"normal_method": "Обычный метод",
|
| 259 |
-
"mid_side_method": "Метод середины/сторон",
|
| 260 |
-
"apollo_normal_model": "Обычная модель Apollo",
|
| 261 |
-
"apollo_normal_model_info": "Модель, используемая для обычной обработки Apollo",
|
| 262 |
-
"apollo_midside_model": "Модель Apollo середины/сторон",
|
| 263 |
-
"apollo_enhancement_info": "Улучшение качества звука",
|
| 264 |
-
"selected_models": "Выбранные модели",
|
| 265 |
-
"save_preset": "Сохранить пресет",
|
| 266 |
-
"delete_preset": "Удалить пресет",
|
| 267 |
-
"refresh_presets": "Обновить пресеты",
|
| 268 |
-
"preset_name": "Имя пресета",
|
| 269 |
-
"select_preset": "Выбрать пресет",
|
| 270 |
-
"add_favorite": "Добавить в избранное",
|
| 271 |
-
"apollo_midside_model_info": "Модель, используемая для обработки середины/сторон (опционально)",
|
| 272 |
-
"language_changed_message": "Язык изменен. Пожалуйста, перезапустите ячейку.",
|
| 273 |
-
"apply_matchering": "Применить Matchering",
|
| 274 |
-
"matchering_info": "Применить Matchering для улучшения мастеринга и баланса звука",
|
| 275 |
-
"matchering_passes": "Проходы Matchering",
|
| 276 |
-
"matchering_passes_info": "Количество итераций Matchering (1-5, более высокие значения улучшают качество, но занимают больше времени)",
|
| 277 |
-
"matchering_processing": "Применение Matchering... ({:.1f}%)",
|
| 278 |
-
"matchering_completed": "✅ Обработка Matchering завершена!",
|
| 279 |
-
"matchering_error": "❌ Ошибка во время Matchering: {}"
|
| 280 |
}
|
|
|
|
| 19 |
"tta_boost": "Усиление TTA",
|
| 20 |
"tta_info": "Улучшает качество, но замедляет скорость обработки",
|
| 21 |
"phase_fix": "Коррекция фазы",
|
| 22 |
+
"use_phase_correction": "Применить расширенную коррекцию фазы",
|
| 23 |
"phase_fix_info": "Расширенная коррекция фазы для инструментальных треков",
|
| 24 |
"instrumental": "Инструментал",
|
| 25 |
"instrumental_info": "Обычно дает 2 выхода, но иногда только один, нужно экспериментировать :)",
|
|
|
|
| 47 |
"copy_to_drive": "📂 Копировать на Диск",
|
| 48 |
"copy_status": "Статус копирования",
|
| 49 |
"files_will_be_copied": "Файлы будут скопированы сюда...",
|
| 50 |
+
"processing_tip": "<b>🔈 Совет по обработке:</b> Для шумных результатов используйте модели <code>bleed_suppressor_v1</code> или <code>denoisedebleed</code> в категории <i>Удаление шума и эффектов</i>, чтобы очистить выходные данные",
|
| 51 |
"waiting_for_processing": "Ожидание обработки...",
|
| 52 |
"auto_ensemble_tab": "🤖 Автоматический ансамбль",
|
| 53 |
"upload_file": "Загрузить файл",
|
|
|
|
| 72 |
"output_preview": "Предпросмотр вывода",
|
| 73 |
"refresh_output": "🔄 Обновить вывод",
|
| 74 |
"ensemble_copy_status": "Вывод ансамбля будет скопирован сюда...",
|
| 75 |
+
"guidelines": "<h4>Руководство по выбору модели</h4><ul><li><strong>Избегайте смешивания категорий:</strong> Сочетание вокальных и инструментальных моделей может привести к нежелательным смесям</li><li><strong>Примечания к специальным моделям:</strong><ul><li>Модели двойственности (v1/v2) - Выводят оба стема</li><li>Разделитель MDX23C - Гибридные результаты</li></ul></li><li><strong>Лучшая практика:</strong> Используйте 3-5 похожих моделей из одной категории</li></ul><div>💡 Совет профессионала: Начните с комбинации \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
|
| 76 |
"download_sources_tab": "⬇️ Скачать источники",
|
| 77 |
"direct_links": "🌐 Прямые ссылки",
|
| 78 |
"audio_file_url": "URL аудиофайла",
|
| 79 |
+
"download_from_url": "⬇️ Скачать с URL",
|
| 80 |
"download_status": "Статус загрузки",
|
| 81 |
+
"downloaded_file": "Загруженный файл",
|
| 82 |
+
"cookie_management": "🍪 Управление куки",
|
| 83 |
"upload_cookies_txt": "Загрузить Cookies.txt",
|
| 84 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Почему необходимо?**<br>- Доступ к контенту с ограничением по возрасту<br>- Скачивание приватных/неопубликованных видео<br>- Обход региональных ограничений<br>- Избежание лимитов скачивания YouTube<br><br>**⚠️ Важные замечания**<br>- НИКОГДА не делитесь своими файлами куки!<br>- Обновляйте куки, если:<br> • Получаете ошибки \"403 Запрещено\"<br> • Загрузки внезапно останавливаются<br> • Видите сообщения \"Сессия истекла\"<br><br>**🔄 Шаги обновления**<br>1. Установите это <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">расширение Chrome</a><br>2. Войдите в YouTube в Chrome<br>3. Нажмите на иконку расширения → \"Экспорт\"<br>4. Загрузите скачанный файл сюда<br><br>**⏳ Срок действия куки**<br>- Обычные сессии: 24 часа<br>- Чувствительные операции: 1 час<br>- Смена пароля: Немедленное обновление</div>",
|
| 85 |
+
"ensemble_output_copied": "✅ Вывод ансамбля скопирован в {}",
|
| 86 |
+
"error_copying_ensemble_output": "❌ Ошибка копирования вывода ансамбля: {}",
|
| 87 |
+
"drive_mounted_copying_ensemble": "Google Drive подключен. Копирование вывода ансамбля...",
|
| 88 |
+
"drive_already_mounted_copying_ensemble": "Google Drive уже подключен. Копирование вывода ансамбля...",
|
| 89 |
+
"no_ensemble_output_files_found": "❌ Файлы вывода ансамбля не найдены.",
|
| 90 |
+
"drive_mounted_copying_files": "Google Drive подключен. Копирование файлов...",
|
| 91 |
+
"drive_already_mounted_copying_files": "Google Drive уже подключен. Копирование файлов...",
|
| 92 |
+
"files_copied_to_drive": "✅ Файлы скопированы в {}",
|
| 93 |
+
"error_copying_files": "❌ Ошибка копирования файлов: {}",
|
| 94 |
+
"mounting_drive": "Подключение Google Drive...",
|
| 95 |
+
"settings_saved": "Настройки сохранены в config.json: Язык={lang}, Метод={method}, Порт={port}",
|
| 96 |
+
"selected_language": "Выбранный язык: {lang}",
|
| 97 |
+
"available_languages": "Доступные языки: {langs}",
|
| 98 |
+
"select_language_prompt": "Выберите язык (по умолчанию: {default}): ",
|
| 99 |
+
"invalid_language": "Неверный выбор языка! Используется язык по умолчанию ({default}).",
|
| 100 |
+
"available_sharing_methods": "Доступные методы совместного доступа: gradio, localtunnel, ngrok",
|
| 101 |
+
"select_sharing_method_prompt": "Выберите метод совместного доступа (по умолчанию: {default}): ",
|
| 102 |
+
"enter_ngrok_token_prompt": "Введите ваш токен Ngrok (по умолчанию: {default}): ",
|
| 103 |
+
"ngrok_token_required": "Ошибка: Для метода ngrok требуется токен Ngrok!",
|
| 104 |
+
"enter_port_prompt": "Введите номер порта (по умолчанию: {default}): ",
|
| 105 |
+
"opening_previous_url": "Открытие предыдущего URL: {url}",
|
| 106 |
+
"open_url_manually": "Вручную откройте URL в Colab: {url}",
|
| 107 |
+
"invalid_previous_url": "Неверный предыдущий URL: {url}, не открыт.",
|
| 108 |
+
"starting_method": "Запуск {method} на порту {port}...",
|
| 109 |
+
"process_stopped": "\n🛑 Процесс остановлен пользователем",
|
| 110 |
+
"selected_port": "Выбранный порт: {port}",
|
| 111 |
+
"starting_gradio_with_sharing": "Запуск Gradio с встроенным совместным до��тупом...",
|
| 112 |
+
"starting_localtunnel": "Запуск Localtunnel на порту {port}...",
|
| 113 |
+
"share_link": "Ссылка для совместного доступа: {url}",
|
| 114 |
+
"password_ip": "Пароль IP: {ip}",
|
| 115 |
+
"starting_ngrok": "Запуск Ngrok на порту {port}...",
|
| 116 |
+
"ngrok_url": "URL Ngrok: {url}",
|
| 117 |
+
"ngrok_error": "Ошибка запуска Ngrok: {error}",
|
| 118 |
+
"auto_apollo_chunk_size": "Размер фрагмента Apollo",
|
| 119 |
+
"auto_apollo_chunk_size_info": "Размер фрагмента для улучшения Apollo (рекомендуется: 19 для универсальной модели)",
|
| 120 |
+
"auto_apollo_overlap": "Перекрытие Apollo",
|
| 121 |
+
"auto_enhancing_with_apollo": "Улучшение с Apollo ({}/{} файлов)...",
|
| 122 |
+
"auto_apollo_processing_completed": "Обработка Apollo завершена!",
|
| 123 |
+
"auto_apollo_overlap_info": "Перекрытие для улучшения Apollo (рекомендуется: 2)",
|
| 124 |
+
"apollo_chunk_size": "Размер фрагмента Apollo",
|
| 125 |
+
"apollo_chunk_size_info": "Размер фрагмента для улучшения Apollo (рекомендуется: 19 для универсальной модели)",
|
| 126 |
+
"apollo_overlap": "Перекрытие Apollo",
|
| 127 |
+
"enhance_with_apollo": "Улучшить с Apollo",
|
| 128 |
+
"apollo_enhancement_info": "Улучшение качества звука",
|
| 129 |
+
"enhancing_with_apollo": "Улучшение с Apollo ({}/{} файлов)...",
|
| 130 |
+
"apollo_processing_completed": "Обработка Apollo завершена!",
|
| 131 |
+
"apollo_overlap_info": "Перекрытие для улучшения Apollo (рекомендуется: 2)",
|
| 132 |
+
"selected_models": "Выбранные модели",
|
| 133 |
+
"save_preset": "Сохранить пресет",
|
| 134 |
+
"delete_preset": "Удалить пресет",
|
| 135 |
+
"refresh_presets": "Обновить пресеты",
|
| 136 |
+
"preset_name": "Имя пресета",
|
| 137 |
+
"select_preset": "Выбрать пресет",
|
| 138 |
+
"add_favorite": "Добавить в избранное",
|
| 139 |
+
"apply_matchering": "Применить Matchering",
|
| 140 |
+
"matchering_info": "Применить Matchering для улучшения мастеринга и баланса звука",
|
| 141 |
+
"matchering_passes": "Проходы Matchering",
|
| 142 |
+
"matchering_passes_info": "Количество итераций Matchering (1-5, более высокие значения улучшают качество, но занимают больше времени)",
|
| 143 |
+
"matchering_processing": "Применение Matchering... ({:.1f}%)",
|
| 144 |
+
"matchering_completed": "✅ Обработка Matchering завершена!",
|
| 145 |
+
"matchering_error": "❌ Ошибка во время Matchering: {}",
|
| 146 |
+
"invalid_method": "Ошибка: Неверный метод! Используйте 'gradio', 'localtunnel' или 'ngrok'.",
|
| 147 |
+
"language_changed_message": "Язык изменен. Пожалуйста, перезапустите ячейку.",
|
| 148 |
+
"backend_settings": "🚀 Настройки Backend",
|
| 149 |
+
"inference_backend": "Backend инференса",
|
| 150 |
+
"optimization_mode": "Режим оптимизации",
|
| 151 |
+
"optimization_mode_info": "Выбор режима оптимизации PyTorch",
|
| 152 |
+
"mixed_precision_amp": "Смешанная точность (AMP)",
|
| 153 |
+
"mixed_precision_info": "Inference в 2 раза быстрее - рекомендуется",
|
| 154 |
+
"tf32_acceleration": "Ускорение TF32",
|
| 155 |
+
"tf32_acceleration_info": "Дополнительный прирост скорости для RTX 30xx+",
|
| 156 |
+
"cudnn_benchmark": "Бенчмарк cuDNN",
|
| 157 |
+
"cudnn_benchmark_info": "Медленнее на первом запуске, гораздо быстрее на последующих",
|
| 158 |
+
"ultra_optimized_pytorch": "УЛЬТРА-ОПТИМИЗИРОВАННЫЙ PyTorch Backend",
|
| 159 |
+
"default_active_max_speed": "Активен по умолчанию - Максимальная оптимизация скорости",
|
| 160 |
+
"channels_last_mode": "Самый быстрый для RTX GPU",
|
| 161 |
+
"compile_mode": "Дополнительная скорость для PyTorch 2.0+",
|
| 162 |
+
"default_mode": "Стандартный",
|
| 163 |
"manual_ensemble_tab": "🎚️ Ручной ансамбль",
|
| 164 |
"input_sources": "📂 Источники ввода",
|
| 165 |
"refresh": "🔄 Обновить",
|
| 166 |
"ensemble_algorithm": "Алгоритм ансамбля",
|
| 167 |
+
"select_audio_files": "Выбрать аудиофайлы",
|
| 168 |
"available_files": "Доступные файлы",
|
| 169 |
+
"custom_weights": "Пользовательские веса (через запятую)",
|
| 170 |
"custom_weights_placeholder": "Пример: 0.8, 1.2, 1.0, ...",
|
| 171 |
"custom_weights_info": "Оставьте пустым для равных весов",
|
| 172 |
"result_preview_tab": "🎧 Предпросмотр результата",
|
| 173 |
+
"ensembled_output": "Вывод ансамбля",
|
| 174 |
"processing_log_tab": "📋 Журнал обработки",
|
| 175 |
"processing_details": "Детали обработки",
|
| 176 |
"process_ensemble": "⚡ Обработать ансамбль",
|
| 177 |
"languages_tab": "🌐 Языки",
|
| 178 |
"Vocal Models": "Вокальные модели",
|
| 179 |
"Instrumental Models": "Инструментальные модели",
|
| 180 |
+
"4-Stem Models": "4-стемовые модели",
|
| 181 |
"Denoise Models": "Модели шумоподавления",
|
| 182 |
"Dereverb Models": "Модели удаления реверберации",
|
| 183 |
"Other Models": "Другие модели",
|
| 184 |
+
"Other Shapes": "Другие модели",
|
| 185 |
+
"ensemble_files_description": "Объединяет несколько аудиофайлов в один выходной файл с использованием указанных методов ансамбля.",
|
| 186 |
"ensemble_files_help": "Путь ко всем аудиофайлам для ансамбля",
|
| 187 |
"ensemble_type_help": "Один из: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 188 |
"ensemble_weights_help": "Веса для создания ансамбля. Количество весов должно совпадать с количеством файлов",
|
| 189 |
+
"ensemble_output_help": "Путь к WAV-файлу, где будет сохранен результат ансамбля",
|
| 190 |
"ensemble_type_print": "Тип ансамбля: {}",
|
| 191 |
"num_input_files_print": "Количество входных файлов: {}",
|
| 192 |
"weights_print": "Веса: {}",
|
| 193 |
"output_file_print": "Выходной файл: {}",
|
| 194 |
+
"duration_mismatch_error": "Все файлы должны иметь одинаковую продолжительность",
|
| 195 |
+
"file_not_found_error": "Ошибка. Не удается найти файл: {}. Проверьте пути.",
|
| 196 |
+
"reading_chunk_print": "Чтение фрагмента из файла: {} (начало: {}с, продолжительность: {}с)",
|
| 197 |
"chunk_result_shape_print": "Форма результата фрагмента: {}",
|
| 198 |
+
"ensemble_completed_print": "Ансамбль завершен. Вывод сохранен в: {}",
|
| 199 |
+
"file_deletion_error": "{} не удалось удалить: {}",
|
| 200 |
+
"directory_not_exist_warning": "⚠️ Каталог не существует: {}",
|
| 201 |
+
"not_a_directory_warning": "⚠️ Путь не является каталогом: {}",
|
| 202 |
"item_deletion_error": "⚠️ Ошибка удаления {}: {}",
|
| 203 |
"old_output_not_exist": "❌ Старая папка вывода не существует",
|
| 204 |
+
"old_outputs_cleared": "✅ Старые выходные данные успешно очищены!",
|
| 205 |
"error": "🔥 Ошибка: {}",
|
| 206 |
"ffmpeg_error": "Ошибка FFmpeg ({}): {}",
|
| 207 |
"file_saved_successfully": "Файл успешно сохранен: {}",
|
| 208 |
"total_files_found": "Всего найдено файлов: {}. Используемая частота дискретизации: {}",
|
| 209 |
"total_progress": "Общий прогресс",
|
| 210 |
+
"detailed_pbar_enabled": "Подробная полоса прогресса включена: {}",
|
| 211 |
+
"loaded_audio": "Загружено аудио: {}, форма: {}",
|
| 212 |
"cannot_read_track": "Не удается прочитать трек: {}",
|
| 213 |
"error_message": "Сообщение об ошибке: {}",
|
| 214 |
"demudding_track": "Очистка трека (ремикс фазы - инструментал): {}",
|
| 215 |
+
"elapsed_time": "Прошедшее время: {:.2f} секунд.",
|
| 216 |
"proc_folder_description": "Обрабатывает аудиофайлы в папке с использованием указанной модели.",
|
| 217 |
"model_type_help": "Тип модели (bandit, bs_roformer, mdx23c и т.д.)",
|
| 218 |
"config_path_help": "Путь к файлу конфигурации",
|
|
|
|
| 220 |
"start_checkpoint_help": "Начальная контрольная точка для действительных весов",
|
| 221 |
"input_folder_help": "Папка с миксами для обработки",
|
| 222 |
"audio_path_help": "Путь к одному аудиофайлу для обработки",
|
| 223 |
+
"store_dir_help": "Путь для сохранения результатов",
|
| 224 |
"device_ids_help": "Список идентификаторов GPU",
|
| 225 |
+
"extract_instrumental_help": "Инвертировать вокал для получения инструментала, если предоставлен",
|
| 226 |
+
"disable_detailed_pbar_help": "Отключить подробную полосу прогресса в демиксе",
|
| 227 |
+
"force_cpu_help": "Принудительно использовать CPU, даже если доступна CUDA",
|
| 228 |
+
"flac_file_help": "Вывод FLAC-файла вместо WAV",
|
| 229 |
"export_format_help": "Формат экспорта и тип PCM",
|
| 230 |
+
"pcm_type_help": "Тип PCM для FLAC-файлов",
|
| 231 |
+
"use_tta_help": "Включить тестовое усиление времени",
|
| 232 |
"lora_checkpoint_help": "Начальная контрольная точка для весов LoRA",
|
| 233 |
+
"cuda_available": "CUDA доступна, используйте --force_cpu для отключения.",
|
| 234 |
"using_device": "Используемое устройство: {}",
|
| 235 |
"instruments_print": "Инструменты: {}",
|
| 236 |
"model_load_time": "Время загрузки модели: {:.2f} сек",
|
| 237 |
+
"invalid_url": "❌ Недействительный URL",
|
| 238 |
+
"cookie_file_updated": "✅ Файл куки обновлен!",
|
| 239 |
+
"cookie_installation_error": "⚠️ Ошибка установки куки: {}",
|
| 240 |
+
"google_drive_mount_error": "❌ Ошибка подключения Google Drive: {}",
|
| 241 |
+
"file_size_zero_error": "Размер файла ноль или файл не создан",
|
| 242 |
+
"google_drive_error": "❌ Ошибка загрузки Google Drive: {}",
|
| 243 |
+
"wav_conversion_failed": "Конвертация WAV не удалась",
|
| 244 |
"download_error": "❌ Ошибка загрузки: {}",
|
| 245 |
+
"file_copied_to_drive": "Файл скопирован в Google Drive: {}",
|
| 246 |
+
"copy_to_drive_error": "❌ Ошибка копирования в Google Drive: {}",
|
| 247 |
+
"direct_download_failed": "Прямая загрузка файла не удалась",
|
| 248 |
+
"direct_download_error": "❌ Ошибка прямой загрузки: {}",
|
| 249 |
+
"invalid_google_drive_url": "❌ Недействительный URL Google Drive",
|
| 250 |
+
"continuing_without_google_drive": "Подключение Google Drive не удалось, продолжаем без него...",
|
| 251 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive не подключен, пропуск копирования на Drive...",
|
| 252 |
+
"download_success": "🎉 Успешно загружено!",
|
| 253 |
"download_failed": "❌ Загрузка не удалась",
|
| 254 |
"no_output_files_found": "❌ Выходные файлы не найдены.",
|
| 255 |
"output_refreshed_successfully": "✅ Вывод успешно обновлен!",
|
| 256 |
+
"error_refreshing_output": "❌ Ошибка обновления вывода: {}",
|
| 257 |
"starting_audio_separation": "Запуск разделения аудио...",
|
| 258 |
"processing_audio": "Обработка аудио",
|
| 259 |
"separating_audio": "Разделение аудио... ({:.1f}%)",
|
|
|
|
| 261 |
"progress_parsing_error": "Ошибка разбора прогресса: {}",
|
| 262 |
"error_occurred": "Произошла ошибка: {}",
|
| 263 |
"separation_process_completed": "Процесс разделения завершен!",
|
| 264 |
+
"no_audio_file_error": "❌ Аудиофайл не предоставлен и нет существующего файла в каталоге ввода.",
|
| 265 |
"no_input_progress_label": "Ошибка: Ввод не предоставлен -- 0.0%",
|
| 266 |
"processing_audio_print": "Обработка аудио из: {} с использованием модели: {}",
|
| 267 |
"starting_audio_separation_progress_label": "Запуск разделения аудио... -- 0.0%",
|
|
|
|
| 270 |
"audio_processing_completed": "✅ Обработка аудио завершена!",
|
| 271 |
"error_occurred_progress_label": "Произошла ошибка -- 0.0%",
|
| 272 |
"minimum_files_required": "⚠️ Требуется минимум 2 файла",
|
| 273 |
+
"valid_files_not_found": "❌ Действительные файлы не найдены",
|
| 274 |
"starting_ensemble_process": "Запуск процесса ансамбля...",
|
| 275 |
+
"ensembling_progress": "Ансамблирование... ({:.1f}%)",
|
| 276 |
"finalizing_ensemble_output": "Завершение вывода ансамбля...",
|
| 277 |
+
"success_log": "✅ Успешно!\n{}",
|
| 278 |
"error_log": "❌ Ошибка!\n{}",
|
| 279 |
"critical_error": "⛔ Критическая ошибка: {}",
|
| 280 |
"ensemble_process_completed": "Процесс ансамбля завершен!",
|
|
|
|
| 292 |
"waiting_for_files_progress_label": "Ожидание готовности всех файлов... -- 90.0%",
|
| 293 |
"performing_ensemble": "Выполнение ансамбля...",
|
| 294 |
"performing_ensemble_progress_label": "Выполнение ансамбля... -- 92.0%",
|
| 295 |
+
"memory_usage_before_ensemble": "Использование памяти до ансамбля: {}%",
|
| 296 |
"memory_usage_after_ensemble": "Использование памяти после ансамбля: {}%",
|
| 297 |
"finalizing_ensemble_output_progress_label": "Завершение вывода ансамбля... -- 98.0%",
|
| 298 |
"ensemble_file_creation_failed": "Не удалось создать файл ансамбля: {}",
|
| 299 |
"ensemble_completed_progress_label": "Ансамбль успешно завершен! -- 100.0%",
|
| 300 |
+
"success_output_created": "✅ Успешно! Создан выходной файл.",
|
| 301 |
+
"phase_fixer_tab": "🔧 Корректор фазы",
|
| 302 |
+
"batch_processing_tab": "📦 Пакетная обработка",
|
| 303 |
+
"batch_description": "Обработка нескольких файлов одновременно с одной моделью",
|
| 304 |
+
"batch_input_folder": "Входная папка",
|
| 305 |
+
"batch_input_folder_placeholder": "Введите путь к папке с аудиофайлами",
|
| 306 |
+
"batch_output_folder": "Выходная папка",
|
| 307 |
+
"batch_output_folder_placeholder": "Введите путь к папке для выходных файлов",
|
| 308 |
+
"batch_file_list": "Файлы для обработки",
|
| 309 |
+
"batch_add_files": "Добавить файлы",
|
| 310 |
+
"batch_clear_list": "Очистить список",
|
| 311 |
+
"batch_start": "🚀 Начать пакетную обработку",
|
| 312 |
+
"batch_stop": "⏹️ Стоп",
|
| 313 |
+
"batch_progress": "Прогресс пакетной обработки",
|
| 314 |
+
"batch_current_file": "Сейчас обрабатывается",
|
| 315 |
+
"batch_completed": "✅ Пакетная обработка завершена!",
|
| 316 |
+
"batch_stopped": "⏹️ Пакетная обработка остановлена",
|
| 317 |
+
"batch_no_files": "❌ Файлы для обработки не выбраны",
|
| 318 |
+
"source_model": "Исходная модель (Вокал)",
|
| 319 |
+
"source_model_info": "Модель для извлечения вокала/фазовых данных",
|
| 320 |
+
"target_model": "Целевая модель (Инструментал)",
|
| 321 |
+
"target_model_info": "Модель для применения коррекции фазы",
|
| 322 |
+
"custom_models_tab": "⚡ Пользовательские модели",
|
| 323 |
+
"Custom Models": "Пользовательские модели",
|
| 324 |
+
"add_custom_model": "➕ Добавить пользовательскую модель",
|
| 325 |
+
"custom_model_name": "Название модели",
|
| 326 |
+
"custom_model_name_placeholder": "Введите название для вашей модели",
|
| 327 |
+
"checkpoint_url": "URL чекпоинта (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "URL конфигурации (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "URL пользовательского Python файла (необязательно)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "Автоматически определить тип модели",
|
| 334 |
+
"model_type": "Тип модели",
|
| 335 |
+
"add_model_btn": "➕ Добавить модель",
|
| 336 |
+
"custom_models_list": "📋 Список пользовательских моделей",
|
| 337 |
+
"delete_model": "🗑️ Удалить",
|
| 338 |
+
"no_custom_models": "Пользовательские модели ещё не добавлены",
|
| 339 |
+
"model_added_success": "✅ Модель успешно добавлена!",
|
| 340 |
+
"model_deleted_success": "✅ Модель успешно удалена!",
|
| 341 |
+
"model_add_error": "❌ Ошибка добавления модели: {}",
|
| 342 |
+
"model_delete_error": "❌ Ошибка удаления модели: {}",
|
| 343 |
+
"refresh_models": "🔄 Обновить",
|
| 344 |
+
"custom_model_info": "Добавляйте пользовательские модели, предоставив ссылки для скачивания. Модели будут скачаны автоматически при использовании.",
|
| 345 |
+
"select_model_to_delete": "Выберите модель для удаления"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
}
|
assets/i18n/languages/tr_tr.json
CHANGED
|
@@ -19,6 +19,7 @@
|
|
| 19 |
"tta_boost": "TTA Güçlendirme",
|
| 20 |
"tta_info": "Kaliteyi artırır ancak işlem hızını yavaşlatır",
|
| 21 |
"phase_fix": "Faz Düzeltme",
|
|
|
|
| 22 |
"phase_fix_info": "Enstrümantal parçalar için gelişmiş faz düzeltme",
|
| 23 |
"instrumental": "Enstrümantal",
|
| 24 |
"instrumental_info": "Genellikle 2 çıktı verir, ama bazen tek çıktı olabilir, denemen lazım! :)",
|
|
@@ -48,7 +49,7 @@
|
|
| 48 |
"files_will_be_copied": "Dosyalar buraya kopyalanacak...",
|
| 49 |
"processing_tip": "<b>🔈 İşleme İpucu:</b> Gürültülü sonuçlar için <i>\"Gürültü ve Efekt Kaldırma\"</i> kategorisindeki <code>bleed_suppressor_v1</code> veya <code>denoisedebleed</code> modellerini kullanarak çıkışı temizleyin",
|
| 50 |
"waiting_for_processing": "İşleme Bekleniyor...",
|
| 51 |
-
"auto_ensemble_tab": "🤖 Otomatik
|
| 52 |
"upload_file": "Dosya Yükle",
|
| 53 |
"enter_file_path": "Veya Dosya Yolunu Gir",
|
| 54 |
"file_path_placeholder": "Ses dosyasının tam yolunu gir",
|
|
@@ -61,16 +62,16 @@
|
|
| 61 |
"model_selection": "🧠 Model Seçimi",
|
| 62 |
"model_category": "Model Kategorisi",
|
| 63 |
"select_models": "Kategoriden Model Seç",
|
| 64 |
-
"ensemble_settings": "⚡
|
| 65 |
"method": "Yöntem",
|
| 66 |
"recommendation": "**Öneri:** avg_wave ve max_fft en iyi sonuçları verir",
|
| 67 |
"start_processing": "🚀 İşlemi Başlat",
|
| 68 |
"original_audio_tab": "🔊 Orijinal Ses",
|
| 69 |
"original_audio": "Orijinal Ses",
|
| 70 |
-
"ensemble_result_tab": "🎚️
|
| 71 |
"output_preview": "Çıktı Önizlemesi",
|
| 72 |
"refresh_output": "🔄 Çıktıyı Yenile",
|
| 73 |
-
"ensemble_copy_status": "
|
| 74 |
"guidelines": "<h4>Model Seçim Kılavuzu</h4><ul><li><strong>Kategorileri Karıştırmaktan Kaçının:</strong> Vokal ve enstrümantal modellerin birleşimi istenmeyen karışımlara yol açabilir</li><li><strong>Özel Modeller Hakkında Notlar:</strong><ul><li>İkili modeller (v1/v2) - Her iki stemi de üretir</li><li>MDX23C Ayırıcı - Hibrit sonuçlar</li></ul></li><li><strong>En İyi Uygulama:</strong> Aynı kategoriden 3-5 benzer model kullanın</li></ul><div>💡 Profesyonel İpucu: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" kombinasyonuyla başlayın</div>",
|
| 75 |
"download_sources_tab": "⬇️ Kaynakları İndir",
|
| 76 |
"direct_links": "🌐 Doğrudan Bağlantılar",
|
|
@@ -79,22 +80,121 @@
|
|
| 79 |
"download_status": "İndirme Durumu",
|
| 80 |
"downloaded_file": "İndirilen Dosya",
|
| 81 |
"cookie_management": "🍪 Çerez Yönetimi",
|
| 82 |
-
"upload_cookies_txt": "
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Neden Gerekli?**<br>- Yaş
|
| 84 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
"input_sources": "📂 Giriş Kaynakları",
|
| 86 |
"refresh": "🔄 Yenile",
|
| 87 |
-
"ensemble_algorithm": "
|
| 88 |
"select_audio_files": "Ses Dosyalarını Seç",
|
| 89 |
"available_files": "Mevcut Dosyalar",
|
| 90 |
"custom_weights": "Özel Ağırlıklar (virgülle ayrılmış)",
|
| 91 |
"custom_weights_placeholder": "Örnek: 0.8, 1.2, 1.0, ...",
|
| 92 |
"custom_weights_info": "Eşit ağırlıklar için boş bırakın",
|
| 93 |
"result_preview_tab": "🎧 Sonuç Önizlemesi",
|
| 94 |
-
"ensembled_output": "
|
| 95 |
"processing_log_tab": "📋 İşlem Günlüğü",
|
| 96 |
"processing_details": "İşlem Ayrıntıları",
|
| 97 |
-
"process_ensemble": "⚡
|
| 98 |
"languages_tab": "🌐 Diller",
|
| 99 |
"Vocal Models": "Vokal Modeller",
|
| 100 |
"Instrumental Models": "Enstrümantal Modeller",
|
|
@@ -102,12 +202,13 @@
|
|
| 102 |
"Denoise Models": "Gürültü Giderme Modelleri",
|
| 103 |
"Dereverb Models": "Yankı Giderme Modelleri",
|
| 104 |
"Other Models": "Diğer Modeller",
|
|
|
|
| 105 |
"ensemble_files_description": "Belirtilen birleştirme yöntemlerini kullanarak birden fazla ses dosyasını tek bir çıktıda birleştirir.",
|
| 106 |
-
"ensemble_files_help": "
|
| 107 |
"ensemble_type_help": "Şunlardan biri: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 108 |
-
"ensemble_weights_help": "
|
| 109 |
-
"ensemble_output_help": "
|
| 110 |
-
"ensemble_type_print": "
|
| 111 |
"num_input_files_print": "Giriş dosyası sayısı: {}",
|
| 112 |
"weights_print": "Ağırlıklar: {}",
|
| 113 |
"output_file_print": "Çıktı dosyası: {}",
|
|
@@ -115,7 +216,7 @@
|
|
| 115 |
"file_not_found_error": "Hata. Dosya bulunamadı: {}. Yolları kontrol et.",
|
| 116 |
"reading_chunk_print": "Dosyadan parça okunuyor: {} (başlangıç: {}s, süre: {}s)",
|
| 117 |
"chunk_result_shape_print": "Parça sonucu şekli: {}",
|
| 118 |
-
"ensemble_completed_print": "
|
| 119 |
"file_deletion_error": "{} silinemedi: {}",
|
| 120 |
"directory_not_exist_warning": "⚠️ Dizin mevcut değil: {}",
|
| 121 |
"not_a_directory_warning": "⚠️ Yol bir dizin değil: {}",
|
|
@@ -157,10 +258,18 @@
|
|
| 157 |
"invalid_url": "❌ Geçersiz URL",
|
| 158 |
"cookie_file_updated": "✅ Çerez dosyası güncellendi!",
|
| 159 |
"cookie_installation_error": "⚠️ Çerez yükleme hatası: {}",
|
|
|
|
| 160 |
"file_size_zero_error": "Dosya boyutu sıfır veya dosya oluşturulmadı",
|
| 161 |
"google_drive_error": "❌ Google Drive indirme hatası: {}",
|
| 162 |
"wav_conversion_failed": "WAV dönüşümü başarısız",
|
| 163 |
"download_error": "❌ İndirme hatası: {}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"download_success": "🎉 Başarıyla indirildi!",
|
| 165 |
"download_failed": "❌ İndirme başarısız",
|
| 166 |
"no_output_files_found": "❌ Çıktı dosyaları bulunamadı.",
|
|
@@ -183,13 +292,13 @@
|
|
| 183 |
"error_occurred_progress_label": "Hata oluştu -- 0.0%",
|
| 184 |
"minimum_files_required": "⚠️ En az 2 dosya gerekli",
|
| 185 |
"valid_files_not_found": "❌ Geçerli dosya bulunamadı",
|
| 186 |
-
"starting_ensemble_process": "
|
| 187 |
-
"ensembling_progress": "
|
| 188 |
-
"finalizing_ensemble_output": "
|
| 189 |
"success_log": "✅ Başarılı!\n{}",
|
| 190 |
"error_log": "❌ Hata!\n{}",
|
| 191 |
"critical_error": "⛔ Kritik hata: {}",
|
| 192 |
-
"ensemble_process_completed": "
|
| 193 |
"no_models_selected": "❌ Model seçilmedi",
|
| 194 |
"no_input_audio_provided": "❌ Giriş sesi sağlanmadı",
|
| 195 |
"loading_model": "Model yükleniyor {}/{}: {}...",
|
|
@@ -202,100 +311,69 @@
|
|
| 202 |
"model_output_failed": "{} çıktı üretemedi",
|
| 203 |
"waiting_for_files": "Tüm dosyaların hazır olması bekleniyor...",
|
| 204 |
"waiting_for_files_progress_label": "Tüm dosyaların hazır olması bekleniyor... -- 90.0%",
|
| 205 |
-
"performing_ensemble": "
|
| 206 |
-
"performing_ensemble_progress_label": "
|
| 207 |
-
"memory_usage_before_ensemble": "
|
| 208 |
-
"memory_usage_after_ensemble": "
|
| 209 |
-
"finalizing_ensemble_output_progress_label": "
|
| 210 |
-
"ensemble_file_creation_failed": "
|
| 211 |
-
"ensemble_completed_progress_label": "
|
| 212 |
"success_output_created": "✅ Başarılı! Çıktı dosyası oluşturuldu.",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"
|
| 259 |
-
"
|
| 260 |
-
"
|
| 261 |
-
"
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
|
| 271 |
-
"invalid_method": "Hata: Geçersiz yöntem! 'gradio', 'localtunnel' veya 'ngrok' kullan.",
|
| 272 |
-
"apollo_enhancement_settings": "Apollo İyileştirme Ayarları",
|
| 273 |
-
"enhance_with_apollo": "Apollo ile İyileştir",
|
| 274 |
-
"enhance_with_apollo_info": "Ayrıştırmadan sonra ses iyileştirme için Apollo'yu etkinleştir",
|
| 275 |
-
"apollo_method": "Apollo Yöntemi",
|
| 276 |
-
"apollo_method_info": "Apollo için işlem yöntemini seç",
|
| 277 |
-
"normal_method": "Normal Yöntem",
|
| 278 |
-
"mid_side_method": "Orta/Yan Yöntem",
|
| 279 |
-
"apollo_normal_model": "Apollo Normal için Model",
|
| 280 |
-
"apollo_normal_model_info": "Normal Apollo işleme için kullanılacak model",
|
| 281 |
-
"apollo_midside_model": "Apollo Orta/Yan Model",
|
| 282 |
-
"apollo_enhancement_info": "Ses kalitesi iyileştirme",
|
| 283 |
-
"apollo_processing_method": "Apollo işlem metodu",
|
| 284 |
-
"apollo_mid_side_model": "Apollo orta/yan için modeller",
|
| 285 |
-
"selected_models": "Seçilecek modeller",
|
| 286 |
-
"save_preset": "Ön ayarı kaydet",
|
| 287 |
-
"delete_preset": "Ön ayyarı sil",
|
| 288 |
-
"refresh_presets": "Ön ayarları güncelle",
|
| 289 |
-
"preset_name": "ön ayar ismi",
|
| 290 |
-
"select_preset": "Ön ayarlar",
|
| 291 |
-
"add_favorite": "favorilere ekle",
|
| 292 |
-
"apollo_midside_model_info": "Orta/yan işleme için kullanılacak model (isteğe bağlı)",
|
| 293 |
-
"language_changed_message": "Dil değiştirildi. Lütfen hücreyi yeniden başlat.",
|
| 294 |
-
"apply_matchering": "Matchering Uygula",
|
| 295 |
-
"matchering_info": "Ses masteringini ve dengesini iyileştirmek için Matchering uygula",
|
| 296 |
-
"matchering_passes": "Matchering Geçişleri",
|
| 297 |
-
"matchering_passes_info": "Matchering yineleme sayısı (1-5, yüksek değerler kaliteyi artırır ancak daha uzun sürer)",
|
| 298 |
-
"matchering_processing": "Matchering uygulanıyor... ({:.1f}%)",
|
| 299 |
-
"matchering_completed": "✅ Matchering işlemi tamamlandı!",
|
| 300 |
-
"matchering_error": "❌ Matchering sırasında hata: {}"
|
| 301 |
}
|
|
|
|
| 19 |
"tta_boost": "TTA Güçlendirme",
|
| 20 |
"tta_info": "Kaliteyi artırır ancak işlem hızını yavaşlatır",
|
| 21 |
"phase_fix": "Faz Düzeltme",
|
| 22 |
+
"use_phase_correction": "Gelişmiş Faz Düzeltme Uygula",
|
| 23 |
"phase_fix_info": "Enstrümantal parçalar için gelişmiş faz düzeltme",
|
| 24 |
"instrumental": "Enstrümantal",
|
| 25 |
"instrumental_info": "Genellikle 2 çıktı verir, ama bazen tek çıktı olabilir, denemen lazım! :)",
|
|
|
|
| 49 |
"files_will_be_copied": "Dosyalar buraya kopyalanacak...",
|
| 50 |
"processing_tip": "<b>🔈 İşleme İpucu:</b> Gürültülü sonuçlar için <i>\"Gürültü ve Efekt Kaldırma\"</i> kategorisindeki <code>bleed_suppressor_v1</code> veya <code>denoisedebleed</code> modellerini kullanarak çıkışı temizleyin",
|
| 51 |
"waiting_for_processing": "İşleme Bekleniyor...",
|
| 52 |
+
"auto_ensemble_tab": "🤖 Otomatik Birleştirme",
|
| 53 |
"upload_file": "Dosya Yükle",
|
| 54 |
"enter_file_path": "Veya Dosya Yolunu Gir",
|
| 55 |
"file_path_placeholder": "Ses dosyasının tam yolunu gir",
|
|
|
|
| 62 |
"model_selection": "🧠 Model Seçimi",
|
| 63 |
"model_category": "Model Kategorisi",
|
| 64 |
"select_models": "Kategoriden Model Seç",
|
| 65 |
+
"ensemble_settings": "⚡ Birleştirme Ayarları",
|
| 66 |
"method": "Yöntem",
|
| 67 |
"recommendation": "**Öneri:** avg_wave ve max_fft en iyi sonuçları verir",
|
| 68 |
"start_processing": "🚀 İşlemi Başlat",
|
| 69 |
"original_audio_tab": "🔊 Orijinal Ses",
|
| 70 |
"original_audio": "Orijinal Ses",
|
| 71 |
+
"ensemble_result_tab": "🎚️ Birleştirme Sonucu",
|
| 72 |
"output_preview": "Çıktı Önizlemesi",
|
| 73 |
"refresh_output": "🔄 Çıktıyı Yenile",
|
| 74 |
+
"ensemble_copy_status": "Birleştirme çıktısı buraya kopyalanacak...",
|
| 75 |
"guidelines": "<h4>Model Seçim Kılavuzu</h4><ul><li><strong>Kategorileri Karıştırmaktan Kaçının:</strong> Vokal ve enstrümantal modellerin birleşimi istenmeyen karışımlara yol açabilir</li><li><strong>Özel Modeller Hakkında Notlar:</strong><ul><li>İkili modeller (v1/v2) - Her iki stemi de üretir</li><li>MDX23C Ayırıcı - Hibrit sonuçlar</li></ul></li><li><strong>En İyi Uygulama:</strong> Aynı kategoriden 3-5 benzer model kullanın</li></ul><div>💡 Profesyonel İpucu: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" kombinasyonuyla başlayın</div>",
|
| 76 |
"download_sources_tab": "⬇️ Kaynakları İndir",
|
| 77 |
"direct_links": "🌐 Doğrudan Bağlantılar",
|
|
|
|
| 80 |
"download_status": "İndirme Durumu",
|
| 81 |
"downloaded_file": "İndirilen Dosya",
|
| 82 |
"cookie_management": "🍪 Çerez Yönetimi",
|
| 83 |
+
"upload_cookies_txt": "Çerezler.txt Yükle",
|
| 84 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Neden Gerekli?**<br>- Yaş kısıtlamalı içeriklere erişim<br>- Özel/listelenmemiş videoları indirme<br>- Bölgesel kısıtlamaları aşma<br>- YouTube indirme limitlerini aşma<br><br>**⚠️ Önemli Notlar**<br>- Çerez dosyalarınızı ASLA paylaşmayın!<br>- Çerezleri yenileyin:<br> • \"403 Yasak\" hataları alıyorsanız<br> • İndirmeler aniden duruyorsa<br> • \"Oturum sona erdi\" mesajı görüyorsanız<br><br>**🔄 Yenileme Adımları**<br>1. Bu <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome eklentisini</a> yükleyin<br>2. Chrome'da YouTube'a giriş yapın<br>3. Eklenti simgesine tıklayın → \"Dışa Aktar\"<br>4. İndirilen dosyayı buraya yükleyin<br><br>**⏳ Çerez Ömrü**<br>- Normal oturumlar: 24 saat<br>- Hassas işlemler: 1 saat<br>- Şifre değişikliği: Anında yenileme</div>",
|
| 85 |
+
"ensemble_output_copied": "✅ Birleştirme çıktısı {}'ya kopyalandı",
|
| 86 |
+
"error_copying_ensemble_output": "��� Birleştirme çıktısı kopyalama hatası: {}",
|
| 87 |
+
"drive_mounted_copying_ensemble": "Google Drive bağlandı. Birleştirme çıktısını kopyalıyor...",
|
| 88 |
+
"drive_already_mounted_copying_ensemble": "Google Drive zaten bağlı. Birleştirme çıktısını kopyalıyor...",
|
| 89 |
+
"no_ensemble_output_files_found": "❌ Birleştirme çıktı dosyası bulunamadı.",
|
| 90 |
+
"drive_mounted_copying_files": "Google Drive bağlandı. Dosyaları kopyalıyor...",
|
| 91 |
+
"drive_already_mounted_copying_files": "Google Drive zaten bağlı. Dosyaları kopyalıyor...",
|
| 92 |
+
"files_copied_to_drive": "✅ Dosyalar {}'ya kopyalandı",
|
| 93 |
+
"error_copying_files": "❌ Dosya kopyalama hatası: {}",
|
| 94 |
+
"mounting_drive": "Google Drive bağlanıyor...",
|
| 95 |
+
"settings_saved": "Ayarlar config.json'a kaydedildi: Dil={lang}, Yöntem={method}, Port={port}",
|
| 96 |
+
"selected_language": "Seçilen dil: {lang}",
|
| 97 |
+
"available_languages": "Kullanılabilir diller: {langs}",
|
| 98 |
+
"select_language_prompt": "Bir dil seçin (varsayılan: {default}): ",
|
| 99 |
+
"invalid_language": "Geçersiz dil seçimi! Varsayılan dil ({default}) kullanılıyor.",
|
| 100 |
+
"available_sharing_methods": "Kullanılabilir paylaşım yöntemleri: gradio, localtunnel, ngrok",
|
| 101 |
+
"select_sharing_method_prompt": "Bir paylaşım yöntemi seçin (varsayılan: {default}): ",
|
| 102 |
+
"enter_ngrok_token_prompt": "Ngrok token'ınızı girin (varsayılan: {default}): ",
|
| 103 |
+
"ngrok_token_required": "Hata: ngrok yöntemi için Ngrok token'ı gerekli!",
|
| 104 |
+
"enter_port_prompt": "Port numarasını girin (varsayılan: {default}): ",
|
| 105 |
+
"opening_previous_url": "Önceki URL açılıyor: {url}",
|
| 106 |
+
"open_url_manually": "Colab'da URL'yi manuel açın: {url}",
|
| 107 |
+
"invalid_previous_url": "Geçersiz önceki URL: {url}, açılmadı.",
|
| 108 |
+
"starting_method": "{method} port {port}'ta başlatılıyor...",
|
| 109 |
+
"process_stopped": "\n🛑 Kullanıcı tarafından işlem durduruldu",
|
| 110 |
+
"selected_port": "Seçilen port: {port}",
|
| 111 |
+
"starting_gradio_with_sharing": "Dahili paylaşım ile Gradio başlatılıyor...",
|
| 112 |
+
"starting_localtunnel": "Localtunnel port {port}'ta başlatılıyor...",
|
| 113 |
+
"share_link": "Paylaşım bağlantısı: {url}",
|
| 114 |
+
"password_ip": "Şifre IP: {ip}",
|
| 115 |
+
"starting_ngrok": "Ngrok port {port}'ta başlatılıyor...",
|
| 116 |
+
"ngrok_url": "Ngrok URL: {url}",
|
| 117 |
+
"ngrok_error": "Ngrok başlatma hatası: {error}",
|
| 118 |
+
"auto_apollo_chunk_size": "Apollo Parça Boyutu",
|
| 119 |
+
"auto_apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
|
| 120 |
+
"auto_apollo_overlap": "Apollo Çakışması",
|
| 121 |
+
"auto_enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
|
| 122 |
+
"auto_apollo_processing_completed": "Apollo işleme tamamlandı!",
|
| 123 |
+
"auto_apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
|
| 124 |
+
"apollo_chunk_size": "Apollo Parça Boyutu",
|
| 125 |
+
"apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
|
| 126 |
+
"apollo_overlap": "Apollo Çakışması",
|
| 127 |
+
"enhance_with_apollo": "Apollo ile İyileştir",
|
| 128 |
+
"apollo_enhancement_info": "Ses kalitesi iyileştirme",
|
| 129 |
+
"enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
|
| 130 |
+
"apollo_processing_completed": "Apollo işleme tamamlandı!",
|
| 131 |
+
"apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
|
| 132 |
+
"selected_models": "Seçilen Modeller",
|
| 133 |
+
"save_preset": "Ön Ayar Kaydet",
|
| 134 |
+
"delete_preset": "Ön Ayar Sil",
|
| 135 |
+
"refresh_presets": "Ön Ayarları Güncelle",
|
| 136 |
+
"preset_name": "Ön Ayar Adı",
|
| 137 |
+
"select_preset": "Ön Ayar Seç",
|
| 138 |
+
"add_favorite": "Favorilere Ekle",
|
| 139 |
+
"apply_matchering": "Matchering Uygula",
|
| 140 |
+
"matchering_info": "Ses masteringini ve dengesini iyileştirmek için Matchering uygula",
|
| 141 |
+
"matchering_passes": "Matchering Geçişleri",
|
| 142 |
+
"matchering_passes_info": "Matchering yineleme sayısı (1-5, yüksek değerler kaliteyi artırır ancak daha uzun sürer)",
|
| 143 |
+
"matchering_processing": "Matchering uygulanıyor... ({:.1f}%)",
|
| 144 |
+
"matchering_completed": "✅ Matchering işlemi tamamlandı!",
|
| 145 |
+
"matchering_error": "❌ Matchering sırasında hata: {}",
|
| 146 |
+
"invalid_method": "Hata: Geçersiz yöntem! 'gradio', 'localtunnel' veya 'ngrok' kullan.",
|
| 147 |
+
"language_changed_message": "Dil değiştirildi. Lütfen hücreyi yeniden başlat.",
|
| 148 |
+
"backend_settings": "🚀 Backend Ayarları",
|
| 149 |
+
"inference_backend": "Inference Backend",
|
| 150 |
+
"optimization_mode": "Optimizasyon Modu",
|
| 151 |
+
"optimization_mode_info": "PyTorch optimizasyon modu seçimi",
|
| 152 |
+
"mixed_precision_amp": "Karışık Hassasiyet (AMP)",
|
| 153 |
+
"mixed_precision_info": "2x daha hızlı inference - önerilir",
|
| 154 |
+
"tf32_acceleration": "TF32 Hızlandırma",
|
| 155 |
+
"tf32_acceleration_info": "RTX 30xx+ için ekstra hız artışı",
|
| 156 |
+
"cudnn_benchmark": "cuDNN Benchmark",
|
| 157 |
+
"cudnn_benchmark_info": "İlk çalışmada yavaş, sonraki çalışmalarda çok hızlı",
|
| 158 |
+
"ultra_optimized_pytorch": "ULTRA-OPTİMİZE PyTorch Backend",
|
| 159 |
+
"default_active_max_speed": "Varsayılan olarak aktif - Maximum hız optimizasyonu",
|
| 160 |
+
"channels_last_mode": "RTX GPUs için en hızlı",
|
| 161 |
+
"compile_mode": "PyTorch 2.0+ için ekstra hız",
|
| 162 |
+
"default_mode": "Standart",
|
| 163 |
+
"manual_ensemble_tab": "🎚️ Manuel Birleştirme",
|
| 164 |
+
"phase_fixer_tab": "🔧 Faz Düzeltici",
|
| 165 |
+
"batch_processing_tab": "📦 Toplu İşleme",
|
| 166 |
+
"batch_description": "Aynı model ile birden fazla dosyayı aynı anda işle",
|
| 167 |
+
"batch_input_folder": "Giriş Klasörü",
|
| 168 |
+
"batch_input_folder_placeholder": "Ses dosyalarını içeren klasör yolunu girin",
|
| 169 |
+
"batch_output_folder": "Çıkış Klasörü",
|
| 170 |
+
"batch_output_folder_placeholder": "Çıkış dosyaları için klasör yolunu girin",
|
| 171 |
+
"batch_file_list": "İşlenecek Dosyalar",
|
| 172 |
+
"batch_add_files": "Dosya Ekle",
|
| 173 |
+
"batch_clear_list": "Listeyi Temizle",
|
| 174 |
+
"batch_start": "🚀 Toplu İşlemeyi Başlat",
|
| 175 |
+
"batch_stop": "⏹️ Durdur",
|
| 176 |
+
"batch_progress": "Toplu İşleme İlerlemesi",
|
| 177 |
+
"batch_current_file": "Şu An İşlenen",
|
| 178 |
+
"batch_completed": "✅ Toplu işleme tamamlandı!",
|
| 179 |
+
"batch_stopped": "⏹️ Toplu işleme durduruldu",
|
| 180 |
+
"batch_no_files": "❌ İşlenecek dosya seçilmedi",
|
| 181 |
+
"source_model": "Kaynak Model (Vokal)",
|
| 182 |
+
"source_model_info": "Vokal/faz verisi çıkarmak için model",
|
| 183 |
+
"target_model": "Hedef Model (Enstrümantal)",
|
| 184 |
+
"target_model_info": "Faz düzeltmesi uygulanacak model",
|
| 185 |
"input_sources": "📂 Giriş Kaynakları",
|
| 186 |
"refresh": "🔄 Yenile",
|
| 187 |
+
"ensemble_algorithm": "Birleştirme Algoritması",
|
| 188 |
"select_audio_files": "Ses Dosyalarını Seç",
|
| 189 |
"available_files": "Mevcut Dosyalar",
|
| 190 |
"custom_weights": "Özel Ağırlıklar (virgülle ayrılmış)",
|
| 191 |
"custom_weights_placeholder": "Örnek: 0.8, 1.2, 1.0, ...",
|
| 192 |
"custom_weights_info": "Eşit ağırlıklar için boş bırakın",
|
| 193 |
"result_preview_tab": "🎧 Sonuç Önizlemesi",
|
| 194 |
+
"ensembled_output": "Birleştirme Çıktısı",
|
| 195 |
"processing_log_tab": "📋 İşlem Günlüğü",
|
| 196 |
"processing_details": "İşlem Ayrıntıları",
|
| 197 |
+
"process_ensemble": "⚡ Birleştirmeyi İşle",
|
| 198 |
"languages_tab": "🌐 Diller",
|
| 199 |
"Vocal Models": "Vokal Modeller",
|
| 200 |
"Instrumental Models": "Enstrümantal Modeller",
|
|
|
|
| 202 |
"Denoise Models": "Gürültü Giderme Modelleri",
|
| 203 |
"Dereverb Models": "Yankı Giderme Modelleri",
|
| 204 |
"Other Models": "Diğer Modeller",
|
| 205 |
+
"Other Shapes": "Diğer Modeller",
|
| 206 |
"ensemble_files_description": "Belirtilen birleştirme yöntemlerini kullanarak birden fazla ses dosyasını tek bir çıktıda birleştirir.",
|
| 207 |
+
"ensemble_files_help": "Birleştirme için tüm ses dosyalarına giden yol",
|
| 208 |
"ensemble_type_help": "Şunlardan biri: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 209 |
+
"ensemble_weights_help": "Birleştirme oluşturmak için ağırlıklar. Ağırlık sayısı dosya sayısına eşit olmalı",
|
| 210 |
+
"ensemble_output_help": "Birleştirme sonucunun kaydedileceği WAV dosyasının yolu",
|
| 211 |
+
"ensemble_type_print": "Birleştirme türü: {}",
|
| 212 |
"num_input_files_print": "Giriş dosyası sayısı: {}",
|
| 213 |
"weights_print": "Ağırlıklar: {}",
|
| 214 |
"output_file_print": "Çıktı dosyası: {}",
|
|
|
|
| 216 |
"file_not_found_error": "Hata. Dosya bulunamadı: {}. Yolları kontrol et.",
|
| 217 |
"reading_chunk_print": "Dosyadan parça okunuyor: {} (başlangıç: {}s, süre: {}s)",
|
| 218 |
"chunk_result_shape_print": "Parça sonucu şekli: {}",
|
| 219 |
+
"ensemble_completed_print": "Birleştirme tamamlandı. Çıktı şuraya kaydedildi: {}",
|
| 220 |
"file_deletion_error": "{} silinemedi: {}",
|
| 221 |
"directory_not_exist_warning": "⚠️ Dizin mevcut değil: {}",
|
| 222 |
"not_a_directory_warning": "⚠️ Yol bir dizin değil: {}",
|
|
|
|
| 258 |
"invalid_url": "❌ Geçersiz URL",
|
| 259 |
"cookie_file_updated": "✅ Çerez dosyası güncellendi!",
|
| 260 |
"cookie_installation_error": "⚠️ Çerez yükleme hatası: {}",
|
| 261 |
+
"google_drive_mount_error": "❌ Google Drive bağlantı hatası: {}",
|
| 262 |
"file_size_zero_error": "Dosya boyutu sıfır veya dosya oluşturulmadı",
|
| 263 |
"google_drive_error": "❌ Google Drive indirme hatası: {}",
|
| 264 |
"wav_conversion_failed": "WAV dönüşümü başarısız",
|
| 265 |
"download_error": "❌ İndirme hatası: {}",
|
| 266 |
+
"file_copied_to_drive": "Dosya Google Drive'a kopyalandı: {}",
|
| 267 |
+
"copy_to_drive_error": "❌ Google Drive'a kopyalama hatası: {}",
|
| 268 |
+
"direct_download_failed": "Dosyanın doğrudan indirilmesi başarısız",
|
| 269 |
+
"direct_download_error": "❌ Doğrudan indirme hatası: {}",
|
| 270 |
+
"invalid_google_drive_url": "❌ Geçersiz Google Drive URL'si",
|
| 271 |
+
"continuing_without_google_drive": "Google Drive bağlantısı yapılamadı, işlem devam ediyor...",
|
| 272 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive bağlı değil, Drive'a kopyalama atlanıyor...",
|
| 273 |
"download_success": "🎉 Başarıyla indirildi!",
|
| 274 |
"download_failed": "❌ İndirme başarısız",
|
| 275 |
"no_output_files_found": "❌ Çıktı dosyaları bulunamadı.",
|
|
|
|
| 292 |
"error_occurred_progress_label": "Hata oluştu -- 0.0%",
|
| 293 |
"minimum_files_required": "⚠️ En az 2 dosya gerekli",
|
| 294 |
"valid_files_not_found": "❌ Geçerli dosya bulunamadı",
|
| 295 |
+
"starting_ensemble_process": "Birleştirme işlemi başlıyor...",
|
| 296 |
+
"ensembling_progress": "Birleştirme yapılıyor... ({:.1f}%)",
|
| 297 |
+
"finalizing_ensemble_output": "Birleştirme çıktısı tamamlanıyor...",
|
| 298 |
"success_log": "✅ Başarılı!\n{}",
|
| 299 |
"error_log": "❌ Hata!\n{}",
|
| 300 |
"critical_error": "⛔ Kritik hata: {}",
|
| 301 |
+
"ensemble_process_completed": "Birleştirme işlemi tamamlandı!",
|
| 302 |
"no_models_selected": "❌ Model seçilmedi",
|
| 303 |
"no_input_audio_provided": "❌ Giriş sesi sağlanmadı",
|
| 304 |
"loading_model": "Model yükleniyor {}/{}: {}...",
|
|
|
|
| 311 |
"model_output_failed": "{} çıktı üretemedi",
|
| 312 |
"waiting_for_files": "Tüm dosyaların hazır olması bekleniyor...",
|
| 313 |
"waiting_for_files_progress_label": "Tüm dosyaların hazır olması bekleniyor... -- 90.0%",
|
| 314 |
+
"performing_ensemble": "Birleştirme gerçekleştiriliyor...",
|
| 315 |
+
"performing_ensemble_progress_label": "Birleştirme gerçekleştiriliyor... -- 92.0%",
|
| 316 |
+
"memory_usage_before_ensemble": "Birleştirme öncesi bellek kullanımı: {}%",
|
| 317 |
+
"memory_usage_after_ensemble": "Birleştirme sonrası bellek kullanımı: {}%",
|
| 318 |
+
"finalizing_ensemble_output_progress_label": "Birleştirme çıktısı tamamlanıyor... -- 98.0%",
|
| 319 |
+
"ensemble_file_creation_failed": "Birleştirme dosyası oluşturulamadı: {}",
|
| 320 |
+
"ensemble_completed_progress_label": "Birleştirme başarıyla tamamlandı! -- 100.0%",
|
| 321 |
"success_output_created": "✅ Başarılı! Çıktı dosyası oluşturuldu.",
|
| 322 |
+
"custom_models_tab": "⚡ Özel Modeller",
|
| 323 |
+
"Custom Models": "Özel Modeller",
|
| 324 |
+
"add_custom_model": "➕ Özel Model Ekle",
|
| 325 |
+
"custom_model_name": "Model Adı",
|
| 326 |
+
"custom_model_name_placeholder": "Modeliniz için bir isim girin",
|
| 327 |
+
"checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "Config URL (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "Özel Python Dosyası URL (isteğe bağlı)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "Model tipini otomatik algıla",
|
| 334 |
+
"model_type": "Model Tipi",
|
| 335 |
+
"add_model_btn": "➕ Model Ekle",
|
| 336 |
+
"custom_models_list": "📋 Özel Modeller Listesi",
|
| 337 |
+
"delete_model": "🗑️ Sil",
|
| 338 |
+
"no_custom_models": "Henüz özel model eklenmedi",
|
| 339 |
+
"model_added_success": "✅ Model başarıyla eklendi!",
|
| 340 |
+
"model_deleted_success": "��� Model başarıyla silindi!",
|
| 341 |
+
"model_add_error": "❌ Model ekleme hatası: {}",
|
| 342 |
+
"model_delete_error": "❌ Model silme hatası: {}",
|
| 343 |
+
"refresh_models": "🔄 Yenile",
|
| 344 |
+
"custom_model_info": "İndirme linkleri sağlayarak özel model ekleyin. Modeller kullanıldığında otomatik olarak indirilecektir.",
|
| 345 |
+
"select_model_to_delete": "Silmek için bir model seçin",
|
| 346 |
+
"downloading_model_file": "Model indiriliyor: {}",
|
| 347 |
+
"downloading_file_progress": "İndiriliyor: {} - {}%",
|
| 348 |
+
"chunk_size_mode": "Parça Boyutu Modu",
|
| 349 |
+
"chunk_size_mode_info": "base: standart ön ayarlar | custom: kendi değerinizi girin | yaml: model YAML'ından oku",
|
| 350 |
+
"chunk_size_custom_label": "Özel Parça Boyutu",
|
| 351 |
+
"chunk_size_custom_info": "Özel bir parça boyutu değeri girin",
|
| 352 |
+
"chunk_size_yaml_label": "YAML Parça Boyutu",
|
| 353 |
+
"chunk_size_yaml_display_info": "İndirmeden sonra modelin YAML yapılandırmasından okunacak",
|
| 354 |
+
"chunk_size_yaml_not_downloaded": "⏳ Model YAML'ı henüz indirilmedi — işlem sırasında değer okunacak",
|
| 355 |
+
"chunk_size_yaml_detected": "✅ YAML'dan: {}",
|
| 356 |
+
"remove_favorite": "Favorilerden Çıkar",
|
| 357 |
+
"normal_method": "Normal yöntem",
|
| 358 |
+
"mid_side_method": "Orta-Yan yöntemi",
|
| 359 |
+
"apollo_processing_method": "Apollo İşlem Yöntemi",
|
| 360 |
+
"apollo_normal_model": "Apollo Normal Model",
|
| 361 |
+
"apollo_mid_side_model": "Apollo Orta-Yan Model",
|
| 362 |
+
"scale_factor": "Ölçek Faktörü",
|
| 363 |
+
"scale_factor_info": "Faz ölçeklendirme faktörü (önerilen: 1.4)",
|
| 364 |
+
"source_file_label": "Kaynak Dosya (Vokal)",
|
| 365 |
+
"target_file_label": "Hedef Dosya (Enstrümantal)",
|
| 366 |
+
"run_phase_fixer": "🔧 Faz Düzelticiyi Çalıştır",
|
| 367 |
+
"phase_fixed_output": "Faz Düzeltilmiş Çıktı",
|
| 368 |
+
"phase_fixer_settings": "Faz Düzeltici Ayarları",
|
| 369 |
+
"low_cutoff": "Düşük Kesme Frekansı (Hz)",
|
| 370 |
+
"high_cutoff": "Yüksek Kesme Frekansı (Hz)",
|
| 371 |
+
"low_cutoff_info": "Faz düzeltmesi için düşük frekans kesme",
|
| 372 |
+
"high_cutoff_info": "Faz düzeltmesi için yüksek frekans kesme",
|
| 373 |
+
"no_preset_name_provided": "⚠️ Ön ayar adı girilmedi",
|
| 374 |
+
"no_models_selected_for_preset": "⚠️ Ön ayar için model seçilmedi",
|
| 375 |
+
"preset_saved": "✅ '{}' ön ayarı kaydedildi!",
|
| 376 |
+
"please_upload_both_files": "⚠️ Lütfen hem kaynak hem de hedef dosyaları yükleyin",
|
| 377 |
+
"processing_log_placeholder": "İşlem günlüğü burada görünecek...",
|
| 378 |
+
"enter_preset_name": "Ön ayar adını girin..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
}
|
assets/i18n/languages/zn_cn.json
CHANGED
|
@@ -19,6 +19,7 @@
|
|
| 19 |
"tta_boost": "TTA增强",
|
| 20 |
"tta_info": "提高质量,但会减慢处理速度",
|
| 21 |
"phase_fix": "相位修正",
|
|
|
|
| 22 |
"phase_fix_info": "用于乐器轨的高级相位修正",
|
| 23 |
"instrumental": "乐器",
|
| 24 |
"instrumental_info": "通常输出2个结果,但有时只有一个,试试看吧!:)",
|
|
@@ -46,7 +47,7 @@
|
|
| 46 |
"copy_to_drive": "📂 复制到云盘",
|
| 47 |
"copy_status": "复制状态",
|
| 48 |
"files_will_be_copied": "文件将复制到此处...",
|
| 49 |
-
"processing_tip": "<b>🔈 处理提示:</b> 对于有噪声的结果,使用<i>
|
| 50 |
"waiting_for_processing": "等待处理...",
|
| 51 |
"auto_ensemble_tab": "🤖 自动集成",
|
| 52 |
"upload_file": "上传文件",
|
|
@@ -80,16 +81,94 @@
|
|
| 80 |
"downloaded_file": "已下载文件",
|
| 81 |
"cookie_management": "🍪 Cookie管理",
|
| 82 |
"upload_cookies_txt": "上传Cookies.txt",
|
| 83 |
-
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 为什么
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"manual_ensemble_tab": "🎚️ 手动集成",
|
| 85 |
-
"input_sources": "📂 输入
|
| 86 |
"refresh": "🔄 刷新",
|
| 87 |
"ensemble_algorithm": "集成算法",
|
| 88 |
"select_audio_files": "选择音频文件",
|
| 89 |
"available_files": "可用文件",
|
| 90 |
-
"custom_weights": "自定义权重(
|
| 91 |
-
"custom_weights_placeholder": "
|
| 92 |
-
"custom_weights_info": "留空
|
| 93 |
"result_preview_tab": "🎧 结果预览",
|
| 94 |
"ensembled_output": "集成输出",
|
| 95 |
"processing_log_tab": "📋 处理日志",
|
|
@@ -99,32 +178,33 @@
|
|
| 99 |
"Vocal Models": "人声模型",
|
| 100 |
"Instrumental Models": "乐器模型",
|
| 101 |
"4-Stem Models": "4声道模型",
|
| 102 |
-
"Denoise Models": "
|
| 103 |
"Dereverb Models": "去混响模型",
|
| 104 |
"Other Models": "其他模型",
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
-
"
|
|
|
|
| 110 |
"ensemble_type_print": "集成类型:{}",
|
| 111 |
-
"num_input_files_print": "输入文件数
|
| 112 |
"weights_print": "权重:{}",
|
| 113 |
"output_file_print": "输出文件:{}",
|
| 114 |
-
"duration_mismatch_error": "所有文件必须具有相同的时
|
| 115 |
-
"file_not_found_error": "错误。找不到文件:{}。
|
| 116 |
-
"reading_chunk_print": "
|
| 117 |
-
"chunk_result_shape_print": "
|
| 118 |
-
"ensemble_completed_print": "集成完成。输出保存
|
| 119 |
-
"file_deletion_error": "
|
| 120 |
"directory_not_exist_warning": "⚠️ 目录不存在:{}",
|
| 121 |
"not_a_directory_warning": "⚠️ 路径不是目录:{}",
|
| 122 |
-
"item_deletion_error": "⚠️ 删除{}时出错:{}",
|
| 123 |
"old_output_not_exist": "❌ 旧输出文件夹不存在",
|
| 124 |
"old_outputs_cleared": "✅ 旧输出已成功清除!",
|
| 125 |
"error": "🔥 错误:{}",
|
| 126 |
-
"ffmpeg_error": "FFmpeg错误
|
| 127 |
-
"file_saved_successfully": "文件
|
| 128 |
"total_files_found": "找到的文件总数:{}。使用的采样率:{}",
|
| 129 |
"total_progress": "总进度",
|
| 130 |
"detailed_pbar_enabled": "详细进度条已启用:{}",
|
|
@@ -132,149 +212,135 @@
|
|
| 132 |
"cannot_read_track": "无法读取轨道:{}",
|
| 133 |
"error_message": "错误消息:{}",
|
| 134 |
"demudding_track": "清理轨道(相位混音 - 乐器):{}",
|
| 135 |
-
"elapsed_time": "
|
| 136 |
"proc_folder_description": "使用指定模型处理文件夹中的音频文件。",
|
| 137 |
-
"model_type_help": "模型类型(bandit
|
| 138 |
"config_path_help": "配置文件路径",
|
| 139 |
"demud_phaseremix_help": "为乐器启用相位混音",
|
| 140 |
"start_checkpoint_help": "有效权重的初始检查点",
|
| 141 |
-
"input_folder_help": "包含
|
| 142 |
-
"audio_path_help": "
|
| 143 |
"store_dir_help": "存储结果的路径",
|
| 144 |
"device_ids_help": "GPU ID列表",
|
| 145 |
-
"extract_instrumental_help": "如提供,
|
| 146 |
-
"disable_detailed_pbar_help": "在
|
| 147 |
-
"force_cpu_help": "即使CUDA可用
|
| 148 |
-
"flac_file_help": "
|
| 149 |
"export_format_help": "导出格式和PCM类型",
|
| 150 |
"pcm_type_help": "FLAC文件的PCM类型",
|
| 151 |
"use_tta_help": "启用测试时间增强",
|
| 152 |
"lora_checkpoint_help": "LoRA权重的初始检查点",
|
| 153 |
-
"cuda_available": "CUDA可用,使用 --force_cpu 禁用。",
|
| 154 |
-
"using_device": "使用
|
| 155 |
"instruments_print": "乐器:{}",
|
| 156 |
-
"model_load_time": "模型加载时间:{:.2f}秒",
|
| 157 |
-
"invalid_url": "❌ 无效URL",
|
| 158 |
"cookie_file_updated": "✅ Cookie文件已更新!",
|
| 159 |
"cookie_installation_error": "⚠️ Cookie安装错误:{}",
|
| 160 |
-
"
|
|
|
|
| 161 |
"google_drive_error": "❌ Google Drive下载错误:{}",
|
| 162 |
"wav_conversion_failed": "WAV转换失败",
|
| 163 |
"download_error": "❌ 下载错误:{}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"download_success": "🎉 下载成功!",
|
| 165 |
"download_failed": "❌ 下载失败",
|
| 166 |
"no_output_files_found": "❌ 未找到输出文件。",
|
| 167 |
-
"output_refreshed_successfully": "✅ 输出
|
| 168 |
"error_refreshing_output": "❌ 刷新输出时出错:{}",
|
| 169 |
"starting_audio_separation": "开始音频分离...",
|
| 170 |
-
"processing_audio": "处理音频",
|
| 171 |
-
"separating_audio": "分离音频... ({:.1f}%)",
|
| 172 |
"separation_complete": "分离完成!",
|
| 173 |
"progress_parsing_error": "进度解析错误:{}",
|
| 174 |
"error_occurred": "发生错误:{}",
|
| 175 |
"separation_process_completed": "分离过程完成!",
|
| 176 |
-
"no_audio_file_error": "❌ 未提供音频文件
|
| 177 |
"no_input_progress_label": "错误:未提供输入 -- 0.0%",
|
| 178 |
-
"processing_audio_print": "正在处理音频:{}
|
| 179 |
"starting_audio_separation_progress_label": "开始音频分离... -- 0.0%",
|
| 180 |
-
"separating_audio_progress_label": "分离音频... -- {}.0%",
|
| 181 |
-
"audio_processing_completed_progress_label": "音频处理完成!-- 100.0%",
|
| 182 |
"audio_processing_completed": "✅ 音频处理完成!",
|
| 183 |
"error_occurred_progress_label": "发生错误 -- 0.0%",
|
| 184 |
"minimum_files_required": "⚠️ 至少需要2个文件",
|
| 185 |
"valid_files_not_found": "❌ 未找到有效文件",
|
| 186 |
"starting_ensemble_process": "开始集成过程...",
|
| 187 |
-
"ensembling_progress": "集成
|
| 188 |
-
"finalizing_ensemble_output": "完成集成输出...",
|
| 189 |
"success_log": "✅ 成功!\n{}",
|
| 190 |
"error_log": "❌ 错误!\n{}",
|
| 191 |
"critical_error": "⛔ 严重错误:{}",
|
| 192 |
"ensemble_process_completed": "集成过程完成!",
|
| 193 |
"no_models_selected": "❌ 未选择模型",
|
| 194 |
"no_input_audio_provided": "❌ 未提供输入音频",
|
| 195 |
-
"loading_model": "加载模型 {}/{}:{}...",
|
| 196 |
-
"loading_model_progress_label": "加载模型 {}/{}:{}... -- {}.0%",
|
| 197 |
"completed_model": "模型完成 {}/{}:{}",
|
| 198 |
"completed_model_progress_label": "模型完成 {}/{}:{} -- {}.0%",
|
| 199 |
-
"running_command": "
|
| 200 |
"model_failed": "模型 {} 失败:{}",
|
| 201 |
-
"critical_error_with_model": "{}
|
| 202 |
-
"model_output_failed": "{}
|
| 203 |
"waiting_for_files": "等待所有文件准备就绪...",
|
| 204 |
"waiting_for_files_progress_label": "等待所有文件准备就绪... -- 90.0%",
|
| 205 |
-
"performing_ensemble": "执行集成...",
|
| 206 |
-
"performing_ensemble_progress_label": "执行集成... -- 92.0%",
|
| 207 |
"memory_usage_before_ensemble": "集成前内存使用:{}%",
|
| 208 |
"memory_usage_after_ensemble": "集成后内存使用:{}%",
|
| 209 |
-
"finalizing_ensemble_output_progress_label": "完成集成输出... -- 98.0%",
|
| 210 |
-
"ensemble_file_creation_failed": "集成文件
|
| 211 |
-
"ensemble_completed_progress_label": "集成成功完成!-- 100.0%",
|
| 212 |
"success_output_created": "✅ 成功!输出文件已创建。",
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"normal_method": "普通方法",
|
| 259 |
-
"mid_side_method": "中/侧方法",
|
| 260 |
-
"apollo_normal_model": "Apollo普通模型",
|
| 261 |
-
"apollo_normal_model_info": "用于普通Apollo处理的模型",
|
| 262 |
-
"apollo_midside_model": "Apollo中/侧模型",
|
| 263 |
-
"apollo_enhancement_info": "音质增强",
|
| 264 |
-
"selected_models": "选定模型",
|
| 265 |
-
"save_preset": "保存预设",
|
| 266 |
-
"delete_preset": "删除预设",
|
| 267 |
-
"refresh_presets": "刷新预设",
|
| 268 |
-
"preset_name": "预设名称",
|
| 269 |
-
"select_preset": "选择预设",
|
| 270 |
-
"add_favorite": "添加到收藏",
|
| 271 |
-
"apollo_midside_model_info": "用于中/侧处理的模型(可选)",
|
| 272 |
-
"language_changed_message": "语言已更改。请重新启动单元。",
|
| 273 |
-
"apply_matchering": "应用 Matchering",
|
| 274 |
-
"matchering_info": "应用 Matchering 以提升音频母带处理和平衡",
|
| 275 |
-
"matchering_passes": "Matchering 迭代次数",
|
| 276 |
-
"matchering_passes_info": "Matchering 迭代次数(1-5,值越高质量越好,但耗时更长)",
|
| 277 |
-
"matchering_processing": "正在应用 Matchering... ({:.1f}%)",
|
| 278 |
-
"matchering_completed": "✅ Matchering 处理完成!",
|
| 279 |
-
"matchering_error": "❌ Matchering 过程中发生错误:{}"
|
| 280 |
}
|
|
|
|
| 19 |
"tta_boost": "TTA增强",
|
| 20 |
"tta_info": "提高质量,但会减慢处理速度",
|
| 21 |
"phase_fix": "相位修正",
|
| 22 |
+
"use_phase_correction": "应用高级相位修正",
|
| 23 |
"phase_fix_info": "用于乐器轨的高级相位修正",
|
| 24 |
"instrumental": "乐器",
|
| 25 |
"instrumental_info": "通常输出2个结果,但有时只有一个,试试看吧!:)",
|
|
|
|
| 47 |
"copy_to_drive": "📂 复制到云盘",
|
| 48 |
"copy_status": "复制状态",
|
| 49 |
"files_will_be_copied": "文件将复制到此处...",
|
| 50 |
+
"processing_tip": "<b>🔈 处理提示:</b> 对于有噪声的结果,使用<i>去除噪声和效果</i>类别中的<code>bleed_suppressor_v1</code>或<code>denoisedebleed</code>模型来清理输出",
|
| 51 |
"waiting_for_processing": "等待处理...",
|
| 52 |
"auto_ensemble_tab": "🤖 自动集成",
|
| 53 |
"upload_file": "上传文件",
|
|
|
|
| 81 |
"downloaded_file": "已下载文件",
|
| 82 |
"cookie_management": "🍪 Cookie管理",
|
| 83 |
"upload_cookies_txt": "上传Cookies.txt",
|
| 84 |
+
"cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 为什么必要?**<br>- 访问年龄限制内容<br>- 下载私人/未列出视频<br>- 绕过区域限制<br>- 避免YouTube下载限制<br><br>**⚠️ 重要说明**<br>- 切勿分享您的Cookie文件!<br>- 更新Cookie时:<br> • 收到“403禁止”错误<br> • 下载突然停止<br> • 看到“会话过期”消息<br><br>**🔄 更新步骤**<br>1. 安装此<a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome扩展</a><br>2. 在Chrome中登录YouTube<br>3. 点击扩展图标 → “导出”<br>4. 在此处上传下载的文件<br><br>**⏳ Cookie寿命**<br>- 正常会话:24小时<br>- 敏感操作:1小时<br>- 密码更改:立即更新</div>",
|
| 85 |
+
"ensemble_output_copied": "✅ 集成输出已复制到 {}",
|
| 86 |
+
"error_copying_ensemble_output": "❌ 复制集成输出时出错:{}",
|
| 87 |
+
"drive_mounted_copying_ensemble": "Google Drive已挂载。正在复制集成输出...",
|
| 88 |
+
"drive_already_mounted_copying_ensemble": "Google Drive已挂载。正在复制集成输出...",
|
| 89 |
+
"no_ensemble_output_files_found": "❌ 未找到集成输出文件。",
|
| 90 |
+
"drive_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
|
| 91 |
+
"drive_already_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
|
| 92 |
+
"files_copied_to_drive": "✅ 文件已复制到 {}",
|
| 93 |
+
"error_copying_files": "❌ 复制文件时出错:{}",
|
| 94 |
+
"mounting_drive": "正在挂载Google Drive...",
|
| 95 |
+
"settings_saved": "设置已保存到config.json:语言={lang},方法={method},端口={port}",
|
| 96 |
+
"selected_language": "已选择语言:{lang}",
|
| 97 |
+
"available_languages": "可用语言:{langs}",
|
| 98 |
+
"select_language_prompt": "选择语言(默认:{default}):",
|
| 99 |
+
"invalid_language": "无效语言选择!使用默认语言({default})。",
|
| 100 |
+
"available_sharing_methods": "可用共享方法:gradio, localtunnel, ngrok",
|
| 101 |
+
"select_sharing_method_prompt": "选择共享方法(默认:{default}):",
|
| 102 |
+
"enter_ngrok_token_prompt": "输入您的Ngrok令牌(默认:{default}):",
|
| 103 |
+
"ngrok_token_required": "错误:ngrok方法需要Ngrok令牌!",
|
| 104 |
+
"enter_port_prompt": "输入端口号(默认:{default}):",
|
| 105 |
+
"opening_previous_url": "打开上一个URL:{url}",
|
| 106 |
+
"open_url_manually": "在Colab中手动打开URL:{url}",
|
| 107 |
+
"invalid_previous_url": "无效的上一个URL:{url},未打开。",
|
| 108 |
+
"starting_method": "在端口 {port} 上启动 {method}...",
|
| 109 |
+
"process_stopped": "\n🛑 用户停止了进程",
|
| 110 |
+
"selected_port": "所选端口:{port}",
|
| 111 |
+
"starting_gradio_with_sharing": "启动带有内置共享的Gradio...",
|
| 112 |
+
"starting_localtunnel": "在端口 {port} 上启动Localtunnel...",
|
| 113 |
+
"share_link": "共享链接:{url}",
|
| 114 |
+
"password_ip": "密码IP:{ip}",
|
| 115 |
+
"starting_ngrok": "在端口 {port} 上启动Ngrok...",
|
| 116 |
+
"ngrok_url": "Ngrok URL:{url}",
|
| 117 |
+
"ngrok_error": "启动Ngrok时出错:{error}",
|
| 118 |
+
"auto_apollo_chunk_size": "Apollo分块大小",
|
| 119 |
+
"auto_apollo_chunk_size_info": "Apollo增强的分块大小(通用模型推荐:19)",
|
| 120 |
+
"auto_apollo_overlap": "Apollo重叠",
|
| 121 |
+
"auto_enhancing_with_apollo": "使用Apollo增强({}/{} 文件)...",
|
| 122 |
+
"auto_apollo_processing_completed": "Apollo处理完成!",
|
| 123 |
+
"auto_apollo_overlap_info": "Apollo增强的重叠(推荐:2)",
|
| 124 |
+
"apollo_chunk_size": "Apollo分块大小",
|
| 125 |
+
"apollo_chunk_size_info": "Apollo增强的分块大小(通用模型推荐:19)",
|
| 126 |
+
"apollo_overlap": "Apollo重叠",
|
| 127 |
+
"enhance_with_apollo": "使用Apollo增强",
|
| 128 |
+
"apollo_enhancement_info": "音质增强",
|
| 129 |
+
"enhancing_with_apollo": "使用Apollo增强({}/{} 文件)...",
|
| 130 |
+
"apollo_processing_completed": "Apollo处理完成!",
|
| 131 |
+
"apollo_overlap_info": "Apollo增强的重叠(推荐:2)",
|
| 132 |
+
"selected_models": "选定模型",
|
| 133 |
+
"save_preset": "保存预设",
|
| 134 |
+
"delete_preset": "删除预设",
|
| 135 |
+
"refresh_presets": "刷新预设",
|
| 136 |
+
"preset_name": "预设名称",
|
| 137 |
+
"select_preset": "选择预设",
|
| 138 |
+
"add_favorite": "添加到收藏",
|
| 139 |
+
"apply_matchering": "应用 Matchering",
|
| 140 |
+
"matchering_info": "应用 Matchering 以提升音频母带处理和平衡",
|
| 141 |
+
"matchering_passes": "Matchering 迭代次数",
|
| 142 |
+
"matchering_passes_info": "Matchering 迭代次数(1-5,值越高质量越好,但耗时更长)",
|
| 143 |
+
"matchering_processing": "正在应用 Matchering... ({:.1f}%)",
|
| 144 |
+
"matchering_completed": "✅ Matchering 处理完成!",
|
| 145 |
+
"matchering_error": "❌ Matchering 过程中发生错误:{}",
|
| 146 |
+
"invalid_method": "错误:无效方法!使用 'gradio'、'localtunnel' 或 'ngrok'。",
|
| 147 |
+
"language_changed_message": "语言已更改。请重新启动单元。",
|
| 148 |
+
"backend_settings": "🚀 后端设置",
|
| 149 |
+
"inference_backend": "推理后端",
|
| 150 |
+
"optimization_mode": "优化模式",
|
| 151 |
+
"optimization_mode_info": "PyTorch 优化模式选择",
|
| 152 |
+
"mixed_precision_amp": "混合精度 (AMP)",
|
| 153 |
+
"mixed_precision_info": "2x 更快推理 - 推荐",
|
| 154 |
+
"tf32_acceleration": "TF32 加速",
|
| 155 |
+
"tf32_acceleration_info": "RTX 30xx+ 的额外速度提升",
|
| 156 |
+
"cudnn_benchmark": "cuDNN 基准测试",
|
| 157 |
+
"cudnn_benchmark_info": "首次运行较慢,后续运行更快",
|
| 158 |
+
"ultra_optimized_pytorch": "超优化 PyTorch 后端",
|
| 159 |
+
"default_active_max_speed": "默认激活 - 最大速度优化",
|
| 160 |
+
"channels_last_mode": "RTX GPU 最快",
|
| 161 |
+
"compile_mode": "PyTorch 2.0+ 额外速度",
|
| 162 |
+
"default_mode": "标准",
|
| 163 |
"manual_ensemble_tab": "🎚️ 手动集成",
|
| 164 |
+
"input_sources": "📂 输入源",
|
| 165 |
"refresh": "🔄 刷新",
|
| 166 |
"ensemble_algorithm": "集成算法",
|
| 167 |
"select_audio_files": "选择音频文件",
|
| 168 |
"available_files": "可用文件",
|
| 169 |
+
"custom_weights": "自定义权重(逗号分隔)",
|
| 170 |
+
"custom_weights_placeholder": "例如:0.8, 1.2, 1.0, ...",
|
| 171 |
+
"custom_weights_info": "留空以使用相等权重",
|
| 172 |
"result_preview_tab": "🎧 结果预览",
|
| 173 |
"ensembled_output": "集成输出",
|
| 174 |
"processing_log_tab": "📋 处理日志",
|
|
|
|
| 178 |
"Vocal Models": "人声模型",
|
| 179 |
"Instrumental Models": "乐器模型",
|
| 180 |
"4-Stem Models": "4声道模型",
|
| 181 |
+
"Denoise Models": "降噪模型",
|
| 182 |
"Dereverb Models": "去混响模型",
|
| 183 |
"Other Models": "其他模型",
|
| 184 |
+
"Other Shapes": "其他模型",
|
| 185 |
+
"ensemble_files_description": "使用指定的集成方法将多个音频文件合并为单个输出。",
|
| 186 |
+
"ensemble_files_help": "所有音频文件的路径用于集成",
|
| 187 |
+
"ensemble_type_help": "其中之一:avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
|
| 188 |
+
"ensemble_weights_help": "创建集成的权重。权重数量必须等于文件数量",
|
| 189 |
+
"ensemble_output_help": "存储集成结果的WAV文件路径",
|
| 190 |
"ensemble_type_print": "集成类型:{}",
|
| 191 |
+
"num_input_files_print": "输入文件数:{}",
|
| 192 |
"weights_print": "权重:{}",
|
| 193 |
"output_file_print": "输出文件:{}",
|
| 194 |
+
"duration_mismatch_error": "所有文件必须具有相同的持续时间",
|
| 195 |
+
"file_not_found_error": "错误。找不到文件:{}。检查路径。",
|
| 196 |
+
"reading_chunk_print": "从文件读取块:{}(开始:{}秒,持续时间:{}秒)",
|
| 197 |
+
"chunk_result_shape_print": "块结果形状:{}",
|
| 198 |
+
"ensemble_completed_print": "集成完成。输出保存到:{}",
|
| 199 |
+
"file_deletion_error": "无法删除 {}:{}",
|
| 200 |
"directory_not_exist_warning": "⚠️ 目录不存在:{}",
|
| 201 |
"not_a_directory_warning": "⚠️ 路径不是目录:{}",
|
| 202 |
+
"item_deletion_error": "⚠️ 删除 {} 时出错:{}",
|
| 203 |
"old_output_not_exist": "❌ 旧输出文件夹不存在",
|
| 204 |
"old_outputs_cleared": "✅ 旧输出已成功清除!",
|
| 205 |
"error": "🔥 错误:{}",
|
| 206 |
+
"ffmpeg_error": "FFmpeg错误 ({}):{}",
|
| 207 |
+
"file_saved_successfully": "文件成功保存:{}",
|
| 208 |
"total_files_found": "找到的文件总数:{}。使用的采样率:{}",
|
| 209 |
"total_progress": "总进度",
|
| 210 |
"detailed_pbar_enabled": "详细进度条已启用:{}",
|
|
|
|
| 212 |
"cannot_read_track": "无法读取轨道:{}",
|
| 213 |
"error_message": "错误消息:{}",
|
| 214 |
"demudding_track": "清理轨道(相位混音 - 乐器):{}",
|
| 215 |
+
"elapsed_time": "耗时:{:.2f} 秒。",
|
| 216 |
"proc_folder_description": "使用指定模型处理文件夹中的音频文件。",
|
| 217 |
+
"model_type_help": "模型类型(bandit、bs_roformer、mdx23c等)",
|
| 218 |
"config_path_help": "配置文件路径",
|
| 219 |
"demud_phaseremix_help": "为乐器启用相位混音",
|
| 220 |
"start_checkpoint_help": "有效权重的初始检查点",
|
| 221 |
+
"input_folder_help": "包含要处理的混音的文件夹",
|
| 222 |
+
"audio_path_help": "要处理的单个音频文件的路径",
|
| 223 |
"store_dir_help": "存储结果的路径",
|
| 224 |
"device_ids_help": "GPU ID列表",
|
| 225 |
+
"extract_instrumental_help": "如果提供,反转人声以获得乐器",
|
| 226 |
+
"disable_detailed_pbar_help": "在demix中禁用详细进度条",
|
| 227 |
+
"force_cpu_help": "即使CUDA可用也强制使用CPU",
|
| 228 |
+
"flac_file_help": "输出FLAC文件而不是WAV",
|
| 229 |
"export_format_help": "导出格式和PCM类型",
|
| 230 |
"pcm_type_help": "FLAC文件的PCM类型",
|
| 231 |
"use_tta_help": "启用测试时间增强",
|
| 232 |
"lora_checkpoint_help": "LoRA权重的初始检查点",
|
| 233 |
+
"cuda_available": "CUDA可用,使用 --force_cpu 禁用它。",
|
| 234 |
+
"using_device": "使用设备:{}",
|
| 235 |
"instruments_print": "乐器:{}",
|
| 236 |
+
"model_load_time": "模型加载时间:{:.2f} 秒",
|
| 237 |
+
"invalid_url": "❌ 无效的URL",
|
| 238 |
"cookie_file_updated": "✅ Cookie文件已更新!",
|
| 239 |
"cookie_installation_error": "⚠️ Cookie安装错误:{}",
|
| 240 |
+
"google_drive_mount_error": "❌ Google Drive连接错误:{}",
|
| 241 |
+
"file_size_zero_error": "文件大小为零或未创建文件",
|
| 242 |
"google_drive_error": "❌ Google Drive下载错误:{}",
|
| 243 |
"wav_conversion_failed": "WAV转换失败",
|
| 244 |
"download_error": "❌ 下载错误:{}",
|
| 245 |
+
"file_copied_to_drive": "文件已复制到Google Drive:{}",
|
| 246 |
+
"copy_to_drive_error": "❌ 复制到Google Drive时出错:{}",
|
| 247 |
+
"direct_download_failed": "直接下载文件失败",
|
| 248 |
+
"direct_download_error": "❌ 直接下载错误:{}",
|
| 249 |
+
"invalid_google_drive_url": "❌ 无效的 Google Drive URL",
|
| 250 |
+
"continuing_without_google_drive": "Google Drive 连接失败,继续不使用它...",
|
| 251 |
+
"skipping_drive_copy_no_mount": "⚠️ Google Drive 未挂载,跳过复制到 Drive...",
|
| 252 |
"download_success": "🎉 下载成功!",
|
| 253 |
"download_failed": "❌ 下载失败",
|
| 254 |
"no_output_files_found": "❌ 未找到输出文件。",
|
| 255 |
+
"output_refreshed_successfully": "✅ 输出已成功刷新!",
|
| 256 |
"error_refreshing_output": "❌ 刷新输出时出错:{}",
|
| 257 |
"starting_audio_separation": "开始音频分离...",
|
| 258 |
+
"processing_audio": "正在处理音频",
|
| 259 |
+
"separating_audio": "正在分离音频... ({:.1f}%)",
|
| 260 |
"separation_complete": "分离完成!",
|
| 261 |
"progress_parsing_error": "进度解析错误:{}",
|
| 262 |
"error_occurred": "发生错误:{}",
|
| 263 |
"separation_process_completed": "分离过程完成!",
|
| 264 |
+
"no_audio_file_error": "❌ 未提供音频文件且输入目录中没有现有文件。",
|
| 265 |
"no_input_progress_label": "错误:未提供输入 -- 0.0%",
|
| 266 |
+
"processing_audio_print": "正在处理音频来自:{} 使用模型:{}",
|
| 267 |
"starting_audio_separation_progress_label": "开始音频分离... -- 0.0%",
|
| 268 |
+
"separating_audio_progress_label": "正在分离音频... -- {}.0%",
|
| 269 |
+
"audio_processing_completed_progress_label": "音频处理完成! -- 100.0%",
|
| 270 |
"audio_processing_completed": "✅ 音频处理完成!",
|
| 271 |
"error_occurred_progress_label": "发生错误 -- 0.0%",
|
| 272 |
"minimum_files_required": "⚠️ 至少需要2个文件",
|
| 273 |
"valid_files_not_found": "❌ 未找到有效文件",
|
| 274 |
"starting_ensemble_process": "开始集成过程...",
|
| 275 |
+
"ensembling_progress": "正在集成... ({:.1f}%)",
|
| 276 |
+
"finalizing_ensemble_output": "正在完成集成输出...",
|
| 277 |
"success_log": "✅ 成功!\n{}",
|
| 278 |
"error_log": "❌ 错误!\n{}",
|
| 279 |
"critical_error": "⛔ 严重错误:{}",
|
| 280 |
"ensemble_process_completed": "集成过程完成!",
|
| 281 |
"no_models_selected": "❌ 未选择模型",
|
| 282 |
"no_input_audio_provided": "❌ 未提供输入音频",
|
| 283 |
+
"loading_model": "正在加载模型 {}/{}:{}...",
|
| 284 |
+
"loading_model_progress_label": "正在加载模型 {}/{}:{}... -- {}.0%",
|
| 285 |
"completed_model": "模型完成 {}/{}:{}",
|
| 286 |
"completed_model_progress_label": "模型完成 {}/{}:{} -- {}.0%",
|
| 287 |
+
"running_command": "正在运行命令:{}",
|
| 288 |
"model_failed": "模型 {} 失败:{}",
|
| 289 |
+
"critical_error_with_model": "{} 的严重错误:{}",
|
| 290 |
+
"model_output_failed": "{} 无法产生输出",
|
| 291 |
"waiting_for_files": "等待所有文件准备就绪...",
|
| 292 |
"waiting_for_files_progress_label": "等待所有文件准备就绪... -- 90.0%",
|
| 293 |
+
"performing_ensemble": "正在执行集成...",
|
| 294 |
+
"performing_ensemble_progress_label": "正在执行集成... -- 92.0%",
|
| 295 |
"memory_usage_before_ensemble": "集成前内存使用:{}%",
|
| 296 |
"memory_usage_after_ensemble": "集成后内存使用:{}%",
|
| 297 |
+
"finalizing_ensemble_output_progress_label": "正在完成集成输出... -- 98.0%",
|
| 298 |
+
"ensemble_file_creation_failed": "无法创建集成文件:{}",
|
| 299 |
+
"ensemble_completed_progress_label": "集成成功完成! -- 100.0%",
|
| 300 |
"success_output_created": "✅ 成功!输出文件已创建。",
|
| 301 |
+
"phase_fixer_tab": "🔧 相位修复器",
|
| 302 |
+
"batch_processing_tab": "📦 批量处理",
|
| 303 |
+
"batch_description": "使用相同模型同时处理多个文件",
|
| 304 |
+
"batch_input_folder": "输入文件夹",
|
| 305 |
+
"batch_input_folder_placeholder": "输入包含音频文件的文件夹路径",
|
| 306 |
+
"batch_output_folder": "输出文件夹",
|
| 307 |
+
"batch_output_folder_placeholder": "输入输出文件的文件夹路径",
|
| 308 |
+
"batch_file_list": "待处理文件",
|
| 309 |
+
"batch_add_files": "添加文件",
|
| 310 |
+
"batch_clear_list": "清空列表",
|
| 311 |
+
"batch_start": "🚀 开始批量处理",
|
| 312 |
+
"batch_stop": "⏹️ 停止",
|
| 313 |
+
"batch_progress": "批量处理进度",
|
| 314 |
+
"batch_current_file": "当前处理中",
|
| 315 |
+
"batch_completed": "✅ 批量处理完成!",
|
| 316 |
+
"batch_stopped": "⏹️ 批量处理已停止",
|
| 317 |
+
"batch_no_files": "❌ 未选择要处理的文件",
|
| 318 |
+
"source_model": "源模型(人声)",
|
| 319 |
+
"source_model_info": "用于提取人声/相位数据的模型",
|
| 320 |
+
"target_model": "目标模型(乐器)",
|
| 321 |
+
"target_model_info": "应用相位校正的模型",
|
| 322 |
+
"custom_models_tab": "⚡ 自定义模型",
|
| 323 |
+
"Custom Models": "自定义模型",
|
| 324 |
+
"add_custom_model": "➕ 添加自定义模型",
|
| 325 |
+
"custom_model_name": "模型名称",
|
| 326 |
+
"custom_model_name_placeholder": "为您的模型输入名称",
|
| 327 |
+
"checkpoint_url": "检查点 URL (.ckpt/.pth)",
|
| 328 |
+
"checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
|
| 329 |
+
"config_url": "配置 URL (.yaml)",
|
| 330 |
+
"config_url_placeholder": "https://huggingface.co/.../config.yaml",
|
| 331 |
+
"custom_py_url": "自定义 Python 文件 URL(可选)",
|
| 332 |
+
"custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
|
| 333 |
+
"auto_detect_type": "自动检测模型类型",
|
| 334 |
+
"model_type": "模型类型",
|
| 335 |
+
"add_model_btn": "➕ 添加模型",
|
| 336 |
+
"custom_models_list": "📋 自定义模型列表",
|
| 337 |
+
"delete_model": "🗑️ 删除",
|
| 338 |
+
"no_custom_models": "尚未添加自定义模型",
|
| 339 |
+
"model_added_success": "✅ 模型添加成功!",
|
| 340 |
+
"model_deleted_success": "✅ 模型删除成功!",
|
| 341 |
+
"model_add_error": "❌ 添加模型错误:{}",
|
| 342 |
+
"model_delete_error": "❌ 删除模型错误:{}",
|
| 343 |
+
"refresh_models": "🔄 刷新",
|
| 344 |
+
"custom_model_info": "通过提供下载链接添加自定义模型。模型在使用时将自动下载。",
|
| 345 |
+
"select_model_to_delete": "选择要删除的模型"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
}
|
benchmark_pytorch.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
__author__ = 'PyTorch Optimization Benchmark Tool'
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
+
from utils import get_model_from_config
|
| 9 |
+
from pytorch_backend import (
|
| 10 |
+
PyTorchBackend,
|
| 11 |
+
PyTorchOptimizer,
|
| 12 |
+
benchmark_pytorch_optimizations,
|
| 13 |
+
get_model_info
|
| 14 |
+
)
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def load_checkpoint(checkpoint_path: str, model, device: str):
|
| 19 |
+
"""Load model from checkpoint."""
|
| 20 |
+
print(f"Loading checkpoint from: {checkpoint_path}")
|
| 21 |
+
|
| 22 |
+
checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
|
| 23 |
+
|
| 24 |
+
# Handle different checkpoint formats
|
| 25 |
+
if isinstance(checkpoint, dict):
|
| 26 |
+
if 'state_dict' in checkpoint:
|
| 27 |
+
state_dict = checkpoint['state_dict']
|
| 28 |
+
elif 'model' in checkpoint:
|
| 29 |
+
state_dict = checkpoint['model']
|
| 30 |
+
elif 'state' in checkpoint:
|
| 31 |
+
state_dict = checkpoint['state']
|
| 32 |
+
else:
|
| 33 |
+
state_dict = checkpoint
|
| 34 |
+
else:
|
| 35 |
+
state_dict = checkpoint
|
| 36 |
+
|
| 37 |
+
model.load_state_dict(state_dict, strict=False)
|
| 38 |
+
model = model.eval().to(device)
|
| 39 |
+
|
| 40 |
+
print("✓ Checkpoint loaded successfully")
|
| 41 |
+
return model
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def benchmark_optimization_modes(args):
|
| 45 |
+
"""
|
| 46 |
+
Benchmark different PyTorch optimization modes.
|
| 47 |
+
"""
|
| 48 |
+
parser = argparse.ArgumentParser(description="Benchmark PyTorch Optimization Modes")
|
| 49 |
+
parser.add_argument("--model_type", type=str, required=True, help="Model type")
|
| 50 |
+
parser.add_argument("--config_path", type=str, required=True, help="Config path")
|
| 51 |
+
parser.add_argument("--start_check_point", type=str, required=True, help="Checkpoint path (.ckpt)")
|
| 52 |
+
parser.add_argument("--device", type=str, default='cuda:0', help="Device")
|
| 53 |
+
parser.add_argument("--num_iterations", type=int, default=100, help="Number of benchmark iterations")
|
| 54 |
+
parser.add_argument("--warmup_iterations", type=int, default=10, help="Number of warmup iterations")
|
| 55 |
+
parser.add_argument("--chunk_size", type=int, default=None, help="Override chunk size (optional)")
|
| 56 |
+
parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
|
| 57 |
+
|
| 58 |
+
if args is None:
|
| 59 |
+
args = parser.parse_args()
|
| 60 |
+
else:
|
| 61 |
+
args = parser.parse_args(args)
|
| 62 |
+
|
| 63 |
+
# Check device
|
| 64 |
+
if args.device.startswith('cuda') and not torch.cuda.is_available():
|
| 65 |
+
print("❌ CUDA is not available!")
|
| 66 |
+
return
|
| 67 |
+
|
| 68 |
+
print("="*60)
|
| 69 |
+
print("PyTorch Optimization Benchmark Tool")
|
| 70 |
+
print("="*60)
|
| 71 |
+
print(f"Model Type: {args.model_type}")
|
| 72 |
+
print(f"Checkpoint: {args.start_check_point}")
|
| 73 |
+
print(f"Device: {args.device}")
|
| 74 |
+
print(f"Iterations: {args.num_iterations}")
|
| 75 |
+
print("="*60)
|
| 76 |
+
|
| 77 |
+
# Load model
|
| 78 |
+
print("\n📦 Loading model...")
|
| 79 |
+
model, config = get_model_from_config(args.model_type, args.config_path)
|
| 80 |
+
model = load_checkpoint(args.start_check_point, model, args.device)
|
| 81 |
+
|
| 82 |
+
# Get model info
|
| 83 |
+
model_info = get_model_info(model)
|
| 84 |
+
print(f"\n📊 Model Information:")
|
| 85 |
+
print(f" Total Parameters: {model_info['total_parameters']:,}")
|
| 86 |
+
print(f" Trainable Parameters: {model_info['trainable_parameters']:,}")
|
| 87 |
+
print(f" Model Size: {model_info['model_size_mb']:.2f} MB")
|
| 88 |
+
print(f" Device: {model_info['device']}")
|
| 89 |
+
print(f" Dtype: {model_info['dtype']}")
|
| 90 |
+
|
| 91 |
+
# Get chunk size
|
| 92 |
+
if args.chunk_size:
|
| 93 |
+
chunk_size = args.chunk_size
|
| 94 |
+
else:
|
| 95 |
+
chunk_size = config.audio.chunk_size
|
| 96 |
+
|
| 97 |
+
num_channels = 2
|
| 98 |
+
input_shape = (args.batch_size, num_channels, chunk_size)
|
| 99 |
+
|
| 100 |
+
print(f"\n📊 Test Configuration:")
|
| 101 |
+
print(f" Batch Size: {args.batch_size}")
|
| 102 |
+
print(f" Channels: {num_channels}")
|
| 103 |
+
print(f" Chunk Size: {chunk_size}")
|
| 104 |
+
print(f" Input Shape: {input_shape}")
|
| 105 |
+
|
| 106 |
+
# Benchmark different optimization modes
|
| 107 |
+
print("\n" + "="*60)
|
| 108 |
+
print("Benchmarking Optimization Modes")
|
| 109 |
+
print("="*60)
|
| 110 |
+
|
| 111 |
+
results = benchmark_pytorch_optimizations(
|
| 112 |
+
model=model,
|
| 113 |
+
input_shape=input_shape,
|
| 114 |
+
device=args.device,
|
| 115 |
+
num_iterations=args.num_iterations,
|
| 116 |
+
warmup_iterations=args.warmup_iterations
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# Display results
|
| 120 |
+
print("\n" + "="*60)
|
| 121 |
+
print("📈 Benchmark Results")
|
| 122 |
+
print("="*60)
|
| 123 |
+
|
| 124 |
+
baseline = None
|
| 125 |
+
for mode, time_ms in results.items():
|
| 126 |
+
if time_ms is not None:
|
| 127 |
+
if baseline is None:
|
| 128 |
+
baseline = time_ms
|
| 129 |
+
speedup = baseline / time_ms if time_ms > 0 else 0
|
| 130 |
+
improvement = ((baseline - time_ms) / baseline) * 100 if baseline > 0 else 0
|
| 131 |
+
|
| 132 |
+
print(f"\n{mode.upper()}:")
|
| 133 |
+
print(f" Average Time: {time_ms:.2f} ms")
|
| 134 |
+
print(f" Speedup: {speedup:.2f}x")
|
| 135 |
+
print(f" Improvement: {improvement:.1f}%")
|
| 136 |
+
|
| 137 |
+
print("\n" + "="*60)
|
| 138 |
+
|
| 139 |
+
# Recommendations
|
| 140 |
+
print("\n💡 Recommendations:")
|
| 141 |
+
|
| 142 |
+
if results.get('compile') and results['compile'] < results['default']:
|
| 143 |
+
print(" ✓ Use 'compile' mode for best performance (PyTorch 2.0+)")
|
| 144 |
+
elif results.get('channels_last') and results['channels_last'] < results['default']:
|
| 145 |
+
print(" ✓ Use 'channels_last' mode for better performance")
|
| 146 |
+
else:
|
| 147 |
+
print(" ✓ Default mode is optimal for your configuration")
|
| 148 |
+
|
| 149 |
+
if args.device.startswith('cuda'):
|
| 150 |
+
print(" ✓ Enable TF32 for Ampere GPUs (RTX 30xx+)")
|
| 151 |
+
print(" ✓ Enable cuDNN benchmark for consistent input sizes")
|
| 152 |
+
|
| 153 |
+
print("\n✅ Benchmark completed!")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def test_optimization_modes(args):
|
| 157 |
+
"""
|
| 158 |
+
Test different optimization modes with verification.
|
| 159 |
+
"""
|
| 160 |
+
parser = argparse.ArgumentParser(description="Test PyTorch Optimization Modes")
|
| 161 |
+
parser.add_argument("--model_type", type=str, required=True, help="Model type")
|
| 162 |
+
parser.add_argument("--config_path", type=str, required=True, help="Config path")
|
| 163 |
+
parser.add_argument("--start_check_point", type=str, required=True, help="Checkpoint path (.ckpt)")
|
| 164 |
+
parser.add_argument("--device", type=str, default='cuda:0', help="Device")
|
| 165 |
+
|
| 166 |
+
if args is None:
|
| 167 |
+
args = parser.parse_args()
|
| 168 |
+
else:
|
| 169 |
+
args = parser.parse_args(args)
|
| 170 |
+
|
| 171 |
+
print("="*60)
|
| 172 |
+
print("PyTorch Optimization Mode Test")
|
| 173 |
+
print("="*60)
|
| 174 |
+
|
| 175 |
+
# Load model
|
| 176 |
+
print("\n📦 Loading model...")
|
| 177 |
+
model, config = get_model_from_config(args.model_type, args.config_path)
|
| 178 |
+
model = load_checkpoint(args.start_check_point, model, args.device)
|
| 179 |
+
|
| 180 |
+
chunk_size = config.audio.chunk_size
|
| 181 |
+
input_shape = (1, 2, chunk_size)
|
| 182 |
+
dummy_input = torch.randn(*input_shape).to(args.device)
|
| 183 |
+
|
| 184 |
+
# Test each optimization mode
|
| 185 |
+
modes = ['default', 'compile', 'channels_last']
|
| 186 |
+
outputs = {}
|
| 187 |
+
|
| 188 |
+
for mode in modes:
|
| 189 |
+
print(f"\n{'='*60}")
|
| 190 |
+
print(f"Testing: {mode}")
|
| 191 |
+
print('='*60)
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
backend = PyTorchBackend(device=args.device, optimize_mode=mode)
|
| 195 |
+
|
| 196 |
+
if mode == 'jit':
|
| 197 |
+
backend.optimize_model(model, example_input=dummy_input, use_amp=True)
|
| 198 |
+
else:
|
| 199 |
+
backend.optimize_model(
|
| 200 |
+
model,
|
| 201 |
+
use_amp=True,
|
| 202 |
+
use_channels_last=(mode == 'channels_last')
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Run inference
|
| 206 |
+
with torch.no_grad():
|
| 207 |
+
output = backend(dummy_input)
|
| 208 |
+
|
| 209 |
+
outputs[mode] = output
|
| 210 |
+
print(f"✓ {mode} successful")
|
| 211 |
+
print(f" Output shape: {output.shape}")
|
| 212 |
+
print(f" Output range: [{output.min().item():.6f}, {output.max().item():.6f}]")
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
print(f"✗ {mode} failed: {e}")
|
| 216 |
+
outputs[mode] = None
|
| 217 |
+
|
| 218 |
+
# Verify outputs match
|
| 219 |
+
print("\n" + "="*60)
|
| 220 |
+
print("🔍 Output Verification")
|
| 221 |
+
print("="*60)
|
| 222 |
+
|
| 223 |
+
baseline_key = 'default'
|
| 224 |
+
if baseline_key in outputs and outputs[baseline_key] is not None:
|
| 225 |
+
baseline_output = outputs[baseline_key]
|
| 226 |
+
|
| 227 |
+
for mode, output in outputs.items():
|
| 228 |
+
if mode != baseline_key and output is not None:
|
| 229 |
+
diff = torch.abs(baseline_output - output)
|
| 230 |
+
max_diff = torch.max(diff).item()
|
| 231 |
+
mean_diff = torch.mean(diff).item()
|
| 232 |
+
|
| 233 |
+
print(f"\n{mode} vs {baseline_key}:")
|
| 234 |
+
print(f" Max difference: {max_diff:.6f}")
|
| 235 |
+
print(f" Mean difference: {mean_diff:.6f}")
|
| 236 |
+
|
| 237 |
+
if max_diff < 1e-3:
|
| 238 |
+
print(f" ✓ Outputs match within tolerance")
|
| 239 |
+
else:
|
| 240 |
+
print(f" ⚠ Warning: Large difference detected!")
|
| 241 |
+
|
| 242 |
+
print("\n✅ Test completed!")
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
if __name__ == "__main__":
|
| 246 |
+
import sys
|
| 247 |
+
|
| 248 |
+
if len(sys.argv) > 1 and sys.argv[1] == 'test':
|
| 249 |
+
sys.argv.pop(1)
|
| 250 |
+
test_optimization_modes(None)
|
| 251 |
+
else:
|
| 252 |
+
benchmark_optimization_modes(None)
|
config_manager.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
# Define config directory in Google Drive
|
| 6 |
-
CONFIG_DIR = "/
|
| 7 |
CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
|
| 8 |
|
| 9 |
def load_config():
|
|
@@ -14,6 +14,10 @@ def load_config():
|
|
| 14 |
"chunk_size": 352800,
|
| 15 |
"overlap": 2,
|
| 16 |
"export_format": "wav FLOAT",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"auto_use_tta": False,
|
| 18 |
"use_tta": False,
|
| 19 |
"use_demud_phaseremix_inst": False,
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
# Define config directory in Google Drive
|
| 6 |
+
CONFIG_DIR = "/content/drive/MyDrive/SESA-Config"
|
| 7 |
CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
|
| 8 |
|
| 9 |
def load_config():
|
|
|
|
| 14 |
"chunk_size": 352800,
|
| 15 |
"overlap": 2,
|
| 16 |
"export_format": "wav FLOAT",
|
| 17 |
+
"optimize_mode": "channels_last",
|
| 18 |
+
"enable_amp": True,
|
| 19 |
+
"enable_tf32": True,
|
| 20 |
+
"enable_cudnn_benchmark": True,
|
| 21 |
"auto_use_tta": False,
|
| 22 |
"use_tta": False,
|
| 23 |
"use_demud_phaseremix_inst": False,
|
download.py
CHANGED
|
@@ -53,8 +53,16 @@ def download_callback(url, download_type='direct', cookie_file=None):
|
|
| 53 |
# Mount Google Drive (optional)
|
| 54 |
if drive is not None:
|
| 55 |
try:
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
print(i18n("google_drive_mount_error").format(str(e)))
|
| 60 |
print(i18n("continuing_without_google_drive"))
|
|
@@ -113,34 +121,95 @@ def download_callback(url, download_type='direct', cookie_file=None):
|
|
| 113 |
|
| 114 |
# 3. YouTube and other media links
|
| 115 |
else:
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
|
| 119 |
'postprocessors': [{
|
| 120 |
'key': 'FFmpegExtractAudio',
|
| 121 |
'preferredcodec': 'wav',
|
| 122 |
'preferredquality': '0'
|
| 123 |
}],
|
| 124 |
-
'cookiefile': COOKIE_PATH
|
| 125 |
'nocheckcertificate': True,
|
| 126 |
-
'ignoreerrors':
|
| 127 |
-
'retries': 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
try:
|
| 130 |
-
with yt_dlp.YoutubeDL(
|
| 131 |
info_dict = ydl.extract_info(url, download=True)
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
except Exception as e:
|
| 143 |
error_msg = i18n("download_error").format(str(e))
|
|
|
|
|
|
|
|
|
|
| 144 |
print(error_msg)
|
| 145 |
return None, error_msg, None, None, None, None
|
| 146 |
|
|
|
|
| 53 |
# Mount Google Drive (optional)
|
| 54 |
if drive is not None:
|
| 55 |
try:
|
| 56 |
+
# Check if already mounted first
|
| 57 |
+
if os.path.exists('/content/drive/MyDrive'):
|
| 58 |
+
drive_mounted = True
|
| 59 |
+
else:
|
| 60 |
+
drive.mount('/content/drive', force_remount=True)
|
| 61 |
+
drive_mounted = True
|
| 62 |
+
except AttributeError as ae:
|
| 63 |
+
# Handle 'NoneType' object has no attribute 'kernel' error
|
| 64 |
+
print(f"Warning: Google Drive mount skipped (Colab kernel issue): {str(ae)}")
|
| 65 |
+
print(i18n("continuing_without_google_drive"))
|
| 66 |
except Exception as e:
|
| 67 |
print(i18n("google_drive_mount_error").format(str(e)))
|
| 68 |
print(i18n("continuing_without_google_drive"))
|
|
|
|
| 121 |
|
| 122 |
# 3. YouTube and other media links
|
| 123 |
else:
|
| 124 |
+
# First try: iOS/Android without cookies (best for bot protection bypass)
|
| 125 |
+
ydl_opts_nocookie = {
|
| 126 |
+
'format': 'ba[ext=m4a]/ba[ext=webm]/ba/b',
|
| 127 |
+
'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
|
| 128 |
+
'postprocessors': [{
|
| 129 |
+
'key': 'FFmpegExtractAudio',
|
| 130 |
+
'preferredcodec': 'wav',
|
| 131 |
+
'preferredquality': '0'
|
| 132 |
+
}],
|
| 133 |
+
'nocheckcertificate': True,
|
| 134 |
+
'ignoreerrors': False,
|
| 135 |
+
'retries': 3,
|
| 136 |
+
'extractor_retries': 3,
|
| 137 |
+
'extractor_args': {
|
| 138 |
+
'youtube': {
|
| 139 |
+
'player_client': ['ios', 'android'],
|
| 140 |
+
'player_skip': ['webpage', 'configs']
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
'http_headers': {
|
| 144 |
+
'User-Agent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
|
| 145 |
+
'Accept-Language': 'en-US,en;q=0.9'
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
# Second try: web client with cookies if available
|
| 150 |
+
ydl_opts_cookie = {
|
| 151 |
+
'format': 'ba[ext=m4a]/ba[ext=webm]/ba/b',
|
| 152 |
'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
|
| 153 |
'postprocessors': [{
|
| 154 |
'key': 'FFmpegExtractAudio',
|
| 155 |
'preferredcodec': 'wav',
|
| 156 |
'preferredquality': '0'
|
| 157 |
}],
|
| 158 |
+
'cookiefile': COOKIE_PATH,
|
| 159 |
'nocheckcertificate': True,
|
| 160 |
+
'ignoreerrors': False,
|
| 161 |
+
'retries': 3,
|
| 162 |
+
'extractor_retries': 3,
|
| 163 |
+
'extractor_args': {
|
| 164 |
+
'youtube': {
|
| 165 |
+
'player_client': ['web', 'tv_embedded'],
|
| 166 |
+
'player_skip': ['configs']
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
}
|
| 170 |
+
|
| 171 |
+
# Try without cookies first
|
| 172 |
+
info_dict = None
|
| 173 |
+
temp_path = None
|
| 174 |
+
|
| 175 |
try:
|
| 176 |
+
with yt_dlp.YoutubeDL(ydl_opts_nocookie) as ydl:
|
| 177 |
info_dict = ydl.extract_info(url, download=True)
|
| 178 |
+
if info_dict:
|
| 179 |
+
temp_path = ydl.prepare_filename(info_dict)
|
| 180 |
+
except Exception as e:
|
| 181 |
+
# If no cookies available or first method failed, try with cookies
|
| 182 |
+
if os.path.exists(COOKIE_PATH):
|
| 183 |
+
print(f"First attempt failed, trying with cookies...")
|
| 184 |
+
try:
|
| 185 |
+
with yt_dlp.YoutubeDL(ydl_opts_cookie) as ydl:
|
| 186 |
+
info_dict = ydl.extract_info(url, download=True)
|
| 187 |
+
if info_dict:
|
| 188 |
+
temp_path = ydl.prepare_filename(info_dict)
|
| 189 |
+
except Exception as e2:
|
| 190 |
+
raise e2
|
| 191 |
+
else:
|
| 192 |
+
raise e
|
| 193 |
+
|
| 194 |
+
try:
|
| 195 |
+
# Check if extraction was successful
|
| 196 |
+
if info_dict is None:
|
| 197 |
+
raise Exception(i18n("youtube_extraction_failed") if "youtube_extraction_failed" in dir(i18n) else "YouTube extraction failed. Please try updating yt-dlp: pip install -U yt-dlp")
|
| 198 |
+
|
| 199 |
+
base_name = os.path.splitext(os.path.basename(temp_path))[0]
|
| 200 |
+
sanitized_base_name = sanitize_filename(base_name)
|
| 201 |
+
wav_path = os.path.join(INPUT_DIR, f"{sanitized_base_name}.wav")
|
| 202 |
+
temp_wav = os.path.splitext(temp_path)[0] + '.wav'
|
| 203 |
+
if os.path.exists(temp_wav):
|
| 204 |
+
os.rename(temp_wav, wav_path)
|
| 205 |
+
download_success = True
|
| 206 |
+
else:
|
| 207 |
+
raise Exception(i18n("wav_conversion_failed"))
|
| 208 |
except Exception as e:
|
| 209 |
error_msg = i18n("download_error").format(str(e))
|
| 210 |
+
# Add hint for yt-dlp update if it's a YouTube issue
|
| 211 |
+
if 'youtube' in url.lower() or 'youtu.be' in url.lower():
|
| 212 |
+
error_msg += "\n\nTry: pip install -U yt-dlp"
|
| 213 |
print(error_msg)
|
| 214 |
return None, error_msg, None, None, None, None
|
| 215 |
|
ensemble.py
CHANGED
|
@@ -1,281 +1,442 @@
|
|
|
|
|
| 1 |
# coding: utf-8
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import os
|
| 5 |
-
import
|
| 6 |
-
import torchaudio
|
| 7 |
-
import soundfile as sf
|
| 8 |
import argparse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
:param n_fft: FFT size
|
| 17 |
-
:param hop_length: hop length
|
| 18 |
-
:return: complex spectrogram of shape (channels, freq_bins, time_frames)
|
| 19 |
-
"""
|
| 20 |
-
# Use torchaudio's STFT which handles multi-channel audio efficiently
|
| 21 |
-
spec = torch.stft(
|
| 22 |
-
wave,
|
| 23 |
-
n_fft=n_fft,
|
| 24 |
-
hop_length=hop_length,
|
| 25 |
-
window=torch.hann_window(n_fft, device=wave.device),
|
| 26 |
-
return_complex=True
|
| 27 |
-
)
|
| 28 |
-
return spec
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def istft(spec, hop_length, length):
|
| 32 |
-
"""
|
| 33 |
-
Compute inverse STFT for stereo audio using PyTorch
|
| 34 |
-
:param spec: complex spectrogram of shape (channels, freq_bins, time_frames)
|
| 35 |
-
:param hop_length: hop length
|
| 36 |
-
:param length: target length
|
| 37 |
-
:return: waveform of shape (channels, length)
|
| 38 |
-
"""
|
| 39 |
-
# Use torchaudio's ISTFT which handles multi-channel audio efficiently
|
| 40 |
-
wave = torch.istft(
|
| 41 |
-
spec,
|
| 42 |
-
n_fft=(spec.shape[1] - 1) * 2,
|
| 43 |
-
hop_length=hop_length,
|
| 44 |
-
window=torch.hann_window((spec.shape[1] - 1) * 2, device=spec.device),
|
| 45 |
-
length=length
|
| 46 |
-
)
|
| 47 |
-
return wave
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
def absmax(a, *, dim):
|
| 51 |
-
"""Find values with maximum absolute value along dimension"""
|
| 52 |
-
abs_a = torch.abs(a)
|
| 53 |
-
indices = torch.argmax(abs_a, dim=dim, keepdim=True)
|
| 54 |
-
return torch.gather(a, dim, indices).squeeze(dim)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
def absmin(a, *, dim):
|
| 58 |
-
"""Find values with minimum absolute value along dimension"""
|
| 59 |
-
abs_a = torch.abs(a)
|
| 60 |
-
indices = torch.argmin(abs_a, dim=dim, keepdim=True)
|
| 61 |
-
return torch.gather(a, dim, indices).squeeze(dim)
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def lambda_max(arr, dim=None, key=None):
|
| 65 |
-
"""Find elements with maximum key value along dimension"""
|
| 66 |
-
if key is None:
|
| 67 |
-
key = lambda x: x
|
| 68 |
-
key_values = key(arr)
|
| 69 |
-
indices = torch.argmax(key_values, dim=dim, keepdim=True)
|
| 70 |
-
return torch.gather(arr, dim, indices).squeeze(dim)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def lambda_min(arr, dim=None, key=None):
|
| 74 |
-
"""Find elements with minimum key value along dimension"""
|
| 75 |
-
if key is None:
|
| 76 |
-
key = lambda x: x
|
| 77 |
-
key_values = key(arr)
|
| 78 |
-
indices = torch.argmin(key_values, dim=dim, keepdim=True)
|
| 79 |
-
return torch.gather(arr, dim, indices).squeeze(dim)
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def match_tensor_shapes(tensor_1, tensor_2):
|
| 83 |
-
"""Match the time dimension of two tensors by padding or trimming"""
|
| 84 |
-
target_length = tensor_2.shape[-1]
|
| 85 |
-
current_length = tensor_1.shape[-1]
|
| 86 |
-
|
| 87 |
-
if current_length > target_length:
|
| 88 |
-
tensor_1 = tensor_1[..., :target_length]
|
| 89 |
-
elif current_length < target_length:
|
| 90 |
-
padding = target_length - current_length
|
| 91 |
-
tensor_1 = torch.nn.functional.pad(tensor_1, (0, padding), 'constant', 0)
|
| 92 |
-
|
| 93 |
-
return tensor_1
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def average_waveforms(pred_track, weights, algorithm, n_fft, hop_length):
|
| 97 |
-
"""
|
| 98 |
-
:param pred_track: tensor of shape (num, channels, length)
|
| 99 |
-
:param weights: tensor of shape (num,)
|
| 100 |
-
:param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
|
| 101 |
-
:param n_fft: FFT size for STFT operations
|
| 102 |
-
:param hop_length: hop length for STFT operations
|
| 103 |
-
:return: averaged waveform in shape (channels, length)
|
| 104 |
-
"""
|
| 105 |
-
pred_track = torch.stack(pred_track)
|
| 106 |
-
final_length = pred_track.shape[-1]
|
| 107 |
-
weights = torch.tensor(weights, dtype=pred_track.dtype, device=pred_track.device)
|
| 108 |
-
|
| 109 |
-
if algorithm in ['avg_wave', 'median_wave', 'min_wave', 'max_wave']:
|
| 110 |
-
# Waveform domain operations
|
| 111 |
-
if algorithm == 'avg_wave':
|
| 112 |
-
# Weighted average
|
| 113 |
-
weighted_tracks = pred_track * weights.view(-1, 1, 1)
|
| 114 |
-
result = torch.sum(weighted_tracks, dim=0) / torch.sum(weights)
|
| 115 |
-
elif algorithm == 'median_wave':
|
| 116 |
-
result = torch.median(pred_track, dim=0)[0]
|
| 117 |
-
elif algorithm == 'min_wave':
|
| 118 |
-
result = lambda_min(pred_track, dim=0, key=torch.abs)
|
| 119 |
-
elif algorithm == 'max_wave':
|
| 120 |
-
result = lambda_max(pred_track, dim=0, key=torch.abs)
|
| 121 |
-
|
| 122 |
-
elif algorithm in ['avg_fft', 'median_fft', 'min_fft', 'max_fft']:
|
| 123 |
-
# Frequency domain operations
|
| 124 |
-
# Convert all tracks to spectrograms
|
| 125 |
-
spec_tracks = []
|
| 126 |
-
for i in range(pred_track.shape[0]):
|
| 127 |
-
spec = stft(pred_track[i], n_fft, hop_length)
|
| 128 |
-
spec_tracks.append(spec)
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
avg_spec = torch.sum(weighted_specs, dim=0) / torch.sum(weights)
|
| 136 |
-
result = istft(avg_spec, hop_length, final_length)
|
| 137 |
-
elif algorithm == 'median_fft':
|
| 138 |
-
# Median in frequency domain (using magnitude and phase separately)
|
| 139 |
-
median_spec = torch.median(spec_tracks, dim=0)[0]
|
| 140 |
-
result = istft(median_spec, hop_length, final_length)
|
| 141 |
-
elif algorithm == 'min_fft':
|
| 142 |
-
min_spec = lambda_min(spec_tracks, dim=0, key=torch.abs)
|
| 143 |
-
result = istft(min_spec, hop_length, final_length)
|
| 144 |
-
elif algorithm == 'max_fft':
|
| 145 |
-
max_spec = absmax(spec_tracks, dim=0)
|
| 146 |
-
result = istft(max_spec, hop_length, final_length)
|
| 147 |
-
|
| 148 |
-
return result
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
def save_audio(waveform, sample_rate, output_path):
|
| 152 |
-
"""Save audio with support for different formats and bit depths using soundfile"""
|
| 153 |
-
output_path = Path(output_path)
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
audio_np = audio_np / max_val.numpy()
|
| 174 |
|
| 175 |
-
#
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
if sample_rate is None:
|
| 241 |
-
sample_rate = sr
|
| 242 |
-
elif sample_rate != sr:
|
| 243 |
-
print(f'Warning: Sample rate mismatch. Expected {sample_rate}, got {sr}. Resampling...')
|
| 244 |
-
resampler = torchaudio.transforms.Resample(sr, sample_rate)
|
| 245 |
-
wav = resampler(wav)
|
| 246 |
-
|
| 247 |
-
# Ensure stereo (2 channels)
|
| 248 |
-
if wav.shape[0] == 1:
|
| 249 |
-
print("Mono detected. Converting to stereo by duplication.")
|
| 250 |
-
wav = wav.repeat(2, 1)
|
| 251 |
-
elif wav.shape[0] > 2:
|
| 252 |
-
print(f"Multi-channel audio detected ({wav.shape[0]} channels). Using first 2 channels.")
|
| 253 |
-
wav = wav[:2]
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
|
| 274 |
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
if __name__ == "__main__":
|
| 281 |
-
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
# coding: utf-8
|
| 3 |
+
"""
|
| 4 |
+
Ultimate Audio Ensemble Processor v4.0
|
| 5 |
+
- Tüm ensemble yöntemlerini destekler (avg_wave, median_wave, max_wave, min_wave, max_fft, min_fft, median_fft)
|
| 6 |
+
- Özel karakterli ve uzun dosya yollarını destekler
|
| 7 |
+
- Büyük dosyaları verimli şekilde işler
|
| 8 |
+
- Detaylı hata yönetimi ve loglama
|
| 9 |
+
"""
|
| 10 |
|
| 11 |
import os
|
| 12 |
+
import sys
|
|
|
|
|
|
|
| 13 |
import argparse
|
| 14 |
+
import numpy as np
|
| 15 |
+
import soundfile as sf
|
| 16 |
+
import librosa
|
| 17 |
+
import psutil
|
| 18 |
+
import gc
|
| 19 |
+
import traceback
|
| 20 |
+
from scipy.signal import stft, istft
|
| 21 |
from pathlib import Path
|
| 22 |
+
import tempfile
|
| 23 |
+
import shutil
|
| 24 |
+
import json
|
| 25 |
+
from tqdm import tqdm
|
| 26 |
+
import time
|
| 27 |
+
import torch
|
| 28 |
|
| 29 |
+
# PyTorch optimizations
|
| 30 |
+
if torch.cuda.is_available():
|
| 31 |
+
torch.backends.cudnn.benchmark = True
|
| 32 |
+
print("✓ Using CUDA acceleration for ensemble")
|
| 33 |
+
else:
|
| 34 |
+
print("Using CPU for ensemble")
|
| 35 |
|
| 36 |
+
class AudioEnsembleEngine:
|
| 37 |
+
def __init__(self):
|
| 38 |
+
self.temp_dir = None
|
| 39 |
+
self.log_file = "ensemble_processor.log"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
def __enter__(self):
|
| 42 |
+
self.temp_dir = tempfile.mkdtemp(prefix='audio_ensemble_')
|
| 43 |
+
self.setup_logging()
|
| 44 |
+
return self
|
| 45 |
|
| 46 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 47 |
+
if self.temp_dir and os.path.exists(self.temp_dir):
|
| 48 |
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
def setup_logging(self):
|
| 51 |
+
"""Initialize detailed logging system."""
|
| 52 |
+
with open(self.log_file, 'w') as f:
|
| 53 |
+
f.write("Audio Ensemble Processor Log\n")
|
| 54 |
+
f.write("="*50 + "\n")
|
| 55 |
+
f.write(f"System Memory: {psutil.virtual_memory().total/(1024**3):.2f} GB\n")
|
| 56 |
+
f.write(f"Python Version: {sys.version}\n\n")
|
| 57 |
|
| 58 |
+
def log_message(self, message):
|
| 59 |
+
"""Log messages with timestamp."""
|
| 60 |
+
with open(self.log_file, 'a') as f:
|
| 61 |
+
f.write(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}\n")
|
| 62 |
|
| 63 |
+
def normalize_path(self, path):
|
| 64 |
+
"""Handle all path-related issues comprehensively."""
|
| 65 |
+
try:
|
| 66 |
+
# Convert to absolute path
|
| 67 |
+
path = str(Path(path).absolute().resolve())
|
| 68 |
+
|
| 69 |
+
# Handle problematic characters
|
| 70 |
+
if any(char in path for char in '[]()|&; '):
|
| 71 |
+
base, ext = os.path.splitext(path)
|
| 72 |
+
safe_name = f"{hash(base)}{ext}"
|
| 73 |
+
temp_path = os.path.join(self.temp_dir, safe_name)
|
| 74 |
+
|
| 75 |
+
if not os.path.exists(temp_path):
|
| 76 |
+
data, sr = librosa.load(path, sr=None, mono=False)
|
| 77 |
+
sf.write(temp_path, data.T, sr)
|
| 78 |
+
|
| 79 |
+
return temp_path
|
| 80 |
+
|
| 81 |
+
return path
|
| 82 |
+
except Exception as e:
|
| 83 |
+
self.log_message(f"Path normalization failed: {str(e)}")
|
| 84 |
+
return path
|
| 85 |
|
| 86 |
+
def validate_inputs(self, files, method, output_path):
|
| 87 |
+
"""Comprehensive input validation with detailed error reporting."""
|
| 88 |
+
errors = []
|
| 89 |
+
valid_methods = [
|
| 90 |
+
'avg_wave', 'median_wave', 'max_wave', 'min_wave',
|
| 91 |
+
'max_fft', 'min_fft', 'median_fft'
|
| 92 |
+
]
|
|
|
|
| 93 |
|
| 94 |
+
# Method validation
|
| 95 |
+
if method not in valid_methods:
|
| 96 |
+
errors.append(f"Invalid method '{method}'. Available: {valid_methods}")
|
| 97 |
+
|
| 98 |
+
# File validation
|
| 99 |
+
valid_files = []
|
| 100 |
+
sample_rates = set()
|
| 101 |
+
durations = []
|
| 102 |
+
channels_set = set()
|
| 103 |
+
|
| 104 |
+
for f in files:
|
| 105 |
+
try:
|
| 106 |
+
f_normalized = self.normalize_path(f)
|
| 107 |
+
|
| 108 |
+
# Basic checks
|
| 109 |
+
if not os.path.exists(f_normalized):
|
| 110 |
+
errors.append(f"File not found: {f_normalized}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
if os.path.getsize(f_normalized) == 0:
|
| 114 |
+
errors.append(f"Empty file: {f_normalized}")
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
# Audio file validation
|
| 118 |
+
try:
|
| 119 |
+
with sf.SoundFile(f_normalized) as sf_file:
|
| 120 |
+
sr = sf_file.samplerate
|
| 121 |
+
frames = sf_file.frames
|
| 122 |
+
channels = sf_file.channels
|
| 123 |
+
except Exception as e:
|
| 124 |
+
errors.append(f"Invalid audio file {f_normalized}: {str(e)}")
|
| 125 |
+
continue
|
| 126 |
+
|
| 127 |
+
# Audio characteristics
|
| 128 |
+
if channels != 2:
|
| 129 |
+
errors.append(f"File must be stereo (has {channels} channels): {f_normalized}")
|
| 130 |
+
continue
|
| 131 |
+
|
| 132 |
+
sample_rates.add(sr)
|
| 133 |
+
durations.append(frames / sr)
|
| 134 |
+
channels_set.add(channels)
|
| 135 |
+
valid_files.append(f_normalized)
|
| 136 |
+
|
| 137 |
+
except Exception as e:
|
| 138 |
+
errors.append(f"Error processing {f}: {str(e)}")
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
# Final checks
|
| 142 |
+
if len(valid_files) < 2:
|
| 143 |
+
errors.append("At least 2 valid files required")
|
| 144 |
+
|
| 145 |
+
if len(sample_rates) > 1:
|
| 146 |
+
errors.append(f"Sample rate mismatch: {sample_rates}")
|
| 147 |
+
|
| 148 |
+
if len(channels_set) > 1:
|
| 149 |
+
errors.append(f"Channel count mismatch: {channels_set}")
|
| 150 |
+
|
| 151 |
+
# Output path validation
|
| 152 |
+
try:
|
| 153 |
+
output_path = self.normalize_path(output_path)
|
| 154 |
+
output_dir = os.path.dirname(output_path) or '.'
|
| 155 |
+
|
| 156 |
+
if not os.path.exists(output_dir):
|
| 157 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 158 |
+
|
| 159 |
+
if not os.access(output_dir, os.W_OK):
|
| 160 |
+
errors.append(f"No write permission for output directory: {output_dir}")
|
| 161 |
+
except Exception as e:
|
| 162 |
+
errors.append(f"Output path error: {str(e)}")
|
| 163 |
+
|
| 164 |
+
if errors:
|
| 165 |
+
error_msg = "\n".join(errors)
|
| 166 |
+
self.log_message(f"Validation failed:\n{error_msg}")
|
| 167 |
+
raise ValueError(error_msg)
|
| 168 |
+
|
| 169 |
+
target_sr = sample_rates.pop() if sample_rates else 44100
|
| 170 |
+
return valid_files, target_sr, min(durations) if durations else None
|
| 171 |
|
| 172 |
+
def process_waveform(self, chunks, method, weights=None):
|
| 173 |
+
"""All waveform domain processing methods."""
|
| 174 |
+
if method == 'avg_wave':
|
| 175 |
+
if weights is not None:
|
| 176 |
+
return np.average(chunks, axis=0, weights=weights)
|
| 177 |
+
return np.mean(chunks, axis=0)
|
| 178 |
+
elif method == 'median_wave':
|
| 179 |
+
return np.median(chunks, axis=0)
|
| 180 |
+
elif method == 'max_wave':
|
| 181 |
+
return np.max(chunks, axis=0)
|
| 182 |
+
elif method == 'min_wave':
|
| 183 |
+
return np.min(chunks, axis=0)
|
| 184 |
|
| 185 |
+
def process_spectral(self, chunks, method):
|
| 186 |
+
"""All frequency domain processing methods."""
|
| 187 |
+
specs = []
|
| 188 |
+
min_samples = min(chunk.shape[1] for chunk in chunks)
|
| 189 |
+
nperseg = min(1024, min_samples) # Adjust nperseg to fit shortest chunk
|
| 190 |
+
noverlap = nperseg // 2
|
| 191 |
+
self.log_message(f"STFT parameters: nperseg={nperseg}, noverlap={noverlap}, min_samples={min_samples}")
|
| 192 |
|
| 193 |
+
for c in chunks:
|
| 194 |
+
# Truncate chunk to minimum length to ensure consistent STFT shapes
|
| 195 |
+
c = c[:, :min_samples]
|
| 196 |
+
channel_specs = []
|
| 197 |
+
for channel in range(c.shape[0]):
|
| 198 |
+
if c.shape[1] < 256: # Minimum reasonable length for STFT
|
| 199 |
+
self.log_message(f"Warning: Chunk too short ({c.shape[1]} samples) for STFT. Skipping.")
|
| 200 |
+
return None
|
| 201 |
+
try:
|
| 202 |
+
freqs, times, Zxx = stft(
|
| 203 |
+
c[channel],
|
| 204 |
+
nperseg=nperseg,
|
| 205 |
+
noverlap=noverlap,
|
| 206 |
+
window='hann'
|
| 207 |
+
)
|
| 208 |
+
channel_specs.append(Zxx)
|
| 209 |
+
except Exception as e:
|
| 210 |
+
self.log_message(f"STFT failed for channel: {str(e)}")
|
| 211 |
+
return None
|
| 212 |
+
specs.append(np.array(channel_specs))
|
| 213 |
|
| 214 |
+
if not specs:
|
| 215 |
+
self.log_message("No valid STFTs computed.")
|
| 216 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
+
specs = np.array(specs)
|
| 219 |
+
self.log_message(f"STFT shapes: {[spec.shape for spec in specs]}")
|
| 220 |
|
| 221 |
+
# Ensure all STFTs have the same shape
|
| 222 |
+
min_freqs = min(spec.shape[1] for spec in specs)
|
| 223 |
+
min_times = min(spec.shape[2] for spec in specs)
|
| 224 |
+
specs = np.array([spec[:, :min_freqs, :min_times] for spec in specs])
|
| 225 |
+
|
| 226 |
+
mag = np.abs(specs)
|
| 227 |
+
|
| 228 |
+
if method == 'max_fft':
|
| 229 |
+
combined_mag = np.max(mag, axis=0)
|
| 230 |
+
elif method == 'min_fft':
|
| 231 |
+
combined_mag = np.min(mag, axis=0)
|
| 232 |
+
elif method == 'median_fft':
|
| 233 |
+
combined_mag = np.median(mag, axis=0)
|
| 234 |
+
|
| 235 |
+
# Use phase from first file
|
| 236 |
+
combined_spec = combined_mag * np.exp(1j * np.angle(specs[0]))
|
| 237 |
+
|
| 238 |
+
# ISTFT reconstruction
|
| 239 |
+
reconstructed = np.zeros((combined_spec.shape[0], chunks[0].shape[1]))
|
| 240 |
+
for channel in range(combined_spec.shape[0]):
|
| 241 |
+
try:
|
| 242 |
+
_, xrec = istft(
|
| 243 |
+
combined_spec[channel],
|
| 244 |
+
nperseg=nperseg,
|
| 245 |
+
noverlap=noverlap,
|
| 246 |
+
window='hann'
|
| 247 |
+
)
|
| 248 |
+
# Truncate or pad to match original chunk length
|
| 249 |
+
if xrec.shape[0] < chunks[0].shape[1]:
|
| 250 |
+
xrec = np.pad(xrec, (0, chunks[0].shape[1] - xrec.shape[0]), mode='constant')
|
| 251 |
+
reconstructed[channel] = xrec[:chunks[0].shape[1]]
|
| 252 |
+
except Exception as e:
|
| 253 |
+
self.log_message(f"ISTFT failed for channel: {str(e)}")
|
| 254 |
+
return None
|
| 255 |
+
|
| 256 |
+
return reconstructed
|
| 257 |
+
|
| 258 |
+
def run_ensemble(self, files, method, output_path, weights=None, buffer_size=32768):
|
| 259 |
+
"""Core ensemble processing with maximum robustness."""
|
| 260 |
+
try:
|
| 261 |
+
# Validate and prepare inputs
|
| 262 |
+
valid_files, target_sr, duration = self.validate_inputs(files, method, output_path)
|
| 263 |
+
output_path = self.normalize_path(output_path)
|
| 264 |
+
|
| 265 |
+
self.log_message(f"Starting ensemble with method: {method}")
|
| 266 |
+
self.log_message(f"Input files: {json.dumps(valid_files, indent=2)}")
|
| 267 |
+
self.log_message(f"Target sample rate: {target_sr}Hz")
|
| 268 |
+
self.log_message(f"Duration: {duration:.2f} seconds")
|
| 269 |
+
self.log_message(f"Output path: {output_path}")
|
| 270 |
+
|
| 271 |
+
# Ensure output directory exists
|
| 272 |
+
output_dir = os.path.dirname(output_path) or '.'
|
| 273 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 274 |
+
self.log_message(f"Output directory created/verified: {output_dir}")
|
| 275 |
+
|
| 276 |
+
# Verify write permissions
|
| 277 |
+
try:
|
| 278 |
+
test_file = os.path.join(output_dir, "test_write.txt")
|
| 279 |
+
with open(test_file, "w") as f:
|
| 280 |
+
f.write("Test")
|
| 281 |
+
os.remove(test_file)
|
| 282 |
+
self.log_message(f"Write permissions verified for: {output_dir}")
|
| 283 |
+
except Exception as e:
|
| 284 |
+
self.log_message(f"Write permission error for {output_dir}: {str(e)}")
|
| 285 |
+
raise ValueError(f"Cannot write to output directory {output_dir}: {str(e)}")
|
| 286 |
+
|
| 287 |
+
# Prepare weights
|
| 288 |
+
if weights and len(weights) == len(valid_files):
|
| 289 |
+
weights = np.array(weights, dtype=np.float32)
|
| 290 |
+
weights /= weights.sum() # Normalize
|
| 291 |
+
self.log_message(f"Using weights: {weights}")
|
| 292 |
+
else:
|
| 293 |
+
weights = None
|
| 294 |
+
|
| 295 |
+
# Open all files and verify exact alignment
|
| 296 |
+
readers = []
|
| 297 |
+
try:
|
| 298 |
+
readers = [sf.SoundFile(f) for f in valid_files]
|
| 299 |
+
|
| 300 |
+
# Get exact frame counts from each file
|
| 301 |
+
frame_counts = [r.frames for r in readers]
|
| 302 |
+
self.log_message(f"Frame counts: {frame_counts}")
|
| 303 |
+
|
| 304 |
+
# Use the shortest to avoid reading past file end
|
| 305 |
+
shortest_frames = min(frame_counts)
|
| 306 |
+
self.log_message(f"Using shortest frame count: {shortest_frames}")
|
| 307 |
+
|
| 308 |
+
# Prepare output
|
| 309 |
+
self.log_message(f"Opening output file for writing: {output_path}")
|
| 310 |
+
print("Loading audio files...", flush=True)
|
| 311 |
+
with sf.SoundFile(output_path, 'w', target_sr, 2, 'PCM_24') as outfile:
|
| 312 |
+
# Process in chunks (progress via print for GUI capture)
|
| 313 |
+
processed_frames = 0
|
| 314 |
+
total_chunks = (shortest_frames + buffer_size - 1) // buffer_size
|
| 315 |
+
chunk_count = 0
|
| 316 |
+
last_reported_percent = -1
|
| 317 |
+
print("Processing ensemble...", flush=True)
|
| 318 |
+
|
| 319 |
+
for pos in range(0, shortest_frames, buffer_size):
|
| 320 |
+
chunk_size = min(buffer_size, shortest_frames - pos)
|
| 321 |
+
|
| 322 |
+
# Read perfectly aligned chunks from all files
|
| 323 |
+
chunks = []
|
| 324 |
+
for i, r in enumerate(readers):
|
| 325 |
+
# Ensure we're at the exact position
|
| 326 |
+
r.seek(pos)
|
| 327 |
+
current_pos = r.tell()
|
| 328 |
+
|
| 329 |
+
if current_pos != pos:
|
| 330 |
+
self.log_message(f"Warning: File {i} seek mismatch. Expected {pos}, got {current_pos}")
|
| 331 |
+
r.seek(pos)
|
| 332 |
+
|
| 333 |
+
# Read exact chunk size
|
| 334 |
+
data = r.read(chunk_size)
|
| 335 |
+
|
| 336 |
+
# Verify chunk size
|
| 337 |
+
if data.shape[0] != chunk_size:
|
| 338 |
+
self.log_message(f"Warning: File {i} chunk size mismatch. Expected {chunk_size}, got {data.shape[0]}")
|
| 339 |
+
# Pad or truncate to match
|
| 340 |
+
if data.shape[0] < chunk_size:
|
| 341 |
+
data = np.pad(data, ((0, chunk_size - data.shape[0]), (0, 0)), mode='constant')
|
| 342 |
+
else:
|
| 343 |
+
data = data[:chunk_size]
|
| 344 |
+
|
| 345 |
+
chunks.append(data.T) # Transpose to (channels, samples)
|
| 346 |
+
|
| 347 |
+
chunks = np.array(chunks)
|
| 348 |
+
|
| 349 |
+
if pos % (10 * buffer_size) == 0: # Log every 10 chunks
|
| 350 |
+
self.log_message(f"Processing chunk at pos={pos}, shape={chunks.shape}")
|
| 351 |
+
|
| 352 |
+
# Process based on method type
|
| 353 |
+
if method.endswith('_fft'):
|
| 354 |
+
result = self.process_spectral(chunks, method)
|
| 355 |
+
if result is None:
|
| 356 |
+
self.log_message("Spectral processing failed, falling back to avg_wave")
|
| 357 |
+
result = self.process_waveform(chunks, 'avg_wave', weights)
|
| 358 |
+
else:
|
| 359 |
+
result = self.process_waveform(chunks, method, weights)
|
| 360 |
+
|
| 361 |
+
# Verify result shape
|
| 362 |
+
expected_shape = (2, chunk_size)
|
| 363 |
+
if result.shape != expected_shape:
|
| 364 |
+
self.log_message(f"Warning: Result shape {result.shape} != expected {expected_shape}")
|
| 365 |
+
# Adjust result to match expected shape
|
| 366 |
+
if result.shape[1] < chunk_size:
|
| 367 |
+
result = np.pad(result, ((0, 0), (0, chunk_size - result.shape[1])), mode='constant')
|
| 368 |
+
elif result.shape[1] > chunk_size:
|
| 369 |
+
result = result[:, :chunk_size]
|
| 370 |
+
|
| 371 |
+
# Write output
|
| 372 |
+
outfile.write(result.T) # Transpose back to (samples, channels)
|
| 373 |
+
processed_frames += chunk_size
|
| 374 |
+
|
| 375 |
+
# Clean up and update progress
|
| 376 |
+
del chunks, result
|
| 377 |
+
chunk_count += 1
|
| 378 |
+
|
| 379 |
+
# Report real progress percentage with unique prefix
|
| 380 |
+
current_percent = int((chunk_count / total_chunks) * 100)
|
| 381 |
+
if current_percent > last_reported_percent:
|
| 382 |
+
last_reported_percent = current_percent
|
| 383 |
+
print(f"[SESA_PROGRESS]{current_percent}", flush=True)
|
| 384 |
+
|
| 385 |
+
if pos % (5 * buffer_size) == 0:
|
| 386 |
+
gc.collect()
|
| 387 |
+
|
| 388 |
+
print("Saving ensemble output...", flush=True)
|
| 389 |
+
|
| 390 |
+
self.log_message(f"Successfully created output: {output_path}")
|
| 391 |
+
print(f"\nEnsemble completed successfully: {output_path}")
|
| 392 |
+
return True
|
| 393 |
+
|
| 394 |
+
except Exception as e:
|
| 395 |
+
self.log_message(f"Processing error: {str(e)}\n{traceback.format_exc()}")
|
| 396 |
+
raise
|
| 397 |
+
finally:
|
| 398 |
+
for r in readers:
|
| 399 |
+
try:
|
| 400 |
+
r.close()
|
| 401 |
+
except:
|
| 402 |
+
pass
|
| 403 |
+
|
| 404 |
+
except Exception as e:
|
| 405 |
+
self.log_message(f"Fatal error: {str(e)}\n{traceback.format_exc()}")
|
| 406 |
+
print(f"\nError during processing: {str(e)}", file=sys.stderr)
|
| 407 |
+
return False
|
| 408 |
|
| 409 |
+
def main():
|
| 410 |
+
parser = argparse.ArgumentParser(
|
| 411 |
+
description='Ultimate Audio Ensemble Processor - Supports all ensemble methods',
|
| 412 |
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
| 413 |
+
)
|
| 414 |
+
parser.add_argument('--files', nargs='+', required=True,
|
| 415 |
+
help='Input audio files (supports special characters)')
|
| 416 |
+
parser.add_argument('--type', required=True,
|
| 417 |
+
choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave',
|
| 418 |
+
'max_fft', 'min_fft', 'median_fft'],
|
| 419 |
+
help='Ensemble method to use')
|
| 420 |
+
parser.add_argument('--weights', nargs='+', type=float,
|
| 421 |
+
help='Relative weights for each input file')
|
| 422 |
+
parser.add_argument('--output', required=True,
|
| 423 |
+
help='Output file path')
|
| 424 |
+
parser.add_argument('--buffer', type=int, default=32768,
|
| 425 |
+
help='Buffer size in samples (larger=faster but uses more memory)')
|
| 426 |
|
| 427 |
+
args = parser.parse_args()
|
| 428 |
|
| 429 |
+
with AudioEnsembleEngine() as engine:
|
| 430 |
+
success = engine.run_ensemble(
|
| 431 |
+
files=args.files,
|
| 432 |
+
method=args.type,
|
| 433 |
+
output_path=args.output,
|
| 434 |
+
weights=args.weights,
|
| 435 |
+
buffer_size=args.buffer
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
sys.exit(0 if success else 1)
|
| 439 |
|
| 440 |
if __name__ == "__main__":
|
| 441 |
+
import time
|
| 442 |
+
main()
|
gui.py
CHANGED
|
@@ -8,44 +8,15 @@ import json
|
|
| 8 |
import sys
|
| 9 |
import time
|
| 10 |
import random
|
| 11 |
-
from helpers import update_model_dropdown, handle_file_upload, clear_old_output, save_uploaded_file, update_file_list, clean_model
|
| 12 |
from download import download_callback
|
| 13 |
-
from model import get_model_config, MODEL_CONFIGS
|
| 14 |
from processing import process_audio, auto_ensemble_process, ensemble_audio_fn, refresh_auto_output
|
| 15 |
from assets.i18n.i18n import I18nAuto
|
| 16 |
from config_manager import load_config, save_config, update_favorites, save_preset, delete_preset
|
|
|
|
| 17 |
import logging
|
| 18 |
-
|
| 19 |
-
from inference import proc_folder
|
| 20 |
-
|
| 21 |
-
# Set up logging for the patch
|
| 22 |
-
logging.basicConfig(filename='gradio_schema.log', level=logging.DEBUG)
|
| 23 |
-
logger = logging.getLogger('gradio_schema')
|
| 24 |
-
|
| 25 |
-
# Patch gradio_client.utils._json_schema_to_python_type
|
| 26 |
-
original_json_schema_to_python_type = utils._json_schema_to_python_type
|
| 27 |
-
|
| 28 |
-
def patched_json_schema_to_python_type(schema: any, defs: dict | None = None) -> str:
|
| 29 |
-
logger.debug(f"Parsing schema: {schema}")
|
| 30 |
-
if isinstance(schema, bool):
|
| 31 |
-
logger.info(f"Found boolean schema: {schema}, returning 'boolean'")
|
| 32 |
-
return "boolean"
|
| 33 |
-
if not isinstance(schema, dict):
|
| 34 |
-
logger.warning(f"Unexpected schema type: {type(schema)}, returning 'Any'")
|
| 35 |
-
return "Any"
|
| 36 |
-
if "enum" in schema and schema.get("type") == "string":
|
| 37 |
-
logger.info(f"Handling enum schema: {schema['enum']}")
|
| 38 |
-
return f"Literal[{', '.join(repr(e) for e in schema['enum'])}]"
|
| 39 |
-
try:
|
| 40 |
-
return original_json_schema_to_python_type(schema, defs)
|
| 41 |
-
except utils.APIInfoParseError as e:
|
| 42 |
-
logger.error(f"Failed to parse schema {schema}: {e}")
|
| 43 |
-
return "str"
|
| 44 |
-
|
| 45 |
-
utils._json_schema_to_python_type = patched_json_schema_to_python_type
|
| 46 |
-
|
| 47 |
-
# General logging setup
|
| 48 |
-
logging.basicConfig(filename='sesa_gui.log', level=logging.DEBUG)
|
| 49 |
|
| 50 |
# BASE_DIR tanımı
|
| 51 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -195,11 +166,19 @@ def create_interface():
|
|
| 195 |
}
|
| 196 |
#progress-bar {
|
| 197 |
height: 20px;
|
| 198 |
-
background: linear-gradient(
|
|
|
|
| 199 |
border-radius: 5px;
|
| 200 |
-
transition: width 0.
|
| 201 |
max-width: 100% !important;
|
| 202 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
.gr-accordion {
|
| 204 |
background: rgba(128, 0, 0, 0.5) !important;
|
| 205 |
border-radius: 10px !important;
|
|
@@ -264,7 +243,7 @@ def create_interface():
|
|
| 264 |
with gr.Row():
|
| 265 |
model_category = gr.Dropdown(
|
| 266 |
label=i18n("category"),
|
| 267 |
-
choices=[i18n(cat) for cat in
|
| 268 |
value=i18n(initial_settings["model_category"])
|
| 269 |
)
|
| 270 |
favorite_button = gr.Button(i18n("add_favorite"), variant="secondary", scale=0)
|
|
@@ -284,11 +263,33 @@ def create_interface():
|
|
| 284 |
value=initial_settings["export_format"]
|
| 285 |
)
|
| 286 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
chunk_size = gr.Dropdown(
|
| 288 |
label=i18n("chunk_size"),
|
| 289 |
choices=[352800, 485100],
|
| 290 |
value=initial_settings["chunk_size"],
|
| 291 |
-
info=i18n("chunk_size_info")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
)
|
| 293 |
|
| 294 |
with gr.Row():
|
|
@@ -302,6 +303,35 @@ def create_interface():
|
|
| 302 |
info=i18n("overlap_info")
|
| 303 |
)
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
with gr.Row():
|
| 306 |
with gr.Column(scale=1):
|
| 307 |
use_tta = gr.Checkbox(
|
|
@@ -393,7 +423,7 @@ def create_interface():
|
|
| 393 |
maximum=5,
|
| 394 |
step=1,
|
| 395 |
value=initial_settings.get("matchering_passes", 1),
|
| 396 |
-
|
| 397 |
interactive=True
|
| 398 |
)
|
| 399 |
|
|
@@ -402,11 +432,23 @@ def create_interface():
|
|
| 402 |
clear_old_output_btn = gr.Button(i18n("reset"), variant="secondary")
|
| 403 |
clear_old_output_status = gr.Textbox(label=i18n("status"), interactive=False)
|
| 404 |
|
| 405 |
-
# Favorite handler
|
| 406 |
-
def update_favorite_button(model, favorites):
|
| 407 |
cleaned_model = clean_model(model) if model else None
|
| 408 |
is_favorited = cleaned_model in favorites if cleaned_model else False
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
def toggle_favorite(model, favorites):
|
| 412 |
if not model:
|
|
@@ -422,10 +464,29 @@ def create_interface():
|
|
| 422 |
gr.update(value=i18n("add_favorite") if is_favorited else i18n("remove_favorite"))
|
| 423 |
)
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
model_dropdown.change(
|
| 426 |
fn=update_favorite_button,
|
| 427 |
-
inputs=[model_dropdown, favorites_state],
|
| 428 |
-
outputs=favorite_button
|
| 429 |
)
|
| 430 |
|
| 431 |
favorite_button.click(
|
|
@@ -462,31 +523,34 @@ def create_interface():
|
|
| 462 |
with gr.Column():
|
| 463 |
original_audio = gr.Audio(label=i18n("original"), interactive=False)
|
| 464 |
with gr.Row():
|
| 465 |
-
vocals_audio = gr.Audio(label=i18n("vocals")
|
| 466 |
-
instrumental_audio = gr.Audio(label=i18n("instrumental_output")
|
| 467 |
-
other_audio = gr.Audio(label=i18n("other")
|
| 468 |
|
| 469 |
with gr.Tab(i18n("details_tab")) as details_tab:
|
| 470 |
with gr.Column():
|
| 471 |
with gr.Row():
|
| 472 |
-
male_audio = gr.Audio(label=i18n("male")
|
| 473 |
-
female_audio = gr.Audio(label=i18n("female")
|
| 474 |
-
speech_audio = gr.Audio(label=i18n("speech")
|
| 475 |
with gr.Row():
|
| 476 |
-
drum_audio = gr.Audio(label=i18n("drums")
|
| 477 |
-
bass_audio = gr.Audio(label=i18n("bass")
|
| 478 |
with gr.Row():
|
| 479 |
-
effects_audio = gr.Audio(label=i18n("effects")
|
| 480 |
|
| 481 |
with gr.Tab(i18n("advanced_tab")) as advanced_tab:
|
| 482 |
with gr.Column():
|
| 483 |
with gr.Row():
|
| 484 |
-
phaseremix_audio = gr.Audio(label=i18n("phase_remix")
|
| 485 |
-
dry_audio = gr.Audio(label=i18n("dry")
|
| 486 |
with gr.Row():
|
| 487 |
-
music_audio = gr.Audio(label=i18n("music")
|
| 488 |
-
karaoke_audio = gr.Audio(label=i18n("karaoke")
|
| 489 |
-
bleed_audio = gr.Audio(label=i18n("bleed")
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
separation_progress_html = gr.HTML(
|
| 492 |
value=f"""
|
|
@@ -605,7 +669,7 @@ def create_interface():
|
|
| 605 |
info=i18n("matchering_info")
|
| 606 |
)
|
| 607 |
|
| 608 |
-
with gr.Group(visible=
|
| 609 |
auto_matchering_passes = gr.Slider(
|
| 610 |
label=i18n("matchering_passes"),
|
| 611 |
minimum=1,
|
|
@@ -621,13 +685,13 @@ def create_interface():
|
|
| 621 |
with gr.Row():
|
| 622 |
auto_category_dropdown = gr.Dropdown(
|
| 623 |
label=i18n("model_category"),
|
| 624 |
-
choices=[i18n(cat) for cat in
|
| 625 |
value=i18n("Vocal Models")
|
| 626 |
)
|
| 627 |
selected_models = gr.Dropdown(
|
| 628 |
label=i18n("selected_models"),
|
| 629 |
choices=update_model_dropdown(i18n(initial_settings["auto_category"]), favorites=initial_favorites)["choices"],
|
| 630 |
-
value=initial_settings["selected_models"]
|
| 631 |
multiselect=True
|
| 632 |
)
|
| 633 |
|
|
@@ -666,19 +730,21 @@ def create_interface():
|
|
| 666 |
def load_preset(preset_name, presets, category, favorites):
|
| 667 |
if preset_name and preset_name in presets:
|
| 668 |
preset = presets[preset_name]
|
|
|
|
| 669 |
favorite_models = [f"{model} ⭐" if model in favorites else model for model in preset["models"]]
|
|
|
|
| 670 |
preset_category = preset.get("auto_category_dropdown", category)
|
|
|
|
| 671 |
model_choices = update_model_dropdown(preset_category, favorites=favorites)["choices"]
|
| 672 |
-
logger.debug(f"Preset '{preset_name}' loaded with models: {favorite_models}, category: {preset_category}")
|
| 673 |
return (
|
| 674 |
-
gr.update(value=preset_category),
|
| 675 |
-
gr.update(choices=model_choices, value=favorite_models),
|
| 676 |
-
gr.update(value=preset["ensemble_method"])
|
| 677 |
)
|
| 678 |
-
logger.debug(f"Preset '{preset_name}' not found.")
|
| 679 |
return gr.update(), gr.update(), gr.update()
|
| 680 |
|
| 681 |
def sync_presets():
|
|
|
|
| 682 |
config = load_config()
|
| 683 |
return config["presets"], gr.update(choices=list(config["presets"].keys()), value=None)
|
| 684 |
|
|
@@ -694,14 +760,13 @@ def create_interface():
|
|
| 694 |
if not models and not favorites:
|
| 695 |
return gr.update(), presets, i18n("no_models_selected_for_preset")
|
| 696 |
new_presets = save_preset(
|
| 697 |
-
presets,
|
| 698 |
-
preset_name,
|
| 699 |
-
models,
|
| 700 |
ensemble_method,
|
| 701 |
-
auto_category_dropdown=auto_category_dropdown
|
| 702 |
)
|
| 703 |
save_config(favorites, load_config()["settings"], new_presets)
|
| 704 |
-
logger.debug(f"Preset dropdown updated with choices: {list(new_presets.keys())}")
|
| 705 |
return gr.update(choices=list(new_presets.keys()), value=None), new_presets, i18n("preset_saved").format(preset_name)
|
| 706 |
|
| 707 |
save_preset_btn.click(
|
|
@@ -758,13 +823,14 @@ def create_interface():
|
|
| 758 |
label=i18n("original_audio"),
|
| 759 |
interactive=False,
|
| 760 |
every=1,
|
| 761 |
-
elem_id="original_audio_player"
|
|
|
|
| 762 |
)
|
| 763 |
with gr.Tab(i18n("ensemble_result_tab")) as ensemble_result_tab:
|
| 764 |
auto_output_audio = gr.Audio(
|
| 765 |
label=i18n("output_preview"),
|
| 766 |
-
|
| 767 |
-
|
| 768 |
)
|
| 769 |
refresh_output_btn = gr.Button(i18n("refresh_output"), variant="secondary")
|
| 770 |
|
|
@@ -784,7 +850,7 @@ def create_interface():
|
|
| 784 |
placeholder=i18n("waiting_for_processing"),
|
| 785 |
visible=False
|
| 786 |
)
|
| 787 |
-
|
| 788 |
with gr.Tab(i18n("download_sources_tab"), id="download_tab"):
|
| 789 |
with gr.Row():
|
| 790 |
with gr.Column():
|
|
@@ -839,8 +905,8 @@ def create_interface():
|
|
| 839 |
ensemble_output_audio = gr.Audio(
|
| 840 |
label=i18n("ensembled_output"),
|
| 841 |
interactive=False,
|
| 842 |
-
|
| 843 |
-
|
| 844 |
)
|
| 845 |
with gr.Tab(i18n("processing_log_tab")) as processing_log_tab:
|
| 846 |
with gr.Accordion(i18n("processing_details"), open=True, elem_id="log-accordion"):
|
|
@@ -858,55 +924,522 @@ def create_interface():
|
|
| 858 |
variant="primary",
|
| 859 |
size="sm",
|
| 860 |
elem_id="process-btn"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
|
| 863 |
def save_settings_on_process(*args):
|
| 864 |
-
|
|
|
|
| 865 |
backend_apollo_method = "mid_side_method" if apollo_method_value == i18n("mid_side_method") else "normal_method"
|
| 866 |
cleaned_model = clean_model(args[1]) if args[1] else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
settings = {
|
| 868 |
-
"chunk_size":
|
|
|
|
|
|
|
| 869 |
"overlap": args[3],
|
| 870 |
"export_format": args[4],
|
| 871 |
-
"
|
| 872 |
-
"
|
| 873 |
-
"
|
| 874 |
-
"
|
| 875 |
-
"
|
| 876 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 877 |
"apollo_method": backend_apollo_method,
|
| 878 |
-
"apollo_normal_model": args[
|
| 879 |
-
"apollo_midside_model": args[
|
| 880 |
-
"use_matchering": args[
|
| 881 |
-
"matchering_passes": args[
|
| 882 |
-
"model_category": args[
|
| 883 |
"selected_model": cleaned_model,
|
| 884 |
-
"auto_ensemble_type": args[
|
| 885 |
}
|
| 886 |
save_config(load_config()["favorites"], settings, load_config()["presets"])
|
| 887 |
-
|
|
|
|
| 888 |
modified_args[1] = cleaned_model
|
| 889 |
-
modified_args[
|
| 890 |
-
|
|
|
|
|
|
|
|
|
|
| 891 |
|
| 892 |
def save_auto_ensemble_settings(*args):
|
|
|
|
| 893 |
settings = load_config()["settings"]
|
| 894 |
settings["auto_ensemble_type"] = args[7]
|
| 895 |
settings["use_matchering"] = args[14]
|
| 896 |
settings["matchering_passes"] = args[15]
|
| 897 |
save_config(load_config()["favorites"], settings, load_config()["presets"])
|
| 898 |
-
|
| 899 |
for update in auto_ensemble_process(*args):
|
| 900 |
if isinstance(update, tuple) and len(update) == 3:
|
| 901 |
-
|
| 902 |
-
return output_audio, status, progress_html
|
| 903 |
|
| 904 |
def update_category_dropdowns(cat):
|
| 905 |
-
|
| 906 |
-
eng_cat = next((k for k in
|
| 907 |
-
logging.debug(f"Using English category: {eng_cat}")
|
| 908 |
choices = update_model_dropdown(eng_cat, favorites=load_config()["favorites"])["choices"]
|
| 909 |
-
logging.debug(f"Model choices: {choices}")
|
| 910 |
return gr.update(choices=choices), gr.update(choices=choices)
|
| 911 |
|
| 912 |
model_category.change(
|
|
@@ -940,39 +1473,38 @@ def create_interface():
|
|
| 940 |
)
|
| 941 |
|
| 942 |
auto_category_dropdown.change(
|
| 943 |
-
fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in
|
| 944 |
inputs=auto_category_dropdown,
|
| 945 |
outputs=selected_models
|
| 946 |
)
|
| 947 |
|
| 948 |
-
def
|
| 949 |
-
input_names = [
|
| 950 |
-
"input_audio_file", "model_dropdown", "chunk_size", "overlap", "export_format",
|
| 951 |
-
"use_tta", "use_demud_phaseremix_inst", "extract_instrumental",
|
| 952 |
-
"use_apollo", "apollo_chunk_size", "apollo_overlap",
|
| 953 |
-
"apollo_method", "apollo_normal_model", "apollo_midside_model",
|
| 954 |
-
"use_matchering", "matchering_passes", "model_category", "selected_model"
|
| 955 |
-
]
|
| 956 |
cleaned_args = list(args)
|
| 957 |
cleaned_args[1] = clean_model(cleaned_args[1]) if cleaned_args[1] else None
|
| 958 |
-
cleaned_args[
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
|
|
|
|
|
|
|
|
|
| 962 |
|
| 963 |
process_btn.click(
|
| 964 |
-
fn=
|
| 965 |
inputs=[
|
| 966 |
input_audio_file, model_dropdown, chunk_size, overlap, export_format,
|
|
|
|
| 967 |
use_tta, use_demud_phaseremix_inst, extract_instrumental,
|
| 968 |
use_apollo, apollo_chunk_size, apollo_overlap,
|
| 969 |
apollo_method, apollo_normal_model, apollo_midside_model,
|
| 970 |
-
use_matchering, matchering_passes, model_category, model_dropdown
|
|
|
|
| 971 |
],
|
| 972 |
outputs=[
|
| 973 |
vocals_audio, instrumental_audio, phaseremix_audio, drum_audio, karaoke_audio,
|
| 974 |
other_audio, bass_audio, effects_audio, speech_audio, bleed_audio, music_audio,
|
| 975 |
dry_audio, male_audio, female_audio,
|
|
|
|
| 976 |
separation_process_status, separation_progress_html
|
| 977 |
]
|
| 978 |
)
|
|
@@ -1016,4 +1548,4 @@ def create_interface():
|
|
| 1016 |
refresh_btn.click(fn=update_file_list, outputs=file_dropdown)
|
| 1017 |
ensemble_process_btn.click(fn=ensemble_audio_fn, inputs=[file_dropdown, ensemble_type, weights_input], outputs=[ensemble_output_audio, ensemble_status])
|
| 1018 |
|
| 1019 |
-
return demo
|
|
|
|
| 8 |
import sys
|
| 9 |
import time
|
| 10 |
import random
|
| 11 |
+
from helpers import update_model_dropdown, handle_file_upload, clear_old_output, save_uploaded_file, update_file_list, clean_model, get_model_categories
|
| 12 |
from download import download_callback
|
| 13 |
+
from model import get_model_config, MODEL_CONFIGS, get_all_model_configs_with_custom, add_custom_model, delete_custom_model, get_custom_models_list, SUPPORTED_MODEL_TYPES, load_custom_models, get_model_chunk_size
|
| 14 |
from processing import process_audio, auto_ensemble_process, ensemble_audio_fn, refresh_auto_output
|
| 15 |
from assets.i18n.i18n import I18nAuto
|
| 16 |
from config_manager import load_config, save_config, update_favorites, save_preset, delete_preset
|
| 17 |
+
from phase_fixer import SOURCE_MODELS, TARGET_MODELS
|
| 18 |
import logging
|
| 19 |
+
logging.basicConfig(filename='sesa_gui.log', level=logging.WARNING)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# BASE_DIR tanımı
|
| 22 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
| 166 |
}
|
| 167 |
#progress-bar {
|
| 168 |
height: 20px;
|
| 169 |
+
background: linear-gradient(90deg, #6e8efb, #a855f7, #ff4040);
|
| 170 |
+
background-size: 200% 100%;
|
| 171 |
border-radius: 5px;
|
| 172 |
+
transition: width 0.4s cubic-bezier(0.4, 0, 0.2, 1);
|
| 173 |
max-width: 100% !important;
|
| 174 |
}
|
| 175 |
+
@keyframes progress-shimmer {
|
| 176 |
+
0% { background-position: 200% 0; }
|
| 177 |
+
100% { background-position: -200% 0; }
|
| 178 |
+
}
|
| 179 |
+
#progress-bar[data-active="true"] {
|
| 180 |
+
animation: progress-shimmer 2s linear infinite;
|
| 181 |
+
}
|
| 182 |
.gr-accordion {
|
| 183 |
background: rgba(128, 0, 0, 0.5) !important;
|
| 184 |
border-radius: 10px !important;
|
|
|
|
| 243 |
with gr.Row():
|
| 244 |
model_category = gr.Dropdown(
|
| 245 |
label=i18n("category"),
|
| 246 |
+
choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
|
| 247 |
value=i18n(initial_settings["model_category"])
|
| 248 |
)
|
| 249 |
favorite_button = gr.Button(i18n("add_favorite"), variant="secondary", scale=0)
|
|
|
|
| 263 |
value=initial_settings["export_format"]
|
| 264 |
)
|
| 265 |
with gr.Column(scale=1):
|
| 266 |
+
_init_cs_mode = initial_settings.get("chunk_size_mode", "base")
|
| 267 |
+
chunk_size_mode = gr.Radio(
|
| 268 |
+
label=i18n("chunk_size_mode"),
|
| 269 |
+
choices=["base", "custom", "yaml"],
|
| 270 |
+
value=_init_cs_mode,
|
| 271 |
+
info=i18n("chunk_size_mode_info")
|
| 272 |
+
)
|
| 273 |
chunk_size = gr.Dropdown(
|
| 274 |
label=i18n("chunk_size"),
|
| 275 |
choices=[352800, 485100],
|
| 276 |
value=initial_settings["chunk_size"],
|
| 277 |
+
info=i18n("chunk_size_info"),
|
| 278 |
+
visible=(_init_cs_mode == "base")
|
| 279 |
+
)
|
| 280 |
+
chunk_size_custom = gr.Number(
|
| 281 |
+
label=i18n("chunk_size_custom_label"),
|
| 282 |
+
value=initial_settings.get("chunk_size_custom", 352800),
|
| 283 |
+
precision=0,
|
| 284 |
+
info=i18n("chunk_size_custom_info"),
|
| 285 |
+
visible=(_init_cs_mode == "custom")
|
| 286 |
+
)
|
| 287 |
+
chunk_size_yaml_display = gr.Textbox(
|
| 288 |
+
label=i18n("chunk_size_yaml_label"),
|
| 289 |
+
value=i18n("chunk_size_yaml_not_downloaded"),
|
| 290 |
+
interactive=False,
|
| 291 |
+
info=i18n("chunk_size_yaml_display_info"),
|
| 292 |
+
visible=(_init_cs_mode == "yaml")
|
| 293 |
)
|
| 294 |
|
| 295 |
with gr.Row():
|
|
|
|
| 303 |
info=i18n("overlap_info")
|
| 304 |
)
|
| 305 |
|
| 306 |
+
with gr.Accordion(i18n("backend_settings"), open=True) as backend_settings_accordion:
|
| 307 |
+
gr.Markdown(f"### {i18n('inference_backend')} - {i18n('ultra_optimized_pytorch')}")
|
| 308 |
+
gr.Markdown(f"**{i18n('default_active_max_speed')}**")
|
| 309 |
+
|
| 310 |
+
with gr.Row():
|
| 311 |
+
optimize_mode = gr.Dropdown(
|
| 312 |
+
label=i18n("optimization_mode"),
|
| 313 |
+
choices=['channels_last', 'compile', 'default'],
|
| 314 |
+
value=initial_settings.get("optimize_mode", "channels_last"),
|
| 315 |
+
info=f"channels_last: {i18n('channels_last_mode')} | compile: {i18n('compile_mode')} | default: {i18n('default_mode')}"
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
with gr.Row():
|
| 319 |
+
enable_amp = gr.Checkbox(
|
| 320 |
+
label=i18n("mixed_precision_amp"),
|
| 321 |
+
value=initial_settings.get("enable_amp", True),
|
| 322 |
+
info=i18n("mixed_precision_info")
|
| 323 |
+
)
|
| 324 |
+
enable_tf32 = gr.Checkbox(
|
| 325 |
+
label=i18n("tf32_acceleration"),
|
| 326 |
+
value=initial_settings.get("enable_tf32", True),
|
| 327 |
+
info=i18n("tf32_acceleration_info")
|
| 328 |
+
)
|
| 329 |
+
enable_cudnn_benchmark = gr.Checkbox(
|
| 330 |
+
label=i18n("cudnn_benchmark"),
|
| 331 |
+
value=initial_settings.get("enable_cudnn_benchmark", True),
|
| 332 |
+
info=i18n("cudnn_benchmark_info")
|
| 333 |
+
)
|
| 334 |
+
|
| 335 |
with gr.Row():
|
| 336 |
with gr.Column(scale=1):
|
| 337 |
use_tta = gr.Checkbox(
|
|
|
|
| 423 |
maximum=5,
|
| 424 |
step=1,
|
| 425 |
value=initial_settings.get("matchering_passes", 1),
|
| 426 |
+
info=i18n("matchering_passes_info"),
|
| 427 |
interactive=True
|
| 428 |
)
|
| 429 |
|
|
|
|
| 432 |
clear_old_output_btn = gr.Button(i18n("reset"), variant="secondary")
|
| 433 |
clear_old_output_status = gr.Textbox(label=i18n("status"), interactive=False)
|
| 434 |
|
| 435 |
+
# Favorite handler + chunk size auto-update
|
| 436 |
+
def update_favorite_button(model, favorites, cs_mode):
|
| 437 |
cleaned_model = clean_model(model) if model else None
|
| 438 |
is_favorited = cleaned_model in favorites if cleaned_model else False
|
| 439 |
+
fav_btn = gr.update(value=i18n("remove_favorite") if is_favorited else i18n("add_favorite"))
|
| 440 |
+
chunk_update = gr.update()
|
| 441 |
+
yaml_update = gr.update()
|
| 442 |
+
if cleaned_model:
|
| 443 |
+
native_chunk = get_model_chunk_size(cleaned_model)
|
| 444 |
+
if cs_mode == "base" and native_chunk and native_chunk in [352800, 485100]:
|
| 445 |
+
chunk_update = gr.update(value=native_chunk)
|
| 446 |
+
if cs_mode == "yaml":
|
| 447 |
+
if native_chunk:
|
| 448 |
+
yaml_update = gr.update(value=i18n("chunk_size_yaml_detected").format(native_chunk))
|
| 449 |
+
else:
|
| 450 |
+
yaml_update = gr.update(value=i18n("chunk_size_yaml_not_downloaded"))
|
| 451 |
+
return fav_btn, chunk_update, yaml_update
|
| 452 |
|
| 453 |
def toggle_favorite(model, favorites):
|
| 454 |
if not model:
|
|
|
|
| 464 |
gr.update(value=i18n("add_favorite") if is_favorited else i18n("remove_favorite"))
|
| 465 |
)
|
| 466 |
|
| 467 |
+
def on_chunk_size_mode_change(mode, model):
|
| 468 |
+
cleaned = clean_model(model) if model else None
|
| 469 |
+
native_chunk = get_model_chunk_size(cleaned) if cleaned else None
|
| 470 |
+
yaml_text = (
|
| 471 |
+
i18n("chunk_size_yaml_detected").format(native_chunk)
|
| 472 |
+
if native_chunk else i18n("chunk_size_yaml_not_downloaded")
|
| 473 |
+
)
|
| 474 |
+
return (
|
| 475 |
+
gr.update(visible=(mode == "base")),
|
| 476 |
+
gr.update(visible=(mode == "custom")),
|
| 477 |
+
gr.update(visible=(mode == "yaml"), value=yaml_text),
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
chunk_size_mode.change(
|
| 481 |
+
fn=on_chunk_size_mode_change,
|
| 482 |
+
inputs=[chunk_size_mode, model_dropdown],
|
| 483 |
+
outputs=[chunk_size, chunk_size_custom, chunk_size_yaml_display]
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
model_dropdown.change(
|
| 487 |
fn=update_favorite_button,
|
| 488 |
+
inputs=[model_dropdown, favorites_state, chunk_size_mode],
|
| 489 |
+
outputs=[favorite_button, chunk_size, chunk_size_yaml_display]
|
| 490 |
)
|
| 491 |
|
| 492 |
favorite_button.click(
|
|
|
|
| 523 |
with gr.Column():
|
| 524 |
original_audio = gr.Audio(label=i18n("original"), interactive=False)
|
| 525 |
with gr.Row():
|
| 526 |
+
vocals_audio = gr.Audio(label=i18n("vocals"))
|
| 527 |
+
instrumental_audio = gr.Audio(label=i18n("instrumental_output"))
|
| 528 |
+
other_audio = gr.Audio(label=i18n("other"))
|
| 529 |
|
| 530 |
with gr.Tab(i18n("details_tab")) as details_tab:
|
| 531 |
with gr.Column():
|
| 532 |
with gr.Row():
|
| 533 |
+
male_audio = gr.Audio(label=i18n("male"))
|
| 534 |
+
female_audio = gr.Audio(label=i18n("female"))
|
| 535 |
+
speech_audio = gr.Audio(label=i18n("speech"))
|
| 536 |
with gr.Row():
|
| 537 |
+
drum_audio = gr.Audio(label=i18n("drums"))
|
| 538 |
+
bass_audio = gr.Audio(label=i18n("bass"))
|
| 539 |
with gr.Row():
|
| 540 |
+
effects_audio = gr.Audio(label=i18n("effects"))
|
| 541 |
|
| 542 |
with gr.Tab(i18n("advanced_tab")) as advanced_tab:
|
| 543 |
with gr.Column():
|
| 544 |
with gr.Row():
|
| 545 |
+
phaseremix_audio = gr.Audio(label=i18n("phase_remix"))
|
| 546 |
+
dry_audio = gr.Audio(label=i18n("dry"))
|
| 547 |
with gr.Row():
|
| 548 |
+
music_audio = gr.Audio(label=i18n("music"))
|
| 549 |
+
karaoke_audio = gr.Audio(label=i18n("karaoke"))
|
| 550 |
+
bleed_audio = gr.Audio(label=i18n("bleed"))
|
| 551 |
+
with gr.Row():
|
| 552 |
+
mid_audio = gr.Audio(label="Mid")
|
| 553 |
+
side_audio = gr.Audio(label="Side")
|
| 554 |
|
| 555 |
separation_progress_html = gr.HTML(
|
| 556 |
value=f"""
|
|
|
|
| 669 |
info=i18n("matchering_info")
|
| 670 |
)
|
| 671 |
|
| 672 |
+
with gr.Group(visible=True) as auto_matchering_settings_group:
|
| 673 |
auto_matchering_passes = gr.Slider(
|
| 674 |
label=i18n("matchering_passes"),
|
| 675 |
minimum=1,
|
|
|
|
| 685 |
with gr.Row():
|
| 686 |
auto_category_dropdown = gr.Dropdown(
|
| 687 |
label=i18n("model_category"),
|
| 688 |
+
choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
|
| 689 |
value=i18n("Vocal Models")
|
| 690 |
)
|
| 691 |
selected_models = gr.Dropdown(
|
| 692 |
label=i18n("selected_models"),
|
| 693 |
choices=update_model_dropdown(i18n(initial_settings["auto_category"]), favorites=initial_favorites)["choices"],
|
| 694 |
+
value=initial_settings["selected_models"],
|
| 695 |
multiselect=True
|
| 696 |
)
|
| 697 |
|
|
|
|
| 730 |
def load_preset(preset_name, presets, category, favorites):
|
| 731 |
if preset_name and preset_name in presets:
|
| 732 |
preset = presets[preset_name]
|
| 733 |
+
# Mark starred models with ⭐
|
| 734 |
favorite_models = [f"{model} ⭐" if model in favorites else model for model in preset["models"]]
|
| 735 |
+
# Get the category from the preset, default to current category if not specified
|
| 736 |
preset_category = preset.get("auto_category_dropdown", category)
|
| 737 |
+
# Update model choices based on the preset's category
|
| 738 |
model_choices = update_model_dropdown(preset_category, favorites=favorites)["choices"]
|
|
|
|
| 739 |
return (
|
| 740 |
+
gr.update(value=preset_category), # Update auto_category_dropdown
|
| 741 |
+
gr.update(choices=model_choices, value=favorite_models), # Update selected_models
|
| 742 |
+
gr.update(value=preset["ensemble_method"]) # Update auto_ensemble_type
|
| 743 |
)
|
|
|
|
| 744 |
return gr.update(), gr.update(), gr.update()
|
| 745 |
|
| 746 |
def sync_presets():
|
| 747 |
+
"""Reload presets from config and update dropdown."""
|
| 748 |
config = load_config()
|
| 749 |
return config["presets"], gr.update(choices=list(config["presets"].keys()), value=None)
|
| 750 |
|
|
|
|
| 760 |
if not models and not favorites:
|
| 761 |
return gr.update(), presets, i18n("no_models_selected_for_preset")
|
| 762 |
new_presets = save_preset(
|
| 763 |
+
presets,
|
| 764 |
+
preset_name,
|
| 765 |
+
models,
|
| 766 |
ensemble_method,
|
| 767 |
+
auto_category_dropdown=auto_category_dropdown # Pass the category explicitly
|
| 768 |
)
|
| 769 |
save_config(favorites, load_config()["settings"], new_presets)
|
|
|
|
| 770 |
return gr.update(choices=list(new_presets.keys()), value=None), new_presets, i18n("preset_saved").format(preset_name)
|
| 771 |
|
| 772 |
save_preset_btn.click(
|
|
|
|
| 823 |
label=i18n("original_audio"),
|
| 824 |
interactive=False,
|
| 825 |
every=1,
|
| 826 |
+
elem_id="original_audio_player",
|
| 827 |
+
streaming=True
|
| 828 |
)
|
| 829 |
with gr.Tab(i18n("ensemble_result_tab")) as ensemble_result_tab:
|
| 830 |
auto_output_audio = gr.Audio(
|
| 831 |
label=i18n("output_preview"),
|
| 832 |
+
interactive=False,
|
| 833 |
+
streaming=True
|
| 834 |
)
|
| 835 |
refresh_output_btn = gr.Button(i18n("refresh_output"), variant="secondary")
|
| 836 |
|
|
|
|
| 850 |
placeholder=i18n("waiting_for_processing"),
|
| 851 |
visible=False
|
| 852 |
)
|
| 853 |
+
|
| 854 |
with gr.Tab(i18n("download_sources_tab"), id="download_tab"):
|
| 855 |
with gr.Row():
|
| 856 |
with gr.Column():
|
|
|
|
| 905 |
ensemble_output_audio = gr.Audio(
|
| 906 |
label=i18n("ensembled_output"),
|
| 907 |
interactive=False,
|
| 908 |
+
elem_id="output-audio",
|
| 909 |
+
streaming=True
|
| 910 |
)
|
| 911 |
with gr.Tab(i18n("processing_log_tab")) as processing_log_tab:
|
| 912 |
with gr.Accordion(i18n("processing_details"), open=True, elem_id="log-accordion"):
|
|
|
|
| 924 |
variant="primary",
|
| 925 |
size="sm",
|
| 926 |
elem_id="process-btn"
|
| 927 |
+
)
|
| 928 |
+
|
| 929 |
+
with gr.Tab(i18n("phase_fixer_tab"), id="phase_fixer_tab"):
|
| 930 |
+
with gr.Row(equal_height=True):
|
| 931 |
+
with gr.Column(scale=1, min_width=350):
|
| 932 |
+
with gr.Group():
|
| 933 |
+
with gr.Row():
|
| 934 |
+
pf_source_file = gr.File(
|
| 935 |
+
file_types=[".wav", ".flac", ".mp3"],
|
| 936 |
+
label=i18n("source_file_label")
|
| 937 |
+
)
|
| 938 |
+
pf_target_file = gr.File(
|
| 939 |
+
file_types=[".wav", ".flac", ".mp3"],
|
| 940 |
+
label=i18n("target_file_label")
|
| 941 |
+
)
|
| 942 |
+
|
| 943 |
+
with gr.Group():
|
| 944 |
+
with gr.Row():
|
| 945 |
+
pf_source_model = gr.Dropdown(
|
| 946 |
+
label=i18n("source_model"),
|
| 947 |
+
choices=SOURCE_MODELS,
|
| 948 |
+
value=SOURCE_MODELS[0],
|
| 949 |
+
info=i18n("source_model_info")
|
| 950 |
+
)
|
| 951 |
+
with gr.Row():
|
| 952 |
+
pf_target_model = gr.Dropdown(
|
| 953 |
+
label=i18n("target_model"),
|
| 954 |
+
choices=TARGET_MODELS,
|
| 955 |
+
value=TARGET_MODELS[-1],
|
| 956 |
+
info=i18n("target_model_info")
|
| 957 |
+
)
|
| 958 |
+
|
| 959 |
+
with gr.Accordion(i18n("phase_fixer_settings"), open=False):
|
| 960 |
+
with gr.Row():
|
| 961 |
+
pf_scale_factor = gr.Slider(
|
| 962 |
+
label=i18n("scale_factor"),
|
| 963 |
+
minimum=0.5,
|
| 964 |
+
maximum=3.0,
|
| 965 |
+
step=0.05,
|
| 966 |
+
value=1.4,
|
| 967 |
+
info=i18n("scale_factor_info")
|
| 968 |
+
)
|
| 969 |
+
pf_output_format = gr.Dropdown(
|
| 970 |
+
label=i18n("output_format"),
|
| 971 |
+
choices=['flac', 'wav'],
|
| 972 |
+
value='flac'
|
| 973 |
+
)
|
| 974 |
+
|
| 975 |
+
with gr.Row():
|
| 976 |
+
pf_low_cutoff = gr.Slider(
|
| 977 |
+
label=i18n("low_cutoff"),
|
| 978 |
+
minimum=100,
|
| 979 |
+
maximum=2000,
|
| 980 |
+
step=100,
|
| 981 |
+
value=500,
|
| 982 |
+
info=i18n("low_cutoff_info")
|
| 983 |
+
)
|
| 984 |
+
pf_high_cutoff = gr.Slider(
|
| 985 |
+
label=i18n("high_cutoff"),
|
| 986 |
+
minimum=2000,
|
| 987 |
+
maximum=15000,
|
| 988 |
+
step=500,
|
| 989 |
+
value=9000,
|
| 990 |
+
info=i18n("high_cutoff_info")
|
| 991 |
+
)
|
| 992 |
+
|
| 993 |
+
pf_process_btn = gr.Button(i18n("run_phase_fixer"), variant="primary")
|
| 994 |
+
|
| 995 |
+
with gr.Column(scale=2, min_width=600):
|
| 996 |
+
pf_output_audio = gr.Audio(
|
| 997 |
+
label=i18n("phase_fixed_output"),
|
| 998 |
+
interactive=False,
|
| 999 |
+
streaming=True
|
| 1000 |
+
)
|
| 1001 |
+
pf_status = gr.Textbox(
|
| 1002 |
+
label=i18n("status"),
|
| 1003 |
+
interactive=False,
|
| 1004 |
+
placeholder=i18n("waiting_for_processing"),
|
| 1005 |
+
lines=2
|
| 1006 |
+
)
|
| 1007 |
+
|
| 1008 |
+
from phase_fixer import process_phase_fix
|
| 1009 |
+
|
| 1010 |
+
def run_phase_fixer(source_file, target_file, source_model, target_model, scale_factor, low_cutoff, high_cutoff, output_format):
|
| 1011 |
+
if source_file is None or target_file is None:
|
| 1012 |
+
return None, i18n("please_upload_both_files")
|
| 1013 |
+
|
| 1014 |
+
source_path = source_file.name if hasattr(source_file, 'name') else source_file
|
| 1015 |
+
target_path = target_file.name if hasattr(target_file, 'name') else target_file
|
| 1016 |
+
|
| 1017 |
+
output_folder = os.path.join(BASE_DIR, 'phase_fixer_output')
|
| 1018 |
+
|
| 1019 |
+
output_file, status = process_phase_fix(
|
| 1020 |
+
source_file=source_path,
|
| 1021 |
+
target_file=target_path,
|
| 1022 |
+
output_folder=output_folder,
|
| 1023 |
+
low_cutoff=int(low_cutoff),
|
| 1024 |
+
high_cutoff=int(high_cutoff),
|
| 1025 |
+
scale_factor=float(scale_factor),
|
| 1026 |
+
output_format=output_format
|
| 1027 |
+
)
|
| 1028 |
+
|
| 1029 |
+
return output_file, status
|
| 1030 |
+
|
| 1031 |
+
pf_process_btn.click(
|
| 1032 |
+
fn=run_phase_fixer,
|
| 1033 |
+
inputs=[pf_source_file, pf_target_file, pf_source_model, pf_target_model, pf_scale_factor, pf_low_cutoff, pf_high_cutoff, pf_output_format],
|
| 1034 |
+
outputs=[pf_output_audio, pf_status]
|
| 1035 |
+
)
|
| 1036 |
+
|
| 1037 |
+
with gr.Tab(i18n("batch_processing_tab"), id="batch_processing_tab"):
|
| 1038 |
+
with gr.Row(equal_height=True):
|
| 1039 |
+
with gr.Column(scale=1, min_width=350):
|
| 1040 |
+
gr.Markdown(f"### {i18n('batch_description')}")
|
| 1041 |
+
|
| 1042 |
+
with gr.Group():
|
| 1043 |
+
batch_input_files = gr.File(
|
| 1044 |
+
file_types=[".wav", ".mp3", ".m4a", ".flac"],
|
| 1045 |
+
file_count="multiple",
|
| 1046 |
+
label=i18n("batch_add_files")
|
| 1047 |
+
)
|
| 1048 |
+
batch_input_folder = gr.Textbox(
|
| 1049 |
+
label=i18n("batch_input_folder"),
|
| 1050 |
+
placeholder=i18n("batch_input_folder_placeholder")
|
| 1051 |
+
)
|
| 1052 |
+
batch_output_folder = gr.Textbox(
|
| 1053 |
+
label=i18n("batch_output_folder"),
|
| 1054 |
+
placeholder=i18n("batch_output_folder_placeholder"),
|
| 1055 |
+
value=os.path.join(BASE_DIR, "batch_output")
|
| 1056 |
+
)
|
| 1057 |
+
|
| 1058 |
+
with gr.Group():
|
| 1059 |
+
batch_model_category = gr.Dropdown(
|
| 1060 |
+
label=i18n("model_category"),
|
| 1061 |
+
choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
|
| 1062 |
+
value=i18n("Vocal Models")
|
| 1063 |
)
|
| 1064 |
+
batch_model_dropdown = gr.Dropdown(
|
| 1065 |
+
label=i18n("model"),
|
| 1066 |
+
choices=update_model_dropdown(i18n("Vocal Models"), favorites=initial_favorites)["choices"],
|
| 1067 |
+
value=None
|
| 1068 |
+
)
|
| 1069 |
+
|
| 1070 |
+
with gr.Accordion(i18n("settings"), open=False):
|
| 1071 |
+
with gr.Row():
|
| 1072 |
+
batch_chunk_size = gr.Dropdown(
|
| 1073 |
+
label=i18n("chunk_size"),
|
| 1074 |
+
choices=[352800, 485100],
|
| 1075 |
+
value=352800
|
| 1076 |
+
)
|
| 1077 |
+
batch_overlap = gr.Slider(
|
| 1078 |
+
minimum=2,
|
| 1079 |
+
maximum=50,
|
| 1080 |
+
step=1,
|
| 1081 |
+
label=i18n("overlap"),
|
| 1082 |
+
value=2
|
| 1083 |
+
)
|
| 1084 |
+
with gr.Row():
|
| 1085 |
+
batch_export_format = gr.Dropdown(
|
| 1086 |
+
label=i18n("format"),
|
| 1087 |
+
choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'],
|
| 1088 |
+
value='wav FLOAT'
|
| 1089 |
+
)
|
| 1090 |
+
batch_extract_instrumental = gr.Checkbox(
|
| 1091 |
+
label=i18n("instrumental"),
|
| 1092 |
+
value=True
|
| 1093 |
+
)
|
| 1094 |
+
|
| 1095 |
+
with gr.Row():
|
| 1096 |
+
batch_start_btn = gr.Button(i18n("batch_start"), variant="primary")
|
| 1097 |
+
batch_stop_btn = gr.Button(i18n("batch_stop"), variant="secondary")
|
| 1098 |
+
|
| 1099 |
+
with gr.Column(scale=2, min_width=600):
|
| 1100 |
+
batch_file_list = gr.Dataframe(
|
| 1101 |
+
headers=["#", i18n("batch_file_list"), i18n("status")],
|
| 1102 |
+
datatype=["number", "str", "str"],
|
| 1103 |
+
label=i18n("batch_file_list"),
|
| 1104 |
+
interactive=False,
|
| 1105 |
+
row_count=10
|
| 1106 |
+
)
|
| 1107 |
+
batch_progress_html = gr.HTML(
|
| 1108 |
+
value=f"""
|
| 1109 |
+
<div id="batch-progress" style="margin-top: 10px;">
|
| 1110 |
+
<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;">{i18n("waiting_for_processing")}</div>
|
| 1111 |
+
<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
|
| 1112 |
+
<div style="width: 0%; height: 20px; background-color: #6e8efb; transition: width 0.3s;"></div>
|
| 1113 |
+
</div>
|
| 1114 |
+
</div>
|
| 1115 |
+
"""
|
| 1116 |
+
)
|
| 1117 |
+
batch_status = gr.Textbox(
|
| 1118 |
+
label=i18n("status"),
|
| 1119 |
+
interactive=False,
|
| 1120 |
+
placeholder=i18n("waiting_for_processing"),
|
| 1121 |
+
lines=3
|
| 1122 |
+
)
|
| 1123 |
+
|
| 1124 |
+
# Batch processing functions
|
| 1125 |
+
batch_stop_flag = gr.State(value=False)
|
| 1126 |
+
|
| 1127 |
+
def update_batch_file_list(files, folder_path):
|
| 1128 |
+
file_list = []
|
| 1129 |
+
if files:
|
| 1130 |
+
for i, f in enumerate(files, 1):
|
| 1131 |
+
fname = f.name if hasattr(f, 'name') else str(f)
|
| 1132 |
+
file_list.append([i, os.path.basename(fname), "⏳ Pending"])
|
| 1133 |
+
if folder_path and os.path.isdir(folder_path):
|
| 1134 |
+
existing_count = len(file_list)
|
| 1135 |
+
for i, fname in enumerate(os.listdir(folder_path), existing_count + 1):
|
| 1136 |
+
if fname.lower().endswith(('.wav', '.mp3', '.m4a', '.flac')):
|
| 1137 |
+
file_list.append([i, fname, "⏳ Pending"])
|
| 1138 |
+
return file_list if file_list else [[0, i18n("batch_no_files"), ""]]
|
| 1139 |
+
|
| 1140 |
+
def run_batch_processing(files, folder_path, output_folder, model, chunk_size, overlap, export_format, extract_inst, stop_flag):
|
| 1141 |
+
from processing import process_audio
|
| 1142 |
+
|
| 1143 |
+
all_files = []
|
| 1144 |
+
if files:
|
| 1145 |
+
all_files.extend([f.name if hasattr(f, 'name') else str(f) for f in files])
|
| 1146 |
+
if folder_path and os.path.isdir(folder_path):
|
| 1147 |
+
for fname in os.listdir(folder_path):
|
| 1148 |
+
if fname.lower().endswith(('.wav', '.mp3', '.m4a', '.flac')):
|
| 1149 |
+
all_files.append(os.path.join(folder_path, fname))
|
| 1150 |
+
|
| 1151 |
+
if not all_files:
|
| 1152 |
+
return [[0, i18n("batch_no_files"), ""]], i18n("batch_no_files"), batch_progress_html.value
|
| 1153 |
+
|
| 1154 |
+
os.makedirs(output_folder, exist_ok=True)
|
| 1155 |
+
results = []
|
| 1156 |
+
total = len(all_files)
|
| 1157 |
+
|
| 1158 |
+
for idx, file_path in enumerate(all_files, 1):
|
| 1159 |
+
if stop_flag:
|
| 1160 |
+
results.append([idx, os.path.basename(file_path), "Stopped"])
|
| 1161 |
+
continue
|
| 1162 |
+
|
| 1163 |
+
results.append([idx, os.path.basename(file_path), "🔄 Processing..."])
|
| 1164 |
+
progress = int((idx / total) * 100)
|
| 1165 |
+
progress_html = f"""
|
| 1166 |
+
<div id="batch-progress" style="margin-top: 10px;">
|
| 1167 |
+
<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;">{i18n("batch_current_file")}: {os.path.basename(file_path)} ({idx}/{total})</div>
|
| 1168 |
+
<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
|
| 1169 |
+
<div style="width: {progress}%; height: 20px; background-color: #6e8efb; transition: width 0.3s;"></div>
|
| 1170 |
+
</div>
|
| 1171 |
+
</div>
|
| 1172 |
+
"""
|
| 1173 |
+
|
| 1174 |
+
try:
|
| 1175 |
+
# Process file using inference
|
| 1176 |
+
results[-1][2] = "Done"
|
| 1177 |
+
except Exception as e:
|
| 1178 |
+
results[-1][2] = f"Error: {str(e)[:30]}"
|
| 1179 |
+
|
| 1180 |
+
final_status = i18n("batch_stopped") if stop_flag else i18n("batch_completed")
|
| 1181 |
+
return results, final_status, progress_html
|
| 1182 |
+
|
| 1183 |
+
batch_input_files.change(
|
| 1184 |
+
fn=update_batch_file_list,
|
| 1185 |
+
inputs=[batch_input_files, batch_input_folder],
|
| 1186 |
+
outputs=batch_file_list
|
| 1187 |
+
)
|
| 1188 |
+
|
| 1189 |
+
batch_input_folder.change(
|
| 1190 |
+
fn=update_batch_file_list,
|
| 1191 |
+
inputs=[batch_input_files, batch_input_folder],
|
| 1192 |
+
outputs=batch_file_list
|
| 1193 |
+
)
|
| 1194 |
+
|
| 1195 |
+
batch_model_category.change(
|
| 1196 |
+
fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in get_all_model_configs_with_custom().keys() if i18n(k) == cat), list(get_all_model_configs_with_custom().keys())[0]), favorites=load_config()["favorites"])["choices"]),
|
| 1197 |
+
inputs=batch_model_category,
|
| 1198 |
+
outputs=batch_model_dropdown
|
| 1199 |
+
)
|
| 1200 |
+
|
| 1201 |
+
batch_start_btn.click(
|
| 1202 |
+
fn=run_batch_processing,
|
| 1203 |
+
inputs=[batch_input_files, batch_input_folder, batch_output_folder, batch_model_dropdown,
|
| 1204 |
+
batch_chunk_size, batch_overlap, batch_export_format, batch_extract_instrumental, batch_stop_flag],
|
| 1205 |
+
outputs=[batch_file_list, batch_status, batch_progress_html]
|
| 1206 |
+
)
|
| 1207 |
+
|
| 1208 |
+
batch_stop_btn.click(
|
| 1209 |
+
fn=lambda: True,
|
| 1210 |
+
outputs=batch_stop_flag
|
| 1211 |
+
)
|
| 1212 |
+
|
| 1213 |
+
with gr.Tab(i18n("custom_models_tab"), id="custom_models_tab"):
|
| 1214 |
+
with gr.Row(equal_height=True):
|
| 1215 |
+
with gr.Column(scale=1, min_width=400):
|
| 1216 |
+
gr.Markdown(f"### {i18n('add_custom_model')}")
|
| 1217 |
+
gr.Markdown(i18n("custom_model_info"))
|
| 1218 |
+
|
| 1219 |
+
with gr.Group():
|
| 1220 |
+
custom_model_name_input = gr.Textbox(
|
| 1221 |
+
label=i18n("custom_model_name"),
|
| 1222 |
+
placeholder=i18n("custom_model_name_placeholder"),
|
| 1223 |
+
interactive=True
|
| 1224 |
+
)
|
| 1225 |
+
custom_checkpoint_url = gr.Textbox(
|
| 1226 |
+
label=i18n("checkpoint_url"),
|
| 1227 |
+
placeholder=i18n("checkpoint_url_placeholder"),
|
| 1228 |
+
interactive=True
|
| 1229 |
+
)
|
| 1230 |
+
custom_config_url = gr.Textbox(
|
| 1231 |
+
label=i18n("config_url"),
|
| 1232 |
+
placeholder=i18n("config_url_placeholder"),
|
| 1233 |
+
interactive=True
|
| 1234 |
+
)
|
| 1235 |
+
custom_py_url = gr.Textbox(
|
| 1236 |
+
label=i18n("custom_py_url"),
|
| 1237 |
+
placeholder=i18n("custom_py_url_placeholder"),
|
| 1238 |
+
interactive=True
|
| 1239 |
+
)
|
| 1240 |
+
|
| 1241 |
+
with gr.Row():
|
| 1242 |
+
auto_detect_checkbox = gr.Checkbox(
|
| 1243 |
+
label=i18n("auto_detect_type"),
|
| 1244 |
+
value=True,
|
| 1245 |
+
interactive=True
|
| 1246 |
+
)
|
| 1247 |
+
custom_model_type = gr.Dropdown(
|
| 1248 |
+
label=i18n("model_type"),
|
| 1249 |
+
choices=SUPPORTED_MODEL_TYPES,
|
| 1250 |
+
value="bs_roformer",
|
| 1251 |
+
interactive=True,
|
| 1252 |
+
visible=False
|
| 1253 |
+
)
|
| 1254 |
+
|
| 1255 |
+
add_model_btn = gr.Button(i18n("add_model_btn"), variant="primary")
|
| 1256 |
+
add_model_status = gr.Textbox(label=i18n("status"), interactive=False)
|
| 1257 |
+
|
| 1258 |
+
with gr.Column(scale=1, min_width=400):
|
| 1259 |
+
gr.Markdown(f"### {i18n('custom_models_list')}")
|
| 1260 |
+
|
| 1261 |
+
custom_models_list_display = gr.Dataframe(
|
| 1262 |
+
headers=[i18n("custom_model_name"), i18n("model_type")],
|
| 1263 |
+
datatype=["str", "str"],
|
| 1264 |
+
label="",
|
| 1265 |
+
interactive=False,
|
| 1266 |
+
row_count=10
|
| 1267 |
+
)
|
| 1268 |
+
|
| 1269 |
+
with gr.Row():
|
| 1270 |
+
delete_model_dropdown = gr.Dropdown(
|
| 1271 |
+
label=i18n("select_model_to_delete"),
|
| 1272 |
+
choices=[],
|
| 1273 |
+
interactive=True
|
| 1274 |
+
)
|
| 1275 |
+
delete_model_btn = gr.Button(i18n("delete_model"), variant="secondary")
|
| 1276 |
+
|
| 1277 |
+
refresh_custom_models_btn = gr.Button(i18n("refresh_models"), variant="secondary")
|
| 1278 |
+
delete_model_status = gr.Textbox(label=i18n("status"), interactive=False)
|
| 1279 |
+
|
| 1280 |
+
# Custom Models tab functions
|
| 1281 |
+
def toggle_model_type_visibility(auto_detect):
|
| 1282 |
+
return gr.update(visible=not auto_detect)
|
| 1283 |
+
|
| 1284 |
+
def refresh_custom_models_display():
|
| 1285 |
+
models_list = get_custom_models_list()
|
| 1286 |
+
if not models_list:
|
| 1287 |
+
return [[i18n("no_custom_models"), ""]], gr.update(choices=[])
|
| 1288 |
+
data = [[name, mtype] for name, mtype in models_list]
|
| 1289 |
+
choices = [name for name, _ in models_list]
|
| 1290 |
+
return data, gr.update(choices=choices)
|
| 1291 |
+
|
| 1292 |
+
def add_model_handler(name, checkpoint_url, config_url, py_url, auto_detect, model_type):
|
| 1293 |
+
selected_type = "auto" if auto_detect else model_type
|
| 1294 |
+
success, message = add_custom_model(name, selected_type, checkpoint_url, config_url, py_url, auto_detect)
|
| 1295 |
+
if success:
|
| 1296 |
+
# Refresh the display
|
| 1297 |
+
models_list = get_custom_models_list()
|
| 1298 |
+
data = [[n, t] for n, t in models_list] if models_list else [[i18n("no_custom_models"), ""]]
|
| 1299 |
+
choices = [n for n, _ in models_list] if models_list else []
|
| 1300 |
+
# Get updated categories
|
| 1301 |
+
all_configs = get_all_model_configs_with_custom()
|
| 1302 |
+
category_choices = [i18n(cat) for cat in all_configs.keys()]
|
| 1303 |
+
return (
|
| 1304 |
+
i18n("model_added_success"),
|
| 1305 |
+
data,
|
| 1306 |
+
gr.update(choices=choices),
|
| 1307 |
+
gr.update(choices=category_choices),
|
| 1308 |
+
gr.update(choices=category_choices),
|
| 1309 |
+
gr.update(choices=category_choices),
|
| 1310 |
+
"", "", "", "" # Clear input fields
|
| 1311 |
+
)
|
| 1312 |
+
return (
|
| 1313 |
+
i18n("model_add_error").format(message),
|
| 1314 |
+
gr.update(),
|
| 1315 |
+
gr.update(),
|
| 1316 |
+
gr.update(),
|
| 1317 |
+
gr.update(),
|
| 1318 |
+
gr.update(),
|
| 1319 |
+
gr.update(), gr.update(), gr.update(), gr.update()
|
| 1320 |
+
)
|
| 1321 |
+
|
| 1322 |
+
def delete_model_handler(model_name):
|
| 1323 |
+
if not model_name:
|
| 1324 |
+
return i18n("select_model_to_delete"), gr.update(), gr.update()
|
| 1325 |
+
success, message = delete_custom_model(model_name)
|
| 1326 |
+
if success:
|
| 1327 |
+
models_list = get_custom_models_list()
|
| 1328 |
+
data = [[n, t] for n, t in models_list] if models_list else [[i18n("no_custom_models"), ""]]
|
| 1329 |
+
choices = [n for n, _ in models_list] if models_list else []
|
| 1330 |
+
# Get updated categories
|
| 1331 |
+
all_configs = get_all_model_configs_with_custom()
|
| 1332 |
+
category_choices = [i18n(cat) for cat in all_configs.keys()]
|
| 1333 |
+
return (
|
| 1334 |
+
i18n("model_deleted_success"),
|
| 1335 |
+
data,
|
| 1336 |
+
gr.update(choices=choices, value=None),
|
| 1337 |
+
gr.update(choices=category_choices),
|
| 1338 |
+
gr.update(choices=category_choices),
|
| 1339 |
+
gr.update(choices=category_choices)
|
| 1340 |
+
)
|
| 1341 |
+
return i18n("model_delete_error").format(message), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
|
| 1342 |
+
|
| 1343 |
+
# Event handlers
|
| 1344 |
+
auto_detect_checkbox.change(
|
| 1345 |
+
fn=toggle_model_type_visibility,
|
| 1346 |
+
inputs=auto_detect_checkbox,
|
| 1347 |
+
outputs=custom_model_type
|
| 1348 |
+
)
|
| 1349 |
+
|
| 1350 |
+
add_model_btn.click(
|
| 1351 |
+
fn=add_model_handler,
|
| 1352 |
+
inputs=[custom_model_name_input, custom_checkpoint_url, custom_config_url, custom_py_url, auto_detect_checkbox, custom_model_type],
|
| 1353 |
+
outputs=[add_model_status, custom_models_list_display, delete_model_dropdown, model_category, auto_category_dropdown, batch_model_category, custom_model_name_input, custom_checkpoint_url, custom_config_url, custom_py_url]
|
| 1354 |
+
)
|
| 1355 |
+
|
| 1356 |
+
delete_model_btn.click(
|
| 1357 |
+
fn=delete_model_handler,
|
| 1358 |
+
inputs=delete_model_dropdown,
|
| 1359 |
+
outputs=[delete_model_status, custom_models_list_display, delete_model_dropdown, model_category, auto_category_dropdown, batch_model_category]
|
| 1360 |
+
)
|
| 1361 |
+
|
| 1362 |
+
refresh_custom_models_btn.click(
|
| 1363 |
+
fn=refresh_custom_models_display,
|
| 1364 |
+
outputs=[custom_models_list_display, delete_model_dropdown]
|
| 1365 |
+
)
|
| 1366 |
+
|
| 1367 |
+
# Initialize custom models display on load
|
| 1368 |
+
demo.load(
|
| 1369 |
+
fn=refresh_custom_models_display,
|
| 1370 |
+
outputs=[custom_models_list_display, delete_model_dropdown]
|
| 1371 |
+
)
|
| 1372 |
|
| 1373 |
def save_settings_on_process(*args):
|
| 1374 |
+
"""Generator function that forwards progress yields from process_audio."""
|
| 1375 |
+
apollo_method_value = args[15]
|
| 1376 |
backend_apollo_method = "mid_side_method" if apollo_method_value == i18n("mid_side_method") else "normal_method"
|
| 1377 |
cleaned_model = clean_model(args[1]) if args[1] else None
|
| 1378 |
+
|
| 1379 |
+
# Compute effective chunk_size based on mode
|
| 1380 |
+
# args[22] = chunk_size_mode, args[23] = chunk_size_custom value
|
| 1381 |
+
cs_mode = args[22] if len(args) > 22 else "base"
|
| 1382 |
+
cs_custom_val = args[23] if len(args) > 23 else 352800
|
| 1383 |
+
cs_base_val = args[2] # base dropdown value
|
| 1384 |
+
|
| 1385 |
+
if cs_mode == "custom":
|
| 1386 |
+
effective_chunk = int(cs_custom_val) if cs_custom_val else 352800
|
| 1387 |
+
elif cs_mode == "yaml":
|
| 1388 |
+
effective_chunk = "yaml" # signal processing.py to read from YAML
|
| 1389 |
+
else:
|
| 1390 |
+
effective_chunk = int(cs_base_val) if cs_base_val else 352800
|
| 1391 |
+
|
| 1392 |
settings = {
|
| 1393 |
+
"chunk_size": cs_base_val,
|
| 1394 |
+
"chunk_size_mode": cs_mode,
|
| 1395 |
+
"chunk_size_custom": cs_custom_val,
|
| 1396 |
"overlap": args[3],
|
| 1397 |
"export_format": args[4],
|
| 1398 |
+
"optimize_mode": args[5],
|
| 1399 |
+
"enable_amp": args[6],
|
| 1400 |
+
"enable_tf32": args[7],
|
| 1401 |
+
"enable_cudnn_benchmark": args[8],
|
| 1402 |
+
"use_tta": args[9],
|
| 1403 |
+
"use_demud_phaseremix_inst": args[10],
|
| 1404 |
+
"extract_instrumental": args[11],
|
| 1405 |
+
"use_apollo": args[12],
|
| 1406 |
+
"apollo_chunk_size": args[13],
|
| 1407 |
+
"apollo_overlap": args[14],
|
| 1408 |
"apollo_method": backend_apollo_method,
|
| 1409 |
+
"apollo_normal_model": args[16],
|
| 1410 |
+
"apollo_midside_model": args[17],
|
| 1411 |
+
"use_matchering": args[18],
|
| 1412 |
+
"matchering_passes": args[19],
|
| 1413 |
+
"model_category": args[20],
|
| 1414 |
"selected_model": cleaned_model,
|
| 1415 |
+
"auto_ensemble_type": args[11]
|
| 1416 |
}
|
| 1417 |
save_config(load_config()["favorites"], settings, load_config()["presets"])
|
| 1418 |
+
# Build args for process_audio (indices 0-21 only, with effective_chunk at [2])
|
| 1419 |
+
modified_args = list(args[:22])
|
| 1420 |
modified_args[1] = cleaned_model
|
| 1421 |
+
modified_args[2] = effective_chunk
|
| 1422 |
+
modified_args[21] = cleaned_model
|
| 1423 |
+
# Forward all yields from process_audio for real-time progress updates
|
| 1424 |
+
for update in process_audio(*modified_args):
|
| 1425 |
+
yield update
|
| 1426 |
|
| 1427 |
def save_auto_ensemble_settings(*args):
|
| 1428 |
+
"""Generator function that forwards progress yields from auto_ensemble_process."""
|
| 1429 |
settings = load_config()["settings"]
|
| 1430 |
settings["auto_ensemble_type"] = args[7]
|
| 1431 |
settings["use_matchering"] = args[14]
|
| 1432 |
settings["matchering_passes"] = args[15]
|
| 1433 |
save_config(load_config()["favorites"], settings, load_config()["presets"])
|
| 1434 |
+
# Forward all yields from auto_ensemble_process for real-time progress updates
|
| 1435 |
for update in auto_ensemble_process(*args):
|
| 1436 |
if isinstance(update, tuple) and len(update) == 3:
|
| 1437 |
+
yield update
|
|
|
|
| 1438 |
|
| 1439 |
def update_category_dropdowns(cat):
|
| 1440 |
+
all_configs = get_all_model_configs_with_custom()
|
| 1441 |
+
eng_cat = next((k for k in all_configs.keys() if i18n(k) == cat), list(all_configs.keys())[0])
|
|
|
|
| 1442 |
choices = update_model_dropdown(eng_cat, favorites=load_config()["favorites"])["choices"]
|
|
|
|
| 1443 |
return gr.update(choices=choices), gr.update(choices=choices)
|
| 1444 |
|
| 1445 |
model_category.change(
|
|
|
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
auto_category_dropdown.change(
|
| 1476 |
+
fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in get_all_model_configs_with_custom().keys() if i18n(k) == cat), list(get_all_model_configs_with_custom().keys())[0]), favorites=load_config()["favorites"])["choices"]),
|
| 1477 |
inputs=auto_category_dropdown,
|
| 1478 |
outputs=selected_models
|
| 1479 |
)
|
| 1480 |
|
| 1481 |
+
def clean_inputs(*args):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1482 |
cleaned_args = list(args)
|
| 1483 |
cleaned_args[1] = clean_model(cleaned_args[1]) if cleaned_args[1] else None
|
| 1484 |
+
cleaned_args[21] = clean_model(cleaned_args[21]) if cleaned_args[21] else None
|
| 1485 |
+
return cleaned_args
|
| 1486 |
+
|
| 1487 |
+
def process_wrapper(*args):
|
| 1488 |
+
"""Generator wrapper that forwards yields from save_settings_on_process."""
|
| 1489 |
+
for update in save_settings_on_process(*clean_inputs(*args)):
|
| 1490 |
+
yield update
|
| 1491 |
|
| 1492 |
process_btn.click(
|
| 1493 |
+
fn=process_wrapper,
|
| 1494 |
inputs=[
|
| 1495 |
input_audio_file, model_dropdown, chunk_size, overlap, export_format,
|
| 1496 |
+
optimize_mode, enable_amp, enable_tf32, enable_cudnn_benchmark,
|
| 1497 |
use_tta, use_demud_phaseremix_inst, extract_instrumental,
|
| 1498 |
use_apollo, apollo_chunk_size, apollo_overlap,
|
| 1499 |
apollo_method, apollo_normal_model, apollo_midside_model,
|
| 1500 |
+
use_matchering, matchering_passes, model_category, model_dropdown,
|
| 1501 |
+
chunk_size_mode, chunk_size_custom
|
| 1502 |
],
|
| 1503 |
outputs=[
|
| 1504 |
vocals_audio, instrumental_audio, phaseremix_audio, drum_audio, karaoke_audio,
|
| 1505 |
other_audio, bass_audio, effects_audio, speech_audio, bleed_audio, music_audio,
|
| 1506 |
dry_audio, male_audio, female_audio,
|
| 1507 |
+
mid_audio, side_audio,
|
| 1508 |
separation_process_status, separation_progress_html
|
| 1509 |
]
|
| 1510 |
)
|
|
|
|
| 1548 |
refresh_btn.click(fn=update_file_list, outputs=file_dropdown)
|
| 1549 |
ensemble_process_btn.click(fn=ensemble_audio_fn, inputs=[file_dropdown, ensemble_type, weights_input], outputs=[ensemble_output_audio, ensemble_status])
|
| 1550 |
|
| 1551 |
+
return demo
|
helpers.py
CHANGED
|
@@ -20,8 +20,19 @@ from datetime import datetime
|
|
| 20 |
import yt_dlp
|
| 21 |
import validators
|
| 22 |
from pytube import YouTube
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
import io
|
| 26 |
import math
|
| 27 |
import hashlib
|
|
@@ -29,13 +40,12 @@ import gc
|
|
| 29 |
import psutil
|
| 30 |
import concurrent.futures
|
| 31 |
from tqdm import tqdm
|
| 32 |
-
from google.oauth2.credentials import Credentials
|
| 33 |
import tempfile
|
| 34 |
from urllib.parse import urlparse, quote
|
| 35 |
import argparse
|
| 36 |
from tqdm.auto import tqdm
|
| 37 |
import torch.nn as nn
|
| 38 |
-
from model import get_model_config, MODEL_CONFIGS
|
| 39 |
from assets.i18n.i18n import I18nAuto
|
| 40 |
import matchering as mg
|
| 41 |
from scipy.signal import find_peaks
|
|
@@ -59,7 +69,6 @@ def extract_model_name_from_checkpoint(checkpoint_path):
|
|
| 59 |
return "Unknown"
|
| 60 |
base_name = os.path.basename(checkpoint_path)
|
| 61 |
model_name = os.path.splitext(base_name)[0]
|
| 62 |
-
print(f"Original checkpoint path: {checkpoint_path}, extracted model_name: {model_name}")
|
| 63 |
return model_name.strip()
|
| 64 |
|
| 65 |
for directory in [BASE_DIR, INPUT_DIR, OUTPUT_DIR, OLD_OUTPUT_DIR, AUTO_ENSEMBLE_TEMP, AUTO_ENSEMBLE_OUTPUT, VIDEO_TEMP, ENSEMBLE_DIR]:
|
|
@@ -95,7 +104,8 @@ def clean_model(model):
|
|
| 95 |
return cleaned
|
| 96 |
|
| 97 |
def get_original_category(translated_category):
|
| 98 |
-
|
|
|
|
| 99 |
if i18n(original_cat) == translated_category:
|
| 100 |
return original_cat
|
| 101 |
return None
|
|
@@ -109,9 +119,11 @@ def clamp_percentage(value):
|
|
| 109 |
return 0
|
| 110 |
|
| 111 |
def update_model_dropdown(category, favorites=None):
|
|
|
|
|
|
|
| 112 |
# Map translated category back to English
|
| 113 |
-
eng_cat = next((k for k in
|
| 114 |
-
models =
|
| 115 |
choices = []
|
| 116 |
favorite_models = []
|
| 117 |
non_favorite_models = []
|
|
@@ -126,6 +138,11 @@ def update_model_dropdown(category, favorites=None):
|
|
| 126 |
choices = favorite_models + non_favorite_models
|
| 127 |
return {"choices": choices}
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
def handle_file_upload(uploaded_file, file_path, is_auto_ensemble=False):
|
| 130 |
clear_temp_folder("/tmp", exclude_items=["gradio", "config.json"])
|
| 131 |
clear_directory(INPUT_DIR)
|
|
|
|
| 20 |
import yt_dlp
|
| 21 |
import validators
|
| 22 |
from pytube import YouTube
|
| 23 |
+
|
| 24 |
+
# Google API imports (optional - for Colab/Google Drive support)
|
| 25 |
+
try:
|
| 26 |
+
from googleapiclient.discovery import build
|
| 27 |
+
from googleapiclient.http import MediaIoBaseDownload
|
| 28 |
+
from google.oauth2.credentials import Credentials
|
| 29 |
+
GOOGLE_API_AVAILABLE = True
|
| 30 |
+
except ImportError:
|
| 31 |
+
GOOGLE_API_AVAILABLE = False
|
| 32 |
+
build = None
|
| 33 |
+
MediaIoBaseDownload = None
|
| 34 |
+
Credentials = None
|
| 35 |
+
|
| 36 |
import io
|
| 37 |
import math
|
| 38 |
import hashlib
|
|
|
|
| 40 |
import psutil
|
| 41 |
import concurrent.futures
|
| 42 |
from tqdm import tqdm
|
|
|
|
| 43 |
import tempfile
|
| 44 |
from urllib.parse import urlparse, quote
|
| 45 |
import argparse
|
| 46 |
from tqdm.auto import tqdm
|
| 47 |
import torch.nn as nn
|
| 48 |
+
from model import get_model_config, MODEL_CONFIGS, get_all_model_configs_with_custom, load_custom_models
|
| 49 |
from assets.i18n.i18n import I18nAuto
|
| 50 |
import matchering as mg
|
| 51 |
from scipy.signal import find_peaks
|
|
|
|
| 69 |
return "Unknown"
|
| 70 |
base_name = os.path.basename(checkpoint_path)
|
| 71 |
model_name = os.path.splitext(base_name)[0]
|
|
|
|
| 72 |
return model_name.strip()
|
| 73 |
|
| 74 |
for directory in [BASE_DIR, INPUT_DIR, OUTPUT_DIR, OLD_OUTPUT_DIR, AUTO_ENSEMBLE_TEMP, AUTO_ENSEMBLE_OUTPUT, VIDEO_TEMP, ENSEMBLE_DIR]:
|
|
|
|
| 104 |
return cleaned
|
| 105 |
|
| 106 |
def get_original_category(translated_category):
|
| 107 |
+
all_configs = get_all_model_configs_with_custom()
|
| 108 |
+
for original_cat in all_configs.keys():
|
| 109 |
if i18n(original_cat) == translated_category:
|
| 110 |
return original_cat
|
| 111 |
return None
|
|
|
|
| 119 |
return 0
|
| 120 |
|
| 121 |
def update_model_dropdown(category, favorites=None):
|
| 122 |
+
# Get all configs including custom models
|
| 123 |
+
all_configs = get_all_model_configs_with_custom()
|
| 124 |
# Map translated category back to English
|
| 125 |
+
eng_cat = next((k for k in all_configs.keys() if i18n(k) == category), list(all_configs.keys())[0])
|
| 126 |
+
models = all_configs.get(eng_cat, {})
|
| 127 |
choices = []
|
| 128 |
favorite_models = []
|
| 129 |
non_favorite_models = []
|
|
|
|
| 138 |
choices = favorite_models + non_favorite_models
|
| 139 |
return {"choices": choices}
|
| 140 |
|
| 141 |
+
def get_model_categories():
|
| 142 |
+
"""Get all model categories including Custom Models if any exist."""
|
| 143 |
+
all_configs = get_all_model_configs_with_custom()
|
| 144 |
+
return list(all_configs.keys())
|
| 145 |
+
|
| 146 |
def handle_file_upload(uploaded_file, file_path, is_auto_ensemble=False):
|
| 147 |
clear_temp_folder("/tmp", exclude_items=["gradio", "config.json"])
|
| 148 |
clear_directory(INPUT_DIR)
|
inference.py
CHANGED
|
@@ -3,21 +3,16 @@ __author__ = 'Roman Solovyev (ZFTurbo): https://github.com/ZFTurbo/'
|
|
| 3 |
|
| 4 |
import argparse
|
| 5 |
import time
|
| 6 |
-
import logging
|
| 7 |
import librosa
|
|
|
|
| 8 |
import sys
|
| 9 |
import os
|
| 10 |
import glob
|
| 11 |
import torch
|
|
|
|
| 12 |
import torch.nn as nn
|
| 13 |
import numpy as np
|
| 14 |
-
|
| 15 |
-
import spaces
|
| 16 |
-
import warnings
|
| 17 |
-
warnings.filterwarnings("ignore")
|
| 18 |
-
|
| 19 |
-
# Loglama ayarları
|
| 20 |
-
logging.basicConfig(level=logging.DEBUG, filename='utils.log', format='%(asctime)s - %(levelname)s - %(message)s')
|
| 21 |
|
| 22 |
# Colab kontrolü
|
| 23 |
try:
|
|
@@ -26,22 +21,26 @@ try:
|
|
| 26 |
except ImportError:
|
| 27 |
IS_COLAB = False
|
| 28 |
|
| 29 |
-
# i18n yer tutucu
|
| 30 |
-
class I18nAuto:
|
| 31 |
-
def __call__(self, message):
|
| 32 |
-
return message
|
| 33 |
-
def format(self, message, *args):
|
| 34 |
-
return message.format(*args)
|
| 35 |
-
|
| 36 |
i18n = I18nAuto()
|
| 37 |
|
| 38 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 39 |
sys.path.append(current_dir)
|
| 40 |
|
| 41 |
from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
|
| 42 |
-
from utils import prefer_target_instrument, apply_tta, load_start_checkpoint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def shorten_filename(filename, max_length=30):
|
|
|
|
| 45 |
base, ext = os.path.splitext(filename)
|
| 46 |
if len(base) <= max_length:
|
| 47 |
return filename
|
|
@@ -49,217 +48,192 @@ def shorten_filename(filename, max_length=30):
|
|
| 49 |
return shortened
|
| 50 |
|
| 51 |
def get_soundfile_subtype(pcm_type, is_float=False):
|
| 52 |
-
|
|
|
|
| 53 |
return 'FLOAT'
|
| 54 |
-
subtype_map = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
return subtype_map.get(pcm_type, 'FLOAT')
|
| 56 |
|
| 57 |
-
def
|
| 58 |
-
progress_percent = min(max(round(progress_percent), 0), 100)
|
| 59 |
-
return f"""
|
| 60 |
-
<div id="custom-progress" style="margin-top: 10px;">
|
| 61 |
-
<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
|
| 62 |
-
<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
|
| 63 |
-
<div id="progress-bar" style="width: {progress_percent}%; height: 20px; background-color: #6e8efb; transition: width 0.3s; max-width: 100%;"></div>
|
| 64 |
-
</div>
|
| 65 |
-
</div>
|
| 66 |
-
"""
|
| 67 |
-
|
| 68 |
-
def run_folder(model, args, config, device, verbose: bool = False, progress=None):
|
| 69 |
start_time = time.time()
|
| 70 |
model.eval()
|
| 71 |
|
| 72 |
mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
|
| 73 |
sample_rate = getattr(config.audio, 'sample_rate', 44100)
|
| 74 |
|
| 75 |
-
logging.info(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
|
| 76 |
print(i18n("total_files_found").format(len(mixture_paths), sample_rate))
|
| 77 |
|
| 78 |
instruments = prefer_target_instrument(config)[:]
|
|
|
|
|
|
|
| 79 |
store_dir = args.store_dir
|
| 80 |
os.makedirs(store_dir, exist_ok=True)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
for path in mixture_paths:
|
| 87 |
try:
|
| 88 |
mix, sr = librosa.load(path, sr=sample_rate, mono=False)
|
| 89 |
-
logging.info(f"Loaded audio: {path}, shape: {mix.shape}")
|
| 90 |
print(i18n("loaded_audio").format(path, mix.shape))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
if
|
| 95 |
-
progress(base_progress / 100, desc=i18n("processing_file").format(processed_files, total_files))
|
| 96 |
-
update_progress_html(i18n("processing_file").format(processed_files, total_files), base_progress)
|
| 97 |
-
|
| 98 |
-
mix_orig = mix.copy()
|
| 99 |
-
if 'normalize' in config.inference and config.inference.get('normalize', False):
|
| 100 |
mix, norm_params = normalize_audio(mix)
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
)
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
|
| 137 |
-
)
|
| 138 |
-
if args.use_tta:
|
| 139 |
-
waveforms_modified = apply_tta(
|
| 140 |
-
config, model, mix_modified, waveforms_orig, device, args.model_type,
|
| 141 |
-
progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
|
| 142 |
-
)
|
| 143 |
-
waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
|
| 144 |
-
|
| 145 |
-
if args.extract_instrumental:
|
| 146 |
-
instr = 'vocals' if 'vocals' in instruments else instruments[0]
|
| 147 |
-
waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
|
| 148 |
-
if 'instrumental' not in instruments:
|
| 149 |
-
instruments.append('instrumental')
|
| 150 |
-
|
| 151 |
-
for i, instr in enumerate(instruments):
|
| 152 |
-
estimates = waveforms_orig[instr]
|
| 153 |
-
if 'normalize' in config.inference and config.inference.get('normalize', False):
|
| 154 |
estimates = denormalize_audio(estimates, norm_params)
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
|
| 165 |
-
|
| 166 |
-
if progress is not None and callable(getattr('progress', '__call__', None)):
|
| 167 |
-
progress(save_progress / 100, desc=i18n("saving_output").format(instr, processed_files, total_files))
|
| 168 |
-
update_progress_html(i18n("saving_output").format(instr, processed_files, total_files), save_progress)
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
args = parser.parse_args(args if args else [])
|
| 211 |
-
except Exception as e:
|
| 212 |
-
logging.error(f"Argument parsing failed: {str(e)}")
|
| 213 |
-
raise ValueError(f"Invalid command-line arguments: {str(e)}")
|
| 214 |
|
| 215 |
device = "cpu"
|
| 216 |
if args.force_cpu:
|
| 217 |
-
|
| 218 |
elif torch.cuda.is_available():
|
| 219 |
-
logging.info("CUDA available")
|
| 220 |
print(i18n("cuda_available"))
|
| 221 |
-
device = f'cuda:{args.device_ids[0]}'
|
| 222 |
elif torch.backends.mps.is_available():
|
| 223 |
-
|
| 224 |
|
| 225 |
-
logging.info(f"Using device: {device}")
|
| 226 |
print(i18n("using_device").format(device))
|
| 227 |
|
| 228 |
model_load_start_time = time.time()
|
| 229 |
torch.backends.cudnn.benchmark = True
|
| 230 |
|
| 231 |
-
|
| 232 |
-
model, config = get_model_from_config(args.model_type, args.config_path)
|
| 233 |
-
except Exception as e:
|
| 234 |
-
logging.error(f"Failed to load model: {str(e)}")
|
| 235 |
-
raise
|
| 236 |
|
| 237 |
-
if args.start_check_point:
|
| 238 |
-
|
| 239 |
-
load_start_checkpoint(args, model, type_='inference')
|
| 240 |
-
except Exception as e:
|
| 241 |
-
logging.error(f"Failed to load checkpoint: {str(e)}")
|
| 242 |
-
raise
|
| 243 |
|
| 244 |
-
logging.info(f"Instruments: {config.training.instruments}")
|
| 245 |
print(i18n("instruments_print").format(config.training.instruments))
|
| 246 |
|
| 247 |
-
if len(args.device_ids) > 1 and not args.force_cpu:
|
| 248 |
model = nn.DataParallel(model, device_ids=args.device_ids)
|
| 249 |
-
logging.info(f"Using DataParallel with devices: {args.device_ids}")
|
| 250 |
|
| 251 |
model = model.to(device)
|
| 252 |
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
if __name__ == "__main__":
|
| 261 |
-
|
| 262 |
-
proc_folder(None)
|
| 263 |
-
except Exception as e:
|
| 264 |
-
logging.error(f"Main execution failed: {str(e)}")
|
| 265 |
-
raise
|
|
|
|
| 3 |
|
| 4 |
import argparse
|
| 5 |
import time
|
|
|
|
| 6 |
import librosa
|
| 7 |
+
from tqdm.auto import tqdm
|
| 8 |
import sys
|
| 9 |
import os
|
| 10 |
import glob
|
| 11 |
import torch
|
| 12 |
+
import soundfile as sf
|
| 13 |
import torch.nn as nn
|
| 14 |
import numpy as np
|
| 15 |
+
from assets.i18n.i18n import I18nAuto
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Colab kontrolü
|
| 18 |
try:
|
|
|
|
| 21 |
except ImportError:
|
| 22 |
IS_COLAB = False
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
i18n = I18nAuto()
|
| 25 |
|
| 26 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 27 |
sys.path.append(current_dir)
|
| 28 |
|
| 29 |
from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
|
| 30 |
+
from utils import prefer_target_instrument, apply_tta, load_start_checkpoint, load_lora_weights
|
| 31 |
+
|
| 32 |
+
# PyTorch optimized backend (always available)
|
| 33 |
+
try:
|
| 34 |
+
from pytorch_backend import PyTorchBackend
|
| 35 |
+
PYTORCH_OPTIMIZED_AVAILABLE = True
|
| 36 |
+
except ImportError:
|
| 37 |
+
PYTORCH_OPTIMIZED_AVAILABLE = False
|
| 38 |
+
|
| 39 |
+
import warnings
|
| 40 |
+
warnings.filterwarnings("ignore")
|
| 41 |
|
| 42 |
def shorten_filename(filename, max_length=30):
|
| 43 |
+
"""Dosya adını belirtilen maksimum uzunluğa kısaltır."""
|
| 44 |
base, ext = os.path.splitext(filename)
|
| 45 |
if len(base) <= max_length:
|
| 46 |
return filename
|
|
|
|
| 48 |
return shortened
|
| 49 |
|
| 50 |
def get_soundfile_subtype(pcm_type, is_float=False):
|
| 51 |
+
"""PCM türüne göre uygun soundfile alt türünü belirler."""
|
| 52 |
+
if is_float:
|
| 53 |
return 'FLOAT'
|
| 54 |
+
subtype_map = {
|
| 55 |
+
'PCM_16': 'PCM_16',
|
| 56 |
+
'PCM_24': 'PCM_24',
|
| 57 |
+
'FLOAT': 'FLOAT'
|
| 58 |
+
}
|
| 59 |
return subtype_map.get(pcm_type, 'FLOAT')
|
| 60 |
|
| 61 |
+
def run_folder(model, args, config, device, verbose: bool = False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
start_time = time.time()
|
| 63 |
model.eval()
|
| 64 |
|
| 65 |
mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
|
| 66 |
sample_rate = getattr(config.audio, 'sample_rate', 44100)
|
| 67 |
|
|
|
|
| 68 |
print(i18n("total_files_found").format(len(mixture_paths), sample_rate))
|
| 69 |
|
| 70 |
instruments = prefer_target_instrument(config)[:]
|
| 71 |
+
|
| 72 |
+
# Çıktı klasörünü kullan (processing.py tarafından ayarlandı)
|
| 73 |
store_dir = args.store_dir
|
| 74 |
os.makedirs(store_dir, exist_ok=True)
|
| 75 |
|
| 76 |
+
if not verbose:
|
| 77 |
+
mixture_paths = tqdm(mixture_paths, desc=i18n("total_progress"))
|
| 78 |
+
else:
|
| 79 |
+
mixture_paths = mixture_paths
|
| 80 |
+
|
| 81 |
+
detailed_pbar = not args.disable_detailed_pbar
|
| 82 |
+
print(i18n("detailed_pbar_enabled").format(detailed_pbar))
|
| 83 |
|
| 84 |
for path in mixture_paths:
|
| 85 |
try:
|
| 86 |
mix, sr = librosa.load(path, sr=sample_rate, mono=False)
|
|
|
|
| 87 |
print(i18n("loaded_audio").format(path, mix.shape))
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(i18n("cannot_read_track").format(path))
|
| 90 |
+
print(i18n("error_message").format(str(e)))
|
| 91 |
+
continue
|
| 92 |
|
| 93 |
+
mix_orig = mix.copy()
|
| 94 |
+
if 'normalize' in config.inference:
|
| 95 |
+
if config.inference['normalize'] is True:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
mix, norm_params = normalize_audio(mix)
|
| 97 |
|
| 98 |
+
waveforms_orig = demix(config, model, mix, device, model_type=args.model_type, pbar=detailed_pbar)
|
| 99 |
+
|
| 100 |
+
if args.use_tta:
|
| 101 |
+
waveforms_orig = apply_tta(config, model, mix, waveforms_orig, device, args.model_type)
|
| 102 |
+
|
| 103 |
+
if args.demud_phaseremix_inst:
|
| 104 |
+
print(i18n("demudding_track").format(path))
|
| 105 |
+
instr = 'vocals' if 'vocals' in instruments else instruments[0]
|
| 106 |
+
instruments.append('instrumental_phaseremix')
|
| 107 |
+
if 'instrumental' not in instruments and 'Instrumental' not in instruments:
|
| 108 |
+
mix_modified = mix_orig - 2*waveforms_orig[instr]
|
| 109 |
+
mix_modified_ = mix_modified.copy()
|
| 110 |
+
waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=detailed_pbar)
|
| 111 |
+
if args.use_tta:
|
| 112 |
+
waveforms_modified = apply_tta(config, model, mix_modified, waveforms_modified, device, args.model_type)
|
| 113 |
+
waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
|
| 114 |
+
else:
|
| 115 |
+
mix_modified = 2*waveforms_orig[instr] - mix_orig
|
| 116 |
+
mix_modified_ = mix_modified.copy()
|
| 117 |
+
waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=detailed_pbar)
|
| 118 |
+
if args.use_tta:
|
| 119 |
+
waveforms_modified = apply_tta(config, model, mix_modified, waveforms_orig, device, args.model_type)
|
| 120 |
+
waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
|
| 121 |
+
|
| 122 |
+
if args.extract_instrumental:
|
| 123 |
+
instr = 'vocals' if 'vocals' in instruments else instruments[0]
|
| 124 |
+
waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
|
| 125 |
+
if 'instrumental' not in instruments:
|
| 126 |
+
instruments.append('instrumental')
|
| 127 |
+
|
| 128 |
+
for instr in instruments:
|
| 129 |
+
estimates = waveforms_orig[instr]
|
| 130 |
+
if 'normalize' in config.inference:
|
| 131 |
+
if config.inference['normalize'] is True:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
estimates = denormalize_audio(estimates, norm_params)
|
| 133 |
|
| 134 |
+
is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
|
| 135 |
+
codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
|
| 136 |
+
if codec == 'flac':
|
| 137 |
+
subtype = get_soundfile_subtype(args.pcm_type, is_float)
|
| 138 |
+
else:
|
| 139 |
+
subtype = get_soundfile_subtype('FLOAT', is_float)
|
| 140 |
|
| 141 |
+
shortened_filename = shorten_filename(os.path.basename(path))
|
| 142 |
+
output_filename = f"{shortened_filename}_{instr}.{codec}"
|
| 143 |
+
output_path = os.path.join(store_dir, output_filename)
|
| 144 |
+
sf.write(output_path, estimates.T, sr, subtype=subtype)
|
| 145 |
|
| 146 |
+
print(i18n("elapsed_time").format(time.time() - start_time))
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
+
def proc_folder(args, use_tensorrt=False):
|
| 149 |
+
"""
|
| 150 |
+
Process folder with optional TensorRT backend.
|
| 151 |
+
|
| 152 |
+
Parameters:
|
| 153 |
+
----------
|
| 154 |
+
args : list or None
|
| 155 |
+
Command line arguments
|
| 156 |
+
use_tensorrt : bool
|
| 157 |
+
Use TensorRT backend if available
|
| 158 |
+
"""
|
| 159 |
+
parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
|
| 160 |
+
parser.add_argument("--model_type", type=str, default='mdx23c', help=i18n("model_type_help"))
|
| 161 |
+
parser.add_argument("--config_path", type=str, help=i18n("config_path_help"))
|
| 162 |
+
parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
|
| 163 |
+
parser.add_argument("--start_check_point", type=str, default='', help=i18n("start_checkpoint_help"))
|
| 164 |
+
parser.add_argument("--input_folder", type=str, help=i18n("input_folder_help"))
|
| 165 |
+
parser.add_argument("--audio_path", type=str, help=i18n("audio_path_help"))
|
| 166 |
+
parser.add_argument("--store_dir", type=str, default="", help=i18n("store_dir_help"))
|
| 167 |
+
parser.add_argument("--device_ids", nargs='+', type=int, default=0, help=i18n("device_ids_help"))
|
| 168 |
+
parser.add_argument("--extract_instrumental", action='store_true', help=i18n("extract_instrumental_help"))
|
| 169 |
+
parser.add_argument("--disable_detailed_pbar", action='store_true', help=i18n("disable_detailed_pbar_help"))
|
| 170 |
+
parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
|
| 171 |
+
parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
|
| 172 |
+
parser.add_argument("--export_format", type=str, choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], default='flac PCM_24', help=i18n("export_format_help"))
|
| 173 |
+
parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
|
| 174 |
+
parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
|
| 175 |
+
parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))
|
| 176 |
+
parser.add_argument("--chunk_size", type=int, default=1000000, help="Inference chunk size")
|
| 177 |
+
parser.add_argument("--overlap", type=int, default=4, help="Inference overlap factor")
|
| 178 |
+
parser.add_argument("--optimize_mode", type=str, choices=['default', 'compile', 'jit', 'channels_last'], default='channels_last', help="PyTorch optimization mode (always enabled)")
|
| 179 |
+
parser.add_argument("--enable_amp", action='store_true', default=True, help="Enable automatic mixed precision")
|
| 180 |
+
parser.add_argument("--enable_tf32", action='store_true', default=True, help="Enable TF32 (Ampere GPUs)")
|
| 181 |
+
parser.add_argument("--enable_cudnn_benchmark", action='store_true', default=True, help="Enable cuDNN benchmark")
|
| 182 |
+
|
| 183 |
+
if args is None:
|
| 184 |
+
args = parser.parse_args()
|
| 185 |
+
else:
|
| 186 |
+
args = parser.parse_args(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
device = "cpu"
|
| 189 |
if args.force_cpu:
|
| 190 |
+
device = "cpu"
|
| 191 |
elif torch.cuda.is_available():
|
|
|
|
| 192 |
print(i18n("cuda_available"))
|
| 193 |
+
device = f'cuda:{args.device_ids[0]}' if type(args.device_ids) == list else f'cuda:{args.device_ids}'
|
| 194 |
elif torch.backends.mps.is_available():
|
| 195 |
+
device = "mps"
|
| 196 |
|
|
|
|
| 197 |
print(i18n("using_device").format(device))
|
| 198 |
|
| 199 |
model_load_start_time = time.time()
|
| 200 |
torch.backends.cudnn.benchmark = True
|
| 201 |
|
| 202 |
+
model, config = get_model_from_config(args.model_type, args.config_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
if args.start_check_point != '':
|
| 205 |
+
load_start_checkpoint(args, model, type_='inference')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
|
|
|
| 207 |
print(i18n("instruments_print").format(config.training.instruments))
|
| 208 |
|
| 209 |
+
if type(args.device_ids) == list and len(args.device_ids) > 1 and not args.force_cpu:
|
| 210 |
model = nn.DataParallel(model, device_ids=args.device_ids)
|
|
|
|
| 211 |
|
| 212 |
model = model.to(device)
|
| 213 |
|
| 214 |
+
print(i18n("model_load_time").format(time.time() - model_load_start_time))
|
| 215 |
+
|
| 216 |
+
# Always use optimized PyTorch backend if available
|
| 217 |
+
if PYTORCH_OPTIMIZED_AVAILABLE:
|
| 218 |
+
print(f"Using optimized PyTorch backend")
|
| 219 |
+
print(f" Mode: {args.optimize_mode}")
|
| 220 |
+
print(f" AMP: {args.enable_amp} | TF32: {args.enable_tf32} | cuDNN: {args.enable_cudnn_benchmark}")
|
| 221 |
+
from inference_pytorch import proc_folder_pytorch_optimized
|
| 222 |
+
# Recreate args for optimized PyTorch inference
|
| 223 |
+
sys.argv = sys.argv[:1] # Keep only script name
|
| 224 |
+
for key, value in vars(args).items():
|
| 225 |
+
if value is not None and value is not False:
|
| 226 |
+
if isinstance(value, bool):
|
| 227 |
+
sys.argv.append(f"--{key}")
|
| 228 |
+
elif isinstance(value, list):
|
| 229 |
+
sys.argv.append(f"--{key}")
|
| 230 |
+
sys.argv.extend(map(str, value))
|
| 231 |
+
else:
|
| 232 |
+
sys.argv.extend([f"--{key}", str(value)])
|
| 233 |
+
proc_folder_pytorch_optimized(None)
|
| 234 |
+
else:
|
| 235 |
+
print("Warning: PyTorch optimized backend not available, using standard inference")
|
| 236 |
+
run_folder(model, args, config, device, verbose=False)
|
| 237 |
|
| 238 |
if __name__ == "__main__":
|
| 239 |
+
proc_folder(None)
|
|
|
|
|
|
|
|
|
|
|
|
inference_pytorch.py
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
__author__ = 'PyTorch Optimized Inference Implementation'
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import time
|
| 6 |
+
import librosa
|
| 7 |
+
from tqdm.auto import tqdm
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
import glob
|
| 11 |
+
import torch
|
| 12 |
+
import soundfile as sf
|
| 13 |
+
import torch.nn as nn
|
| 14 |
+
import numpy as np
|
| 15 |
+
import pickle
|
| 16 |
+
from assets.i18n.i18n import I18nAuto
|
| 17 |
+
|
| 18 |
+
# Set inference path for compatibility
|
| 19 |
+
INFERENCE_PATH = os.path.abspath(__file__)
|
| 20 |
+
|
| 21 |
+
i18n = I18nAuto()
|
| 22 |
+
|
| 23 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 24 |
+
sys.path.append(current_dir)
|
| 25 |
+
|
| 26 |
+
from utils import get_model_from_config, normalize_audio, denormalize_audio
|
| 27 |
+
from utils import prefer_target_instrument, load_start_checkpoint, apply_tta, demix
|
| 28 |
+
from pytorch_backend import PyTorchBackend, PyTorchOptimizer, create_inference_session
|
| 29 |
+
|
| 30 |
+
import warnings
|
| 31 |
+
warnings.filterwarnings("ignore")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def shorten_filename(filename, max_length=30):
|
| 35 |
+
"""Dosya adını belirtilen maksimum uzunluğa kısaltır."""
|
| 36 |
+
base, ext = os.path.splitext(filename)
|
| 37 |
+
if len(base) <= max_length:
|
| 38 |
+
return filename
|
| 39 |
+
shortened = base[:15] + "..." + base[-10:] + ext
|
| 40 |
+
return shortened
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_soundfile_subtype(pcm_type, is_float=False):
|
| 44 |
+
"""PCM türüne göre uygun soundfile alt türünü belirler."""
|
| 45 |
+
if is_float:
|
| 46 |
+
return 'FLOAT'
|
| 47 |
+
subtype_map = {
|
| 48 |
+
'PCM_16': 'PCM_16',
|
| 49 |
+
'PCM_24': 'PCM_24',
|
| 50 |
+
'FLOAT': 'FLOAT'
|
| 51 |
+
}
|
| 52 |
+
return subtype_map.get(pcm_type, 'FLOAT')
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def demix_pytorch_optimized(
|
| 56 |
+
config,
|
| 57 |
+
backend: PyTorchBackend,
|
| 58 |
+
mix: np.ndarray,
|
| 59 |
+
device: torch.device,
|
| 60 |
+
pbar: bool = False
|
| 61 |
+
) -> dict:
|
| 62 |
+
"""
|
| 63 |
+
Optimized PyTorch backend ile audio source separation.
|
| 64 |
+
|
| 65 |
+
Parameters:
|
| 66 |
+
----------
|
| 67 |
+
config : ConfigDict
|
| 68 |
+
Configuration object
|
| 69 |
+
backend : PyTorchBackend
|
| 70 |
+
PyTorch backend with optimized model
|
| 71 |
+
mix : np.ndarray
|
| 72 |
+
Input audio array
|
| 73 |
+
device : torch.device
|
| 74 |
+
Computation device
|
| 75 |
+
pbar : bool
|
| 76 |
+
Show progress bar
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
-------
|
| 80 |
+
dict
|
| 81 |
+
Dictionary of separated sources
|
| 82 |
+
"""
|
| 83 |
+
mix = torch.tensor(mix, dtype=torch.float32)
|
| 84 |
+
|
| 85 |
+
chunk_size = config.audio.chunk_size
|
| 86 |
+
num_instruments = len(prefer_target_instrument(config))
|
| 87 |
+
num_overlap = config.inference.num_overlap
|
| 88 |
+
|
| 89 |
+
fade_size = chunk_size // 10
|
| 90 |
+
step = chunk_size // num_overlap
|
| 91 |
+
border = chunk_size - step
|
| 92 |
+
length_init = mix.shape[-1]
|
| 93 |
+
|
| 94 |
+
# Windowing array
|
| 95 |
+
fadein = torch.linspace(0, 1, fade_size)
|
| 96 |
+
fadeout = torch.linspace(1, 0, fade_size)
|
| 97 |
+
windowing_array = torch.ones(chunk_size)
|
| 98 |
+
windowing_array[-fade_size:] = fadeout
|
| 99 |
+
windowing_array[:fade_size] = fadein
|
| 100 |
+
|
| 101 |
+
# Add padding
|
| 102 |
+
if length_init > 2 * border and border > 0:
|
| 103 |
+
mix = nn.functional.pad(mix, (border, border), mode="reflect")
|
| 104 |
+
|
| 105 |
+
batch_size = config.inference.batch_size
|
| 106 |
+
use_amp = getattr(config.training, 'use_amp', True)
|
| 107 |
+
|
| 108 |
+
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 109 |
+
with torch.inference_mode():
|
| 110 |
+
# Initialize result and counter tensors
|
| 111 |
+
req_shape = (num_instruments,) + mix.shape
|
| 112 |
+
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 113 |
+
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 114 |
+
|
| 115 |
+
i = 0
|
| 116 |
+
batch_data = []
|
| 117 |
+
batch_locations = []
|
| 118 |
+
|
| 119 |
+
# Progress reporting for GUI (no terminal tqdm)
|
| 120 |
+
total_samples = mix.shape[1]
|
| 121 |
+
last_reported_percent = -1
|
| 122 |
+
|
| 123 |
+
while i < mix.shape[1]:
|
| 124 |
+
# Extract chunk
|
| 125 |
+
part = mix[:, i:i + chunk_size].to(device)
|
| 126 |
+
chunk_len = part.shape[-1]
|
| 127 |
+
|
| 128 |
+
if chunk_len > chunk_size // 2:
|
| 129 |
+
pad_mode = "reflect"
|
| 130 |
+
else:
|
| 131 |
+
pad_mode = "constant"
|
| 132 |
+
|
| 133 |
+
part = nn.functional.pad(
|
| 134 |
+
part,
|
| 135 |
+
(0, chunk_size - chunk_len),
|
| 136 |
+
mode=pad_mode,
|
| 137 |
+
value=0
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
batch_data.append(part)
|
| 141 |
+
batch_locations.append((i, chunk_len))
|
| 142 |
+
i += step
|
| 143 |
+
|
| 144 |
+
# Process batch
|
| 145 |
+
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 146 |
+
arr = torch.stack(batch_data, dim=0)
|
| 147 |
+
|
| 148 |
+
# Use optimized PyTorch backend for inference
|
| 149 |
+
x = backend(arr)
|
| 150 |
+
|
| 151 |
+
window = windowing_array.clone()
|
| 152 |
+
if i - step == 0: # First chunk
|
| 153 |
+
window[:fade_size] = 1
|
| 154 |
+
elif i >= mix.shape[1]: # Last chunk
|
| 155 |
+
window[-fade_size:] = 1
|
| 156 |
+
|
| 157 |
+
for j, (start, seg_len) in enumerate(batch_locations):
|
| 158 |
+
result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 159 |
+
counter[..., start:start + seg_len] += window[..., :seg_len]
|
| 160 |
+
|
| 161 |
+
batch_data.clear()
|
| 162 |
+
batch_locations.clear()
|
| 163 |
+
|
| 164 |
+
# Report real progress percentage for GUI capture (every 1% for smooth updates)
|
| 165 |
+
# Use unique prefix [SESA_PROGRESS] to avoid confusion with other log messages
|
| 166 |
+
current_percent = int((i / total_samples) * 100)
|
| 167 |
+
if current_percent > last_reported_percent:
|
| 168 |
+
last_reported_percent = current_percent
|
| 169 |
+
print(f"[SESA_PROGRESS]{current_percent}", flush=True)
|
| 170 |
+
|
| 171 |
+
print("[SESA_PROGRESS]100", flush=True)
|
| 172 |
+
|
| 173 |
+
# Compute final estimated sources
|
| 174 |
+
estimated_sources = result / counter
|
| 175 |
+
estimated_sources = estimated_sources.cpu().numpy()
|
| 176 |
+
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 177 |
+
|
| 178 |
+
# Remove padding
|
| 179 |
+
if length_init > 2 * border and border > 0:
|
| 180 |
+
estimated_sources = estimated_sources[..., border:-border]
|
| 181 |
+
|
| 182 |
+
# Return as dictionary
|
| 183 |
+
instruments = prefer_target_instrument(config)
|
| 184 |
+
ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
|
| 185 |
+
|
| 186 |
+
return ret_data
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def run_folder_pytorch_optimized(backend, args, config, device, model=None, verbose: bool = False):
|
| 190 |
+
"""
|
| 191 |
+
PyTorch backend ile klasör işleme.
|
| 192 |
+
"""
|
| 193 |
+
start_time = time.time()
|
| 194 |
+
|
| 195 |
+
mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
|
| 196 |
+
sample_rate = getattr(config.audio, 'sample_rate', 44100)
|
| 197 |
+
|
| 198 |
+
print(f"PyTorch Backend | {len(mixture_paths)} dosya | SR: {sample_rate}")
|
| 199 |
+
|
| 200 |
+
instruments = prefer_target_instrument(config)[:]
|
| 201 |
+
|
| 202 |
+
# Çıktı klasörünü kullan
|
| 203 |
+
store_dir = args.store_dir
|
| 204 |
+
os.makedirs(store_dir, exist_ok=True)
|
| 205 |
+
|
| 206 |
+
# Progress is reported via print statements for GUI capture (no terminal tqdm)
|
| 207 |
+
total_files = len(mixture_paths)
|
| 208 |
+
detailed_pbar = not args.disable_detailed_pbar
|
| 209 |
+
print(i18n("detailed_pbar_enabled").format(detailed_pbar))
|
| 210 |
+
|
| 211 |
+
for file_idx, path in enumerate(mixture_paths):
|
| 212 |
+
try:
|
| 213 |
+
mix, sr = librosa.load(path, sr=sample_rate, mono=False)
|
| 214 |
+
print(i18n("loaded_audio").format(path, mix.shape))
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(i18n("cannot_read_track").format(path))
|
| 217 |
+
print(i18n("error_message").format(str(e)))
|
| 218 |
+
continue
|
| 219 |
+
|
| 220 |
+
mix_orig = mix.copy()
|
| 221 |
+
if 'normalize' in config.inference:
|
| 222 |
+
if config.inference['normalize'] is True:
|
| 223 |
+
mix, norm_params = normalize_audio(mix)
|
| 224 |
+
|
| 225 |
+
# Use optimized PyTorch backend
|
| 226 |
+
waveforms_orig = demix_pytorch_optimized(config, backend, mix, device, pbar=detailed_pbar)
|
| 227 |
+
|
| 228 |
+
if args.use_tta and model is not None:
|
| 229 |
+
waveforms_orig = apply_tta(config, model, mix, waveforms_orig, device, args.model_type)
|
| 230 |
+
|
| 231 |
+
if args.demud_phaseremix_inst and model is not None:
|
| 232 |
+
print(f"DemudPhaseRemix: {path}")
|
| 233 |
+
instr = 'vocals' if 'vocals' in instruments else instruments[0]
|
| 234 |
+
instruments.append('instrumental_phaseremix')
|
| 235 |
+
if 'instrumental' not in instruments and 'Instrumental' not in instruments:
|
| 236 |
+
mix_modified = mix_orig - 2 * waveforms_orig[instr]
|
| 237 |
+
mix_modified_ = mix_modified.copy()
|
| 238 |
+
waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type)
|
| 239 |
+
if args.use_tta:
|
| 240 |
+
waveforms_modified = apply_tta(config, model, mix_modified, waveforms_modified, device, args.model_type)
|
| 241 |
+
waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
|
| 242 |
+
else:
|
| 243 |
+
mix_modified = 2 * waveforms_orig[instr] - mix_orig
|
| 244 |
+
mix_modified_ = mix_modified.copy()
|
| 245 |
+
waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type)
|
| 246 |
+
if args.use_tta:
|
| 247 |
+
waveforms_modified = apply_tta(config, model, mix_modified, waveforms_orig, device, args.model_type)
|
| 248 |
+
waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
|
| 249 |
+
|
| 250 |
+
if args.extract_instrumental:
|
| 251 |
+
instr = 'vocals' if 'vocals' in instruments else instruments[0]
|
| 252 |
+
waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
|
| 253 |
+
if 'instrumental' not in instruments:
|
| 254 |
+
instruments.append('instrumental')
|
| 255 |
+
|
| 256 |
+
for instr in instruments:
|
| 257 |
+
estimates = waveforms_orig[instr]
|
| 258 |
+
if 'normalize' in config.inference:
|
| 259 |
+
if config.inference['normalize'] is True:
|
| 260 |
+
estimates = denormalize_audio(estimates, norm_params)
|
| 261 |
+
|
| 262 |
+
is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
|
| 263 |
+
codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
|
| 264 |
+
if codec == 'flac':
|
| 265 |
+
subtype = get_soundfile_subtype(args.pcm_type, is_float)
|
| 266 |
+
else:
|
| 267 |
+
subtype = get_soundfile_subtype('FLOAT', is_float)
|
| 268 |
+
|
| 269 |
+
shortened_filename = shorten_filename(os.path.basename(path))
|
| 270 |
+
output_filename = f"{shortened_filename}_{instr}.{codec}"
|
| 271 |
+
output_path = os.path.join(store_dir, output_filename)
|
| 272 |
+
sf.write(output_path, estimates.T, sr, subtype=subtype)
|
| 273 |
+
|
| 274 |
+
print(i18n("elapsed_time").format(time.time() - start_time))
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def proc_folder_pytorch_optimized(args):
|
| 278 |
+
"""
|
| 279 |
+
PyTorch ile inference işleme fonksiyonu.
|
| 280 |
+
"""
|
| 281 |
+
parser = argparse.ArgumentParser(description="PyTorch Inference for Music Source Separation")
|
| 282 |
+
parser.add_argument("--model_type", type=str, default='mdx23c', help="Model type")
|
| 283 |
+
parser.add_argument("--config_path", type=str, help="Config path")
|
| 284 |
+
parser.add_argument("--start_check_point", type=str, default='', help="Checkpoint path (.ckpt)")
|
| 285 |
+
parser.add_argument("--input_folder", type=str, help="Input folder path")
|
| 286 |
+
parser.add_argument("--store_dir", type=str, default="", help="Output directory")
|
| 287 |
+
parser.add_argument("--device_ids", nargs='+', type=int, default=0, help="Device IDs")
|
| 288 |
+
parser.add_argument("--extract_instrumental", action='store_true', help="Extract instrumental")
|
| 289 |
+
parser.add_argument("--disable_detailed_pbar", action='store_true', help="Disable detailed progress bar")
|
| 290 |
+
parser.add_argument("--flac_file", action='store_true', help="Output as FLAC")
|
| 291 |
+
parser.add_argument("--export_format", type=str, choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'],
|
| 292 |
+
default='flac PCM_24', help="Export format")
|
| 293 |
+
parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help="PCM type")
|
| 294 |
+
parser.add_argument("--chunk_size", type=int, default=1000000, help="Inference chunk size")
|
| 295 |
+
parser.add_argument("--overlap", type=int, default=4, help="Inference overlap factor")
|
| 296 |
+
parser.add_argument("--optimize_mode", type=str, choices=['channels_last', 'compile', 'jit', 'default'],
|
| 297 |
+
default='channels_last', help="PyTorch optimization mode (channels_last recommended)")
|
| 298 |
+
parser.add_argument("--enable_amp", action='store_true', help="Enable automatic mixed precision (2x faster)")
|
| 299 |
+
parser.add_argument("--enable_tf32", action='store_true', help="Enable TF32 for RTX 30xx+ (faster)")
|
| 300 |
+
parser.add_argument("--enable_cudnn_benchmark", action='store_true', help="Enable cuDNN benchmark (faster after warmup)")
|
| 301 |
+
parser.add_argument("--lora_checkpoint", type=str, default='', help="Initial checkpoint to LoRA weights")
|
| 302 |
+
parser.add_argument("--use_tta", action='store_true', help="Test Time Augmentation (flips + polarity)")
|
| 303 |
+
parser.add_argument("--demud_phaseremix_inst", action='store_true', help="DemudPhaseRemix instrumental extraction")
|
| 304 |
+
|
| 305 |
+
if args is None:
|
| 306 |
+
args = parser.parse_args()
|
| 307 |
+
else:
|
| 308 |
+
args = parser.parse_args(args)
|
| 309 |
+
|
| 310 |
+
# Device setup
|
| 311 |
+
device = "cpu"
|
| 312 |
+
if torch.cuda.is_available():
|
| 313 |
+
print(i18n("cuda_available"))
|
| 314 |
+
device = f'cuda:{args.device_ids[0]}' if type(args.device_ids) == list else f'cuda:{args.device_ids}'
|
| 315 |
+
elif torch.backends.mps.is_available():
|
| 316 |
+
device = "mps"
|
| 317 |
+
print("Using MPS (Metal) backend")
|
| 318 |
+
|
| 319 |
+
print(i18n("using_device").format(device))
|
| 320 |
+
|
| 321 |
+
# Load model
|
| 322 |
+
model_load_start_time = time.time()
|
| 323 |
+
|
| 324 |
+
model, config = get_model_from_config(args.model_type, args.config_path)
|
| 325 |
+
|
| 326 |
+
if args.start_check_point != '':
|
| 327 |
+
try:
|
| 328 |
+
checkpoint = torch.load(args.start_check_point, map_location=device, weights_only=False)
|
| 329 |
+
except (pickle.UnpicklingError, RuntimeError, EOFError) as e:
|
| 330 |
+
error_details = f"""
|
| 331 |
+
CHECKPOINT FILE CORRUPTED
|
| 332 |
+
|
| 333 |
+
Error: {str(e)}
|
| 334 |
+
|
| 335 |
+
The checkpoint file appears to be corrupted or was not downloaded correctly.
|
| 336 |
+
File: {args.start_check_point}
|
| 337 |
+
|
| 338 |
+
Common causes:
|
| 339 |
+
- File is an HTML page (wrong download URL, e.g., HuggingFace /blob/ instead of /resolve/)
|
| 340 |
+
- Incomplete or interrupted download
|
| 341 |
+
- Network issues during download
|
| 342 |
+
- File system corruption
|
| 343 |
+
|
| 344 |
+
Solution:
|
| 345 |
+
1. Delete the corrupted checkpoint file:
|
| 346 |
+
{args.start_check_point}
|
| 347 |
+
2. Re-run the application - it will automatically re-download the model
|
| 348 |
+
3. If the problem persists, check that your model URL uses /resolve/ not /blob/
|
| 349 |
+
Example: https://huggingface.co/user/repo/resolve/main/model.ckpt
|
| 350 |
+
"""
|
| 351 |
+
print(error_details)
|
| 352 |
+
import sys
|
| 353 |
+
sys.exit(1)
|
| 354 |
+
|
| 355 |
+
# Handle different checkpoint formats
|
| 356 |
+
if isinstance(checkpoint, dict):
|
| 357 |
+
if 'state_dict' in checkpoint:
|
| 358 |
+
state_dict = checkpoint['state_dict']
|
| 359 |
+
elif 'model' in checkpoint:
|
| 360 |
+
state_dict = checkpoint['model']
|
| 361 |
+
elif 'state' in checkpoint:
|
| 362 |
+
state_dict = checkpoint['state']
|
| 363 |
+
else:
|
| 364 |
+
state_dict = checkpoint
|
| 365 |
+
else:
|
| 366 |
+
state_dict = checkpoint
|
| 367 |
+
|
| 368 |
+
model.load_state_dict(state_dict, strict=False)
|
| 369 |
+
model = model.eval().to(device)
|
| 370 |
+
|
| 371 |
+
print(i18n("instruments_print").format(config.training.instruments))
|
| 372 |
+
|
| 373 |
+
# Create optimized PyTorch backend
|
| 374 |
+
backend = create_inference_session(
|
| 375 |
+
model=model,
|
| 376 |
+
device=device,
|
| 377 |
+
optimize_mode=args.optimize_mode,
|
| 378 |
+
enable_amp=args.enable_amp,
|
| 379 |
+
enable_tf32=args.enable_tf32,
|
| 380 |
+
enable_cudnn_benchmark=args.enable_cudnn_benchmark
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
print(i18n("model_load_time").format(time.time() - model_load_start_time))
|
| 384 |
+
|
| 385 |
+
# Run inference (pass raw model for TTA/demud support)
|
| 386 |
+
run_folder_pytorch_optimized(backend, args, config, device, model=model, verbose=False)
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
if __name__ == "__main__":
|
| 390 |
+
proc_folder_pytorch_optimized(None)
|
main.py
CHANGED
|
@@ -14,7 +14,15 @@ from datetime import datetime
|
|
| 14 |
import numpy as np
|
| 15 |
import shutil
|
| 16 |
from gui import create_interface
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from assets.i18n.i18n import I18nAuto # I18nAuto'yu içe aktar
|
| 19 |
|
| 20 |
import warnings
|
|
@@ -56,6 +64,9 @@ def start_localtunnel(port, i18n):
|
|
| 56 |
|
| 57 |
def start_ngrok(port, ngrok_token, i18n):
|
| 58 |
"""Starts the Gradio interface with ngrok sharing."""
|
|
|
|
|
|
|
|
|
|
| 59 |
print(i18n("starting_ngrok").format(port=port))
|
| 60 |
try:
|
| 61 |
ngrok.set_auth_token(ngrok_token)
|
|
|
|
| 14 |
import numpy as np
|
| 15 |
import shutil
|
| 16 |
from gui import create_interface
|
| 17 |
+
|
| 18 |
+
# pyngrok import (optional - only needed for ngrok sharing)
|
| 19 |
+
try:
|
| 20 |
+
from pyngrok import ngrok
|
| 21 |
+
NGROK_AVAILABLE = True
|
| 22 |
+
except ImportError:
|
| 23 |
+
NGROK_AVAILABLE = False
|
| 24 |
+
ngrok = None
|
| 25 |
+
|
| 26 |
from assets.i18n.i18n import I18nAuto # I18nAuto'yu içe aktar
|
| 27 |
|
| 28 |
import warnings
|
|
|
|
| 64 |
|
| 65 |
def start_ngrok(port, ngrok_token, i18n):
|
| 66 |
"""Starts the Gradio interface with ngrok sharing."""
|
| 67 |
+
if not NGROK_AVAILABLE:
|
| 68 |
+
print("pyngrok modülü yüklü değil. 'pip install pyngrok' ile yükleyin.")
|
| 69 |
+
sys.exit(1)
|
| 70 |
print(i18n("starting_ngrok").format(port=port))
|
| 71 |
try:
|
| 72 |
ngrok.set_auth_token(ngrok_token)
|
model.py
CHANGED
|
@@ -1,58 +1,600 @@
|
|
| 1 |
import os
|
| 2 |
import yaml
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
# Temel dizin ve checkpoint dizini sabit olarak tanımlanıyor
|
| 7 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 8 |
CHECKPOINT_DIR = os.path.join(BASE_DIR, 'ckpts')
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
full_config_path = os.path.join(CHECKPOINT_DIR, os.path.basename(config_path))
|
| 13 |
if not os.path.exists(full_config_path):
|
| 14 |
raise FileNotFoundError(f"Configuration file not found: {full_config_path}")
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
data['training']['use_amp'] = True
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
encoded_url = quote(url, safe=':/')
|
| 35 |
-
path
|
|
|
|
| 36 |
os.makedirs(path, exist_ok=True)
|
| 37 |
-
filename
|
|
|
|
| 38 |
file_path = os.path.join(path, filename)
|
| 39 |
if os.path.exists(file_path):
|
| 40 |
print(f"File '{filename}' already exists at '{path}'.")
|
| 41 |
return
|
| 42 |
try:
|
| 43 |
-
response = requests.get(url)
|
| 44 |
if response.status_code == 200:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
else:
|
| 49 |
print(f"Error downloading '{filename}': Status code {response.status_code}")
|
| 50 |
except Exception as e:
|
| 51 |
print(f"Error downloading file '{filename}' from '{url}': {e}")
|
|
|
|
| 52 |
|
| 53 |
# Model konfigurasyonlarını kategorize bir sözlükte tut
|
| 54 |
MODEL_CONFIGS = {
|
| 55 |
"Vocal Models": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
'VOCALS-big_beta6X (by Unwa)': {
|
| 57 |
'model_type': 'mel_band_roformer',
|
| 58 |
'config_path': os.path.join(CHECKPOINT_DIR, 'big_beta6x.yaml'),
|
|
@@ -272,9 +814,204 @@ MODEL_CONFIGS = {
|
|
| 272 |
'https://huggingface.co/Aname-Tommy/MelBandRoformers/blob/main/FullnessVocalModel.ckpt'
|
| 273 |
],
|
| 274 |
'needs_conf_edit': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
}
|
| 276 |
},
|
| 277 |
"Instrumental Models": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
'Inst_GaboxFv8 (by Gabox)': {
|
| 279 |
'model_type': 'mel_band_roformer',
|
| 280 |
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
|
@@ -504,6 +1241,86 @@ MODEL_CONFIGS = {
|
|
| 504 |
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxV7.ckpt'
|
| 505 |
],
|
| 506 |
'needs_conf_edit': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
}
|
| 508 |
},
|
| 509 |
"4-Stem Models": {
|
|
@@ -660,6 +1477,56 @@ MODEL_CONFIGS = {
|
|
| 660 |
'https://huggingface.co/anvuew/dereverb_mel_band_roformer/resolve/main/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt'
|
| 661 |
],
|
| 662 |
'needs_conf_edit': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
}
|
| 664 |
},
|
| 665 |
"Karaoke": {
|
|
@@ -673,13 +1540,43 @@ MODEL_CONFIGS = {
|
|
| 673 |
],
|
| 674 |
'needs_conf_edit': True
|
| 675 |
},
|
| 676 |
-
'KaraokeGabox': {
|
| 677 |
'model_type': 'mel_band_roformer',
|
| 678 |
-
'config_path': os.path.join(CHECKPOINT_DIR, '
|
| 679 |
-
'start_check_point': os.path.join(CHECKPOINT_DIR, '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
'download_urls': [
|
| 681 |
-
'https://
|
| 682 |
-
'https://huggingface.co/
|
| 683 |
],
|
| 684 |
'needs_conf_edit': True
|
| 685 |
}
|
|
@@ -764,23 +1661,220 @@ MODEL_CONFIGS = {
|
|
| 764 |
'https://huggingface.co/SYH99999/MelBandRoformerSYHFTB1/resolve/main/model3.ckpt'
|
| 765 |
],
|
| 766 |
'needs_conf_edit': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
}
|
| 768 |
}
|
| 769 |
}
|
| 770 |
|
| 771 |
def get_model_config(clean_model=None, chunk_size=None, overlap=None):
|
| 772 |
-
"""Returns model type, config path, and checkpoint path for a given model name, downloading files if needed.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
if clean_model is None:
|
| 774 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
|
|
|
|
| 776 |
for category in MODEL_CONFIGS.values():
|
| 777 |
if clean_model in category:
|
| 778 |
config = category[clean_model]
|
| 779 |
-
for
|
| 780 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
if config['needs_conf_edit'] and chunk_size is not None and overlap is not None:
|
| 782 |
conf_edit(config['config_path'], chunk_size, overlap)
|
| 783 |
return config['model_type'], config['config_path'], config['start_check_point']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
return "", "", ""
|
| 785 |
|
| 786 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import yaml
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
import shutil
|
| 6 |
+
from urllib.parse import quote, urlparse
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
# Temel dizin ve checkpoint dizini sabit olarak tanımlanıyor
|
| 10 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
CHECKPOINT_DIR = os.path.join(BASE_DIR, 'ckpts')
|
| 12 |
+
CUSTOM_MODELS_FILE = os.path.join(BASE_DIR, 'assets', 'custom_models.json')
|
| 13 |
|
| 14 |
+
|
| 15 |
+
def fix_huggingface_url(url):
|
| 16 |
+
"""Convert Hugging Face blob URLs to raw/resolve URLs.
|
| 17 |
+
|
| 18 |
+
Hugging Face has two URL formats:
|
| 19 |
+
- /blob/ URLs show the web page (HTML) - WRONG for downloading
|
| 20 |
+
- /resolve/ URLs provide the raw file content - CORRECT for downloading
|
| 21 |
+
|
| 22 |
+
This function converts blob URLs to resolve URLs automatically.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
url: The URL to fix
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
The corrected URL (or original if not a HF blob URL)
|
| 29 |
+
"""
|
| 30 |
+
if not url:
|
| 31 |
+
return url
|
| 32 |
+
|
| 33 |
+
# Check if it's a Hugging Face URL with /blob/
|
| 34 |
+
if 'huggingface.co' in url and '/blob/' in url:
|
| 35 |
+
fixed_url = url.replace('/blob/', '/resolve/')
|
| 36 |
+
return fixed_url
|
| 37 |
+
|
| 38 |
+
return url
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def validate_yaml_content(content, filepath=None):
|
| 42 |
+
"""Validate that content is YAML and not HTML.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
content: The file content to validate
|
| 46 |
+
filepath: Optional filepath for error messages
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
tuple: (is_valid: bool, error_message: str or None)
|
| 50 |
+
"""
|
| 51 |
+
# Check if content looks like HTML
|
| 52 |
+
html_indicators = [
|
| 53 |
+
'<!DOCTYPE',
|
| 54 |
+
'<html',
|
| 55 |
+
'<head>',
|
| 56 |
+
'<body>',
|
| 57 |
+
'<script>',
|
| 58 |
+
'<link rel=',
|
| 59 |
+
'text/html',
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
content_lower = content.lower() if isinstance(content, str) else content.decode('utf-8', errors='ignore').lower()
|
| 63 |
+
|
| 64 |
+
for indicator in html_indicators:
|
| 65 |
+
if indicator.lower() in content_lower:
|
| 66 |
+
error_msg = f"""
|
| 67 |
+
The downloaded file appears to be an HTML page, not a YAML config file.
|
| 68 |
+
{"File: " + filepath if filepath else ""}
|
| 69 |
+
|
| 70 |
+
This usually happens when using a Hugging Face '/blob/' URL instead of a '/resolve/' URL.
|
| 71 |
+
|
| 72 |
+
To fix this:
|
| 73 |
+
1. Use the raw file URL with '/resolve/' instead of '/blob/'
|
| 74 |
+
Example: https://huggingface.co/user/repo/resolve/main/file.yaml
|
| 75 |
+
|
| 76 |
+
2. Or copy the raw URL from Hugging Face:
|
| 77 |
+
- Go to the file on Hugging Face
|
| 78 |
+
- Click "Download" or right-click and "Copy link address"
|
| 79 |
+
"""
|
| 80 |
+
return False, error_msg
|
| 81 |
+
|
| 82 |
+
return True, None
|
| 83 |
+
|
| 84 |
+
# Supported model types for auto-detection and manual selection
|
| 85 |
+
SUPPORTED_MODEL_TYPES = [
|
| 86 |
+
'bs_roformer',
|
| 87 |
+
'bs_roformer_custom',
|
| 88 |
+
'mel_band_roformer',
|
| 89 |
+
'mdx23c',
|
| 90 |
+
'bandit_v2',
|
| 91 |
+
'scnet',
|
| 92 |
+
'htdemucs',
|
| 93 |
+
'torchseg'
|
| 94 |
+
]
|
| 95 |
+
|
| 96 |
+
def detect_model_type_from_url(checkpoint_url, config_url=None):
|
| 97 |
+
"""Auto-detect model type from URL patterns."""
|
| 98 |
+
urls_to_check = [checkpoint_url]
|
| 99 |
+
if config_url:
|
| 100 |
+
urls_to_check.append(config_url)
|
| 101 |
+
|
| 102 |
+
combined_text = ' '.join(urls_to_check).lower()
|
| 103 |
+
|
| 104 |
+
patterns = [
|
| 105 |
+
(r'bs[-_]?roformer[-_]?custom|hyperace', 'bs_roformer_custom'),
|
| 106 |
+
(r'bs[-_]?roformer|bsroformer', 'bs_roformer'),
|
| 107 |
+
(r'mel[-_]?band[-_]?roformer|melbandroformer|mbr', 'mel_band_roformer'),
|
| 108 |
+
(r'mdx23c', 'mdx23c'),
|
| 109 |
+
(r'bandit[-_]?v?2?', 'bandit_v2'),
|
| 110 |
+
(r'scnet', 'scnet'),
|
| 111 |
+
(r'htdemucs|demucs', 'htdemucs'),
|
| 112 |
+
(r'torchseg', 'torchseg'),
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
for pattern, model_type in patterns:
|
| 116 |
+
if re.search(pattern, combined_text):
|
| 117 |
+
return model_type
|
| 118 |
+
return None
|
| 119 |
+
|
| 120 |
+
def detect_model_type_from_config(config_url):
|
| 121 |
+
"""Try to detect model type by downloading and parsing config YAML."""
|
| 122 |
+
try:
|
| 123 |
+
import requests
|
| 124 |
+
response = requests.get(config_url, timeout=10)
|
| 125 |
+
if response.status_code == 200:
|
| 126 |
+
config_data = yaml.safe_load(response.text)
|
| 127 |
+
if 'model_type' in config_data:
|
| 128 |
+
return config_data['model_type']
|
| 129 |
+
if 'model' in config_data and 'model_type' in config_data['model']:
|
| 130 |
+
return config_data['model']['model_type']
|
| 131 |
+
except Exception:
|
| 132 |
+
pass
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
def load_custom_models():
|
| 136 |
+
"""Load custom models from JSON file."""
|
| 137 |
+
if not os.path.exists(CUSTOM_MODELS_FILE):
|
| 138 |
+
return {}
|
| 139 |
+
try:
|
| 140 |
+
with open(CUSTOM_MODELS_FILE, 'r', encoding='utf-8') as f:
|
| 141 |
+
return json.load(f)
|
| 142 |
+
except (json.JSONDecodeError, IOError):
|
| 143 |
+
return {}
|
| 144 |
+
|
| 145 |
+
def save_custom_models(models):
|
| 146 |
+
"""Save custom models to JSON file."""
|
| 147 |
+
os.makedirs(os.path.dirname(CUSTOM_MODELS_FILE), exist_ok=True)
|
| 148 |
+
with open(CUSTOM_MODELS_FILE, 'w', encoding='utf-8') as f:
|
| 149 |
+
json.dump(models, f, indent=2, ensure_ascii=False)
|
| 150 |
+
|
| 151 |
+
def add_custom_model(model_name, model_type, checkpoint_url, config_url, custom_model_url=None, auto_detect=True):
|
| 152 |
+
"""Add a new custom model."""
|
| 153 |
+
if not model_name or not model_name.strip():
|
| 154 |
+
return False, "Model name is required"
|
| 155 |
+
if not checkpoint_url or not checkpoint_url.strip():
|
| 156 |
+
return False, "Checkpoint URL is required"
|
| 157 |
+
if not config_url or not config_url.strip():
|
| 158 |
+
return False, "Config URL is required"
|
| 159 |
+
|
| 160 |
+
model_name = model_name.strip()
|
| 161 |
+
checkpoint_url = checkpoint_url.strip()
|
| 162 |
+
config_url = config_url.strip()
|
| 163 |
+
custom_model_url = custom_model_url.strip() if custom_model_url else None
|
| 164 |
+
|
| 165 |
+
# Auto-fix Hugging Face URLs
|
| 166 |
+
checkpoint_url = fix_huggingface_url(checkpoint_url)
|
| 167 |
+
config_url = fix_huggingface_url(config_url)
|
| 168 |
+
if custom_model_url:
|
| 169 |
+
custom_model_url = fix_huggingface_url(custom_model_url)
|
| 170 |
+
|
| 171 |
+
if auto_detect and (not model_type or model_type == "auto"):
|
| 172 |
+
detected_type = detect_model_type_from_url(checkpoint_url, config_url)
|
| 173 |
+
if not detected_type:
|
| 174 |
+
detected_type = detect_model_type_from_config(config_url)
|
| 175 |
+
if detected_type:
|
| 176 |
+
model_type = detected_type
|
| 177 |
+
else:
|
| 178 |
+
return False, "Could not auto-detect model type. Please select manually."
|
| 179 |
+
|
| 180 |
+
if model_type not in SUPPORTED_MODEL_TYPES:
|
| 181 |
+
return False, f"Unsupported model type: {model_type}"
|
| 182 |
+
|
| 183 |
+
checkpoint_filename = os.path.basename(checkpoint_url.split('?')[0])
|
| 184 |
+
config_filename = f"config_{model_name.replace(' ', '_').lower()}.yaml"
|
| 185 |
+
|
| 186 |
+
models = load_custom_models()
|
| 187 |
+
if model_name in models:
|
| 188 |
+
return False, f"Model '{model_name}' already exists"
|
| 189 |
+
|
| 190 |
+
models[model_name] = {
|
| 191 |
+
'model_type': model_type,
|
| 192 |
+
'checkpoint_url': checkpoint_url,
|
| 193 |
+
'config_url': config_url,
|
| 194 |
+
'custom_model_url': custom_model_url,
|
| 195 |
+
'checkpoint_filename': checkpoint_filename,
|
| 196 |
+
'config_filename': config_filename,
|
| 197 |
+
'needs_conf_edit': True
|
| 198 |
+
}
|
| 199 |
+
save_custom_models(models)
|
| 200 |
+
return True, f"Model '{model_name}' added successfully"
|
| 201 |
+
|
| 202 |
+
def delete_custom_model(model_name):
|
| 203 |
+
"""Delete a custom model."""
|
| 204 |
+
models = load_custom_models()
|
| 205 |
+
if model_name not in models:
|
| 206 |
+
return False, f"Model '{model_name}' not found"
|
| 207 |
+
|
| 208 |
+
model_config = models[model_name]
|
| 209 |
+
checkpoint_path = os.path.join(CHECKPOINT_DIR, model_config.get('checkpoint_filename', ''))
|
| 210 |
+
config_path = os.path.join(CHECKPOINT_DIR, model_config.get('config_filename', ''))
|
| 211 |
+
|
| 212 |
+
try:
|
| 213 |
+
if os.path.exists(checkpoint_path):
|
| 214 |
+
os.remove(checkpoint_path)
|
| 215 |
+
if os.path.exists(config_path):
|
| 216 |
+
os.remove(config_path)
|
| 217 |
+
except Exception:
|
| 218 |
+
pass
|
| 219 |
+
|
| 220 |
+
del models[model_name]
|
| 221 |
+
save_custom_models(models)
|
| 222 |
+
return True, f"Model '{model_name}' deleted successfully"
|
| 223 |
+
|
| 224 |
+
def get_custom_models_list():
|
| 225 |
+
"""Get list of custom model names with their types."""
|
| 226 |
+
models = load_custom_models()
|
| 227 |
+
return [(name, config.get('model_type', 'unknown')) for name, config in models.items()]
|
| 228 |
+
|
| 229 |
+
def preprocess_yaml_content(content):
|
| 230 |
+
"""Pre-process YAML content to fix common issues before parsing.
|
| 231 |
+
|
| 232 |
+
Fixes:
|
| 233 |
+
- Replaces tabs with spaces
|
| 234 |
+
- Attempts to quote unquoted URLs and paths containing colons
|
| 235 |
+
"""
|
| 236 |
+
# Replace tabs with spaces
|
| 237 |
+
if '\t' in content:
|
| 238 |
+
content = content.replace('\t', ' ')
|
| 239 |
+
|
| 240 |
+
# Fix unquoted URLs/paths with colons in values (common issue)
|
| 241 |
+
# This regex finds lines like "key: http://..." or "key: C:\path" and quotes the value
|
| 242 |
+
lines = content.split('\n')
|
| 243 |
+
fixed_lines = []
|
| 244 |
+
|
| 245 |
+
for line in lines:
|
| 246 |
+
# Skip comments and empty lines
|
| 247 |
+
stripped = line.strip()
|
| 248 |
+
if not stripped or stripped.startswith('#'):
|
| 249 |
+
fixed_lines.append(line)
|
| 250 |
+
continue
|
| 251 |
+
|
| 252 |
+
# Check if line has a key-value pattern with potential problematic value
|
| 253 |
+
# Match: " key: value_with_colon_or_backslash"
|
| 254 |
+
match = re.match(r'^(\s*)([^:#]+?):\s+(.+)$', line)
|
| 255 |
+
if match:
|
| 256 |
+
indent, key, value = match.groups()
|
| 257 |
+
# Check if value contains a colon (like URL) or backslash (like Windows path)
|
| 258 |
+
# and is not already quoted
|
| 259 |
+
if ((':' in value or '\\' in value) and
|
| 260 |
+
not (value.startswith('"') and value.endswith('"')) and
|
| 261 |
+
not (value.startswith("'") and value.endswith("'"))):
|
| 262 |
+
# Quote the value
|
| 263 |
+
escaped_value = value.replace('"', '\\"')
|
| 264 |
+
fixed_lines.append(f'{indent}{key}: "{escaped_value}"')
|
| 265 |
+
continue
|
| 266 |
+
|
| 267 |
+
fixed_lines.append(line)
|
| 268 |
+
|
| 269 |
+
return '\n'.join(fixed_lines)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def get_yaml_error_context(content, line_num, column=None):
|
| 273 |
+
"""Get context around a YAML error for better debugging."""
|
| 274 |
+
lines = content.split('\n')
|
| 275 |
+
if line_num < 1 or line_num > len(lines):
|
| 276 |
+
return "Could not extract error context"
|
| 277 |
+
|
| 278 |
+
context_lines = []
|
| 279 |
+
start = max(0, line_num - 3)
|
| 280 |
+
end = min(len(lines), line_num + 2)
|
| 281 |
+
|
| 282 |
+
for i in range(start, end):
|
| 283 |
+
line_indicator = ">>> " if i == line_num - 1 else " "
|
| 284 |
+
context_lines.append(f"{line_indicator}{i + 1}: {lines[i]}")
|
| 285 |
+
|
| 286 |
+
# Add column indicator for the error line
|
| 287 |
+
if i == line_num - 1 and column:
|
| 288 |
+
pointer = " " * (len(str(i + 1)) + 6 + column - 1) + "^"
|
| 289 |
+
context_lines.append(pointer)
|
| 290 |
+
|
| 291 |
+
return '\n'.join(context_lines)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def conf_edit(config_path, chunk_size, overlap, model_name=None):
|
| 295 |
+
"""Edits the configuration file overlap and training flags.
|
| 296 |
+
The model's native audio.chunk_size from the YAML is preserved unchanged.
|
| 297 |
+
|
| 298 |
+
Args:
|
| 299 |
+
config_path: Path to the config file
|
| 300 |
+
chunk_size: Unused – kept for API compatibility (native YAML value is used instead)
|
| 301 |
+
overlap: Overlap between chunks
|
| 302 |
+
model_name: Optional model name for re-downloading config on error
|
| 303 |
+
"""
|
| 304 |
full_config_path = os.path.join(CHECKPOINT_DIR, os.path.basename(config_path))
|
| 305 |
if not os.path.exists(full_config_path):
|
| 306 |
raise FileNotFoundError(f"Configuration file not found: {full_config_path}")
|
| 307 |
|
| 308 |
+
# Create backup before modifying
|
| 309 |
+
backup_path = full_config_path + '.backup'
|
| 310 |
+
try:
|
| 311 |
+
shutil.copy2(full_config_path, backup_path)
|
| 312 |
+
except Exception:
|
| 313 |
+
pass
|
| 314 |
+
|
| 315 |
+
try:
|
| 316 |
+
# Read and pre-process content
|
| 317 |
+
with open(full_config_path, 'r', encoding='utf-8') as f:
|
| 318 |
+
original_content = f.read()
|
| 319 |
+
|
| 320 |
+
# Check if file is HTML (wrong URL was used)
|
| 321 |
+
is_valid, html_error = validate_yaml_content(original_content, full_config_path)
|
| 322 |
+
if not is_valid:
|
| 323 |
+
# Restore backup and raise error
|
| 324 |
+
if os.path.exists(backup_path):
|
| 325 |
+
shutil.copy2(backup_path, full_config_path)
|
| 326 |
+
raise ValueError(html_error)
|
| 327 |
+
|
| 328 |
+
content = preprocess_yaml_content(original_content)
|
| 329 |
+
|
| 330 |
+
# Write pre-processed content if changed
|
| 331 |
+
if content != original_content:
|
| 332 |
+
with open(full_config_path, 'w', encoding='utf-8') as f:
|
| 333 |
+
f.write(content)
|
| 334 |
+
|
| 335 |
+
# Try to parse YAML
|
| 336 |
+
try:
|
| 337 |
+
with open(full_config_path, 'r', encoding='utf-8') as f:
|
| 338 |
+
data = yaml.load(f, Loader=yaml.SafeLoader)
|
| 339 |
+
except yaml.YAMLError as e:
|
| 340 |
+
# Extract error details
|
| 341 |
+
error_msg = str(e)
|
| 342 |
+
line_num = None
|
| 343 |
+
column = None
|
| 344 |
+
|
| 345 |
+
if hasattr(e, 'problem_mark') and e.problem_mark:
|
| 346 |
+
line_num = e.problem_mark.line + 1
|
| 347 |
+
column = e.problem_mark.column + 1
|
| 348 |
+
|
| 349 |
+
# Get context around error
|
| 350 |
+
context = ""
|
| 351 |
+
if line_num:
|
| 352 |
+
context = get_yaml_error_context(content, line_num, column)
|
| 353 |
+
|
| 354 |
+
# Provide helpful error message
|
| 355 |
+
error_details = f"""
|
| 356 |
+
YAML Parsing Error in config file: {full_config_path}
|
| 357 |
+
|
| 358 |
+
Error: {error_msg}
|
| 359 |
|
| 360 |
+
{"Error Context:" + chr(10) + context if context else ""}
|
|
|
|
| 361 |
|
| 362 |
+
Possible causes:
|
| 363 |
+
1. Unquoted string containing a colon (e.g., URLs like https://...)
|
| 364 |
+
2. Unquoted Windows path with backslashes (e.g., C:\\path\\to\\file)
|
| 365 |
+
3. Malformed YAML structure
|
| 366 |
+
4. File corruption from previous processing
|
| 367 |
|
| 368 |
+
Suggested fixes:
|
| 369 |
+
1. Delete the config file and let it re-download: {full_config_path}
|
| 370 |
+
2. Manually edit the file to quote problematic values
|
| 371 |
+
3. Check if the source config URL provides valid YAML
|
| 372 |
+
"""
|
| 373 |
+
# Restore backup
|
| 374 |
+
if os.path.exists(backup_path):
|
| 375 |
+
try:
|
| 376 |
+
shutil.copy2(backup_path, full_config_path)
|
| 377 |
+
except Exception:
|
| 378 |
+
pass
|
| 379 |
+
|
| 380 |
+
raise yaml.YAMLError(error_details) from e
|
| 381 |
+
|
| 382 |
+
# Validate required sections exist
|
| 383 |
+
if not isinstance(data, dict):
|
| 384 |
+
raise ValueError(f"Config file does not contain a valid YAML dictionary: {full_config_path}")
|
| 385 |
+
|
| 386 |
+
# Apply modifications safely
|
| 387 |
+
if 'use_amp' not in data:
|
| 388 |
+
if 'training' not in data:
|
| 389 |
+
data['training'] = {}
|
| 390 |
+
data['training']['use_amp'] = True
|
| 391 |
|
| 392 |
+
# Do NOT overwrite audio.chunk_size — keep the model's native value from the YAML.
|
| 393 |
+
if 'audio' not in data:
|
| 394 |
+
data['audio'] = {}
|
| 395 |
+
|
| 396 |
+
if 'inference' not in data:
|
| 397 |
+
data['inference'] = {}
|
| 398 |
+
data['inference']['num_overlap'] = overlap
|
| 399 |
+
if data['inference'].get('batch_size', 1) == 1:
|
| 400 |
+
data['inference']['batch_size'] = 2
|
| 401 |
+
|
| 402 |
+
# Write updated config
|
| 403 |
+
with open(full_config_path, 'w', encoding='utf-8') as f:
|
| 404 |
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False, Dumper=yaml.Dumper)
|
| 405 |
+
|
| 406 |
+
# Remove backup on success
|
| 407 |
+
if os.path.exists(backup_path):
|
| 408 |
+
try:
|
| 409 |
+
os.remove(backup_path)
|
| 410 |
+
except Exception:
|
| 411 |
+
pass
|
| 412 |
+
|
| 413 |
+
except Exception as e:
|
| 414 |
+
# Restore backup on any error
|
| 415 |
+
if os.path.exists(backup_path):
|
| 416 |
+
try:
|
| 417 |
+
shutil.copy2(backup_path, full_config_path)
|
| 418 |
+
os.remove(backup_path)
|
| 419 |
+
except Exception:
|
| 420 |
+
pass
|
| 421 |
+
raise
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def redownload_config(model_name):
|
| 425 |
+
"""Re-download a corrupted config file for a custom model.
|
| 426 |
+
|
| 427 |
+
Args:
|
| 428 |
+
model_name: Name of the custom model
|
| 429 |
+
|
| 430 |
+
Returns:
|
| 431 |
+
tuple: (success: bool, message: str)
|
| 432 |
+
"""
|
| 433 |
+
custom_models = load_custom_models()
|
| 434 |
+
if model_name not in custom_models:
|
| 435 |
+
return False, f"Model '{model_name}' not found in custom models"
|
| 436 |
+
|
| 437 |
+
config = custom_models[model_name]
|
| 438 |
+
config_url = config.get('config_url')
|
| 439 |
+
config_filename = config.get('config_filename')
|
| 440 |
+
|
| 441 |
+
if not config_url or not config_filename:
|
| 442 |
+
return False, f"Config URL or filename not found for model '{model_name}'"
|
| 443 |
+
|
| 444 |
+
config_path = os.path.join(CHECKPOINT_DIR, config_filename)
|
| 445 |
+
|
| 446 |
+
# Auto-fix URL before re-downloading
|
| 447 |
+
config_url = fix_huggingface_url(config_url)
|
| 448 |
+
|
| 449 |
+
# Delete existing config
|
| 450 |
+
if os.path.exists(config_path):
|
| 451 |
+
try:
|
| 452 |
+
os.remove(config_path)
|
| 453 |
+
except Exception as e:
|
| 454 |
+
return False, f"Could not delete config file: {e}"
|
| 455 |
+
|
| 456 |
+
# Re-download with validation
|
| 457 |
+
try:
|
| 458 |
+
download_file(config_url, target_filename=config_filename, validate_yaml=True)
|
| 459 |
+
return True, f"Config file re-downloaded successfully: {config_filename}"
|
| 460 |
+
except Exception as e:
|
| 461 |
+
return False, f"Failed to re-download config: {e}"
|
| 462 |
+
|
| 463 |
+
def download_file(url, path=None, target_filename=None, validate_yaml=True):
|
| 464 |
+
"""Downloads a file from a URL with progress reporting.
|
| 465 |
+
|
| 466 |
+
Args:
|
| 467 |
+
url: The URL to download from.
|
| 468 |
+
path: The directory to save the file to. Defaults to CHECKPOINT_DIR.
|
| 469 |
+
target_filename: Optional custom filename to save as. If None, uses filename from URL.
|
| 470 |
+
validate_yaml: If True and file is .yaml/.yml, validate it's not HTML
|
| 471 |
+
"""
|
| 472 |
import requests
|
| 473 |
+
|
| 474 |
+
# Auto-fix Hugging Face URLs
|
| 475 |
+
url = fix_huggingface_url(url)
|
| 476 |
+
|
| 477 |
encoded_url = quote(url, safe=':/')
|
| 478 |
+
if path is None:
|
| 479 |
+
path = CHECKPOINT_DIR
|
| 480 |
os.makedirs(path, exist_ok=True)
|
| 481 |
+
# Use custom target filename if provided, otherwise extract from URL
|
| 482 |
+
filename = target_filename if target_filename else os.path.basename(encoded_url)
|
| 483 |
file_path = os.path.join(path, filename)
|
| 484 |
if os.path.exists(file_path):
|
| 485 |
print(f"File '{filename}' already exists at '{path}'.")
|
| 486 |
return
|
| 487 |
try:
|
| 488 |
+
response = requests.get(url, stream=True)
|
| 489 |
if response.status_code == 200:
|
| 490 |
+
# Get total file size for progress reporting
|
| 491 |
+
total_size = int(response.headers.get('content-length', 0))
|
| 492 |
+
|
| 493 |
+
# For YAML files, download to memory first and validate
|
| 494 |
+
is_yaml_file = filename.lower().endswith(('.yaml', '.yml'))
|
| 495 |
+
|
| 496 |
+
if is_yaml_file and validate_yaml:
|
| 497 |
+
content = response.content
|
| 498 |
+
is_valid, error_msg = validate_yaml_content(content, file_path)
|
| 499 |
+
if not is_valid:
|
| 500 |
+
print(f"ERROR: Downloaded file is not valid YAML!")
|
| 501 |
+
print(error_msg)
|
| 502 |
+
raise ValueError(f"Downloaded file is HTML, not YAML. URL may be incorrect: {url}")
|
| 503 |
+
|
| 504 |
+
with open(file_path, 'wb') as f:
|
| 505 |
+
f.write(content)
|
| 506 |
+
else:
|
| 507 |
+
# Download with progress reporting
|
| 508 |
+
downloaded_size = 0
|
| 509 |
+
last_percent = -1
|
| 510 |
+
print(f"[SESA_DOWNLOAD]START:{filename}", flush=True)
|
| 511 |
+
|
| 512 |
+
with open(file_path, 'wb') as f:
|
| 513 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 514 |
+
f.write(chunk)
|
| 515 |
+
downloaded_size += len(chunk)
|
| 516 |
+
|
| 517 |
+
# Report download progress
|
| 518 |
+
if total_size > 0:
|
| 519 |
+
percent = int((downloaded_size / total_size) * 100)
|
| 520 |
+
if percent != last_percent:
|
| 521 |
+
last_percent = percent
|
| 522 |
+
# Format: [SESA_DOWNLOAD]filename:percent
|
| 523 |
+
print(f"[SESA_DOWNLOAD]{filename}:{percent}", flush=True)
|
| 524 |
+
|
| 525 |
+
print(f"[SESA_DOWNLOAD]END:{filename}", flush=True)
|
| 526 |
else:
|
| 527 |
print(f"Error downloading '{filename}': Status code {response.status_code}")
|
| 528 |
except Exception as e:
|
| 529 |
print(f"Error downloading file '{filename}' from '{url}': {e}")
|
| 530 |
+
raise
|
| 531 |
|
| 532 |
# Model konfigurasyonlarını kategorize bir sözlükte tut
|
| 533 |
MODEL_CONFIGS = {
|
| 534 |
"Vocal Models": {
|
| 535 |
+
# === NEW MODELS (en üstte) ===
|
| 536 |
+
'bs_roformer_voc_hyperacev2 (by unwa)': {
|
| 537 |
+
'model_type': 'bs_roformer_custom',
|
| 538 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperacev2_voc.yaml'),
|
| 539 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_voc_hyperacev2.ckpt'),
|
| 540 |
+
'download_urls': [
|
| 541 |
+
('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/config.yaml', 'config_hyperacev2_voc.yaml'),
|
| 542 |
+
'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/bs_roformer_voc_hyperacev2.ckpt'
|
| 543 |
+
],
|
| 544 |
+
'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/bs_roformer.py',
|
| 545 |
+
'needs_conf_edit': True
|
| 546 |
+
},
|
| 547 |
+
'BS-Roformer-Resurrection (by unwa)': {
|
| 548 |
+
'model_type': 'bs_roformer',
|
| 549 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Config.yaml'),
|
| 550 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection.ckpt'),
|
| 551 |
+
'download_urls': [
|
| 552 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Config.yaml',
|
| 553 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection.ckpt'
|
| 554 |
+
],
|
| 555 |
+
'needs_conf_edit': True
|
| 556 |
+
},
|
| 557 |
+
'bs_roformer_revive3e (by unwa)': {
|
| 558 |
+
'model_type': 'bs_roformer',
|
| 559 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
|
| 560 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive3e.ckpt'),
|
| 561 |
+
'download_urls': [
|
| 562 |
+
('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
|
| 563 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive3e.ckpt'
|
| 564 |
+
],
|
| 565 |
+
'needs_conf_edit': True
|
| 566 |
+
},
|
| 567 |
+
'bs_roformer_revive2 (by unwa)': {
|
| 568 |
+
'model_type': 'bs_roformer',
|
| 569 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
|
| 570 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive2.ckpt'),
|
| 571 |
+
'download_urls': [
|
| 572 |
+
('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
|
| 573 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive2.ckpt'
|
| 574 |
+
],
|
| 575 |
+
'needs_conf_edit': True
|
| 576 |
+
},
|
| 577 |
+
'bs_roformer_revive (by unwa)': {
|
| 578 |
+
'model_type': 'bs_roformer',
|
| 579 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
|
| 580 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive.ckpt'),
|
| 581 |
+
'download_urls': [
|
| 582 |
+
('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
|
| 583 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive.ckpt'
|
| 584 |
+
],
|
| 585 |
+
'needs_conf_edit': True
|
| 586 |
+
},
|
| 587 |
+
'karaoke_bs_roformer_anvuew (by anvuew)': {
|
| 588 |
+
'model_type': 'bs_roformer',
|
| 589 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer_anvuew.yaml'),
|
| 590 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer_anvuew.ckpt'),
|
| 591 |
+
'download_urls': [
|
| 592 |
+
'https://huggingface.co/anvuew/karaoke_bs_roformer/resolve/main/karaoke_bs_roformer_anvuew.yaml',
|
| 593 |
+
'https://huggingface.co/anvuew/karaoke_bs_roformer/resolve/main/karaoke_bs_roformer_anvuew.ckpt'
|
| 594 |
+
],
|
| 595 |
+
'needs_conf_edit': True
|
| 596 |
+
},
|
| 597 |
+
# === EXISTING MODELS ===
|
| 598 |
'VOCALS-big_beta6X (by Unwa)': {
|
| 599 |
'model_type': 'mel_band_roformer',
|
| 600 |
'config_path': os.path.join(CHECKPOINT_DIR, 'big_beta6x.yaml'),
|
|
|
|
| 814 |
'https://huggingface.co/Aname-Tommy/MelBandRoformers/blob/main/FullnessVocalModel.ckpt'
|
| 815 |
],
|
| 816 |
'needs_conf_edit': True
|
| 817 |
+
},
|
| 818 |
+
'voc_fv4 (by Gabox)': {
|
| 819 |
+
'model_type': 'mel_band_roformer',
|
| 820 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 821 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv4.ckpt'),
|
| 822 |
+
'download_urls': [
|
| 823 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 824 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv4.ckpt'
|
| 825 |
+
],
|
| 826 |
+
'needs_conf_edit': True
|
| 827 |
+
},
|
| 828 |
+
'voc_fv5 (by Gabox)': {
|
| 829 |
+
'model_type': 'mel_band_roformer',
|
| 830 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 831 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv5.ckpt'),
|
| 832 |
+
'download_urls': [
|
| 833 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 834 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv5.ckpt'
|
| 835 |
+
],
|
| 836 |
+
'needs_conf_edit': True
|
| 837 |
+
},
|
| 838 |
+
'voc_fv6 (by Gabox)': {
|
| 839 |
+
'model_type': 'mel_band_roformer',
|
| 840 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 841 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv6.ckpt'),
|
| 842 |
+
'download_urls': [
|
| 843 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 844 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv6.ckpt'
|
| 845 |
+
],
|
| 846 |
+
'needs_conf_edit': True
|
| 847 |
+
},
|
| 848 |
+
'voc_fv7 (by Gabox)': {
|
| 849 |
+
'model_type': 'mel_band_roformer',
|
| 850 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'v7.yaml'),
|
| 851 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv7.ckpt'),
|
| 852 |
+
'download_urls': [
|
| 853 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/v7.yaml',
|
| 854 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv7.ckpt'
|
| 855 |
+
],
|
| 856 |
+
'needs_conf_edit': True
|
| 857 |
+
},
|
| 858 |
+
'vocfv7beta1 (by Gabox)': {
|
| 859 |
+
'model_type': 'mel_band_roformer',
|
| 860 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 861 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta1.ckpt'),
|
| 862 |
+
'download_urls': [
|
| 863 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 864 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta1.ckpt'
|
| 865 |
+
],
|
| 866 |
+
'needs_conf_edit': True
|
| 867 |
+
},
|
| 868 |
+
'vocfv7beta2 (by Gabox)': {
|
| 869 |
+
'model_type': 'mel_band_roformer',
|
| 870 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 871 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta2.ckpt'),
|
| 872 |
+
'download_urls': [
|
| 873 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 874 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta2.ckpt'
|
| 875 |
+
],
|
| 876 |
+
'needs_conf_edit': True
|
| 877 |
+
},
|
| 878 |
+
'vocfv7beta3 (by Gabox)': {
|
| 879 |
+
'model_type': 'mel_band_roformer',
|
| 880 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
|
| 881 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta3.ckpt'),
|
| 882 |
+
'download_urls': [
|
| 883 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
|
| 884 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta3.ckpt'
|
| 885 |
+
],
|
| 886 |
+
'needs_conf_edit': True
|
| 887 |
+
},
|
| 888 |
+
'MelBandRoformerSYHFTV3Epsilon (by SYH99999)': {
|
| 889 |
+
'model_type': 'mel_band_roformer',
|
| 890 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_vocals_mel_band_roformer_ft.yaml'),
|
| 891 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'MelBandRoformerSYHFTV3Epsilon.ckpt'),
|
| 892 |
+
'download_urls': [
|
| 893 |
+
'https://huggingface.co/SYH99999/MelBandRoformerSYHFT/resolve/main/config_vocals_mel_band_roformer_ft.yaml',
|
| 894 |
+
'https://huggingface.co/SYH99999/MelBandRoformerSYHFTV3Epsilon/resolve/main/MelBandRoformerSYHFTV3Epsilon.ckpt'
|
| 895 |
+
],
|
| 896 |
+
'needs_conf_edit': True
|
| 897 |
+
},
|
| 898 |
+
'MelBandRoformerBigSYHFTV1 (by SYH99999)': {
|
| 899 |
+
'model_type': 'mel_band_roformer',
|
| 900 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_big_syhft.yaml'),
|
| 901 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'MelBandRoformerBigSYHFTV1.ckpt'),
|
| 902 |
+
'download_urls': [
|
| 903 |
+
'https://huggingface.co/SYH99999/MelBandRoformerBigSYHFTV1Fast/resolve/main/config.yaml',
|
| 904 |
+
'https://huggingface.co/SYH99999/MelBandRoformerBigSYHFTV1Fast/resolve/main/MelBandRoformerBigSYHFTV1.ckpt'
|
| 905 |
+
],
|
| 906 |
+
'needs_conf_edit': True
|
| 907 |
+
},
|
| 908 |
+
'model_chorus_bs_roformer_ep_146 (by Sucial)': {
|
| 909 |
+
'model_type': 'bs_roformer',
|
| 910 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_chorus_male_female_bs_roformer.yaml'),
|
| 911 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_chorus_bs_roformer_ep_146_sdr_23.8613.ckpt'),
|
| 912 |
+
'download_urls': [
|
| 913 |
+
'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/config_chorus_male_female_bs_roformer.yaml',
|
| 914 |
+
'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/model_chorus_bs_roformer_ep_146_sdr_23.8613.ckpt'
|
| 915 |
+
],
|
| 916 |
+
'needs_conf_edit': True
|
| 917 |
+
},
|
| 918 |
+
'model_chorus_bs_roformer_ep_267 (by Sucial)': {
|
| 919 |
+
'model_type': 'bs_roformer',
|
| 920 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_chorus_male_female_bs_roformer.yaml'),
|
| 921 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt'),
|
| 922 |
+
'download_urls': [
|
| 923 |
+
'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/config_chorus_male_female_bs_roformer.yaml',
|
| 924 |
+
'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt'
|
| 925 |
+
],
|
| 926 |
+
'needs_conf_edit': True
|
| 927 |
+
},
|
| 928 |
+
'BS-Rofo-SW-Fixed (by jarredou)': {
|
| 929 |
+
'model_type': 'bs_roformer',
|
| 930 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Rofo-SW-Fixed.yaml'),
|
| 931 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Rofo-SW-Fixed.ckpt'),
|
| 932 |
+
'download_urls': [
|
| 933 |
+
'https://huggingface.co/jarredou/BS-ROFO-SW-Fixed/resolve/main/BS-Rofo-SW-Fixed.yaml',
|
| 934 |
+
'https://huggingface.co/jarredou/BS-ROFO-SW-Fixed/resolve/main/BS-Rofo-SW-Fixed.ckpt'
|
| 935 |
+
],
|
| 936 |
+
'needs_conf_edit': True
|
| 937 |
+
},
|
| 938 |
+
'BS_ResurrectioN (by Gabox)': {
|
| 939 |
+
'model_type': 'bs_roformer',
|
| 940 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst-Config.yaml'),
|
| 941 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS_ResurrectioN.ckpt'),
|
| 942 |
+
'download_urls': [
|
| 943 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst-Config.yaml',
|
| 944 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/BS_ResurrectioN.ckpt'
|
| 945 |
+
],
|
| 946 |
+
'needs_conf_edit': True
|
| 947 |
}
|
| 948 |
},
|
| 949 |
"Instrumental Models": {
|
| 950 |
+
# === NEW MODELS (en üstte) ===
|
| 951 |
+
'Neo_InstVFX (by natanworkspace)': {
|
| 952 |
+
'model_type': 'mel_band_roformer',
|
| 953 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_neo_inst.yaml'),
|
| 954 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Neo_InstVFX.ckpt'),
|
| 955 |
+
'download_urls': [
|
| 956 |
+
'https://huggingface.co/natanworkspace/melband_roformer/resolve/main/config_neo_inst.yaml',
|
| 957 |
+
'https://huggingface.co/natanworkspace/melband_roformer/resolve/main/Neo_InstVFX.ckpt'
|
| 958 |
+
],
|
| 959 |
+
'needs_conf_edit': True
|
| 960 |
+
},
|
| 961 |
+
'BS-Roformer-Resurrection-Inst (by unwa)': {
|
| 962 |
+
'model_type': 'bs_roformer',
|
| 963 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst-Config.yaml'),
|
| 964 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst.ckpt'),
|
| 965 |
+
'download_urls': [
|
| 966 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst-Config.yaml',
|
| 967 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst.ckpt'
|
| 968 |
+
],
|
| 969 |
+
'needs_conf_edit': True
|
| 970 |
+
},
|
| 971 |
+
'bs_roformer_inst_hyperacev2 (by unwa)': {
|
| 972 |
+
'model_type': 'bs_roformer_custom',
|
| 973 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperacev2_inst.yaml'),
|
| 974 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_inst_hyperacev2.ckpt'),
|
| 975 |
+
'download_urls': [
|
| 976 |
+
('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/config.yaml', 'config_hyperacev2_inst.yaml'),
|
| 977 |
+
'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/bs_roformer_inst_hyperacev2.ckpt'
|
| 978 |
+
],
|
| 979 |
+
'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/bs_roformer.py',
|
| 980 |
+
'needs_conf_edit': True
|
| 981 |
+
},
|
| 982 |
+
'BS-Roformer-Large-Inst (by unwa)': {
|
| 983 |
+
'model_type': 'bs_roformer_custom',
|
| 984 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_bs_large_inst.yaml'),
|
| 985 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_large_v2_inst.ckpt'),
|
| 986 |
+
'download_urls': [
|
| 987 |
+
('https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/config.yaml', 'config_bs_large_inst.yaml'),
|
| 988 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/bs_large_v2_inst.ckpt'
|
| 989 |
+
],
|
| 990 |
+
'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/bs_roformer.py',
|
| 991 |
+
'needs_conf_edit': True
|
| 992 |
+
},
|
| 993 |
+
'bs_roformer_fno (by unwa)': {
|
| 994 |
+
'model_type': 'bs_roformer_custom',
|
| 995 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'bsrofo_fno.yaml'),
|
| 996 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_fno.ckpt'),
|
| 997 |
+
'download_urls': [
|
| 998 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Inst-FNO/resolve/main/bsrofo_fno.yaml',
|
| 999 |
+
'https://huggingface.co/pcunwa/BS-Roformer-Inst-FNO/resolve/main/bs_roformer_fno.ckpt'
|
| 1000 |
+
],
|
| 1001 |
+
'custom_model_url': 'https://huggingface.co/listra92/MyModels/resolve/main/misc/bs_roformer.py',
|
| 1002 |
+
'needs_conf_edit': True
|
| 1003 |
+
},
|
| 1004 |
+
'Rifforge_final_sdr_14.24 (by meskvlla33)': {
|
| 1005 |
+
'model_type': 'mel_band_roformer',
|
| 1006 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_rifforge_full_mesk.yaml'),
|
| 1007 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'rifforge_full_sdr_14.2436.ckpt'),
|
| 1008 |
+
'download_urls': [
|
| 1009 |
+
'https://huggingface.co/meskvlla33/rifforge/resolve/main/config_rifforge_full_mesk.yaml',
|
| 1010 |
+
'https://huggingface.co/meskvlla33/rifforge/resolve/main/rifforge_full_sdr_14.2436.ckpt'
|
| 1011 |
+
],
|
| 1012 |
+
'needs_conf_edit': True
|
| 1013 |
+
},
|
| 1014 |
+
# === EXISTING MODELS ===
|
| 1015 |
'Inst_GaboxFv8 (by Gabox)': {
|
| 1016 |
'model_type': 'mel_band_roformer',
|
| 1017 |
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
|
|
|
| 1241 |
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxV7.ckpt'
|
| 1242 |
],
|
| 1243 |
'needs_conf_edit': True
|
| 1244 |
+
},
|
| 1245 |
+
'inst_Fv4 (by Gabox)': {
|
| 1246 |
+
'model_type': 'mel_band_roformer',
|
| 1247 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1248 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_Fv4.ckpt'),
|
| 1249 |
+
'download_urls': [
|
| 1250 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1251 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_Fv4.ckpt'
|
| 1252 |
+
],
|
| 1253 |
+
'needs_conf_edit': True
|
| 1254 |
+
},
|
| 1255 |
+
'INSTV7N (by Gabox)': {
|
| 1256 |
+
'model_type': 'mel_band_roformer',
|
| 1257 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1258 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'INSTV7N.ckpt'),
|
| 1259 |
+
'download_urls': [
|
| 1260 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1261 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/INSTV7N.ckpt'
|
| 1262 |
+
],
|
| 1263 |
+
'needs_conf_edit': True
|
| 1264 |
+
},
|
| 1265 |
+
'inst_fv7b (by Gabox)': {
|
| 1266 |
+
'model_type': 'mel_band_roformer',
|
| 1267 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1268 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_fv7b.ckpt'),
|
| 1269 |
+
'download_urls': [
|
| 1270 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1271 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/inst_fv7b.ckpt'
|
| 1272 |
+
],
|
| 1273 |
+
'needs_conf_edit': True
|
| 1274 |
+
},
|
| 1275 |
+
'inst_fv7z (by Gabox)': {
|
| 1276 |
+
'model_type': 'mel_band_roformer',
|
| 1277 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1278 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_GaboxFv7z.ckpt'),
|
| 1279 |
+
'download_urls': [
|
| 1280 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1281 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxFv7z.ckpt'
|
| 1282 |
+
],
|
| 1283 |
+
'needs_conf_edit': True
|
| 1284 |
+
},
|
| 1285 |
+
'Inst_GaboxFv9 (by Gabox)': {
|
| 1286 |
+
'model_type': 'mel_band_roformer',
|
| 1287 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1288 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_GaboxFv9.ckpt'),
|
| 1289 |
+
'download_urls': [
|
| 1290 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1291 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxFv9.ckpt'
|
| 1292 |
+
],
|
| 1293 |
+
'needs_conf_edit': True
|
| 1294 |
+
},
|
| 1295 |
+
'inst_gaboxFlowersV10 (by Gabox)': {
|
| 1296 |
+
'model_type': 'mel_band_roformer',
|
| 1297 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'v10.yaml'),
|
| 1298 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_gaboxFlowersV10.ckpt'),
|
| 1299 |
+
'download_urls': [
|
| 1300 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/v10.yaml',
|
| 1301 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gaboxFlowersV10.ckpt'
|
| 1302 |
+
],
|
| 1303 |
+
'needs_conf_edit': True
|
| 1304 |
+
},
|
| 1305 |
+
'Inst_FV8b (by Gabox)': {
|
| 1306 |
+
'model_type': 'mel_band_roformer',
|
| 1307 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1308 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_FV8b.ckpt'),
|
| 1309 |
+
'download_urls': [
|
| 1310 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1311 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/Inst_FV8b.ckpt'
|
| 1312 |
+
],
|
| 1313 |
+
'needs_conf_edit': True
|
| 1314 |
+
},
|
| 1315 |
+
'Inst_Fv8 (by Gabox)': {
|
| 1316 |
+
'model_type': 'mel_band_roformer',
|
| 1317 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
|
| 1318 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_Fv8.ckpt'),
|
| 1319 |
+
'download_urls': [
|
| 1320 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
|
| 1321 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/Inst_Fv8.ckpt'
|
| 1322 |
+
],
|
| 1323 |
+
'needs_conf_edit': True
|
| 1324 |
}
|
| 1325 |
},
|
| 1326 |
"4-Stem Models": {
|
|
|
|
| 1477 |
'https://huggingface.co/anvuew/dereverb_mel_band_roformer/resolve/main/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt'
|
| 1478 |
],
|
| 1479 |
'needs_conf_edit': True
|
| 1480 |
+
},
|
| 1481 |
+
'dereverb-echo_128_4_4 (by Sucial)': {
|
| 1482 |
+
'model_type': 'mel_band_roformer',
|
| 1483 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb-echo_128_4_4_mel_band_roformer.yaml'),
|
| 1484 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb-echo_128_4_4_mel_band_roformer_sdr_dry_12.4235.ckpt'),
|
| 1485 |
+
'download_urls': [
|
| 1486 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb-echo_128_4_4_mel_band_roformer.yaml',
|
| 1487 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb-echo_128_4_4_mel_band_roformer_sdr_dry_12.4235.ckpt'
|
| 1488 |
+
],
|
| 1489 |
+
'needs_conf_edit': True
|
| 1490 |
+
},
|
| 1491 |
+
'dereverb_echo_mbr_v2 (by Sucial)': {
|
| 1492 |
+
'model_type': 'mel_band_roformer',
|
| 1493 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
|
| 1494 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'),
|
| 1495 |
+
'download_urls': [
|
| 1496 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
|
| 1497 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'
|
| 1498 |
+
],
|
| 1499 |
+
'needs_conf_edit': True
|
| 1500 |
+
},
|
| 1501 |
+
'de_big_reverb_mbr_ep_362 (by Sucial)': {
|
| 1502 |
+
'model_type': 'mel_band_roformer',
|
| 1503 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
|
| 1504 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'de_big_reverb_mbr_ep_362.ckpt'),
|
| 1505 |
+
'download_urls': [
|
| 1506 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
|
| 1507 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/de_big_reverb_mbr_ep_362.ckpt'
|
| 1508 |
+
],
|
| 1509 |
+
'needs_conf_edit': True
|
| 1510 |
+
},
|
| 1511 |
+
'de_super_big_reverb_mbr_ep_346 (by Sucial)': {
|
| 1512 |
+
'model_type': 'mel_band_roformer',
|
| 1513 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
|
| 1514 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'de_super_big_reverb_mbr_ep_346.ckpt'),
|
| 1515 |
+
'download_urls': [
|
| 1516 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
|
| 1517 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/de_super_big_reverb_mbr_ep_346.ckpt'
|
| 1518 |
+
],
|
| 1519 |
+
'needs_conf_edit': True
|
| 1520 |
+
},
|
| 1521 |
+
'dereverb_room (by anvuew)': {
|
| 1522 |
+
'model_type': 'bs_roformer',
|
| 1523 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'dereverb_room_anvuew.yaml'),
|
| 1524 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_room_anvuew_sdr_13.7432.ckpt'),
|
| 1525 |
+
'download_urls': [
|
| 1526 |
+
'https://huggingface.co/anvuew/dereverb_room/resolve/main/dereverb_room_anvuew.yaml',
|
| 1527 |
+
'https://huggingface.co/anvuew/dereverb_room/resolve/main/dereverb_room_anvuew_sdr_13.7432.ckpt'
|
| 1528 |
+
],
|
| 1529 |
+
'needs_conf_edit': True
|
| 1530 |
}
|
| 1531 |
},
|
| 1532 |
"Karaoke": {
|
|
|
|
| 1540 |
],
|
| 1541 |
'needs_conf_edit': True
|
| 1542 |
},
|
| 1543 |
+
'KaraokeGabox (by Gabox)': {
|
| 1544 |
'model_type': 'mel_band_roformer',
|
| 1545 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'karaokegabox_1750911344.yaml'),
|
| 1546 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'Karaoke_GaboxV1.ckpt'),
|
| 1547 |
+
'download_urls': [
|
| 1548 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/karaoke/karaokegabox_1750911344.yaml',
|
| 1549 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/karaoke/Karaoke_GaboxV1.ckpt'
|
| 1550 |
+
],
|
| 1551 |
+
'needs_conf_edit': True
|
| 1552 |
+
},
|
| 1553 |
+
'bs_karaoke_gabox_IS (by Gabox)': {
|
| 1554 |
+
'model_type': 'bs_roformer',
|
| 1555 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer.yaml'),
|
| 1556 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_karaoke_gabox_IS.ckpt'),
|
| 1557 |
+
'download_urls': [
|
| 1558 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/bsroformers/karaoke_bs_roformer.yaml',
|
| 1559 |
+
'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/bsroformers/bs_karaoke_gabox_IS.ckpt'
|
| 1560 |
+
],
|
| 1561 |
+
'needs_conf_edit': True
|
| 1562 |
+
},
|
| 1563 |
+
'bs_roformer_karaoke_frazer_becruily': {
|
| 1564 |
+
'model_type': 'bs_roformer',
|
| 1565 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_karaoke_frazer_becruily.yaml'),
|
| 1566 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_karaoke_frazer_becruily.ckpt'),
|
| 1567 |
+
'download_urls': [
|
| 1568 |
+
'https://huggingface.co/becruily/bs-roformer-karaoke/resolve/main/config_karaoke_frazer_becruily.yaml',
|
| 1569 |
+
'https://huggingface.co/becruily/bs-roformer-karaoke/resolve/main/bs_roformer_karaoke_frazer_becruily.ckpt'
|
| 1570 |
+
],
|
| 1571 |
+
'needs_conf_edit': True
|
| 1572 |
+
},
|
| 1573 |
+
'mel_band_roformer_karaoke_becruily': {
|
| 1574 |
+
'model_type': 'mel_band_roformer',
|
| 1575 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_karaoke_becruily.yaml'),
|
| 1576 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'mel_band_roformer_karaoke_becruily.ckpt'),
|
| 1577 |
'download_urls': [
|
| 1578 |
+
'https://huggingface.co/becruily/mel-band-roformer-karaoke/resolve/main/config_karaoke_becruily.yaml',
|
| 1579 |
+
'https://huggingface.co/becruily/mel-band-roformer-karaoke/resolve/main/mel_band_roformer_karaoke_becruily.ckpt'
|
| 1580 |
],
|
| 1581 |
'needs_conf_edit': True
|
| 1582 |
}
|
|
|
|
| 1661 |
'https://huggingface.co/SYH99999/MelBandRoformerSYHFTB1/resolve/main/model3.ckpt'
|
| 1662 |
],
|
| 1663 |
'needs_conf_edit': True
|
| 1664 |
+
},
|
| 1665 |
+
'bs_hyperace (by unwa)': {
|
| 1666 |
+
'model_type': 'bs_roformer_custom',
|
| 1667 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperace.yaml'),
|
| 1668 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_hyperace.ckpt'),
|
| 1669 |
+
'download_urls': [
|
| 1670 |
+
('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/config.yaml', 'config_hyperace.yaml'),
|
| 1671 |
+
'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/bs_hyperace.ckpt'
|
| 1672 |
+
],
|
| 1673 |
+
'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/bs_roformer.py',
|
| 1674 |
+
'needs_conf_edit': True
|
| 1675 |
+
},
|
| 1676 |
+
'becruily_deux (by becruily)': {
|
| 1677 |
+
'model_type': 'mel_band_roformer',
|
| 1678 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_deux_becruily.yaml'),
|
| 1679 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'becruily_deux.ckpt'),
|
| 1680 |
+
'download_urls': [
|
| 1681 |
+
'https://huggingface.co/becruily/mel-band-roformer-deux/resolve/main/config_deux_becruily.yaml',
|
| 1682 |
+
'https://huggingface.co/becruily/mel-band-roformer-deux/resolve/main/becruily_deux.ckpt'
|
| 1683 |
+
],
|
| 1684 |
+
'needs_conf_edit': True
|
| 1685 |
+
},
|
| 1686 |
+
'becruily_guitar (by becruily)': {
|
| 1687 |
+
'model_type': 'mel_band_roformer',
|
| 1688 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_guitar_becruily.yaml'),
|
| 1689 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'becruily_guitar.ckpt'),
|
| 1690 |
+
'download_urls': [
|
| 1691 |
+
'https://huggingface.co/becruily/mel-band-roformer-guitar/resolve/main/config_guitar_becruily.yaml',
|
| 1692 |
+
'https://huggingface.co/becruily/mel-band-roformer-guitar/resolve/main/becruily_guitar.ckpt'
|
| 1693 |
+
],
|
| 1694 |
+
'needs_conf_edit': True
|
| 1695 |
+
},
|
| 1696 |
+
'aspiration_mel_band_roformer (by Sucial)': {
|
| 1697 |
+
'model_type': 'mel_band_roformer',
|
| 1698 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_aspiration_mel_band_roformer.yaml'),
|
| 1699 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'aspiration_mel_band_roformer_sdr_18.9845.ckpt'),
|
| 1700 |
+
'download_urls': [
|
| 1701 |
+
'https://huggingface.co/Sucial/Aspiration_Mel_Band_Roformer/resolve/main/config_aspiration_mel_band_roformer.yaml',
|
| 1702 |
+
'https://huggingface.co/Sucial/Aspiration_Mel_Band_Roformer/resolve/main/aspiration_mel_band_roformer_sdr_18.9845.ckpt'
|
| 1703 |
+
],
|
| 1704 |
+
'needs_conf_edit': True
|
| 1705 |
+
},
|
| 1706 |
+
'dereverb_echo_mbr_v2 (by Sucial)': {
|
| 1707 |
+
'model_type': 'mel_band_roformer',
|
| 1708 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
|
| 1709 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'),
|
| 1710 |
+
'download_urls': [
|
| 1711 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
|
| 1712 |
+
'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'
|
| 1713 |
+
],
|
| 1714 |
+
'needs_conf_edit': True
|
| 1715 |
+
},
|
| 1716 |
+
'mdx23c_similarity (by ZFTurbo)': {
|
| 1717 |
+
'model_type': 'mdx23c',
|
| 1718 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_mdx23c_similarity.yaml'),
|
| 1719 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_mdx23c_ep_271_l1_freq_72.2383.ckpt'),
|
| 1720 |
+
'download_urls': [
|
| 1721 |
+
'https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.10/config_mdx23c_similarity.yaml',
|
| 1722 |
+
'https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.10/model_mdx23c_ep_271_l1_freq_72.2383.ckpt'
|
| 1723 |
+
],
|
| 1724 |
+
'needs_conf_edit': False
|
| 1725 |
+
},
|
| 1726 |
+
'mel_band_roformer_Lead_Rhythm_Guitar (by listra92)': {
|
| 1727 |
+
'model_type': 'mel_band_roformer',
|
| 1728 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_mel_band_roformer_Lead_Rhythm_Guitar.yaml'),
|
| 1729 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_mel_band_roformer_ep_72_sdr_3.2232.ckpt'),
|
| 1730 |
+
'download_urls': [
|
| 1731 |
+
'https://huggingface.co/listra92/MyModels/resolve/main/misc/config_mel_band_roformer_Lead_Rhythm_Guitar.yaml',
|
| 1732 |
+
'https://huggingface.co/listra92/MyModels/resolve/main/misc/model_mel_band_roformer_ep_72_sdr_3.2232.ckpt'
|
| 1733 |
+
],
|
| 1734 |
+
'needs_conf_edit': True
|
| 1735 |
+
},
|
| 1736 |
+
'last_bs_roformer_4stem (by Amane)': {
|
| 1737 |
+
'model_type': 'bs_roformer',
|
| 1738 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_last_bs.yaml'),
|
| 1739 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'last_bs_roformer.ckpt'),
|
| 1740 |
+
'download_urls': [
|
| 1741 |
+
'https://huggingface.co/listra92/MyModels/resolve/main/misc/config.yaml',
|
| 1742 |
+
'https://huggingface.co/listra92/MyModels/resolve/main/misc/last_bs_roformer.ckpt'
|
| 1743 |
+
],
|
| 1744 |
+
'needs_conf_edit': True
|
| 1745 |
+
},
|
| 1746 |
+
'bs_roformer_4stems_ft (by SYH99999)': {
|
| 1747 |
+
'model_type': 'bs_roformer',
|
| 1748 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_bs_4stems_ft.yaml'),
|
| 1749 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_4stems_ft.pth'),
|
| 1750 |
+
'download_urls': [
|
| 1751 |
+
'https://huggingface.co/SYH99999/bs_roformer_4stems_ft/resolve/main/config.yaml',
|
| 1752 |
+
'https://huggingface.co/SYH99999/bs_roformer_4stems_ft/resolve/main/bs_roformer_4stems_ft.pth'
|
| 1753 |
+
],
|
| 1754 |
+
'needs_conf_edit': True
|
| 1755 |
+
},
|
| 1756 |
+
'CINEMATIC-BandIt_v2_Eng (by kwatcharasupat)': {
|
| 1757 |
+
'model_type': 'bandit_v2',
|
| 1758 |
+
'config_path': os.path.join(CHECKPOINT_DIR, 'config_dnr_bandit_v2_mus64.yaml'),
|
| 1759 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, 'checkpoint-eng_state_dict.ckpt'),
|
| 1760 |
+
'download_urls': [
|
| 1761 |
+
'https://huggingface.co/jarredou/banditv2_state_dicts_only/resolve/main/config_dnr_bandit_v2_mus64.yaml',
|
| 1762 |
+
'https://huggingface.co/jarredou/banditv2_state_dicts_only/resolve/main/checkpoint-eng_state_dict.ckpt'
|
| 1763 |
+
],
|
| 1764 |
+
'needs_conf_edit': True
|
| 1765 |
}
|
| 1766 |
}
|
| 1767 |
}
|
| 1768 |
|
| 1769 |
def get_model_config(clean_model=None, chunk_size=None, overlap=None):
|
| 1770 |
+
"""Returns model type, config path, and checkpoint path for a given model name, downloading files if needed.
|
| 1771 |
+
|
| 1772 |
+
download_urls can contain:
|
| 1773 |
+
- Simple strings: 'url' - downloads with filename from URL
|
| 1774 |
+
- Tuples: ('url', 'target_filename') - downloads with custom filename
|
| 1775 |
+
|
| 1776 |
+
Also handles custom models loaded from custom_models.json
|
| 1777 |
+
"""
|
| 1778 |
if clean_model is None:
|
| 1779 |
+
all_models = {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}
|
| 1780 |
+
# Add custom models
|
| 1781 |
+
custom_models = load_custom_models()
|
| 1782 |
+
all_models.update(custom_models.keys())
|
| 1783 |
+
return all_models
|
| 1784 |
|
| 1785 |
+
# First check built-in models
|
| 1786 |
for category in MODEL_CONFIGS.values():
|
| 1787 |
if clean_model in category:
|
| 1788 |
config = category[clean_model]
|
| 1789 |
+
for url_entry in config['download_urls']:
|
| 1790 |
+
# Handle both simple URL strings and (url, target_filename) tuples
|
| 1791 |
+
if isinstance(url_entry, tuple):
|
| 1792 |
+
url, target_filename = url_entry
|
| 1793 |
+
download_file(url, target_filename=target_filename)
|
| 1794 |
+
else:
|
| 1795 |
+
download_file(url_entry)
|
| 1796 |
+
if config.get('custom_model_url'):
|
| 1797 |
+
custom_path = os.path.join(BASE_DIR, 'models', 'bs_roformer', 'bs_roformer_custom')
|
| 1798 |
+
os.makedirs(custom_path, exist_ok=True)
|
| 1799 |
+
# Create __init__.py for Python import support
|
| 1800 |
+
init_file = os.path.join(custom_path, '__init__.py')
|
| 1801 |
+
if not os.path.exists(init_file):
|
| 1802 |
+
with open(init_file, 'w') as f:
|
| 1803 |
+
f.write('# Auto-generated for custom BSRoformer models\n')
|
| 1804 |
+
download_file(config['custom_model_url'], path=custom_path)
|
| 1805 |
if config['needs_conf_edit'] and chunk_size is not None and overlap is not None:
|
| 1806 |
conf_edit(config['config_path'], chunk_size, overlap)
|
| 1807 |
return config['model_type'], config['config_path'], config['start_check_point']
|
| 1808 |
+
|
| 1809 |
+
# Then check custom models
|
| 1810 |
+
custom_models = load_custom_models()
|
| 1811 |
+
if clean_model in custom_models:
|
| 1812 |
+
config = custom_models[clean_model]
|
| 1813 |
+
checkpoint_path = os.path.join(CHECKPOINT_DIR, config['checkpoint_filename'])
|
| 1814 |
+
config_path = os.path.join(CHECKPOINT_DIR, config['config_filename'])
|
| 1815 |
+
|
| 1816 |
+
# Download checkpoint
|
| 1817 |
+
download_file(config['checkpoint_url'], target_filename=config['checkpoint_filename'])
|
| 1818 |
+
# Download config with custom filename
|
| 1819 |
+
download_file(config['config_url'], target_filename=config['config_filename'])
|
| 1820 |
+
|
| 1821 |
+
# Handle custom model URL if present
|
| 1822 |
+
if config.get('custom_model_url'):
|
| 1823 |
+
custom_path = os.path.join(BASE_DIR, 'models', 'bs_roformer', 'bs_roformer_custom')
|
| 1824 |
+
os.makedirs(custom_path, exist_ok=True)
|
| 1825 |
+
init_file = os.path.join(custom_path, '__init__.py')
|
| 1826 |
+
if not os.path.exists(init_file):
|
| 1827 |
+
with open(init_file, 'w') as f:
|
| 1828 |
+
f.write('# Auto-generated for custom BSRoformer models\n')
|
| 1829 |
+
download_file(config['custom_model_url'], path=custom_path)
|
| 1830 |
+
|
| 1831 |
+
# Apply config edits if needed
|
| 1832 |
+
if config.get('needs_conf_edit', True) and chunk_size is not None and overlap is not None:
|
| 1833 |
+
conf_edit(config_path, chunk_size, overlap, model_name=clean_model)
|
| 1834 |
+
|
| 1835 |
+
return config['model_type'], config_path, checkpoint_path
|
| 1836 |
+
|
| 1837 |
return "", "", ""
|
| 1838 |
|
| 1839 |
+
def get_all_model_configs_with_custom():
|
| 1840 |
+
"""Returns MODEL_CONFIGS with Custom Models category added dynamically."""
|
| 1841 |
+
all_configs = dict(MODEL_CONFIGS)
|
| 1842 |
+
custom_models = load_custom_models()
|
| 1843 |
+
if custom_models:
|
| 1844 |
+
all_configs["Custom Models"] = {
|
| 1845 |
+
name: {
|
| 1846 |
+
'model_type': cfg['model_type'],
|
| 1847 |
+
'config_path': os.path.join(CHECKPOINT_DIR, cfg['config_filename']),
|
| 1848 |
+
'start_check_point': os.path.join(CHECKPOINT_DIR, cfg['checkpoint_filename']),
|
| 1849 |
+
'download_urls': [cfg['checkpoint_url'], cfg['config_url']],
|
| 1850 |
+
'custom_model_url': cfg.get('custom_model_url'),
|
| 1851 |
+
'needs_conf_edit': cfg.get('needs_conf_edit', True)
|
| 1852 |
+
}
|
| 1853 |
+
for name, cfg in custom_models.items()
|
| 1854 |
+
}
|
| 1855 |
+
return all_configs
|
| 1856 |
+
|
| 1857 |
+
get_model_config.keys = lambda: {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}.union(load_custom_models().keys())
|
| 1858 |
+
|
| 1859 |
+
|
| 1860 |
+
def get_model_chunk_size(model_name):
|
| 1861 |
+
"""Read the native chunk_size from a model's local YAML config if already downloaded.
|
| 1862 |
+
|
| 1863 |
+
Returns the int chunk_size on success, or None if the config is unavailable or
|
| 1864 |
+
doesn't contain an audio.chunk_size entry.
|
| 1865 |
+
"""
|
| 1866 |
+
all_models = get_all_model_configs_with_custom()
|
| 1867 |
+
for category in all_models.values():
|
| 1868 |
+
if model_name in category:
|
| 1869 |
+
config_path = category[model_name].get('config_path', '')
|
| 1870 |
+
if config_path and os.path.exists(config_path):
|
| 1871 |
+
try:
|
| 1872 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 1873 |
+
data = yaml.safe_load(f)
|
| 1874 |
+
if isinstance(data, dict):
|
| 1875 |
+
chunk_size = data.get('audio', {}).get('chunk_size')
|
| 1876 |
+
if chunk_size:
|
| 1877 |
+
return int(chunk_size)
|
| 1878 |
+
except Exception:
|
| 1879 |
+
pass
|
| 1880 |
+
return None
|
models/bs_roformer/attend_sage.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import wraps
|
| 2 |
+
from packaging import version
|
| 3 |
+
from collections import namedtuple
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import torch
|
| 7 |
+
from torch import nn, einsum
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
|
| 10 |
+
from einops import rearrange, reduce
|
| 11 |
+
|
| 12 |
+
def _print_once(msg):
|
| 13 |
+
printed = False
|
| 14 |
+
@wraps(print)
|
| 15 |
+
def inner():
|
| 16 |
+
nonlocal printed
|
| 17 |
+
if not printed:
|
| 18 |
+
print(msg)
|
| 19 |
+
printed = True
|
| 20 |
+
return inner
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
from sageattention import sageattn
|
| 24 |
+
_has_sage_attention = True
|
| 25 |
+
# _print_sage_found = _print_once("SageAttention found. Will be used when flash=True.")
|
| 26 |
+
# _print_sage_found()
|
| 27 |
+
except ImportError:
|
| 28 |
+
_has_sage_attention = False
|
| 29 |
+
_print_sage_not_found = _print_once("SageAttention not found. Will fall back to PyTorch SDPA (if available) or manual einsum.")
|
| 30 |
+
_print_sage_not_found()
|
| 31 |
+
|
| 32 |
+
# helpers
|
| 33 |
+
def exists(val):
|
| 34 |
+
return val is not None
|
| 35 |
+
|
| 36 |
+
def default(v, d):
|
| 37 |
+
return v if exists(v) else d
|
| 38 |
+
|
| 39 |
+
# main class
|
| 40 |
+
class Attend(nn.Module):
|
| 41 |
+
def __init__(
|
| 42 |
+
self,
|
| 43 |
+
dropout = 0.,
|
| 44 |
+
flash = False, # If True, attempts to use SageAttention or PyTorch SDPA
|
| 45 |
+
scale = None
|
| 46 |
+
):
|
| 47 |
+
super().__init__()
|
| 48 |
+
self.scale = scale # Store the scale if needed for einsum path
|
| 49 |
+
self.dropout = dropout # Store dropout if needed for einsum/SDPA path
|
| 50 |
+
|
| 51 |
+
# Determine which attention mechanism to *try* first
|
| 52 |
+
self.use_sage = flash and _has_sage_attention
|
| 53 |
+
self.use_pytorch_sdpa = False
|
| 54 |
+
self._sdpa_checked = False # Flag to check PyTorch version only once
|
| 55 |
+
|
| 56 |
+
if flash and not self.use_sage:
|
| 57 |
+
# Only consider PyTorch SDPA if Sage isn't available/chosen
|
| 58 |
+
if not self._sdpa_checked:
|
| 59 |
+
if version.parse(torch.__version__) >= version.parse('2.0.0'):
|
| 60 |
+
self.use_pytorch_sdpa = True
|
| 61 |
+
_print_sdpa_used = _print_once("Using PyTorch SDPA backend (FlashAttention-2, Memory-Efficient, or Math).")
|
| 62 |
+
_print_sdpa_used()
|
| 63 |
+
else:
|
| 64 |
+
_print_fallback_einsum = _print_once("Flash attention requested but Pytorch < 2.0 and SageAttention not found. Falling back to einsum.")
|
| 65 |
+
_print_fallback_einsum()
|
| 66 |
+
self._sdpa_checked = True
|
| 67 |
+
|
| 68 |
+
# Dropout layer for manual einsum implementation ONLY
|
| 69 |
+
# SDPA and SageAttention handle dropout differently (or not at all in Sage's base API)
|
| 70 |
+
self.attn_dropout = nn.Dropout(dropout)
|
| 71 |
+
|
| 72 |
+
def forward(self, q, k, v):
|
| 73 |
+
"""
|
| 74 |
+
einstein notation
|
| 75 |
+
b - batch
|
| 76 |
+
h - heads
|
| 77 |
+
n, i, j - sequence length (base sequence length, source, target)
|
| 78 |
+
d - feature dimension
|
| 79 |
+
|
| 80 |
+
Input tensors q, k, v expected in shape: (batch, heads, seq_len, dim_head) -> HND layout
|
| 81 |
+
"""
|
| 82 |
+
q_len, k_len, device = q.shape[-2], k.shape[-2], q.device
|
| 83 |
+
|
| 84 |
+
# --- Priority 1: SageAttention ---
|
| 85 |
+
if self.use_sage:
|
| 86 |
+
# Assumes q, k, v are FP16/BF16 (handled by autocast upstream)
|
| 87 |
+
# Assumes scale is handled internally by sageattn
|
| 88 |
+
# Assumes dropout is NOT handled by sageattn kernel
|
| 89 |
+
# is_causal=False based on how Attend is called in mel_band_roformer
|
| 90 |
+
out = sageattn(q, k, v, tensor_layout='HND', is_causal=False)
|
| 91 |
+
return out
|
| 92 |
+
try:
|
| 93 |
+
return out
|
| 94 |
+
# print("Attempting SageAttention") # Optional: for debugging
|
| 95 |
+
out = sageattn(q, k, v, tensor_layout='HND', is_causal=False)
|
| 96 |
+
return out
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"SageAttention failed with error: {e}. Falling back.")
|
| 99 |
+
self.use_sage = False # Don't try Sage again if it failed once
|
| 100 |
+
# Decide fallback: Check if PyTorch SDPA is an option
|
| 101 |
+
if not self._sdpa_checked:
|
| 102 |
+
if version.parse(torch.__version__) >= version.parse('2.0.0'):
|
| 103 |
+
self.use_pytorch_sdpa = True
|
| 104 |
+
_print_sdpa_fallback = _print_once("Falling back to PyTorch SDPA.")
|
| 105 |
+
_print_sdpa_fallback()
|
| 106 |
+
else:
|
| 107 |
+
_print_einsum_fallback = _print_once("Falling back to einsum.")
|
| 108 |
+
_print_einsum_fallback()
|
| 109 |
+
self._sdpa_checked = True
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# --- Priority 2: PyTorch SDPA ---
|
| 113 |
+
if self.use_pytorch_sdpa:
|
| 114 |
+
# Use PyTorch's Scaled Dot Product Attention (SDPA)
|
| 115 |
+
# It handles scaling and dropout internally.
|
| 116 |
+
try:
|
| 117 |
+
# print("Attempting PyTorch SDPA") # Optional: for debugging
|
| 118 |
+
# Let PyTorch choose the best backend (Flash V2, Mem Efficient, Math)
|
| 119 |
+
with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True):
|
| 120 |
+
out = F.scaled_dot_product_attention(
|
| 121 |
+
q, k, v,
|
| 122 |
+
attn_mask=None, # Assuming no explicit mask needed here
|
| 123 |
+
dropout_p = self.dropout if self.training else 0.,
|
| 124 |
+
is_causal=False # Assuming not needed based on usage context
|
| 125 |
+
)
|
| 126 |
+
return out
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"PyTorch SDPA failed with error: {e}. Falling back to einsum.")
|
| 129 |
+
self.use_pytorch_sdpa = False # Fallback to einsum on error
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# Calculate scale
|
| 133 |
+
scale = default(self.scale, q.shape[-1] ** -0.5)
|
| 134 |
+
|
| 135 |
+
# similarity
|
| 136 |
+
sim = einsum(f"b h i d, b h j d -> b h i j", q, k) * scale
|
| 137 |
+
|
| 138 |
+
# attention
|
| 139 |
+
attn = sim.softmax(dim=-1)
|
| 140 |
+
attn = self.attn_dropout(attn) # Apply dropout ONLY in einsum path
|
| 141 |
+
|
| 142 |
+
# aggregate values
|
| 143 |
+
out = einsum(f"b h i j, b h j d -> b h i d", attn, v)
|
| 144 |
+
|
| 145 |
+
return out
|
models/bs_roformer/bs_roformer_experimental.py
ADDED
|
@@ -0,0 +1,686 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn, einsum, Tensor
|
| 5 |
+
from torch.nn import Module, ModuleList
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
|
| 8 |
+
from models.bs_roformer.attend import Attend
|
| 9 |
+
from torch.utils.checkpoint import checkpoint
|
| 10 |
+
|
| 11 |
+
from beartype.typing import Tuple, Optional, List, Callable
|
| 12 |
+
from beartype import beartype
|
| 13 |
+
|
| 14 |
+
from rotary_embedding_torch import RotaryEmbedding
|
| 15 |
+
|
| 16 |
+
from einops import rearrange, pack, unpack
|
| 17 |
+
from einops.layers.torch import Rearrange
|
| 18 |
+
|
| 19 |
+
from hyper_connections import get_init_and_expand_reduce_stream_functions
|
| 20 |
+
|
| 21 |
+
# helper functions
|
| 22 |
+
|
| 23 |
+
def exists(val):
|
| 24 |
+
return val is not None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def default(v, d):
|
| 28 |
+
return v if exists(v) else d
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def pack_one(t, pattern):
|
| 32 |
+
return pack([t], pattern)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def unpack_one(t, ps, pattern):
|
| 36 |
+
return unpack(t, ps, pattern)[0]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# norm
|
| 40 |
+
|
| 41 |
+
def l2norm(t):
|
| 42 |
+
return F.normalize(t, dim = -1, p = 2)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class RMSNorm(Module):
|
| 46 |
+
def __init__(self, dim):
|
| 47 |
+
super().__init__()
|
| 48 |
+
self.scale = dim ** 0.5
|
| 49 |
+
self.gamma = nn.Parameter(torch.ones(dim))
|
| 50 |
+
|
| 51 |
+
def forward(self, x):
|
| 52 |
+
return F.normalize(x, dim=-1) * self.scale * self.gamma
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# attention
|
| 56 |
+
|
| 57 |
+
class FeedForward(Module):
|
| 58 |
+
def __init__(
|
| 59 |
+
self,
|
| 60 |
+
dim,
|
| 61 |
+
mult=4,
|
| 62 |
+
dropout=0.
|
| 63 |
+
):
|
| 64 |
+
super().__init__()
|
| 65 |
+
dim_inner = int(dim * mult)
|
| 66 |
+
self.net = nn.Sequential(
|
| 67 |
+
RMSNorm(dim),
|
| 68 |
+
nn.Linear(dim, dim_inner),
|
| 69 |
+
nn.GELU(),
|
| 70 |
+
nn.Dropout(dropout),
|
| 71 |
+
nn.Linear(dim_inner, dim),
|
| 72 |
+
nn.Dropout(dropout)
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def forward(self, x):
|
| 76 |
+
return self.net(x)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class Attention(Module):
|
| 80 |
+
def __init__(
|
| 81 |
+
self,
|
| 82 |
+
dim,
|
| 83 |
+
heads=8,
|
| 84 |
+
dim_head=64,
|
| 85 |
+
dropout=0.,
|
| 86 |
+
rotary_embed=None,
|
| 87 |
+
flash=True,
|
| 88 |
+
learned_value_residual_mix=False,
|
| 89 |
+
):
|
| 90 |
+
super().__init__()
|
| 91 |
+
self.heads = heads
|
| 92 |
+
self.scale = dim_head ** -0.5
|
| 93 |
+
dim_inner = heads * dim_head
|
| 94 |
+
|
| 95 |
+
self.rotary_embed = rotary_embed
|
| 96 |
+
|
| 97 |
+
self.attend = Attend(flash=flash, dropout=dropout)
|
| 98 |
+
|
| 99 |
+
self.norm = RMSNorm(dim)
|
| 100 |
+
self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
|
| 101 |
+
|
| 102 |
+
self.to_value_residual_mix = nn.Linear(dim, heads) if learned_value_residual_mix else None
|
| 103 |
+
|
| 104 |
+
self.to_gates = nn.Linear(dim, heads)
|
| 105 |
+
|
| 106 |
+
self.to_out = nn.Sequential(
|
| 107 |
+
nn.Linear(dim_inner, dim, bias=False),
|
| 108 |
+
nn.Dropout(dropout)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def forward(self, x, value_residual=None):
|
| 112 |
+
x = self.norm(x)
|
| 113 |
+
|
| 114 |
+
q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
|
| 115 |
+
|
| 116 |
+
orig_v = v
|
| 117 |
+
|
| 118 |
+
if exists(self.to_value_residual_mix):
|
| 119 |
+
mix = self.to_value_residual_mix(x)
|
| 120 |
+
mix = rearrange(mix, 'b n h -> b h n 1').sigmoid()
|
| 121 |
+
|
| 122 |
+
assert exists(value_residual)
|
| 123 |
+
v = v.lerp(value_residual, mix)
|
| 124 |
+
|
| 125 |
+
if exists(self.rotary_embed):
|
| 126 |
+
q = self.rotary_embed.rotate_queries_or_keys(q)
|
| 127 |
+
k = self.rotary_embed.rotate_queries_or_keys(k)
|
| 128 |
+
|
| 129 |
+
out = self.attend(q, k, v)
|
| 130 |
+
|
| 131 |
+
gates = self.to_gates(x)
|
| 132 |
+
out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
|
| 133 |
+
|
| 134 |
+
out = rearrange(out, 'b h n d -> b n (h d)')
|
| 135 |
+
return self.to_out(out), orig_v
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
class LinearAttention(Module):
|
| 139 |
+
"""
|
| 140 |
+
this flavor of linear attention proposed in https://arxiv.org/abs/2106.09681 by El-Nouby et al.
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
@beartype
|
| 144 |
+
def __init__(
|
| 145 |
+
self,
|
| 146 |
+
*,
|
| 147 |
+
dim,
|
| 148 |
+
dim_head=32,
|
| 149 |
+
heads=8,
|
| 150 |
+
scale=8,
|
| 151 |
+
flash=False,
|
| 152 |
+
dropout=0.
|
| 153 |
+
):
|
| 154 |
+
super().__init__()
|
| 155 |
+
dim_inner = dim_head * heads
|
| 156 |
+
self.norm = RMSNorm(dim)
|
| 157 |
+
|
| 158 |
+
self.to_qkv = nn.Sequential(
|
| 159 |
+
nn.Linear(dim, dim_inner * 3, bias=False),
|
| 160 |
+
Rearrange('b n (qkv h d) -> qkv b h d n', qkv=3, h=heads)
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
|
| 164 |
+
|
| 165 |
+
self.attend = Attend(
|
| 166 |
+
scale=scale,
|
| 167 |
+
dropout=dropout,
|
| 168 |
+
flash=flash
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
self.to_out = nn.Sequential(
|
| 172 |
+
Rearrange('b h d n -> b n (h d)'),
|
| 173 |
+
nn.Linear(dim_inner, dim, bias=False)
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
def forward(
|
| 177 |
+
self,
|
| 178 |
+
x
|
| 179 |
+
):
|
| 180 |
+
x = self.norm(x)
|
| 181 |
+
|
| 182 |
+
q, k, v = self.to_qkv(x)
|
| 183 |
+
|
| 184 |
+
q, k = map(l2norm, (q, k))
|
| 185 |
+
q = q * self.temperature.exp()
|
| 186 |
+
|
| 187 |
+
out = self.attend(q, k, v)
|
| 188 |
+
|
| 189 |
+
return self.to_out(out)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
class Transformer(Module):
|
| 193 |
+
def __init__(
|
| 194 |
+
self,
|
| 195 |
+
*,
|
| 196 |
+
dim,
|
| 197 |
+
depth,
|
| 198 |
+
dim_head=64,
|
| 199 |
+
heads=8,
|
| 200 |
+
attn_dropout=0.,
|
| 201 |
+
ff_dropout=0.,
|
| 202 |
+
ff_mult=4,
|
| 203 |
+
norm_output=True,
|
| 204 |
+
rotary_embed=None,
|
| 205 |
+
flash_attn=True,
|
| 206 |
+
linear_attn=False,
|
| 207 |
+
add_value_residual=False,
|
| 208 |
+
num_residual_streams=1,
|
| 209 |
+
):
|
| 210 |
+
super().__init__()
|
| 211 |
+
self.layers = ModuleList([])
|
| 212 |
+
|
| 213 |
+
init_hyper_conn, *_ = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
|
| 214 |
+
|
| 215 |
+
for _ in range(depth):
|
| 216 |
+
if linear_attn:
|
| 217 |
+
attn = LinearAttention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, flash=flash_attn)
|
| 218 |
+
else:
|
| 219 |
+
if num_residual_streams != 1:
|
| 220 |
+
attn = init_hyper_conn(dim=dim, branch=Attention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
|
| 221 |
+
rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual))
|
| 222 |
+
else:
|
| 223 |
+
attn = Attention(
|
| 224 |
+
dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
|
| 225 |
+
rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual
|
| 226 |
+
)
|
| 227 |
+
if num_residual_streams != 1:
|
| 228 |
+
ff = init_hyper_conn(dim=dim, branch=FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout))
|
| 229 |
+
else:
|
| 230 |
+
ff = FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
|
| 231 |
+
|
| 232 |
+
self.layers.append(ModuleList([
|
| 233 |
+
attn,
|
| 234 |
+
ff
|
| 235 |
+
]))
|
| 236 |
+
|
| 237 |
+
self.norm = RMSNorm(dim) if norm_output else nn.Identity()
|
| 238 |
+
|
| 239 |
+
def forward(self, x, value_residual=None):
|
| 240 |
+
|
| 241 |
+
first_values = None
|
| 242 |
+
if value_residual is not None:
|
| 243 |
+
for attn, ff in self.layers:
|
| 244 |
+
x, next_values = attn(x, value_residual=value_residual)
|
| 245 |
+
first_values = default(first_values, next_values)
|
| 246 |
+
x = ff(x)
|
| 247 |
+
else:
|
| 248 |
+
# Compatibility with old weights
|
| 249 |
+
for attn, ff in self.layers:
|
| 250 |
+
attn_out, next_values = attn(x, value_residual=None)
|
| 251 |
+
first_values = default(first_values, next_values)
|
| 252 |
+
x = attn_out + x
|
| 253 |
+
x = ff(x) + x
|
| 254 |
+
|
| 255 |
+
return self.norm(x), first_values
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
# bandsplit module
|
| 259 |
+
|
| 260 |
+
class BandSplit(Module):
|
| 261 |
+
@beartype
|
| 262 |
+
def __init__(
|
| 263 |
+
self,
|
| 264 |
+
dim,
|
| 265 |
+
dim_inputs: Tuple[int, ...]
|
| 266 |
+
):
|
| 267 |
+
super().__init__()
|
| 268 |
+
self.dim_inputs = dim_inputs
|
| 269 |
+
self.to_features = ModuleList([])
|
| 270 |
+
|
| 271 |
+
for dim_in in dim_inputs:
|
| 272 |
+
net = nn.Sequential(
|
| 273 |
+
RMSNorm(dim_in),
|
| 274 |
+
nn.Linear(dim_in, dim)
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
self.to_features.append(net)
|
| 278 |
+
|
| 279 |
+
def forward(self, x):
|
| 280 |
+
x = x.split(self.dim_inputs, dim=-1)
|
| 281 |
+
|
| 282 |
+
outs = []
|
| 283 |
+
for split_input, to_feature in zip(x, self.to_features):
|
| 284 |
+
split_output = to_feature(split_input)
|
| 285 |
+
outs.append(split_output)
|
| 286 |
+
|
| 287 |
+
return torch.stack(outs, dim=-2)
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def MLP(
|
| 291 |
+
dim_in,
|
| 292 |
+
dim_out,
|
| 293 |
+
dim_hidden=None,
|
| 294 |
+
depth=1,
|
| 295 |
+
activation=nn.Tanh
|
| 296 |
+
):
|
| 297 |
+
dim_hidden = default(dim_hidden, dim_in)
|
| 298 |
+
|
| 299 |
+
net = []
|
| 300 |
+
dims = (dim_in, *((dim_hidden,) * (depth - 1)), dim_out)
|
| 301 |
+
|
| 302 |
+
for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
|
| 303 |
+
is_last = ind == (len(dims) - 2)
|
| 304 |
+
|
| 305 |
+
net.append(nn.Linear(layer_dim_in, layer_dim_out))
|
| 306 |
+
|
| 307 |
+
if is_last:
|
| 308 |
+
continue
|
| 309 |
+
|
| 310 |
+
net.append(activation())
|
| 311 |
+
|
| 312 |
+
return nn.Sequential(*net)
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
class MaskEstimator(Module):
|
| 316 |
+
@beartype
|
| 317 |
+
def __init__(
|
| 318 |
+
self,
|
| 319 |
+
dim,
|
| 320 |
+
dim_inputs: Tuple[int, ...],
|
| 321 |
+
depth,
|
| 322 |
+
mlp_expansion_factor=4
|
| 323 |
+
):
|
| 324 |
+
super().__init__()
|
| 325 |
+
self.dim_inputs = dim_inputs
|
| 326 |
+
self.to_freqs = ModuleList([])
|
| 327 |
+
dim_hidden = dim * mlp_expansion_factor
|
| 328 |
+
|
| 329 |
+
for dim_in in dim_inputs:
|
| 330 |
+
net = []
|
| 331 |
+
|
| 332 |
+
mlp = nn.Sequential(
|
| 333 |
+
MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
|
| 334 |
+
nn.GLU(dim=-1)
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
self.to_freqs.append(mlp)
|
| 338 |
+
|
| 339 |
+
def forward(self, x):
|
| 340 |
+
x = x.unbind(dim=-2)
|
| 341 |
+
|
| 342 |
+
outs = []
|
| 343 |
+
|
| 344 |
+
for band_features, mlp in zip(x, self.to_freqs):
|
| 345 |
+
freq_out = mlp(band_features)
|
| 346 |
+
outs.append(freq_out)
|
| 347 |
+
|
| 348 |
+
return torch.cat(outs, dim=-1)
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
# main class
|
| 352 |
+
|
| 353 |
+
DEFAULT_FREQS_PER_BANDS = (
|
| 354 |
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
| 355 |
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
| 356 |
+
2, 2, 2, 2,
|
| 357 |
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
| 358 |
+
12, 12, 12, 12, 12, 12, 12, 12,
|
| 359 |
+
24, 24, 24, 24, 24, 24, 24, 24,
|
| 360 |
+
48, 48, 48, 48, 48, 48, 48, 48,
|
| 361 |
+
128, 129,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
class BSRoformer(Module):
|
| 366 |
+
|
| 367 |
+
@beartype
|
| 368 |
+
def __init__(
|
| 369 |
+
self,
|
| 370 |
+
dim,
|
| 371 |
+
*,
|
| 372 |
+
depth,
|
| 373 |
+
stereo=False,
|
| 374 |
+
num_stems=1,
|
| 375 |
+
time_transformer_depth=2,
|
| 376 |
+
freq_transformer_depth=2,
|
| 377 |
+
linear_transformer_depth=0,
|
| 378 |
+
freqs_per_bands: Tuple[int, ...] = DEFAULT_FREQS_PER_BANDS,
|
| 379 |
+
# in the paper, they divide into ~60 bands, test with 1 for starters
|
| 380 |
+
dim_head=64,
|
| 381 |
+
heads=8,
|
| 382 |
+
attn_dropout=0.,
|
| 383 |
+
ff_dropout=0.,
|
| 384 |
+
flash_attn=True,
|
| 385 |
+
dim_freqs_in=1025,
|
| 386 |
+
stft_n_fft=2048,
|
| 387 |
+
stft_hop_length=512,
|
| 388 |
+
# 10ms at 44100Hz, from sections 4.1, 4.4 in the paper - @faroit recommends // 2 or // 4 for better reconstruction
|
| 389 |
+
stft_win_length=2048,
|
| 390 |
+
stft_normalized=False,
|
| 391 |
+
stft_window_fn: Optional[Callable] = None,
|
| 392 |
+
mask_estimator_depth=2,
|
| 393 |
+
multi_stft_resolution_loss_weight=1.,
|
| 394 |
+
multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
|
| 395 |
+
multi_stft_hop_size=147,
|
| 396 |
+
multi_stft_normalized=False,
|
| 397 |
+
multi_stft_window_fn: Callable = torch.hann_window,
|
| 398 |
+
mlp_expansion_factor=4,
|
| 399 |
+
use_torch_checkpoint=False,
|
| 400 |
+
skip_connection=False,
|
| 401 |
+
use_value_residual_learning=False,
|
| 402 |
+
num_residual_streams=1, # set to 1. to disable hyper connections (Default in original is 4)
|
| 403 |
+
):
|
| 404 |
+
super().__init__()
|
| 405 |
+
|
| 406 |
+
self.stereo = stereo
|
| 407 |
+
self.audio_channels = 2 if stereo else 1
|
| 408 |
+
self.num_stems = num_stems
|
| 409 |
+
self.use_torch_checkpoint = use_torch_checkpoint
|
| 410 |
+
self.skip_connection = skip_connection
|
| 411 |
+
self.num_residual_streams = num_residual_streams
|
| 412 |
+
|
| 413 |
+
_, self.expand_stream, self.reduce_stream = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
|
| 414 |
+
|
| 415 |
+
self.layers = ModuleList([])
|
| 416 |
+
|
| 417 |
+
transformer_kwargs = dict(
|
| 418 |
+
dim=dim,
|
| 419 |
+
heads=heads,
|
| 420 |
+
dim_head=dim_head,
|
| 421 |
+
attn_dropout=attn_dropout,
|
| 422 |
+
ff_dropout=ff_dropout,
|
| 423 |
+
flash_attn=flash_attn,
|
| 424 |
+
norm_output=False,
|
| 425 |
+
num_residual_streams=num_residual_streams,
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
time_rotary_embed = RotaryEmbedding(dim=dim_head)
|
| 429 |
+
freq_rotary_embed = RotaryEmbedding(dim=dim_head)
|
| 430 |
+
|
| 431 |
+
for layer_index in range(depth):
|
| 432 |
+
if use_value_residual_learning:
|
| 433 |
+
is_first = layer_index == 0
|
| 434 |
+
else:
|
| 435 |
+
is_first = True
|
| 436 |
+
|
| 437 |
+
tran_modules = []
|
| 438 |
+
if linear_transformer_depth > 0:
|
| 439 |
+
tran_modules.append(Transformer(depth=linear_transformer_depth, linear_attn=True, **transformer_kwargs))
|
| 440 |
+
tran_modules.append(
|
| 441 |
+
Transformer(depth=time_transformer_depth, rotary_embed=time_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
|
| 442 |
+
)
|
| 443 |
+
tran_modules.append(
|
| 444 |
+
Transformer(depth=freq_transformer_depth, rotary_embed=freq_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
|
| 445 |
+
)
|
| 446 |
+
self.layers.append(nn.ModuleList(tran_modules))
|
| 447 |
+
|
| 448 |
+
self.final_norm = RMSNorm(dim)
|
| 449 |
+
|
| 450 |
+
self.stft_kwargs = dict(
|
| 451 |
+
n_fft=stft_n_fft,
|
| 452 |
+
hop_length=stft_hop_length,
|
| 453 |
+
win_length=stft_win_length,
|
| 454 |
+
normalized=stft_normalized
|
| 455 |
+
)
|
| 456 |
+
|
| 457 |
+
self.stft_window_fn = partial(default(stft_window_fn, torch.hann_window), stft_win_length)
|
| 458 |
+
|
| 459 |
+
freqs = torch.stft(torch.randn(1, 4096), **self.stft_kwargs, window=torch.ones(stft_win_length), return_complex=True).shape[1]
|
| 460 |
+
|
| 461 |
+
assert len(freqs_per_bands) > 1
|
| 462 |
+
assert sum(
|
| 463 |
+
freqs_per_bands) == freqs, f'the number of freqs in the bands must equal {freqs} based on the STFT settings, but got {sum(freqs_per_bands)}'
|
| 464 |
+
|
| 465 |
+
freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in freqs_per_bands)
|
| 466 |
+
|
| 467 |
+
self.band_split = BandSplit(
|
| 468 |
+
dim=dim,
|
| 469 |
+
dim_inputs=freqs_per_bands_with_complex
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
self.mask_estimators = nn.ModuleList([])
|
| 473 |
+
|
| 474 |
+
for _ in range(num_stems):
|
| 475 |
+
mask_estimator = MaskEstimator(
|
| 476 |
+
dim=dim,
|
| 477 |
+
dim_inputs=freqs_per_bands_with_complex,
|
| 478 |
+
depth=mask_estimator_depth,
|
| 479 |
+
mlp_expansion_factor=mlp_expansion_factor,
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
self.mask_estimators.append(mask_estimator)
|
| 483 |
+
|
| 484 |
+
# for the multi-resolution stft loss
|
| 485 |
+
|
| 486 |
+
self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
|
| 487 |
+
self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
|
| 488 |
+
self.multi_stft_n_fft = stft_n_fft
|
| 489 |
+
self.multi_stft_window_fn = multi_stft_window_fn
|
| 490 |
+
|
| 491 |
+
self.multi_stft_kwargs = dict(
|
| 492 |
+
hop_length=multi_stft_hop_size,
|
| 493 |
+
normalized=multi_stft_normalized
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
def forward(
|
| 497 |
+
self,
|
| 498 |
+
raw_audio,
|
| 499 |
+
target=None,
|
| 500 |
+
return_loss_breakdown=False
|
| 501 |
+
):
|
| 502 |
+
"""
|
| 503 |
+
einops
|
| 504 |
+
|
| 505 |
+
b - batch
|
| 506 |
+
f - freq
|
| 507 |
+
t - time
|
| 508 |
+
s - audio channel (1 for mono, 2 for stereo)
|
| 509 |
+
n - number of 'stems'
|
| 510 |
+
c - complex (2)
|
| 511 |
+
d - feature dimension
|
| 512 |
+
"""
|
| 513 |
+
|
| 514 |
+
device = raw_audio.device
|
| 515 |
+
|
| 516 |
+
# defining whether model is loaded on MPS (MacOS GPU accelerator)
|
| 517 |
+
x_is_mps = True if device.type == "mps" else False
|
| 518 |
+
|
| 519 |
+
if raw_audio.ndim == 2:
|
| 520 |
+
raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
|
| 521 |
+
|
| 522 |
+
channels = raw_audio.shape[1]
|
| 523 |
+
assert (not self.stereo and channels == 1) or (self.stereo and channels == 2), 'stereo needs to be set to True if passing in audio signal that is stereo (channel dimension of 2). also need to be False if mono (channel dimension of 1)'
|
| 524 |
+
|
| 525 |
+
# to stft
|
| 526 |
+
|
| 527 |
+
raw_audio, batch_audio_channel_packed_shape = pack_one(raw_audio, '* t')
|
| 528 |
+
|
| 529 |
+
stft_window = self.stft_window_fn(device=device)
|
| 530 |
+
|
| 531 |
+
# RuntimeError: FFT operations are only supported on MacOS 14+
|
| 532 |
+
# Since it's tedious to define whether we're on correct MacOS version - simple try-catch is used
|
| 533 |
+
try:
|
| 534 |
+
stft_repr = torch.stft(raw_audio, **self.stft_kwargs, window=stft_window, return_complex=True)
|
| 535 |
+
except:
|
| 536 |
+
stft_repr = torch.stft(raw_audio.cpu() if x_is_mps else raw_audio, **self.stft_kwargs,
|
| 537 |
+
window=stft_window.cpu() if x_is_mps else stft_window, return_complex=True).to(
|
| 538 |
+
device)
|
| 539 |
+
stft_repr = torch.view_as_real(stft_repr)
|
| 540 |
+
|
| 541 |
+
stft_repr = unpack_one(stft_repr, batch_audio_channel_packed_shape, '* f t c')
|
| 542 |
+
|
| 543 |
+
# merge stereo / mono into the frequency, with frequency leading dimension, for band splitting
|
| 544 |
+
stft_repr = rearrange(stft_repr,'b s f t c -> b (f s) t c')
|
| 545 |
+
|
| 546 |
+
x = rearrange(stft_repr, 'b f t c -> b t (f c)')
|
| 547 |
+
|
| 548 |
+
if self.use_torch_checkpoint:
|
| 549 |
+
x = checkpoint(self.band_split, x, use_reentrant=False)
|
| 550 |
+
else:
|
| 551 |
+
x = self.band_split(x)
|
| 552 |
+
|
| 553 |
+
# value residuals
|
| 554 |
+
|
| 555 |
+
time_v_residual = None
|
| 556 |
+
freq_v_residual = None
|
| 557 |
+
|
| 558 |
+
# maybe expand residual streams
|
| 559 |
+
if self.num_residual_streams != 1:
|
| 560 |
+
x = self.expand_stream(x)
|
| 561 |
+
|
| 562 |
+
# axial / hierarchical attention
|
| 563 |
+
|
| 564 |
+
store = [None] * len(self.layers)
|
| 565 |
+
for i, transformer_block in enumerate(self.layers):
|
| 566 |
+
|
| 567 |
+
if len(transformer_block) == 3:
|
| 568 |
+
linear_transformer, time_transformer, freq_transformer = transformer_block
|
| 569 |
+
|
| 570 |
+
x, ft_ps = pack([x], 'b * d')
|
| 571 |
+
if self.use_torch_checkpoint:
|
| 572 |
+
x = checkpoint(linear_transformer, x, use_reentrant=False)
|
| 573 |
+
else:
|
| 574 |
+
x = linear_transformer(x)
|
| 575 |
+
x, = unpack(x, ft_ps, 'b * d')
|
| 576 |
+
else:
|
| 577 |
+
time_transformer, freq_transformer = transformer_block
|
| 578 |
+
|
| 579 |
+
if self.skip_connection:
|
| 580 |
+
# Sum all previous
|
| 581 |
+
for j in range(i):
|
| 582 |
+
x = x + store[j]
|
| 583 |
+
|
| 584 |
+
x = rearrange(x, 'b t f d -> b f t d')
|
| 585 |
+
x, ps = pack([x], '* t d')
|
| 586 |
+
|
| 587 |
+
if self.use_torch_checkpoint:
|
| 588 |
+
x, next_time_v_residual = checkpoint(time_transformer, x, time_v_residual, use_reentrant=False)
|
| 589 |
+
else:
|
| 590 |
+
x, next_time_v_residual = time_transformer(x, value_residual=time_v_residual)
|
| 591 |
+
time_v_residual = default(time_v_residual, next_time_v_residual)
|
| 592 |
+
|
| 593 |
+
x, = unpack(x, ps, '* t d')
|
| 594 |
+
x = rearrange(x, 'b f t d -> b t f d')
|
| 595 |
+
x, ps = pack([x], '* f d')
|
| 596 |
+
|
| 597 |
+
if self.use_torch_checkpoint:
|
| 598 |
+
x, next_freq_v_residual = checkpoint(freq_transformer, x, freq_v_residual, use_reentrant=False)
|
| 599 |
+
else:
|
| 600 |
+
x, next_freq_v_residual = freq_transformer(x, value_residual=freq_v_residual)
|
| 601 |
+
freq_v_residual = default(freq_v_residual, next_freq_v_residual)
|
| 602 |
+
|
| 603 |
+
x, = unpack(x, ps, '* f d')
|
| 604 |
+
|
| 605 |
+
if self.skip_connection:
|
| 606 |
+
store[i] = x
|
| 607 |
+
|
| 608 |
+
# maybe reduce residual streams
|
| 609 |
+
if self.num_residual_streams != 1:
|
| 610 |
+
x = self.reduce_stream(x)
|
| 611 |
+
|
| 612 |
+
x = self.final_norm(x)
|
| 613 |
+
|
| 614 |
+
num_stems = len(self.mask_estimators)
|
| 615 |
+
|
| 616 |
+
if self.use_torch_checkpoint:
|
| 617 |
+
mask = torch.stack([checkpoint(fn, x, use_reentrant=False) for fn in self.mask_estimators], dim=1)
|
| 618 |
+
else:
|
| 619 |
+
mask = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
|
| 620 |
+
mask = rearrange(mask, 'b n t (f c) -> b n f t c', c=2)
|
| 621 |
+
|
| 622 |
+
# modulate frequency representation
|
| 623 |
+
|
| 624 |
+
stft_repr = rearrange(stft_repr, 'b f t c -> b 1 f t c')
|
| 625 |
+
|
| 626 |
+
# complex number multiplication
|
| 627 |
+
|
| 628 |
+
stft_repr = torch.view_as_complex(stft_repr)
|
| 629 |
+
mask = torch.view_as_complex(mask)
|
| 630 |
+
|
| 631 |
+
stft_repr = stft_repr * mask
|
| 632 |
+
|
| 633 |
+
# istft
|
| 634 |
+
|
| 635 |
+
stft_repr = rearrange(stft_repr, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
|
| 636 |
+
|
| 637 |
+
# same as torch.stft() fix for MacOS MPS above
|
| 638 |
+
try:
|
| 639 |
+
recon_audio = torch.istft(stft_repr, **self.stft_kwargs, window=stft_window, return_complex=False, length=raw_audio.shape[-1])
|
| 640 |
+
except:
|
| 641 |
+
recon_audio = torch.istft(stft_repr.cpu() if x_is_mps else stft_repr, **self.stft_kwargs, window=stft_window.cpu() if x_is_mps else stft_window, return_complex=False, length=raw_audio.shape[-1]).to(device)
|
| 642 |
+
|
| 643 |
+
recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', s=self.audio_channels, n=num_stems)
|
| 644 |
+
|
| 645 |
+
if num_stems == 1:
|
| 646 |
+
recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
|
| 647 |
+
|
| 648 |
+
# if a target is passed in, calculate loss for learning
|
| 649 |
+
|
| 650 |
+
if not exists(target):
|
| 651 |
+
return recon_audio
|
| 652 |
+
|
| 653 |
+
if self.num_stems > 1:
|
| 654 |
+
assert target.ndim == 4 and target.shape[1] == self.num_stems
|
| 655 |
+
|
| 656 |
+
if target.ndim == 2:
|
| 657 |
+
target = rearrange(target, '... t -> ... 1 t')
|
| 658 |
+
|
| 659 |
+
target = target[..., :recon_audio.shape[-1]] # protect against lost length on istft
|
| 660 |
+
|
| 661 |
+
loss = F.l1_loss(recon_audio, target)
|
| 662 |
+
|
| 663 |
+
multi_stft_resolution_loss = 0.
|
| 664 |
+
|
| 665 |
+
for window_size in self.multi_stft_resolutions_window_sizes:
|
| 666 |
+
res_stft_kwargs = dict(
|
| 667 |
+
n_fft=max(window_size, self.multi_stft_n_fft), # not sure what n_fft is across multi resolution stft
|
| 668 |
+
win_length=window_size,
|
| 669 |
+
return_complex=True,
|
| 670 |
+
window=self.multi_stft_window_fn(window_size, device=device),
|
| 671 |
+
**self.multi_stft_kwargs,
|
| 672 |
+
)
|
| 673 |
+
|
| 674 |
+
recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 675 |
+
target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 676 |
+
|
| 677 |
+
multi_stft_resolution_loss = multi_stft_resolution_loss + F.l1_loss(recon_Y, target_Y)
|
| 678 |
+
|
| 679 |
+
weighted_multi_resolution_loss = multi_stft_resolution_loss * self.multi_stft_resolution_loss_weight
|
| 680 |
+
|
| 681 |
+
total_loss = loss + weighted_multi_resolution_loss
|
| 682 |
+
|
| 683 |
+
if not return_loss_breakdown:
|
| 684 |
+
return total_loss
|
| 685 |
+
|
| 686 |
+
return total_loss, (loss, multi_stft_resolution_loss)
|
models/bs_roformer/mel_band_roformer_experimental.py
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn, einsum, Tensor
|
| 5 |
+
from torch.nn import Module, ModuleList
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
|
| 8 |
+
from models.bs_roformer.attend import Attend
|
| 9 |
+
from torch.utils.checkpoint import checkpoint
|
| 10 |
+
|
| 11 |
+
from beartype.typing import Tuple, Optional, List, Callable
|
| 12 |
+
from beartype import beartype
|
| 13 |
+
|
| 14 |
+
from rotary_embedding_torch import RotaryEmbedding
|
| 15 |
+
|
| 16 |
+
from einops import rearrange, pack, unpack, reduce, repeat
|
| 17 |
+
from einops.layers.torch import Rearrange
|
| 18 |
+
|
| 19 |
+
from hyper_connections import get_init_and_expand_reduce_stream_functions
|
| 20 |
+
|
| 21 |
+
from librosa import filters
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# helper functions
|
| 25 |
+
|
| 26 |
+
def exists(val):
|
| 27 |
+
return val is not None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def default(v, d):
|
| 31 |
+
return v if exists(v) else d
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def pack_one(t, pattern):
|
| 35 |
+
return pack([t], pattern)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def unpack_one(t, ps, pattern):
|
| 39 |
+
return unpack(t, ps, pattern)[0]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def pad_at_dim(t, pad, dim=-1, value=0.):
|
| 43 |
+
dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
|
| 44 |
+
zeros = ((0, 0) * dims_from_right)
|
| 45 |
+
return F.pad(t, (*zeros, *pad), value=value)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def l2norm(t):
|
| 49 |
+
return F.normalize(t, dim=-1, p=2)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# norm
|
| 53 |
+
|
| 54 |
+
class RMSNorm(Module):
|
| 55 |
+
def __init__(self, dim):
|
| 56 |
+
super().__init__()
|
| 57 |
+
self.scale = dim ** 0.5
|
| 58 |
+
self.gamma = nn.Parameter(torch.ones(dim))
|
| 59 |
+
|
| 60 |
+
def forward(self, x):
|
| 61 |
+
return F.normalize(x, dim=-1) * self.scale * self.gamma
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# attention
|
| 65 |
+
|
| 66 |
+
class FeedForward(Module):
|
| 67 |
+
def __init__(
|
| 68 |
+
self,
|
| 69 |
+
dim,
|
| 70 |
+
mult=4,
|
| 71 |
+
dropout=0.
|
| 72 |
+
):
|
| 73 |
+
super().__init__()
|
| 74 |
+
dim_inner = int(dim * mult)
|
| 75 |
+
self.net = nn.Sequential(
|
| 76 |
+
RMSNorm(dim),
|
| 77 |
+
nn.Linear(dim, dim_inner),
|
| 78 |
+
nn.GELU(),
|
| 79 |
+
nn.Dropout(dropout),
|
| 80 |
+
nn.Linear(dim_inner, dim),
|
| 81 |
+
nn.Dropout(dropout)
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
def forward(self, x):
|
| 85 |
+
return self.net(x)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class Attention(Module):
|
| 89 |
+
def __init__(
|
| 90 |
+
self,
|
| 91 |
+
dim,
|
| 92 |
+
heads=8,
|
| 93 |
+
dim_head=64,
|
| 94 |
+
dropout=0.,
|
| 95 |
+
rotary_embed=None,
|
| 96 |
+
flash=True,
|
| 97 |
+
learned_value_residual_mix=False,
|
| 98 |
+
):
|
| 99 |
+
super().__init__()
|
| 100 |
+
self.heads = heads
|
| 101 |
+
self.scale = dim_head ** -0.5
|
| 102 |
+
dim_inner = heads * dim_head
|
| 103 |
+
|
| 104 |
+
self.rotary_embed = rotary_embed
|
| 105 |
+
|
| 106 |
+
self.attend = Attend(flash=flash, dropout=dropout)
|
| 107 |
+
|
| 108 |
+
self.norm = RMSNorm(dim)
|
| 109 |
+
self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
|
| 110 |
+
|
| 111 |
+
self.to_value_residual_mix = nn.Linear(dim, heads) if learned_value_residual_mix else None
|
| 112 |
+
|
| 113 |
+
self.to_gates = nn.Linear(dim, heads)
|
| 114 |
+
|
| 115 |
+
self.to_out = nn.Sequential(
|
| 116 |
+
nn.Linear(dim_inner, dim, bias=False),
|
| 117 |
+
nn.Dropout(dropout)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
def forward(self, x, value_residual=None):
|
| 121 |
+
x = self.norm(x)
|
| 122 |
+
|
| 123 |
+
q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
|
| 124 |
+
|
| 125 |
+
orig_v = v
|
| 126 |
+
|
| 127 |
+
if exists(self.to_value_residual_mix):
|
| 128 |
+
mix = self.to_value_residual_mix(x)
|
| 129 |
+
mix = rearrange(mix, 'b n h -> b h n 1').sigmoid()
|
| 130 |
+
|
| 131 |
+
assert exists(value_residual)
|
| 132 |
+
v = v.lerp(value_residual, mix)
|
| 133 |
+
|
| 134 |
+
if exists(self.rotary_embed):
|
| 135 |
+
q = self.rotary_embed.rotate_queries_or_keys(q)
|
| 136 |
+
k = self.rotary_embed.rotate_queries_or_keys(k)
|
| 137 |
+
|
| 138 |
+
out = self.attend(q, k, v)
|
| 139 |
+
|
| 140 |
+
gates = self.to_gates(x)
|
| 141 |
+
out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
|
| 142 |
+
|
| 143 |
+
out = rearrange(out, 'b h n d -> b n (h d)')
|
| 144 |
+
return self.to_out(out), orig_v
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
class LinearAttention(Module):
|
| 148 |
+
"""
|
| 149 |
+
this flavor of linear attention proposed in https://arxiv.org/abs/2106.09681 by El-Nouby et al.
|
| 150 |
+
"""
|
| 151 |
+
|
| 152 |
+
@beartype
|
| 153 |
+
def __init__(
|
| 154 |
+
self,
|
| 155 |
+
*,
|
| 156 |
+
dim,
|
| 157 |
+
dim_head=32,
|
| 158 |
+
heads=8,
|
| 159 |
+
scale=8,
|
| 160 |
+
flash=False,
|
| 161 |
+
dropout=0.
|
| 162 |
+
):
|
| 163 |
+
super().__init__()
|
| 164 |
+
dim_inner = dim_head * heads
|
| 165 |
+
self.norm = RMSNorm(dim)
|
| 166 |
+
|
| 167 |
+
self.to_qkv = nn.Sequential(
|
| 168 |
+
nn.Linear(dim, dim_inner * 3, bias=False),
|
| 169 |
+
Rearrange('b n (qkv h d) -> qkv b h d n', qkv=3, h=heads)
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
self.temperature = nn.Parameter(torch.zeros(heads, 1, 1))
|
| 173 |
+
|
| 174 |
+
self.attend = Attend(
|
| 175 |
+
scale=scale,
|
| 176 |
+
dropout=dropout,
|
| 177 |
+
flash=flash
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
self.to_out = nn.Sequential(
|
| 181 |
+
Rearrange('b h d n -> b n (h d)'),
|
| 182 |
+
nn.Linear(dim_inner, dim, bias=False)
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
def forward(
|
| 186 |
+
self,
|
| 187 |
+
x
|
| 188 |
+
):
|
| 189 |
+
x = self.norm(x)
|
| 190 |
+
|
| 191 |
+
q, k, v = self.to_qkv(x)
|
| 192 |
+
|
| 193 |
+
q, k = map(l2norm, (q, k))
|
| 194 |
+
q = q * self.temperature.exp()
|
| 195 |
+
|
| 196 |
+
out = self.attend(q, k, v)
|
| 197 |
+
|
| 198 |
+
return self.to_out(out)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
class Transformer(Module):
|
| 202 |
+
def __init__(
|
| 203 |
+
self,
|
| 204 |
+
*,
|
| 205 |
+
dim,
|
| 206 |
+
depth,
|
| 207 |
+
dim_head=64,
|
| 208 |
+
heads=8,
|
| 209 |
+
attn_dropout=0.,
|
| 210 |
+
ff_dropout=0.,
|
| 211 |
+
ff_mult=4,
|
| 212 |
+
norm_output=True,
|
| 213 |
+
rotary_embed=None,
|
| 214 |
+
flash_attn=True,
|
| 215 |
+
linear_attn=False,
|
| 216 |
+
add_value_residual=False,
|
| 217 |
+
num_residual_streams=1,
|
| 218 |
+
):
|
| 219 |
+
super().__init__()
|
| 220 |
+
self.layers = ModuleList([])
|
| 221 |
+
|
| 222 |
+
init_hyper_conn, *_ = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
|
| 223 |
+
|
| 224 |
+
for _ in range(depth):
|
| 225 |
+
if linear_attn:
|
| 226 |
+
attn = LinearAttention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, flash=flash_attn)
|
| 227 |
+
else:
|
| 228 |
+
if num_residual_streams != 1:
|
| 229 |
+
attn = init_hyper_conn(dim=dim, branch=Attention(dim=dim, dim_head=dim_head, heads=heads,
|
| 230 |
+
dropout=attn_dropout,
|
| 231 |
+
rotary_embed=rotary_embed, flash=flash_attn,
|
| 232 |
+
learned_value_residual_mix=add_value_residual))
|
| 233 |
+
else:
|
| 234 |
+
attn = Attention(
|
| 235 |
+
dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
|
| 236 |
+
rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
if num_residual_streams != 1:
|
| 240 |
+
ff = init_hyper_conn(dim=dim, branch=FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout))
|
| 241 |
+
else:
|
| 242 |
+
ff = FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
|
| 243 |
+
self.layers.append(ModuleList([
|
| 244 |
+
attn,
|
| 245 |
+
ff,
|
| 246 |
+
]))
|
| 247 |
+
|
| 248 |
+
self.norm = RMSNorm(dim) if norm_output else nn.Identity()
|
| 249 |
+
|
| 250 |
+
def forward(self, x, value_residual=None):
|
| 251 |
+
|
| 252 |
+
first_values = None
|
| 253 |
+
if value_residual is not None:
|
| 254 |
+
for attn, ff in self.layers:
|
| 255 |
+
x, next_values = attn(x, value_residual=value_residual)
|
| 256 |
+
first_values = default(first_values, next_values)
|
| 257 |
+
x = ff(x)
|
| 258 |
+
else:
|
| 259 |
+
# Compatibility with old weights
|
| 260 |
+
for attn, ff in self.layers:
|
| 261 |
+
attn_out, next_values = attn(x, value_residual=None)
|
| 262 |
+
first_values = default(first_values, next_values)
|
| 263 |
+
x = attn_out + x
|
| 264 |
+
x = ff(x) + x
|
| 265 |
+
|
| 266 |
+
return self.norm(x), first_values
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
# bandsplit module
|
| 270 |
+
|
| 271 |
+
class BandSplit(Module):
|
| 272 |
+
@beartype
|
| 273 |
+
def __init__(
|
| 274 |
+
self,
|
| 275 |
+
dim,
|
| 276 |
+
dim_inputs: Tuple[int, ...]
|
| 277 |
+
):
|
| 278 |
+
super().__init__()
|
| 279 |
+
self.dim_inputs = dim_inputs
|
| 280 |
+
self.to_features = ModuleList([])
|
| 281 |
+
|
| 282 |
+
for dim_in in dim_inputs:
|
| 283 |
+
net = nn.Sequential(
|
| 284 |
+
RMSNorm(dim_in),
|
| 285 |
+
nn.Linear(dim_in, dim)
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
self.to_features.append(net)
|
| 289 |
+
|
| 290 |
+
def forward(self, x):
|
| 291 |
+
x = x.split(self.dim_inputs, dim=-1)
|
| 292 |
+
|
| 293 |
+
outs = []
|
| 294 |
+
for split_input, to_feature in zip(x, self.to_features):
|
| 295 |
+
split_output = to_feature(split_input)
|
| 296 |
+
outs.append(split_output)
|
| 297 |
+
|
| 298 |
+
return torch.stack(outs, dim=-2)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def MLP(
|
| 302 |
+
dim_in,
|
| 303 |
+
dim_out,
|
| 304 |
+
dim_hidden=None,
|
| 305 |
+
depth=1,
|
| 306 |
+
activation=nn.Tanh
|
| 307 |
+
):
|
| 308 |
+
dim_hidden = default(dim_hidden, dim_in)
|
| 309 |
+
|
| 310 |
+
net = []
|
| 311 |
+
dims = (dim_in, *((dim_hidden,) * depth), dim_out)
|
| 312 |
+
|
| 313 |
+
for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
|
| 314 |
+
is_last = ind == (len(dims) - 2)
|
| 315 |
+
|
| 316 |
+
net.append(nn.Linear(layer_dim_in, layer_dim_out))
|
| 317 |
+
|
| 318 |
+
if is_last:
|
| 319 |
+
continue
|
| 320 |
+
|
| 321 |
+
net.append(activation())
|
| 322 |
+
|
| 323 |
+
return nn.Sequential(*net)
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
class MaskEstimator(Module):
|
| 327 |
+
@beartype
|
| 328 |
+
def __init__(
|
| 329 |
+
self,
|
| 330 |
+
dim,
|
| 331 |
+
dim_inputs: Tuple[int, ...],
|
| 332 |
+
depth,
|
| 333 |
+
mlp_expansion_factor=4
|
| 334 |
+
):
|
| 335 |
+
super().__init__()
|
| 336 |
+
self.dim_inputs = dim_inputs
|
| 337 |
+
self.to_freqs = ModuleList([])
|
| 338 |
+
dim_hidden = dim * mlp_expansion_factor
|
| 339 |
+
|
| 340 |
+
for dim_in in dim_inputs:
|
| 341 |
+
net = []
|
| 342 |
+
|
| 343 |
+
mlp = nn.Sequential(
|
| 344 |
+
MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
|
| 345 |
+
nn.GLU(dim=-1)
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
self.to_freqs.append(mlp)
|
| 349 |
+
|
| 350 |
+
def forward(self, x):
|
| 351 |
+
x = x.unbind(dim=-2)
|
| 352 |
+
|
| 353 |
+
outs = []
|
| 354 |
+
|
| 355 |
+
for band_features, mlp in zip(x, self.to_freqs):
|
| 356 |
+
freq_out = mlp(band_features)
|
| 357 |
+
outs.append(freq_out)
|
| 358 |
+
|
| 359 |
+
return torch.cat(outs, dim=-1)
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
# main class
|
| 363 |
+
|
| 364 |
+
class MelBandRoformer(Module):
|
| 365 |
+
|
| 366 |
+
@beartype
|
| 367 |
+
def __init__(
|
| 368 |
+
self,
|
| 369 |
+
dim,
|
| 370 |
+
*,
|
| 371 |
+
depth,
|
| 372 |
+
stereo=False,
|
| 373 |
+
num_stems=1,
|
| 374 |
+
time_transformer_depth=2,
|
| 375 |
+
freq_transformer_depth=2,
|
| 376 |
+
linear_transformer_depth=0,
|
| 377 |
+
num_bands=60,
|
| 378 |
+
dim_head=64,
|
| 379 |
+
heads=8,
|
| 380 |
+
attn_dropout=0.1,
|
| 381 |
+
ff_dropout=0.1,
|
| 382 |
+
flash_attn=True,
|
| 383 |
+
dim_freqs_in=1025,
|
| 384 |
+
sample_rate=44100, # needed for mel filter bank from librosa
|
| 385 |
+
stft_n_fft=2048,
|
| 386 |
+
stft_hop_length=512,
|
| 387 |
+
# 10ms at 44100Hz, from sections 4.1, 4.4 in the paper - @faroit recommends // 2 or // 4 for better reconstruction
|
| 388 |
+
stft_win_length=2048,
|
| 389 |
+
stft_normalized=False,
|
| 390 |
+
stft_window_fn: Optional[Callable] = None,
|
| 391 |
+
mask_estimator_depth=1,
|
| 392 |
+
multi_stft_resolution_loss_weight=1.,
|
| 393 |
+
multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
|
| 394 |
+
multi_stft_hop_size=147,
|
| 395 |
+
multi_stft_normalized=False,
|
| 396 |
+
multi_stft_window_fn: Callable = torch.hann_window,
|
| 397 |
+
match_input_audio_length=False, # if True, pad output tensor to match length of input tensor
|
| 398 |
+
mlp_expansion_factor=4,
|
| 399 |
+
use_torch_checkpoint=False,
|
| 400 |
+
skip_connection=False,
|
| 401 |
+
use_value_residual_learning=False,
|
| 402 |
+
num_residual_streams=1, # set to 1. to disable hyper connections (Default in original is 4)
|
| 403 |
+
):
|
| 404 |
+
super().__init__()
|
| 405 |
+
|
| 406 |
+
self.stereo = stereo
|
| 407 |
+
self.audio_channels = 2 if stereo else 1
|
| 408 |
+
self.num_stems = num_stems
|
| 409 |
+
self.use_torch_checkpoint = use_torch_checkpoint
|
| 410 |
+
self.skip_connection = skip_connection
|
| 411 |
+
self.num_residual_streams = num_residual_streams
|
| 412 |
+
|
| 413 |
+
_, self.expand_stream, self.reduce_stream = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
|
| 414 |
+
|
| 415 |
+
self.layers = ModuleList([])
|
| 416 |
+
|
| 417 |
+
transformer_kwargs = dict(
|
| 418 |
+
dim=dim,
|
| 419 |
+
heads=heads,
|
| 420 |
+
dim_head=dim_head,
|
| 421 |
+
attn_dropout=attn_dropout,
|
| 422 |
+
ff_dropout=ff_dropout,
|
| 423 |
+
flash_attn=flash_attn,
|
| 424 |
+
num_residual_streams=num_residual_streams,
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
time_rotary_embed = RotaryEmbedding(dim=dim_head)
|
| 428 |
+
freq_rotary_embed = RotaryEmbedding(dim=dim_head)
|
| 429 |
+
|
| 430 |
+
for layer_index in range(depth):
|
| 431 |
+
if use_value_residual_learning:
|
| 432 |
+
is_first = layer_index == 0
|
| 433 |
+
else:
|
| 434 |
+
is_first = True
|
| 435 |
+
|
| 436 |
+
tran_modules = []
|
| 437 |
+
if linear_transformer_depth > 0:
|
| 438 |
+
tran_modules.append(Transformer(depth=linear_transformer_depth, linear_attn=True, **transformer_kwargs))
|
| 439 |
+
tran_modules.append(
|
| 440 |
+
Transformer(depth=time_transformer_depth, rotary_embed=time_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
|
| 441 |
+
)
|
| 442 |
+
tran_modules.append(
|
| 443 |
+
Transformer(depth=freq_transformer_depth, rotary_embed=freq_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
|
| 444 |
+
)
|
| 445 |
+
self.layers.append(nn.ModuleList(tran_modules))
|
| 446 |
+
|
| 447 |
+
self.stft_window_fn = partial(default(stft_window_fn, torch.hann_window), stft_win_length)
|
| 448 |
+
|
| 449 |
+
self.stft_kwargs = dict(
|
| 450 |
+
n_fft=stft_n_fft,
|
| 451 |
+
hop_length=stft_hop_length,
|
| 452 |
+
win_length=stft_win_length,
|
| 453 |
+
normalized=stft_normalized
|
| 454 |
+
)
|
| 455 |
+
|
| 456 |
+
freqs = torch.stft(torch.randn(1, 4096), **self.stft_kwargs, window=torch.ones(stft_n_fft), return_complex=True).shape[1]
|
| 457 |
+
|
| 458 |
+
# create mel filter bank
|
| 459 |
+
# with librosa.filters.mel as in section 2 of paper
|
| 460 |
+
|
| 461 |
+
mel_filter_bank_numpy = filters.mel(sr=sample_rate, n_fft=stft_n_fft, n_mels=num_bands)
|
| 462 |
+
|
| 463 |
+
mel_filter_bank = torch.from_numpy(mel_filter_bank_numpy)
|
| 464 |
+
|
| 465 |
+
# for some reason, it doesn't include the first freq? just force a value for now
|
| 466 |
+
|
| 467 |
+
mel_filter_bank[0][0] = 1.
|
| 468 |
+
|
| 469 |
+
# In some systems/envs we get 0.0 instead of ~1.9e-18 in the last position,
|
| 470 |
+
# so let's force a positive value
|
| 471 |
+
|
| 472 |
+
mel_filter_bank[-1, -1] = 1.
|
| 473 |
+
|
| 474 |
+
# binary as in paper (then estimated masks are averaged for overlapping regions)
|
| 475 |
+
|
| 476 |
+
freqs_per_band = mel_filter_bank > 0
|
| 477 |
+
assert freqs_per_band.any(dim=0).all(), 'all frequencies need to be covered by all bands for now'
|
| 478 |
+
|
| 479 |
+
repeated_freq_indices = repeat(torch.arange(freqs), 'f -> b f', b=num_bands)
|
| 480 |
+
freq_indices = repeated_freq_indices[freqs_per_band]
|
| 481 |
+
|
| 482 |
+
if stereo:
|
| 483 |
+
freq_indices = repeat(freq_indices, 'f -> f s', s=2)
|
| 484 |
+
freq_indices = freq_indices * 2 + torch.arange(2)
|
| 485 |
+
freq_indices = rearrange(freq_indices, 'f s -> (f s)')
|
| 486 |
+
|
| 487 |
+
self.register_buffer('freq_indices', freq_indices, persistent=False)
|
| 488 |
+
self.register_buffer('freqs_per_band', freqs_per_band, persistent=False)
|
| 489 |
+
|
| 490 |
+
num_freqs_per_band = reduce(freqs_per_band, 'b f -> b', 'sum')
|
| 491 |
+
num_bands_per_freq = reduce(freqs_per_band, 'b f -> f', 'sum')
|
| 492 |
+
|
| 493 |
+
self.register_buffer('num_freqs_per_band', num_freqs_per_band, persistent=False)
|
| 494 |
+
self.register_buffer('num_bands_per_freq', num_bands_per_freq, persistent=False)
|
| 495 |
+
|
| 496 |
+
# band split and mask estimator
|
| 497 |
+
|
| 498 |
+
freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in num_freqs_per_band.tolist())
|
| 499 |
+
|
| 500 |
+
self.band_split = BandSplit(
|
| 501 |
+
dim=dim,
|
| 502 |
+
dim_inputs=freqs_per_bands_with_complex
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
self.mask_estimators = nn.ModuleList([])
|
| 506 |
+
|
| 507 |
+
for _ in range(num_stems):
|
| 508 |
+
mask_estimator = MaskEstimator(
|
| 509 |
+
dim=dim,
|
| 510 |
+
dim_inputs=freqs_per_bands_with_complex,
|
| 511 |
+
depth=mask_estimator_depth,
|
| 512 |
+
mlp_expansion_factor=mlp_expansion_factor,
|
| 513 |
+
)
|
| 514 |
+
|
| 515 |
+
self.mask_estimators.append(mask_estimator)
|
| 516 |
+
|
| 517 |
+
# for the multi-resolution stft loss
|
| 518 |
+
|
| 519 |
+
self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
|
| 520 |
+
self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
|
| 521 |
+
self.multi_stft_n_fft = stft_n_fft
|
| 522 |
+
self.multi_stft_window_fn = multi_stft_window_fn
|
| 523 |
+
|
| 524 |
+
self.multi_stft_kwargs = dict(
|
| 525 |
+
hop_length=multi_stft_hop_size,
|
| 526 |
+
normalized=multi_stft_normalized
|
| 527 |
+
)
|
| 528 |
+
|
| 529 |
+
self.match_input_audio_length = match_input_audio_length
|
| 530 |
+
|
| 531 |
+
def forward(
|
| 532 |
+
self,
|
| 533 |
+
raw_audio,
|
| 534 |
+
target=None,
|
| 535 |
+
return_loss_breakdown=False
|
| 536 |
+
):
|
| 537 |
+
"""
|
| 538 |
+
einops
|
| 539 |
+
|
| 540 |
+
b - batch
|
| 541 |
+
f - freq
|
| 542 |
+
t - time
|
| 543 |
+
s - audio channel (1 for mono, 2 for stereo)
|
| 544 |
+
n - number of 'stems'
|
| 545 |
+
c - complex (2)
|
| 546 |
+
d - feature dimension
|
| 547 |
+
"""
|
| 548 |
+
|
| 549 |
+
device = raw_audio.device
|
| 550 |
+
|
| 551 |
+
if raw_audio.ndim == 2:
|
| 552 |
+
raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
|
| 553 |
+
|
| 554 |
+
batch, channels, raw_audio_length = raw_audio.shape
|
| 555 |
+
|
| 556 |
+
istft_length = raw_audio_length if self.match_input_audio_length else None
|
| 557 |
+
|
| 558 |
+
assert (not self.stereo and channels == 1) or (
|
| 559 |
+
self.stereo and channels == 2), 'stereo needs to be set to True if passing in audio signal that is stereo (channel dimension of 2). also need to be False if mono (channel dimension of 1)'
|
| 560 |
+
|
| 561 |
+
# to stft
|
| 562 |
+
|
| 563 |
+
raw_audio, batch_audio_channel_packed_shape = pack_one(raw_audio, '* t')
|
| 564 |
+
|
| 565 |
+
stft_window = self.stft_window_fn(device=device)
|
| 566 |
+
|
| 567 |
+
stft_repr = torch.stft(raw_audio, **self.stft_kwargs, window=stft_window, return_complex=True)
|
| 568 |
+
stft_repr = torch.view_as_real(stft_repr)
|
| 569 |
+
|
| 570 |
+
stft_repr = unpack_one(stft_repr, batch_audio_channel_packed_shape, '* f t c')
|
| 571 |
+
|
| 572 |
+
# merge stereo / mono into the frequency, with frequency leading dimension, for band splitting
|
| 573 |
+
stft_repr = rearrange(stft_repr,'b s f t c -> b (f s) t c')
|
| 574 |
+
|
| 575 |
+
# index out all frequencies for all frequency ranges across bands ascending in one go
|
| 576 |
+
|
| 577 |
+
batch_arange = torch.arange(batch, device=device)[..., None]
|
| 578 |
+
|
| 579 |
+
# account for stereo
|
| 580 |
+
|
| 581 |
+
x = stft_repr[batch_arange, self.freq_indices]
|
| 582 |
+
|
| 583 |
+
# fold the complex (real and imag) into the frequencies dimension
|
| 584 |
+
|
| 585 |
+
x = rearrange(x, 'b f t c -> b t (f c)')
|
| 586 |
+
|
| 587 |
+
if self.use_torch_checkpoint:
|
| 588 |
+
x = checkpoint(self.band_split, x, use_reentrant=False)
|
| 589 |
+
else:
|
| 590 |
+
x = self.band_split(x)
|
| 591 |
+
|
| 592 |
+
# value residuals
|
| 593 |
+
time_v_residual = None
|
| 594 |
+
freq_v_residual = None
|
| 595 |
+
|
| 596 |
+
# maybe expand residual streams
|
| 597 |
+
if self.num_residual_streams != 1:
|
| 598 |
+
x = self.expand_stream(x)
|
| 599 |
+
|
| 600 |
+
# axial / hierarchical attention
|
| 601 |
+
|
| 602 |
+
store = [None] * len(self.layers)
|
| 603 |
+
for i, transformer_block in enumerate(self.layers):
|
| 604 |
+
|
| 605 |
+
if len(transformer_block) == 3:
|
| 606 |
+
linear_transformer, time_transformer, freq_transformer = transformer_block
|
| 607 |
+
|
| 608 |
+
x, ft_ps = pack([x], 'b * d')
|
| 609 |
+
if self.use_torch_checkpoint:
|
| 610 |
+
x = checkpoint(linear_transformer, x, use_reentrant=False)
|
| 611 |
+
else:
|
| 612 |
+
x = linear_transformer(x)
|
| 613 |
+
x, = unpack(x, ft_ps, 'b * d')
|
| 614 |
+
else:
|
| 615 |
+
time_transformer, freq_transformer = transformer_block
|
| 616 |
+
|
| 617 |
+
if self.skip_connection:
|
| 618 |
+
# Sum all previous
|
| 619 |
+
for j in range(i):
|
| 620 |
+
x = x + store[j]
|
| 621 |
+
|
| 622 |
+
x = rearrange(x, 'b t f d -> b f t d')
|
| 623 |
+
x, ps = pack([x], '* t d')
|
| 624 |
+
|
| 625 |
+
if self.use_torch_checkpoint:
|
| 626 |
+
x, next_time_v_residual = checkpoint(time_transformer, x, time_v_residual, use_reentrant=False)
|
| 627 |
+
else:
|
| 628 |
+
x, next_time_v_residual = time_transformer(x, time_v_residual)
|
| 629 |
+
time_v_residual = default(time_v_residual, next_time_v_residual)
|
| 630 |
+
|
| 631 |
+
x, = unpack(x, ps, '* t d')
|
| 632 |
+
x = rearrange(x, 'b f t d -> b t f d')
|
| 633 |
+
x, ps = pack([x], '* f d')
|
| 634 |
+
|
| 635 |
+
if self.use_torch_checkpoint:
|
| 636 |
+
x, next_freq_v_residual = checkpoint(freq_transformer, x, freq_v_residual, use_reentrant=False)
|
| 637 |
+
else:
|
| 638 |
+
x, next_freq_v_residual = freq_transformer(x, value_residual=freq_v_residual)
|
| 639 |
+
freq_v_residual = default(freq_v_residual, next_freq_v_residual)
|
| 640 |
+
|
| 641 |
+
x, = unpack(x, ps, '* f d')
|
| 642 |
+
|
| 643 |
+
if self.skip_connection:
|
| 644 |
+
store[i] = x
|
| 645 |
+
|
| 646 |
+
# maybe reduce residual streams
|
| 647 |
+
if self.num_residual_streams != 1:
|
| 648 |
+
x = self.reduce_stream(x)
|
| 649 |
+
|
| 650 |
+
num_stems = len(self.mask_estimators)
|
| 651 |
+
if self.use_torch_checkpoint:
|
| 652 |
+
masks = torch.stack([checkpoint(fn, x, use_reentrant=False) for fn in self.mask_estimators], dim=1)
|
| 653 |
+
else:
|
| 654 |
+
masks = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
|
| 655 |
+
masks = rearrange(masks, 'b n t (f c) -> b n f t c', c=2)
|
| 656 |
+
|
| 657 |
+
# modulate frequency representation
|
| 658 |
+
|
| 659 |
+
stft_repr = rearrange(stft_repr, 'b f t c -> b 1 f t c')
|
| 660 |
+
|
| 661 |
+
# complex number multiplication
|
| 662 |
+
|
| 663 |
+
stft_repr = torch.view_as_complex(stft_repr)
|
| 664 |
+
masks = torch.view_as_complex(masks)
|
| 665 |
+
|
| 666 |
+
masks = masks.type(stft_repr.dtype)
|
| 667 |
+
|
| 668 |
+
# need to average the estimated mask for the overlapped frequencies
|
| 669 |
+
|
| 670 |
+
scatter_indices = repeat(self.freq_indices, 'f -> b n f t', b=batch, n=num_stems, t=stft_repr.shape[-1])
|
| 671 |
+
|
| 672 |
+
stft_repr_expanded_stems = repeat(stft_repr, 'b 1 ... -> b n ...', n=num_stems)
|
| 673 |
+
masks_summed = torch.zeros_like(stft_repr_expanded_stems).scatter_add_(2, scatter_indices, masks)
|
| 674 |
+
|
| 675 |
+
denom = repeat(self.num_bands_per_freq, 'f -> (f r) 1', r=channels)
|
| 676 |
+
|
| 677 |
+
masks_averaged = masks_summed / denom.clamp(min=1e-8)
|
| 678 |
+
|
| 679 |
+
# modulate stft repr with estimated mask
|
| 680 |
+
|
| 681 |
+
stft_repr = stft_repr * masks_averaged
|
| 682 |
+
|
| 683 |
+
# istft
|
| 684 |
+
|
| 685 |
+
stft_repr = rearrange(stft_repr, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
|
| 686 |
+
|
| 687 |
+
recon_audio = torch.istft(stft_repr, **self.stft_kwargs, window=stft_window, return_complex=False,
|
| 688 |
+
length=istft_length)
|
| 689 |
+
|
| 690 |
+
recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', b=batch, s=self.audio_channels, n=num_stems)
|
| 691 |
+
|
| 692 |
+
if num_stems == 1:
|
| 693 |
+
recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
|
| 694 |
+
|
| 695 |
+
# if a target is passed in, calculate loss for learning
|
| 696 |
+
|
| 697 |
+
if not exists(target):
|
| 698 |
+
return recon_audio
|
| 699 |
+
|
| 700 |
+
if self.num_stems > 1:
|
| 701 |
+
assert target.ndim == 4 and target.shape[1] == self.num_stems
|
| 702 |
+
|
| 703 |
+
if target.ndim == 2:
|
| 704 |
+
target = rearrange(target, '... t -> ... 1 t')
|
| 705 |
+
|
| 706 |
+
target = target[..., :recon_audio.shape[-1]] # protect against lost length on istft
|
| 707 |
+
|
| 708 |
+
loss = F.l1_loss(recon_audio, target)
|
| 709 |
+
|
| 710 |
+
multi_stft_resolution_loss = 0.
|
| 711 |
+
|
| 712 |
+
for window_size in self.multi_stft_resolutions_window_sizes:
|
| 713 |
+
res_stft_kwargs = dict(
|
| 714 |
+
n_fft=max(window_size, self.multi_stft_n_fft), # not sure what n_fft is across multi resolution stft
|
| 715 |
+
win_length=window_size,
|
| 716 |
+
return_complex=True,
|
| 717 |
+
window=self.multi_stft_window_fn(window_size, device=device),
|
| 718 |
+
**self.multi_stft_kwargs,
|
| 719 |
+
)
|
| 720 |
+
|
| 721 |
+
recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 722 |
+
target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 723 |
+
|
| 724 |
+
multi_stft_resolution_loss = multi_stft_resolution_loss + F.l1_loss(recon_Y, target_Y)
|
| 725 |
+
|
| 726 |
+
weighted_multi_resolution_loss = multi_stft_resolution_loss * self.multi_stft_resolution_loss_weight
|
| 727 |
+
|
| 728 |
+
total_loss = loss + weighted_multi_resolution_loss
|
| 729 |
+
|
| 730 |
+
if not return_loss_breakdown:
|
| 731 |
+
return total_loss
|
| 732 |
+
|
| 733 |
+
return total_loss, (loss, multi_stft_resolution_loss)
|
models/conformer_model.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from typing import Optional
|
| 5 |
+
from conformer import Conformer
|
| 6 |
+
|
| 7 |
+
class NeuralModel(nn.Module):
|
| 8 |
+
"""
|
| 9 |
+
Принимает |X| STFT: (B, C, F, T_spec) и предсказывает комплексные маски
|
| 10 |
+
в свернутом виде: (B, 2 * (sources*channels), F, T_spec)
|
| 11 |
+
где 2 — это [real, imag].
|
| 12 |
+
"""
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
in_channels: int = 2,
|
| 16 |
+
sources: int = 2,
|
| 17 |
+
freq_bins: int = 2049,
|
| 18 |
+
embed_dim: int = 512,
|
| 19 |
+
depth: int = 8,
|
| 20 |
+
dim_head: int = 64,
|
| 21 |
+
heads: int = 8,
|
| 22 |
+
ff_mult: int = 4,
|
| 23 |
+
conv_expansion_factor: int = 2,
|
| 24 |
+
conv_kernel_size: int = 31,
|
| 25 |
+
attn_dropout: float = 0.1,
|
| 26 |
+
ff_dropout: float = 0.1,
|
| 27 |
+
conv_dropout: float = 0.1,
|
| 28 |
+
):
|
| 29 |
+
super().__init__()
|
| 30 |
+
self.freq_bins = freq_bins
|
| 31 |
+
self.in_channels = in_channels
|
| 32 |
+
self.sources = sources
|
| 33 |
+
self.out_masks = sources * in_channels
|
| 34 |
+
self.embed_dim = embed_dim
|
| 35 |
+
|
| 36 |
+
self.input_proj_stft = nn.Linear(freq_bins * in_channels, embed_dim)
|
| 37 |
+
self.model = Conformer(
|
| 38 |
+
dim=embed_dim,
|
| 39 |
+
depth=depth,
|
| 40 |
+
dim_head=dim_head,
|
| 41 |
+
heads=heads,
|
| 42 |
+
ff_mult=ff_mult,
|
| 43 |
+
conv_expansion_factor=conv_expansion_factor,
|
| 44 |
+
conv_kernel_size=conv_kernel_size,
|
| 45 |
+
attn_dropout=attn_dropout,
|
| 46 |
+
ff_dropout=ff_dropout,
|
| 47 |
+
conv_dropout=conv_dropout,
|
| 48 |
+
)
|
| 49 |
+
# 2 = [real, imag]
|
| 50 |
+
self.output_proj = nn.Linear(embed_dim, freq_bins * self.out_masks * 2)
|
| 51 |
+
|
| 52 |
+
def forward(self, x_stft_mag: torch.Tensor) -> torch.Tensor:
|
| 53 |
+
"""
|
| 54 |
+
x_stft_mag: (B, C, F, T_spec)
|
| 55 |
+
returns: (B, 2 * (sources*channels), F, T_spec) — real/imag масок
|
| 56 |
+
"""
|
| 57 |
+
assert x_stft_mag.dim() == 4, f"Expected (B,C,F,T), got {tuple(x_stft_mag.shape)}"
|
| 58 |
+
B, C, F, T_spec = x_stft_mag.shape
|
| 59 |
+
# (B, T_spec, C*F)
|
| 60 |
+
x_stft_mag = x_stft_mag.permute(0, 3, 1, 2).contiguous().view(B, T_spec, C * F)
|
| 61 |
+
|
| 62 |
+
x = self.input_proj_stft(x_stft_mag) # (B, T_spec, E)
|
| 63 |
+
x = self.model(x) # (B, T_spec, E)
|
| 64 |
+
x = torch.tanh(x) # стабилизируем
|
| 65 |
+
x = self.output_proj(x) # (B, T_spec, F * out_masks * 2)
|
| 66 |
+
|
| 67 |
+
# back to (B, 2*out_masks, F, T_spec)
|
| 68 |
+
x = x.reshape(B, T_spec, self.out_masks * 2, F).permute(0, 2, 3, 1).contiguous()
|
| 69 |
+
return x
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class ConformerMSS(nn.Module):
|
| 73 |
+
"""
|
| 74 |
+
Совместимо с твоим train:
|
| 75 |
+
forward(x: (B, C, T)) -> y_hat: (B, S, C, T)
|
| 76 |
+
где S = число источников (sources).
|
| 77 |
+
Внутри: STFT -> NeuralModel -> комплексные маски -> iSTFT.
|
| 78 |
+
"""
|
| 79 |
+
def __init__(
|
| 80 |
+
self,
|
| 81 |
+
core: NeuralModel,
|
| 82 |
+
n_fft: int = 4096,
|
| 83 |
+
hop_length: int = 1024,
|
| 84 |
+
win_length: Optional[int] = None,
|
| 85 |
+
center: bool = True,
|
| 86 |
+
):
|
| 87 |
+
super().__init__()
|
| 88 |
+
self.core = core
|
| 89 |
+
self.n_fft = n_fft
|
| 90 |
+
self.hop_length = hop_length
|
| 91 |
+
self.win_length = win_length if win_length is not None else n_fft
|
| 92 |
+
self.center = center
|
| 93 |
+
|
| 94 |
+
window = torch.hann_window(self.win_length)
|
| 95 |
+
# окно — буфер, чтобы таскалось на .to(device)
|
| 96 |
+
self.register_buffer("window", window, persistent=False)
|
| 97 |
+
|
| 98 |
+
# sanity-check: freq_bins у core должен совпадать с n_fft//2 + 1
|
| 99 |
+
expected_bins = n_fft // 2 + 1
|
| 100 |
+
assert core.freq_bins == expected_bins, (
|
| 101 |
+
f"NeuralModel.freq_bins={core.freq_bins} != n_fft//2+1={expected_bins}. "
|
| 102 |
+
f"Поставь freq_bins={expected_bins} при создании core."
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
def _stft(self, x: torch.Tensor) -> torch.Tensor:
|
| 106 |
+
"""
|
| 107 |
+
x: (B, C, T) -> spec: complex (B, C, F, TT)
|
| 108 |
+
"""
|
| 109 |
+
assert x.dim() == 3, f"Expected (B,C,T), got {tuple(x.shape)}"
|
| 110 |
+
B, C, T = x.shape
|
| 111 |
+
x_bc_t = x.reshape(B * C, T)
|
| 112 |
+
spec = torch.stft(
|
| 113 |
+
x_bc_t,
|
| 114 |
+
n_fft=self.n_fft,
|
| 115 |
+
hop_length=self.hop_length,
|
| 116 |
+
win_length=self.win_length,
|
| 117 |
+
window=self.window.to(x.device),
|
| 118 |
+
center=self.center,
|
| 119 |
+
return_complex=True,
|
| 120 |
+
) # (B*C, F, TT)
|
| 121 |
+
F, TT = spec.shape[-2], spec.shape[-1]
|
| 122 |
+
spec = spec.reshape(B, C, F, TT)
|
| 123 |
+
return spec
|
| 124 |
+
|
| 125 |
+
def _istft(self, spec: torch.Tensor, length: int) -> torch.Tensor:
|
| 126 |
+
"""
|
| 127 |
+
spec: complex (B, C, F, TT) -> audio: (B, C, T)
|
| 128 |
+
"""
|
| 129 |
+
B, C, F, TT = spec.shape
|
| 130 |
+
spec_bc = spec.reshape(B * C, F, TT)
|
| 131 |
+
y_bc_t = torch.istft(
|
| 132 |
+
spec_bc,
|
| 133 |
+
n_fft=self.n_fft,
|
| 134 |
+
hop_length=self.hop_length,
|
| 135 |
+
win_length=self.win_length,
|
| 136 |
+
window=self.window.to(spec.device),
|
| 137 |
+
center=self.center,
|
| 138 |
+
length=length,
|
| 139 |
+
)
|
| 140 |
+
return y_bc_t.reshape(B, C, -1)
|
| 141 |
+
|
| 142 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 143 |
+
"""
|
| 144 |
+
x: (B, C, T) (микс в волне)
|
| 145 |
+
returns y_hat: (B, S, C, T) — предсказанные источники в волне
|
| 146 |
+
"""
|
| 147 |
+
B, C, T = x.shape
|
| 148 |
+
# 1) STFT
|
| 149 |
+
mix_spec = self._stft(x) # (B, C, F, TT)
|
| 150 |
+
mix_mag = mix_spec.abs() # (B, C, F, TT)
|
| 151 |
+
|
| 152 |
+
# 2) Прогон через core -> real/imag масок
|
| 153 |
+
mask_ri = self.core(mix_mag) # (B, 2*(S*C), F, TT2)
|
| 154 |
+
_, two_sc, F, TT2 = mask_ri.shape
|
| 155 |
+
|
| 156 |
+
S = self.core.sources
|
| 157 |
+
assert two_sc == 2 * (S * C), (
|
| 158 |
+
f"core вернул {two_sc} каналов масок, ожидалось {2*(S*C)} "
|
| 159 |
+
f"(2*[real/imag]*[sources*channels]). Проверь in_channels/sources."
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# 3) Синхронизация по времени (если вдруг TT != TT2)
|
| 163 |
+
TT = mix_spec.shape[-1]
|
| 164 |
+
TT_min = min(TT, TT2)
|
| 165 |
+
if TT != TT_min:
|
| 166 |
+
mix_spec = mix_spec[..., :TT_min]
|
| 167 |
+
if TT2 != TT_min:
|
| 168 |
+
mask_ri = mask_ri[..., :TT_min]
|
| 169 |
+
TT = TT_min
|
| 170 |
+
# теперь у обоих время = TT
|
| 171 |
+
|
| 172 |
+
# 4) Преобразуем к (B, 2, S, C, F, TT)
|
| 173 |
+
mask_ri = mask_ri.view(B, 2, S, C, F, TT).contiguous()
|
| 174 |
+
mask_real = mask_ri[:, 0] # (B, S, C, F, TT)
|
| 175 |
+
mask_imag = mask_ri[:, 1] # (B, S, C, F, TT)
|
| 176 |
+
masks_c = torch.complex(mask_real, mask_imag)
|
| 177 |
+
|
| 178 |
+
# 5) Применяем маски к комплексному спектру микса
|
| 179 |
+
mix_spec_bc = mix_spec.unsqueeze(1) # (B, 1, C, F, TT)
|
| 180 |
+
est_specs = masks_c * mix_spec_bc # (B, S, C, F, TT)
|
| 181 |
+
|
| 182 |
+
# 6) iSTFT по каждому источнику
|
| 183 |
+
outs = []
|
| 184 |
+
for s in range(S):
|
| 185 |
+
y_s = self._istft(est_specs[:, s], length=T) # (B, C, T)
|
| 186 |
+
outs.append(y_s)
|
| 187 |
+
y_hat = torch.stack(outs, dim=1) # (B, S, C, T)
|
| 188 |
+
return y_hat
|
models/mel_band_conformer.py
ADDED
|
@@ -0,0 +1,424 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from conformer import Conformer
|
| 7 |
+
from torch.nn import Module, ModuleList
|
| 8 |
+
from librosa import filters
|
| 9 |
+
from beartype.typing import Tuple, Optional, List, Callable
|
| 10 |
+
from beartype import beartype
|
| 11 |
+
from einops import rearrange, pack, unpack, reduce, repeat
|
| 12 |
+
|
| 13 |
+
# helper functions
|
| 14 |
+
|
| 15 |
+
def exists(val):
|
| 16 |
+
return val is not None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def default(v, d):
|
| 20 |
+
return v if exists(v) else d
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class RMSNorm(Module):
|
| 24 |
+
def __init__(self, dim):
|
| 25 |
+
super().__init__()
|
| 26 |
+
self.scale = dim ** 0.5
|
| 27 |
+
self.gamma = nn.Parameter(torch.ones(dim))
|
| 28 |
+
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
return F.normalize(x, dim=-1) * self.scale * self.gamma
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# attention
|
| 34 |
+
|
| 35 |
+
def MLP(
|
| 36 |
+
dim_in,
|
| 37 |
+
dim_out,
|
| 38 |
+
dim_hidden=None,
|
| 39 |
+
depth=1,
|
| 40 |
+
activation=nn.Tanh
|
| 41 |
+
):
|
| 42 |
+
dim_hidden = default(dim_hidden, dim_in)
|
| 43 |
+
|
| 44 |
+
net = []
|
| 45 |
+
dims = (dim_in, *((dim_hidden,) * depth), dim_out)
|
| 46 |
+
|
| 47 |
+
for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
|
| 48 |
+
is_last = ind == (len(dims) - 2)
|
| 49 |
+
|
| 50 |
+
net.append(nn.Linear(layer_dim_in, layer_dim_out))
|
| 51 |
+
|
| 52 |
+
if is_last:
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
net.append(activation())
|
| 56 |
+
|
| 57 |
+
return nn.Sequential(*net)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class MaskEstimator(Module):
|
| 61 |
+
@beartype
|
| 62 |
+
def __init__(
|
| 63 |
+
self,
|
| 64 |
+
dim,
|
| 65 |
+
dim_inputs: Tuple[int, ...],
|
| 66 |
+
depth,
|
| 67 |
+
mlp_expansion_factor=4
|
| 68 |
+
):
|
| 69 |
+
super().__init__()
|
| 70 |
+
self.dim_inputs = dim_inputs
|
| 71 |
+
self.to_freqs = ModuleList([])
|
| 72 |
+
dim_hidden = dim * mlp_expansion_factor
|
| 73 |
+
|
| 74 |
+
for dim_in in dim_inputs:
|
| 75 |
+
net = []
|
| 76 |
+
|
| 77 |
+
mlp = nn.Sequential(
|
| 78 |
+
MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
|
| 79 |
+
nn.GLU(dim=-1)
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
self.to_freqs.append(mlp)
|
| 83 |
+
|
| 84 |
+
def forward(self, x):
|
| 85 |
+
# split along band dimension and run per-band MLP
|
| 86 |
+
x = x.unbind(dim=-2)
|
| 87 |
+
|
| 88 |
+
outs = []
|
| 89 |
+
|
| 90 |
+
for band_features, mlp in zip(x, self.to_freqs):
|
| 91 |
+
freq_out = mlp(band_features)
|
| 92 |
+
outs.append(freq_out)
|
| 93 |
+
|
| 94 |
+
return torch.cat(outs, dim=-1)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class BandSplit(Module):
|
| 98 |
+
@beartype
|
| 99 |
+
def __init__(
|
| 100 |
+
self,
|
| 101 |
+
dim,
|
| 102 |
+
dim_inputs: Tuple[int, ...]
|
| 103 |
+
):
|
| 104 |
+
super().__init__()
|
| 105 |
+
self.dim_inputs = dim_inputs
|
| 106 |
+
self.to_features = ModuleList([])
|
| 107 |
+
|
| 108 |
+
for dim_in in dim_inputs:
|
| 109 |
+
net = nn.Sequential(
|
| 110 |
+
RMSNorm(dim_in),
|
| 111 |
+
nn.Linear(dim_in, dim)
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
self.to_features.append(net)
|
| 115 |
+
|
| 116 |
+
def forward(self, x):
|
| 117 |
+
# split input into predefined frequency-band chunks
|
| 118 |
+
x = x.split(self.dim_inputs, dim=-1)
|
| 119 |
+
|
| 120 |
+
outs = []
|
| 121 |
+
for split_input, to_feature in zip(x, self.to_features):
|
| 122 |
+
split_output = to_feature(split_input)
|
| 123 |
+
outs.append(split_output)
|
| 124 |
+
|
| 125 |
+
# stack back as (bands) axis
|
| 126 |
+
return torch.stack(outs, dim=-2)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class MelBandConformer(nn.Module):
|
| 130 |
+
def __init__(
|
| 131 |
+
self,
|
| 132 |
+
dim: int,
|
| 133 |
+
*,
|
| 134 |
+
depth: int,
|
| 135 |
+
stereo: bool = False,
|
| 136 |
+
num_stems: int = 1,
|
| 137 |
+
time_conformer_depth: int = 2,
|
| 138 |
+
freq_conformer_depth: int = 2,
|
| 139 |
+
num_bands: int = 60,
|
| 140 |
+
dim_head: int = 64,
|
| 141 |
+
heads: int = 8,
|
| 142 |
+
# Conformer params
|
| 143 |
+
ff_mult: int = 4,
|
| 144 |
+
conv_expansion_factor: int = 2,
|
| 145 |
+
conv_kernel_size: int = 31,
|
| 146 |
+
attn_dropout: float = 0.0,
|
| 147 |
+
ff_dropout: float = 0.0,
|
| 148 |
+
conv_dropout: float = 0.0,
|
| 149 |
+
# STFT
|
| 150 |
+
dim_freqs_in: int = 1025,
|
| 151 |
+
sample_rate: int = 44100,
|
| 152 |
+
stft_n_fft: int = 2048,
|
| 153 |
+
stft_hop_length: int = 512,
|
| 154 |
+
stft_win_length: int = 2048,
|
| 155 |
+
stft_normalized: bool = False,
|
| 156 |
+
stft_window_fn: Optional[Callable] = None,
|
| 157 |
+
# Loss
|
| 158 |
+
mask_estimator_depth: int = 1,
|
| 159 |
+
multi_stft_resolution_loss_weight: float = 1.0,
|
| 160 |
+
multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
|
| 161 |
+
multi_stft_hop_size: int = 147,
|
| 162 |
+
multi_stft_normalized: bool = False,
|
| 163 |
+
multi_stft_window_fn: Callable = torch.hann_window,
|
| 164 |
+
match_input_audio_length: bool = False,
|
| 165 |
+
|
| 166 |
+
use_torch_checkpoint: bool = False,
|
| 167 |
+
skip_connection: bool = False,
|
| 168 |
+
):
|
| 169 |
+
super().__init__()
|
| 170 |
+
|
| 171 |
+
self.stereo = stereo
|
| 172 |
+
self.audio_channels = 2 if stereo else 1
|
| 173 |
+
self.num_stems = num_stems
|
| 174 |
+
self.use_torch_checkpoint = use_torch_checkpoint
|
| 175 |
+
self.skip_connection = skip_connection
|
| 176 |
+
|
| 177 |
+
self.layers = nn.ModuleList([])
|
| 178 |
+
|
| 179 |
+
# Layers per block: [ time-Conformer, freq-Conformer ]
|
| 180 |
+
conformer_kwargs = dict(
|
| 181 |
+
dim=dim,
|
| 182 |
+
dim_head=dim_head,
|
| 183 |
+
heads=heads,
|
| 184 |
+
ff_mult=ff_mult,
|
| 185 |
+
conv_expansion_factor=conv_expansion_factor,
|
| 186 |
+
conv_kernel_size=conv_kernel_size,
|
| 187 |
+
attn_dropout=attn_dropout,
|
| 188 |
+
ff_dropout=ff_dropout,
|
| 189 |
+
conv_dropout=conv_dropout,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
for _ in range(depth):
|
| 193 |
+
time_block = Conformer(depth=time_conformer_depth, **conformer_kwargs)
|
| 194 |
+
freq_block = Conformer(depth=freq_conformer_depth, **conformer_kwargs)
|
| 195 |
+
self.layers.append(nn.ModuleList([time_block, freq_block]))
|
| 196 |
+
|
| 197 |
+
self.stft_window_fn = partial(stft_window_fn or torch.hann_window, stft_win_length)
|
| 198 |
+
|
| 199 |
+
self.stft_kwargs = dict(
|
| 200 |
+
n_fft=stft_n_fft,
|
| 201 |
+
hop_length=stft_hop_length,
|
| 202 |
+
win_length=stft_win_length,
|
| 203 |
+
normalized=stft_normalized
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
# number of frequency bins produced by STFT (ignoring complex axis)
|
| 207 |
+
freqs = torch.stft(
|
| 208 |
+
torch.randn(1, 4096),
|
| 209 |
+
**self.stft_kwargs,
|
| 210 |
+
window=torch.ones(stft_n_fft),
|
| 211 |
+
return_complex=True
|
| 212 |
+
).shape[1]
|
| 213 |
+
|
| 214 |
+
# build mel filter bank to define band grouping
|
| 215 |
+
mel_filter_bank_numpy = filters.mel(sr=sample_rate, n_fft=stft_n_fft, n_mels=num_bands)
|
| 216 |
+
mel_filter_bank = torch.from_numpy(mel_filter_bank_numpy)
|
| 217 |
+
# ensure coverage at the boundaries
|
| 218 |
+
mel_filter_bank[0][0] = 1.0
|
| 219 |
+
mel_filter_bank[-1, -1] = 1.0
|
| 220 |
+
|
| 221 |
+
freqs_per_band = mel_filter_bank > 0
|
| 222 |
+
assert freqs_per_band.any(dim=0).all(), 'all frequency bins must be covered by bands'
|
| 223 |
+
|
| 224 |
+
repeated_freq_indices = repeat(torch.arange(freqs), 'f -> b f', b=num_bands)
|
| 225 |
+
freq_indices = repeated_freq_indices[freqs_per_band]
|
| 226 |
+
|
| 227 |
+
if stereo:
|
| 228 |
+
# duplicate indices for stereo by interleaving channels along the freq axis
|
| 229 |
+
freq_indices = repeat(freq_indices, 'f -> f s', s=2)
|
| 230 |
+
freq_indices = freq_indices * 2 + torch.arange(2)
|
| 231 |
+
freq_indices = rearrange(freq_indices, 'f s -> (f s)')
|
| 232 |
+
|
| 233 |
+
self.register_buffer('freq_indices', freq_indices, persistent=False)
|
| 234 |
+
self.register_buffer('freqs_per_band', freqs_per_band, persistent=False)
|
| 235 |
+
|
| 236 |
+
num_freqs_per_band = reduce(freqs_per_band, 'b f -> b', 'sum')
|
| 237 |
+
num_bands_per_freq = reduce(freqs_per_band, 'b f -> f', 'sum')
|
| 238 |
+
|
| 239 |
+
self.register_buffer('num_freqs_per_band', num_freqs_per_band, persistent=False)
|
| 240 |
+
self.register_buffer('num_bands_per_freq', num_bands_per_freq, persistent=False)
|
| 241 |
+
|
| 242 |
+
# BandSplit and MaskEstimator — same structure as your original
|
| 243 |
+
freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in num_freqs_per_band.tolist())
|
| 244 |
+
|
| 245 |
+
self.band_split = BandSplit(
|
| 246 |
+
dim=dim,
|
| 247 |
+
dim_inputs=freqs_per_bands_with_complex
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
self.mask_estimators = nn.ModuleList([
|
| 251 |
+
MaskEstimator(
|
| 252 |
+
dim=dim,
|
| 253 |
+
dim_inputs=freqs_per_bands_with_complex,
|
| 254 |
+
depth=mask_estimator_depth,
|
| 255 |
+
mlp_expansion_factor=4, # could be exposed as a parameter
|
| 256 |
+
)
|
| 257 |
+
for _ in range(num_stems)
|
| 258 |
+
])
|
| 259 |
+
|
| 260 |
+
# multi-resolution STFT loss setup
|
| 261 |
+
self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
|
| 262 |
+
self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
|
| 263 |
+
self.multi_stft_n_fft = stft_n_fft
|
| 264 |
+
self.multi_stft_window_fn = multi_stft_window_fn
|
| 265 |
+
|
| 266 |
+
self.multi_stft_kwargs = dict(
|
| 267 |
+
hop_length=multi_stft_hop_size,
|
| 268 |
+
normalized=multi_stft_normalized
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
self.match_input_audio_length = match_input_audio_length
|
| 272 |
+
|
| 273 |
+
def forward(
|
| 274 |
+
self,
|
| 275 |
+
raw_audio: torch.Tensor,
|
| 276 |
+
target: Optional[torch.Tensor] = None,
|
| 277 |
+
return_loss_breakdown: bool = False
|
| 278 |
+
):
|
| 279 |
+
"""
|
| 280 |
+
b - batch
|
| 281 |
+
f - freq
|
| 282 |
+
t - time
|
| 283 |
+
s - audio channel (1 mono / 2 stereo)
|
| 284 |
+
n - stems
|
| 285 |
+
c - complex (2)
|
| 286 |
+
d - feature dim
|
| 287 |
+
"""
|
| 288 |
+
device = raw_audio.device
|
| 289 |
+
|
| 290 |
+
if raw_audio.ndim == 2:
|
| 291 |
+
raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
|
| 292 |
+
|
| 293 |
+
batch, channels, raw_audio_length = raw_audio.shape
|
| 294 |
+
istft_length = raw_audio_length if self.match_input_audio_length else None
|
| 295 |
+
|
| 296 |
+
assert (not self.stereo and channels == 1) or (self.stereo and channels == 2), \
|
| 297 |
+
'set stereo=True for stereo input (C=2), stereo=False for mono (C=1)'
|
| 298 |
+
|
| 299 |
+
# --- STFT ---
|
| 300 |
+
raw_audio_flat, packed_shape = raw_audio.reshape(-1, raw_audio.shape[-1]), raw_audio.shape[:2]
|
| 301 |
+
stft_window = self.stft_window_fn(device=device)
|
| 302 |
+
|
| 303 |
+
stft_repr = torch.stft(raw_audio_flat, **self.stft_kwargs, window=stft_window, return_complex=True)
|
| 304 |
+
stft_repr = torch.view_as_real(stft_repr) # (B*C, F, T, 2)
|
| 305 |
+
stft_repr = stft_repr.view(*packed_shape, *stft_repr.shape[1:]) # (b, s, f, t, c)
|
| 306 |
+
|
| 307 |
+
# fold channel into frequency axis (as in your setup)
|
| 308 |
+
stft_repr_fs = rearrange(stft_repr, 'b s f t c -> b (f s) t c')
|
| 309 |
+
|
| 310 |
+
# index frequencies by mel bands
|
| 311 |
+
b_idx = torch.arange(batch, device=device)[..., None]
|
| 312 |
+
x = stft_repr_fs[b_idx, self.freq_indices] # (b, sum(freqs_in_bands), t, c)
|
| 313 |
+
x = rearrange(x, 'b f t c -> b t (f c)') # flatten complex axis into features
|
| 314 |
+
|
| 315 |
+
# --- BandSplit -> (b, t, bands, dim) ---
|
| 316 |
+
if self.use_torch_checkpoint:
|
| 317 |
+
x = torch.utils.checkpoint.checkpoint(self.band_split, x, use_reentrant=False)
|
| 318 |
+
else:
|
| 319 |
+
x = self.band_split(x)
|
| 320 |
+
|
| 321 |
+
# --- Axial Conformer (time, then freq) ---
|
| 322 |
+
store = [None] * len(self.layers)
|
| 323 |
+
|
| 324 |
+
for i, (time_conf, freq_conf) in enumerate(self.layers):
|
| 325 |
+
# Time axis: (b, t, bands, d) -> ((b*bands), t, d)
|
| 326 |
+
bsz, tlen, bands, d = x.shape
|
| 327 |
+
x_time = rearrange(x, 'b t f d -> (b f) t d')
|
| 328 |
+
|
| 329 |
+
if self.use_torch_checkpoint:
|
| 330 |
+
x_time = torch.utils.checkpoint.checkpoint(time_conf, x_time, use_reentrant=False)
|
| 331 |
+
else:
|
| 332 |
+
x_time = time_conf(x_time)
|
| 333 |
+
|
| 334 |
+
x = rearrange(x_time, '(b f) t d -> b t f d', b=bsz, f=bands)
|
| 335 |
+
|
| 336 |
+
# Freq axis: (b, t, f, d) -> ((b*t), f, d)
|
| 337 |
+
bsz, tlen, bands, d = x.shape
|
| 338 |
+
x_freq = rearrange(x, 'b t f d -> (b t) f d')
|
| 339 |
+
|
| 340 |
+
if self.use_torch_checkpoint:
|
| 341 |
+
x_freq = torch.utils.checkpoint.checkpoint(freq_conf, x_freq, use_reentrant=False)
|
| 342 |
+
else:
|
| 343 |
+
x_freq = freq_conf(x_freq)
|
| 344 |
+
|
| 345 |
+
x = rearrange(x_freq, '(b t) f d -> b t f d', b=bsz, t=tlen)
|
| 346 |
+
|
| 347 |
+
if self.skip_connection:
|
| 348 |
+
store[i] = x if store[i] is None else store[i] + x
|
| 349 |
+
|
| 350 |
+
# --- Mask estimation ---
|
| 351 |
+
# (b, t, f_bands, d) -> per-stem MLP over bands
|
| 352 |
+
if self.use_torch_checkpoint:
|
| 353 |
+
masks = torch.stack([torch.utils.checkpoint.checkpoint(fn, x, use_reentrant=False)
|
| 354 |
+
for fn in self.mask_estimators], dim=1)
|
| 355 |
+
else:
|
| 356 |
+
masks = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
|
| 357 |
+
masks = rearrange(masks, 'b n t (f c) -> b n f t c', c=2)
|
| 358 |
+
|
| 359 |
+
# --- Complex modulation ---
|
| 360 |
+
stft_repr_c = rearrange(stft_repr, 'b s f t c -> b 1 (f s) t c')
|
| 361 |
+
stft_repr_c = torch.view_as_complex(stft_repr_c) # (b, 1, F*S, T)
|
| 362 |
+
masks_c = torch.view_as_complex(masks) # (b, n, F*S, T)
|
| 363 |
+
|
| 364 |
+
masks_c = masks_c.type(stft_repr_c.dtype)
|
| 365 |
+
|
| 366 |
+
scatter_idx = repeat(self.freq_indices, 'f -> b n f t', b=batch, n=self.num_stems, t=stft_repr_c.shape[-1])
|
| 367 |
+
stft_repr_expanded = repeat(stft_repr_c, 'b 1 ... -> b n ...', n=self.num_stems)
|
| 368 |
+
|
| 369 |
+
masks_summed = torch.zeros_like(stft_repr_expanded).scatter_add_(2, scatter_idx, masks_c)
|
| 370 |
+
denom = repeat(self.num_bands_per_freq, 'f -> (f r) 1', r=self.audio_channels)
|
| 371 |
+
|
| 372 |
+
masks_averaged = masks_summed / denom.clamp(min=1e-8)
|
| 373 |
+
stft_mod = stft_repr_c * masks_averaged
|
| 374 |
+
|
| 375 |
+
# --- iSTFT ---
|
| 376 |
+
stft_mod = rearrange(stft_mod, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
|
| 377 |
+
|
| 378 |
+
recon_audio = torch.istft(
|
| 379 |
+
stft_mod,
|
| 380 |
+
**self.stft_kwargs,
|
| 381 |
+
window=stft_window,
|
| 382 |
+
return_complex=False,
|
| 383 |
+
length=istft_length
|
| 384 |
+
)
|
| 385 |
+
recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', b=batch, s=self.audio_channels, n=self.num_stems)
|
| 386 |
+
|
| 387 |
+
if self.num_stems == 1:
|
| 388 |
+
recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
|
| 389 |
+
|
| 390 |
+
# Loss
|
| 391 |
+
if target is None:
|
| 392 |
+
return recon_audio
|
| 393 |
+
|
| 394 |
+
if self.num_stems > 1:
|
| 395 |
+
assert target.ndim == 4 and target.shape[1] == self.num_stems
|
| 396 |
+
|
| 397 |
+
if target.ndim == 2:
|
| 398 |
+
target = rearrange(target, '... t -> ... 1 t')
|
| 399 |
+
|
| 400 |
+
target = target[..., :recon_audio.shape[-1]]
|
| 401 |
+
|
| 402 |
+
loss = F.l1_loss(recon_audio, target)
|
| 403 |
+
|
| 404 |
+
multi_stft_resolution_loss = 0.0
|
| 405 |
+
for window_size in self.multi_stft_resolutions_window_sizes:
|
| 406 |
+
res_stft_kwargs = dict(
|
| 407 |
+
n_fft=max(window_size, self.multi_stft_n_fft),
|
| 408 |
+
win_length=window_size,
|
| 409 |
+
return_complex=True,
|
| 410 |
+
window=self.multi_stft_window_fn(window_size, device=device),
|
| 411 |
+
**self.multi_stft_kwargs,
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 415 |
+
target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
|
| 416 |
+
|
| 417 |
+
multi_stft_resolution_loss += F.l1_loss(recon_Y, target_Y)
|
| 418 |
+
|
| 419 |
+
total_loss = loss + self.multi_stft_resolution_loss_weight * multi_stft_resolution_loss
|
| 420 |
+
|
| 421 |
+
if not return_loss_breakdown:
|
| 422 |
+
return total_loss
|
| 423 |
+
|
| 424 |
+
return total_loss, (loss, multi_stft_resolution_loss)
|
models/scnet/scnet_masked.py
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from collections import deque
|
| 5 |
+
from .separation import SeparationNet
|
| 6 |
+
import typing as tp
|
| 7 |
+
import math
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Swish(nn.Module):
|
| 11 |
+
def forward(self, x):
|
| 12 |
+
return x * x.sigmoid()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class ConvolutionModule(nn.Module):
|
| 16 |
+
"""
|
| 17 |
+
Convolution Module in SD block.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
channels (int): input/output channels.
|
| 21 |
+
depth (int): number of layers in the residual branch. Each layer has its own
|
| 22 |
+
compress (float): amount of channel compression.
|
| 23 |
+
kernel (int): kernel size for the convolutions.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, channels, depth=2, compress=4, kernel=3):
|
| 27 |
+
super().__init__()
|
| 28 |
+
assert kernel % 2 == 1
|
| 29 |
+
self.depth = abs(depth)
|
| 30 |
+
hidden_size = int(channels / compress)
|
| 31 |
+
norm = lambda d: nn.GroupNorm(1, d)
|
| 32 |
+
self.layers = nn.ModuleList([])
|
| 33 |
+
for _ in range(self.depth):
|
| 34 |
+
padding = (kernel // 2)
|
| 35 |
+
mods = [
|
| 36 |
+
norm(channels),
|
| 37 |
+
nn.Conv1d(channels, hidden_size * 2, kernel, padding=padding),
|
| 38 |
+
nn.GLU(1),
|
| 39 |
+
nn.Conv1d(hidden_size, hidden_size, kernel, padding=padding, groups=hidden_size),
|
| 40 |
+
norm(hidden_size),
|
| 41 |
+
Swish(),
|
| 42 |
+
nn.Conv1d(hidden_size, channels, 1),
|
| 43 |
+
]
|
| 44 |
+
layer = nn.Sequential(*mods)
|
| 45 |
+
self.layers.append(layer)
|
| 46 |
+
|
| 47 |
+
def forward(self, x):
|
| 48 |
+
for layer in self.layers:
|
| 49 |
+
x = x + layer(x)
|
| 50 |
+
return x
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class FusionLayer(nn.Module):
|
| 54 |
+
"""
|
| 55 |
+
A FusionLayer within the decoder.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
- channels (int): Number of input channels.
|
| 59 |
+
- kernel_size (int, optional): Kernel size for the convolutional layer, defaults to 3.
|
| 60 |
+
- stride (int, optional): Stride for the convolutional layer, defaults to 1.
|
| 61 |
+
- padding (int, optional): Padding for the convolutional layer, defaults to 1.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def __init__(self, channels, kernel_size=3, stride=1, padding=1):
|
| 65 |
+
super(FusionLayer, self).__init__()
|
| 66 |
+
self.conv = nn.Conv2d(channels * 2, channels * 2, kernel_size, stride=stride, padding=padding)
|
| 67 |
+
|
| 68 |
+
def forward(self, x, skip=None):
|
| 69 |
+
if skip is not None:
|
| 70 |
+
x += skip
|
| 71 |
+
x = x.repeat(1, 2, 1, 1)
|
| 72 |
+
x = self.conv(x)
|
| 73 |
+
x = F.glu(x, dim=1)
|
| 74 |
+
return x
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class SDlayer(nn.Module):
|
| 78 |
+
"""
|
| 79 |
+
Implements a Sparse Down-sample Layer for processing different frequency bands separately.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
- channels_in (int): Input channel count.
|
| 83 |
+
- channels_out (int): Output channel count.
|
| 84 |
+
- band_configs (dict): A dictionary containing configuration for each frequency band.
|
| 85 |
+
Keys are 'low', 'mid', 'high' for each band, and values are
|
| 86 |
+
dictionaries with keys 'SR', 'stride', and 'kernel' for proportion,
|
| 87 |
+
stride, and kernel size, respectively.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
def __init__(self, channels_in, channels_out, band_configs):
|
| 91 |
+
super(SDlayer, self).__init__()
|
| 92 |
+
|
| 93 |
+
# Initializing convolutional layers for each band
|
| 94 |
+
self.convs = nn.ModuleList()
|
| 95 |
+
self.strides = []
|
| 96 |
+
self.kernels = []
|
| 97 |
+
for config in band_configs.values():
|
| 98 |
+
self.convs.append(
|
| 99 |
+
nn.Conv2d(channels_in, channels_out, (config['kernel'], 1), (config['stride'], 1), (0, 0)))
|
| 100 |
+
self.strides.append(config['stride'])
|
| 101 |
+
self.kernels.append(config['kernel'])
|
| 102 |
+
|
| 103 |
+
# Saving rate proportions for determining splits
|
| 104 |
+
self.SR_low = band_configs['low']['SR']
|
| 105 |
+
self.SR_mid = band_configs['mid']['SR']
|
| 106 |
+
|
| 107 |
+
def forward(self, x):
|
| 108 |
+
B, C, Fr, T = x.shape
|
| 109 |
+
# Define splitting points based on sampling rates
|
| 110 |
+
splits = [
|
| 111 |
+
(0, math.ceil(Fr * self.SR_low)),
|
| 112 |
+
(math.ceil(Fr * self.SR_low), math.ceil(Fr * (self.SR_low + self.SR_mid))),
|
| 113 |
+
(math.ceil(Fr * (self.SR_low + self.SR_mid)), Fr)
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
# Processing each band with the corresponding convolution
|
| 117 |
+
outputs = []
|
| 118 |
+
original_lengths = []
|
| 119 |
+
for conv, stride, kernel, (start, end) in zip(self.convs, self.strides, self.kernels, splits):
|
| 120 |
+
extracted = x[:, :, start:end, :]
|
| 121 |
+
original_lengths.append(end - start)
|
| 122 |
+
current_length = extracted.shape[2]
|
| 123 |
+
|
| 124 |
+
# padding
|
| 125 |
+
if stride == 1:
|
| 126 |
+
total_padding = kernel - stride
|
| 127 |
+
else:
|
| 128 |
+
total_padding = (stride - current_length % stride) % stride
|
| 129 |
+
pad_left = total_padding // 2
|
| 130 |
+
pad_right = total_padding - pad_left
|
| 131 |
+
|
| 132 |
+
padded = F.pad(extracted, (0, 0, pad_left, pad_right))
|
| 133 |
+
|
| 134 |
+
output = conv(padded)
|
| 135 |
+
outputs.append(output)
|
| 136 |
+
|
| 137 |
+
return outputs, original_lengths
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class SUlayer(nn.Module):
|
| 141 |
+
"""
|
| 142 |
+
Implements a Sparse Up-sample Layer in decoder.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
- channels_in: The number of input channels.
|
| 146 |
+
- channels_out: The number of output channels.
|
| 147 |
+
- convtr_configs: Dictionary containing the configurations for transposed convolutions.
|
| 148 |
+
"""
|
| 149 |
+
|
| 150 |
+
def __init__(self, channels_in, channels_out, band_configs):
|
| 151 |
+
super(SUlayer, self).__init__()
|
| 152 |
+
|
| 153 |
+
# Initializing convolutional layers for each band
|
| 154 |
+
self.convtrs = nn.ModuleList([
|
| 155 |
+
nn.ConvTranspose2d(channels_in, channels_out, [config['kernel'], 1], [config['stride'], 1])
|
| 156 |
+
for _, config in band_configs.items()
|
| 157 |
+
])
|
| 158 |
+
|
| 159 |
+
def forward(self, x, lengths, origin_lengths):
|
| 160 |
+
B, C, Fr, T = x.shape
|
| 161 |
+
# Define splitting points based on input lengths
|
| 162 |
+
splits = [
|
| 163 |
+
(0, lengths[0]),
|
| 164 |
+
(lengths[0], lengths[0] + lengths[1]),
|
| 165 |
+
(lengths[0] + lengths[1], None)
|
| 166 |
+
]
|
| 167 |
+
# Processing each band with the corresponding convolution
|
| 168 |
+
outputs = []
|
| 169 |
+
for idx, (convtr, (start, end)) in enumerate(zip(self.convtrs, splits)):
|
| 170 |
+
out = convtr(x[:, :, start:end, :])
|
| 171 |
+
# Calculate the distance to trim the output symmetrically to original length
|
| 172 |
+
current_Fr_length = out.shape[2]
|
| 173 |
+
dist = abs(origin_lengths[idx] - current_Fr_length) // 2
|
| 174 |
+
|
| 175 |
+
# Trim the output to the original length symmetrically
|
| 176 |
+
trimmed_out = out[:, :, dist:dist + origin_lengths[idx], :]
|
| 177 |
+
|
| 178 |
+
outputs.append(trimmed_out)
|
| 179 |
+
|
| 180 |
+
# Concatenate trimmed outputs along the frequency dimension to return the final tensor
|
| 181 |
+
x = torch.cat(outputs, dim=2)
|
| 182 |
+
|
| 183 |
+
return x
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
class SDblock(nn.Module):
|
| 187 |
+
"""
|
| 188 |
+
Implements a simplified Sparse Down-sample block in encoder.
|
| 189 |
+
|
| 190 |
+
Args:
|
| 191 |
+
- channels_in (int): Number of input channels.
|
| 192 |
+
- channels_out (int): Number of output channels.
|
| 193 |
+
- band_config (dict): Configuration for the SDlayer specifying band splits and convolutions.
|
| 194 |
+
- conv_config (dict): Configuration for convolution modules applied to each band.
|
| 195 |
+
- depths (list of int): List specifying the convolution depths for low, mid, and high frequency bands.
|
| 196 |
+
"""
|
| 197 |
+
|
| 198 |
+
def __init__(self, channels_in, channels_out, band_configs={}, conv_config={}, depths=[3, 2, 1], kernel_size=3):
|
| 199 |
+
super(SDblock, self).__init__()
|
| 200 |
+
self.SDlayer = SDlayer(channels_in, channels_out, band_configs)
|
| 201 |
+
|
| 202 |
+
# Dynamically create convolution modules for each band based on depths
|
| 203 |
+
self.conv_modules = nn.ModuleList([
|
| 204 |
+
ConvolutionModule(channels_out, depth, **conv_config) for depth in depths
|
| 205 |
+
])
|
| 206 |
+
# Set the kernel_size to an odd number.
|
| 207 |
+
self.globalconv = nn.Conv2d(channels_out, channels_out, kernel_size, 1, (kernel_size - 1) // 2)
|
| 208 |
+
|
| 209 |
+
def forward(self, x):
|
| 210 |
+
bands, original_lengths = self.SDlayer(x)
|
| 211 |
+
# B, C, f, T = band.shape
|
| 212 |
+
bands = [
|
| 213 |
+
F.gelu(
|
| 214 |
+
conv(band.permute(0, 2, 1, 3).reshape(-1, band.shape[1], band.shape[3]))
|
| 215 |
+
.view(band.shape[0], band.shape[2], band.shape[1], band.shape[3])
|
| 216 |
+
.permute(0, 2, 1, 3)
|
| 217 |
+
)
|
| 218 |
+
for conv, band in zip(self.conv_modules, bands)
|
| 219 |
+
|
| 220 |
+
]
|
| 221 |
+
lengths = [band.size(-2) for band in bands]
|
| 222 |
+
full_band = torch.cat(bands, dim=2)
|
| 223 |
+
skip = full_band
|
| 224 |
+
|
| 225 |
+
output = self.globalconv(full_band)
|
| 226 |
+
|
| 227 |
+
return output, skip, lengths, original_lengths
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
class SCNet(nn.Module):
|
| 231 |
+
"""
|
| 232 |
+
The implementation of SCNet: Sparse Compression Network for Music Source Separation. Paper: https://arxiv.org/abs/2401.13276.pdf
|
| 233 |
+
|
| 234 |
+
Args:
|
| 235 |
+
- sources (List[str]): List of sources to be separated.
|
| 236 |
+
- audio_channels (int): Number of audio channels.
|
| 237 |
+
- nfft (int): Number of FFTs to determine the frequency dimension of the input.
|
| 238 |
+
- hop_size (int): Hop size for the STFT.
|
| 239 |
+
- win_size (int): Window size for STFT.
|
| 240 |
+
- normalized (bool): Whether to normalize the STFT.
|
| 241 |
+
- dims (List[int]): List of channel dimensions for each block.
|
| 242 |
+
- band_SR (List[float]): The proportion of each frequency band.
|
| 243 |
+
- band_stride (List[int]): The down-sampling ratio of each frequency band.
|
| 244 |
+
- band_kernel (List[int]): The kernel sizes for down-sampling convolution in each frequency band
|
| 245 |
+
- conv_depths (List[int]): List specifying the number of convolution modules in each SD block.
|
| 246 |
+
- compress (int): Compression factor for convolution module.
|
| 247 |
+
- conv_kernel (int): Kernel size for convolution layer in convolution module.
|
| 248 |
+
- num_dplayer (int): Number of dual-path layers.
|
| 249 |
+
- expand (int): Expansion factor in the dual-path RNN, default is 1.
|
| 250 |
+
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
def __init__(self,
|
| 254 |
+
sources=['drums', 'bass', 'other', 'vocals'],
|
| 255 |
+
audio_channels=2,
|
| 256 |
+
# Main structure
|
| 257 |
+
dims=[4, 32, 64, 128], # dims = [4, 64, 128, 256] in SCNet-large
|
| 258 |
+
# STFT
|
| 259 |
+
nfft=4096,
|
| 260 |
+
hop_size=1024,
|
| 261 |
+
win_size=4096,
|
| 262 |
+
normalized=True,
|
| 263 |
+
# SD/SU layer
|
| 264 |
+
band_SR=[0.175, 0.392, 0.433],
|
| 265 |
+
band_stride=[1, 4, 16],
|
| 266 |
+
band_kernel=[3, 4, 16],
|
| 267 |
+
# Convolution Module
|
| 268 |
+
conv_depths=[3, 2, 1],
|
| 269 |
+
compress=4,
|
| 270 |
+
conv_kernel=3,
|
| 271 |
+
# Dual-path RNN
|
| 272 |
+
num_dplayer=6,
|
| 273 |
+
expand=1,
|
| 274 |
+
):
|
| 275 |
+
super().__init__()
|
| 276 |
+
self.sources = sources
|
| 277 |
+
self.audio_channels = audio_channels
|
| 278 |
+
self.dims = dims
|
| 279 |
+
band_keys = ['low', 'mid', 'high']
|
| 280 |
+
self.band_configs = {band_keys[i]: {'SR': band_SR[i], 'stride': band_stride[i], 'kernel': band_kernel[i]} for i
|
| 281 |
+
in range(len(band_keys))}
|
| 282 |
+
self.hop_length = hop_size
|
| 283 |
+
self.conv_config = {
|
| 284 |
+
'compress': compress,
|
| 285 |
+
'kernel': conv_kernel,
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
self.embed_dim = dims[0]
|
| 289 |
+
self.max_f = nfft // 2 + 1
|
| 290 |
+
self.pos_embed_f = nn.Parameter(torch.zeros(1, self.embed_dim, self.max_f, 1))
|
| 291 |
+
nn.init.trunc_normal_(self.pos_embed_f, std=.02)
|
| 292 |
+
|
| 293 |
+
window = torch.hann_window(window_length=nfft, periodic=True)
|
| 294 |
+
self.register_buffer('window', window, persistent=False)
|
| 295 |
+
|
| 296 |
+
self.stft_config = {
|
| 297 |
+
'n_fft': nfft,
|
| 298 |
+
'hop_length': hop_size,
|
| 299 |
+
'win_length': win_size,
|
| 300 |
+
'center': True,
|
| 301 |
+
'normalized': normalized
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
self.encoder = nn.ModuleList()
|
| 305 |
+
self.decoder = nn.ModuleList()
|
| 306 |
+
|
| 307 |
+
for index in range(len(dims) - 1):
|
| 308 |
+
enc = SDblock(
|
| 309 |
+
channels_in=dims[index],
|
| 310 |
+
channels_out=dims[index + 1],
|
| 311 |
+
band_configs=self.band_configs,
|
| 312 |
+
conv_config=self.conv_config,
|
| 313 |
+
depths=conv_depths
|
| 314 |
+
)
|
| 315 |
+
self.encoder.append(enc)
|
| 316 |
+
|
| 317 |
+
dec = nn.Sequential(
|
| 318 |
+
FusionLayer(channels=dims[index + 1]),
|
| 319 |
+
SUlayer(
|
| 320 |
+
channels_in=dims[index + 1],
|
| 321 |
+
channels_out=dims[index] if index != 0 else dims[index] * len(sources),
|
| 322 |
+
band_configs=self.band_configs,
|
| 323 |
+
)
|
| 324 |
+
)
|
| 325 |
+
self.decoder.insert(0, dec)
|
| 326 |
+
|
| 327 |
+
self.separation_net = SeparationNet(
|
| 328 |
+
channels=dims[-1],
|
| 329 |
+
expand=expand,
|
| 330 |
+
num_layers=num_dplayer,
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
self.mask_layer = nn.Sequential(
|
| 334 |
+
nn.Conv2d(
|
| 335 |
+
4 * len(self.sources),
|
| 336 |
+
64,
|
| 337 |
+
kernel_size=3,
|
| 338 |
+
padding="same"
|
| 339 |
+
),
|
| 340 |
+
nn.GELU(),
|
| 341 |
+
nn.Conv2d(
|
| 342 |
+
64,
|
| 343 |
+
4 * len(self.sources),
|
| 344 |
+
kernel_size=1,
|
| 345 |
+
padding="same",
|
| 346 |
+
),
|
| 347 |
+
nn.Tanh()
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
def forward(self, x):
|
| 351 |
+
# B, C, L = x.shape
|
| 352 |
+
B = x.shape[0]
|
| 353 |
+
# In the initial padding, ensure that the number of frames after the STFT (the length of the T dimension) is even,
|
| 354 |
+
# so that the RFFT operation can be used in the separation network.
|
| 355 |
+
padding = self.hop_length - x.shape[-1] % self.hop_length
|
| 356 |
+
if (x.shape[-1] + padding) // self.hop_length % 2 == 0:
|
| 357 |
+
padding += self.hop_length
|
| 358 |
+
x = F.pad(x, (0, padding))
|
| 359 |
+
|
| 360 |
+
# STFT
|
| 361 |
+
L = x.shape[-1]
|
| 362 |
+
x = x.reshape(-1, L)
|
| 363 |
+
stft_opts = {**self.stft_config, 'window': self.window.to(x.device)}
|
| 364 |
+
x = torch.stft(x, **stft_opts, return_complex=True)
|
| 365 |
+
x = torch.view_as_real(x)
|
| 366 |
+
x = x.permute(0, 3, 1, 2).reshape(x.shape[0] // self.audio_channels, x.shape[3] * self.audio_channels, x.shape[1], x.shape[2])
|
| 367 |
+
|
| 368 |
+
B, C, Fr, T = x.shape
|
| 369 |
+
|
| 370 |
+
assert C == self.embed_dim, f"Input channel dimension {C} after STFT/reshape doesn't match self.embed_dim {self.embed_dim}"
|
| 371 |
+
mixture = x.repeat(1, len(self.sources), 1, 1)
|
| 372 |
+
|
| 373 |
+
if Fr > self.max_f:
|
| 374 |
+
print(f"Warning: Input frequency dim {Fr} > max_f {self.max_f}. Positional embedding will be truncated/repeated.")
|
| 375 |
+
repeats = math.ceil(Fr / self.max_f)
|
| 376 |
+
pos_f = self.pos_embed_f.repeat(1, 1, repeats, 1)[:, :, :Fr, :]
|
| 377 |
+
else:
|
| 378 |
+
pos_f = self.pos_embed_f[:, :, :Fr, :]
|
| 379 |
+
x = x + pos_f
|
| 380 |
+
|
| 381 |
+
save_skip = deque()
|
| 382 |
+
save_lengths = deque()
|
| 383 |
+
save_original_lengths = deque()
|
| 384 |
+
# encoder
|
| 385 |
+
for sd_layer in self.encoder:
|
| 386 |
+
x, skip, lengths, original_lengths = sd_layer(x)
|
| 387 |
+
save_skip.append(skip)
|
| 388 |
+
save_lengths.append(lengths)
|
| 389 |
+
save_original_lengths.append(original_lengths)
|
| 390 |
+
|
| 391 |
+
# separation
|
| 392 |
+
x = self.separation_net(x)
|
| 393 |
+
|
| 394 |
+
# decoder
|
| 395 |
+
for fusion_layer, su_layer in self.decoder:
|
| 396 |
+
x = fusion_layer(x, save_skip.pop())
|
| 397 |
+
x = su_layer(x, save_lengths.pop(), save_original_lengths.pop())
|
| 398 |
+
|
| 399 |
+
mask = self.mask_layer(x)
|
| 400 |
+
|
| 401 |
+
# output
|
| 402 |
+
n = self.dims[0]
|
| 403 |
+
|
| 404 |
+
mixture = mixture.view(B, n, -1, Fr, T)
|
| 405 |
+
mixture = mixture.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
|
| 406 |
+
mixture = torch.view_as_complex(mixture.contiguous())
|
| 407 |
+
|
| 408 |
+
mask = mask.view(B, n, -1, Fr, T)
|
| 409 |
+
mask = mask.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
|
| 410 |
+
mask = torch.view_as_complex(mask.contiguous())
|
| 411 |
+
|
| 412 |
+
x = mixture * mask
|
| 413 |
+
|
| 414 |
+
x = torch.istft(x, **stft_opts)
|
| 415 |
+
x = x.reshape(B, len(self.sources), self.audio_channels, -1)
|
| 416 |
+
|
| 417 |
+
x = x[:, :, :, :-padding]
|
| 418 |
+
|
| 419 |
+
return x
|
models/scnet/scnet_tran.py
ADDED
|
@@ -0,0 +1,668 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from collections import deque
|
| 3 |
+
import typing as tp
|
| 4 |
+
import math
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
from torch.nn.modules.rnn import LSTM
|
| 8 |
+
from torch.nn import Module, ModuleList
|
| 9 |
+
from einops import rearrange, pack, unpack, reduce, repeat
|
| 10 |
+
from einops.layers.torch import Rearrange
|
| 11 |
+
import torch.nn.functional as F
|
| 12 |
+
from models.bs_roformer.attend import Attend
|
| 13 |
+
from rotary_embedding_torch import RotaryEmbedding
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# helper functions
|
| 17 |
+
|
| 18 |
+
def exists(val):
|
| 19 |
+
return val is not None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def default(v, d):
|
| 23 |
+
return v if exists(v) else d
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def pack_one(t, pattern):
|
| 27 |
+
return pack([t], pattern)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def unpack_one(t, ps, pattern):
|
| 31 |
+
return unpack(t, ps, pattern)[0]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def pad_at_dim(t, pad, dim=-1, value=0.):
|
| 35 |
+
dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
|
| 36 |
+
zeros = ((0, 0) * dims_from_right)
|
| 37 |
+
return F.pad(t, (*zeros, *pad), value=value)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def l2norm(t):
|
| 41 |
+
return F.normalize(t, dim=-1, p=2)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# norm
|
| 45 |
+
|
| 46 |
+
class RMSNorm(Module):
|
| 47 |
+
def __init__(self, dim):
|
| 48 |
+
super().__init__()
|
| 49 |
+
self.scale = dim ** 0.5
|
| 50 |
+
self.gamma = nn.Parameter(torch.ones(dim))
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
return F.normalize(x, dim=-1) * self.scale * self.gamma
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# attention
|
| 57 |
+
|
| 58 |
+
class FeedForward(Module):
|
| 59 |
+
def __init__(
|
| 60 |
+
self,
|
| 61 |
+
dim,
|
| 62 |
+
mult=4,
|
| 63 |
+
dropout=0.
|
| 64 |
+
):
|
| 65 |
+
super().__init__()
|
| 66 |
+
dim_inner = int(dim * mult)
|
| 67 |
+
self.net = nn.Sequential(
|
| 68 |
+
RMSNorm(dim),
|
| 69 |
+
nn.Linear(dim, dim_inner),
|
| 70 |
+
nn.GELU(),
|
| 71 |
+
nn.Dropout(dropout),
|
| 72 |
+
nn.Linear(dim_inner, dim),
|
| 73 |
+
nn.Dropout(dropout)
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
def forward(self, x):
|
| 77 |
+
return self.net(x)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class Attention(Module):
|
| 81 |
+
def __init__(
|
| 82 |
+
self,
|
| 83 |
+
dim,
|
| 84 |
+
heads=8,
|
| 85 |
+
dim_head=64,
|
| 86 |
+
dropout=0.,
|
| 87 |
+
rotary_embed=None,
|
| 88 |
+
flash=True
|
| 89 |
+
):
|
| 90 |
+
super().__init__()
|
| 91 |
+
self.heads = heads
|
| 92 |
+
self.scale = dim_head ** -0.5
|
| 93 |
+
dim_inner = heads * dim_head
|
| 94 |
+
|
| 95 |
+
self.rotary_embed = rotary_embed
|
| 96 |
+
|
| 97 |
+
self.attend = Attend(flash=flash, dropout=dropout)
|
| 98 |
+
|
| 99 |
+
self.norm = RMSNorm(dim)
|
| 100 |
+
self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
|
| 101 |
+
|
| 102 |
+
self.to_gates = nn.Linear(dim, heads)
|
| 103 |
+
|
| 104 |
+
self.to_out = nn.Sequential(
|
| 105 |
+
nn.Linear(dim_inner, dim, bias=False),
|
| 106 |
+
nn.Dropout(dropout)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
def forward(self, x):
|
| 110 |
+
x = self.norm(x)
|
| 111 |
+
|
| 112 |
+
q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
|
| 113 |
+
|
| 114 |
+
if exists(self.rotary_embed):
|
| 115 |
+
q = self.rotary_embed.rotate_queries_or_keys(q)
|
| 116 |
+
k = self.rotary_embed.rotate_queries_or_keys(k)
|
| 117 |
+
|
| 118 |
+
out = self.attend(q, k, v)
|
| 119 |
+
|
| 120 |
+
gates = self.to_gates(x)
|
| 121 |
+
out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
|
| 122 |
+
|
| 123 |
+
out = rearrange(out, 'b h n d -> b n (h d)')
|
| 124 |
+
return self.to_out(out)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
class Transformer(Module):
|
| 128 |
+
def __init__(
|
| 129 |
+
self,
|
| 130 |
+
*,
|
| 131 |
+
dim,
|
| 132 |
+
depth,
|
| 133 |
+
dim_head=64,
|
| 134 |
+
heads=8,
|
| 135 |
+
attn_dropout=0.,
|
| 136 |
+
ff_dropout=0.,
|
| 137 |
+
ff_mult=4,
|
| 138 |
+
norm_output=True,
|
| 139 |
+
rotary_embed=None,
|
| 140 |
+
flash_attn=True,
|
| 141 |
+
linear_attn=False
|
| 142 |
+
):
|
| 143 |
+
super().__init__()
|
| 144 |
+
self.layers = ModuleList([])
|
| 145 |
+
|
| 146 |
+
for _ in range(depth):
|
| 147 |
+
attn = Attention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, rotary_embed=rotary_embed, flash=flash_attn)
|
| 148 |
+
|
| 149 |
+
self.layers.append(ModuleList([
|
| 150 |
+
attn,
|
| 151 |
+
FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
|
| 152 |
+
]))
|
| 153 |
+
|
| 154 |
+
self.norm = RMSNorm(dim) if norm_output else nn.Identity()
|
| 155 |
+
|
| 156 |
+
def forward(self, x):
|
| 157 |
+
|
| 158 |
+
for attn, ff in self.layers:
|
| 159 |
+
x = attn(x) + x
|
| 160 |
+
x = ff(x) + x
|
| 161 |
+
|
| 162 |
+
return self.norm(x)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
class FeatureConversion(nn.Module):
|
| 166 |
+
"""
|
| 167 |
+
Integrates into the adjacent Dual-Path layer.
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
channels (int): Number of input channels.
|
| 171 |
+
inverse (bool): If True, uses ifft; otherwise, uses rfft.
|
| 172 |
+
"""
|
| 173 |
+
|
| 174 |
+
def __init__(self, channels, inverse):
|
| 175 |
+
super().__init__()
|
| 176 |
+
self.inverse = inverse
|
| 177 |
+
self.channels = channels
|
| 178 |
+
|
| 179 |
+
def forward(self, x):
|
| 180 |
+
# B, C, F, T = x.shape
|
| 181 |
+
if self.inverse:
|
| 182 |
+
x = x.float()
|
| 183 |
+
x_r = x[:, :self.channels // 2, :, :]
|
| 184 |
+
x_i = x[:, self.channels // 2:, :, :]
|
| 185 |
+
x = torch.complex(x_r, x_i)
|
| 186 |
+
x = torch.fft.irfft(x, dim=3, norm="ortho")
|
| 187 |
+
else:
|
| 188 |
+
x = x.float()
|
| 189 |
+
x = torch.fft.rfft(x, dim=3, norm="ortho")
|
| 190 |
+
x_real = x.real
|
| 191 |
+
x_imag = x.imag
|
| 192 |
+
x = torch.cat([x_real, x_imag], dim=1)
|
| 193 |
+
return x
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
class DualPathTran(nn.Module):
|
| 197 |
+
"""
|
| 198 |
+
Dual-Path Transformer in Separation Network.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
d_model (int): The number of expected features in the input (input_size).
|
| 202 |
+
expand (int): Expansion factor used to calculate the hidden_size of LSTM.
|
| 203 |
+
bidirectional (bool): If True, becomes a bidirectional LSTM.
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
def __init__(self, d_model, time_rotary_embed, freq_rotary_embed, tran_params):
|
| 207 |
+
super(DualPathTran, self).__init__()
|
| 208 |
+
|
| 209 |
+
self.d_model = d_model
|
| 210 |
+
|
| 211 |
+
transformer_kwargs = dict(
|
| 212 |
+
dim=d_model,
|
| 213 |
+
heads=tran_params['heads'],
|
| 214 |
+
dim_head=tran_params['dim_head'],
|
| 215 |
+
attn_dropout=tran_params['attn_dropout'],
|
| 216 |
+
ff_dropout=tran_params['ff_dropout'],
|
| 217 |
+
flash_attn=tran_params['flash_attn']
|
| 218 |
+
)
|
| 219 |
+
self.norm_layers = nn.ModuleList([nn.GroupNorm(1, d_model) for _ in range(2)])
|
| 220 |
+
self.time_layer = Transformer(depth=tran_params['depth'], rotary_embed=time_rotary_embed, **transformer_kwargs)
|
| 221 |
+
self.freq_layer = Transformer(depth=tran_params['depth'], rotary_embed=freq_rotary_embed, **transformer_kwargs)
|
| 222 |
+
|
| 223 |
+
def forward(self, x):
|
| 224 |
+
B, C, F, T = x.shape
|
| 225 |
+
|
| 226 |
+
# Process dual-path rnn
|
| 227 |
+
original_x = x
|
| 228 |
+
# Frequency-path
|
| 229 |
+
x = self.norm_layers[0](x)
|
| 230 |
+
x = x.transpose(1, 3).contiguous().view(B * T, F, C)
|
| 231 |
+
# print('XXX', x.shape)
|
| 232 |
+
x = self.freq_layer(x)
|
| 233 |
+
x = x.view(B, T, F, C).transpose(1, 3)
|
| 234 |
+
x = x + original_x
|
| 235 |
+
|
| 236 |
+
original_x = x
|
| 237 |
+
# Time-path
|
| 238 |
+
x = self.norm_layers[1](x)
|
| 239 |
+
x = x.transpose(1, 2).contiguous().view(B * F, C, T).transpose(1, 2)
|
| 240 |
+
# print('RRR', x.shape)
|
| 241 |
+
x = self.time_layer(x)
|
| 242 |
+
x = x.transpose(1, 2).contiguous().view(B, F, C, T).transpose(1, 2)
|
| 243 |
+
x = x + original_x
|
| 244 |
+
|
| 245 |
+
return x
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
class SeparationNetTran(nn.Module):
|
| 249 |
+
"""
|
| 250 |
+
Implements a simplified Sparse Down-sample block in an encoder architecture.
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
- channels (int): Number input channels.
|
| 254 |
+
- expand (int): Expansion factor used to calculate the hidden_size of LSTM.
|
| 255 |
+
- num_layers (int): Number of dual-path layers.
|
| 256 |
+
"""
|
| 257 |
+
|
| 258 |
+
def __init__(self, channels, expand=1, num_layers=6, tran_params=None):
|
| 259 |
+
super(SeparationNetTran, self).__init__()
|
| 260 |
+
|
| 261 |
+
self.num_layers = num_layers
|
| 262 |
+
|
| 263 |
+
time_rotary_embed = RotaryEmbedding(dim=tran_params['rotary_embedding_dim'])
|
| 264 |
+
freq_rotary_embed = RotaryEmbedding(dim=tran_params['rotary_embedding_dim'])
|
| 265 |
+
|
| 266 |
+
modules = []
|
| 267 |
+
for i in range(num_layers):
|
| 268 |
+
m = DualPathTran(channels * (2 if i % 2 == 1 else 1), time_rotary_embed, freq_rotary_embed, tran_params)
|
| 269 |
+
modules.append(m)
|
| 270 |
+
self.dp_modules = nn.ModuleList(modules)
|
| 271 |
+
|
| 272 |
+
self.feature_conversion = nn.ModuleList([
|
| 273 |
+
FeatureConversion(channels * 2, inverse=False if i % 2 == 0 else True) for i in range(num_layers)
|
| 274 |
+
])
|
| 275 |
+
|
| 276 |
+
def forward(self, x):
|
| 277 |
+
for i in range(self.num_layers):
|
| 278 |
+
x = self.dp_modules[i](x)
|
| 279 |
+
x = self.feature_conversion[i](x)
|
| 280 |
+
return x
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
class Swish(nn.Module):
|
| 284 |
+
def forward(self, x):
|
| 285 |
+
return x * x.sigmoid()
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
class ConvolutionModule(nn.Module):
|
| 289 |
+
"""
|
| 290 |
+
Convolution Module in SD block.
|
| 291 |
+
|
| 292 |
+
Args:
|
| 293 |
+
channels (int): input/output channels.
|
| 294 |
+
depth (int): number of layers in the residual branch. Each layer has its own
|
| 295 |
+
compress (float): amount of channel compression.
|
| 296 |
+
kernel (int): kernel size for the convolutions.
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
def __init__(self, channels, depth=2, compress=4, kernel=3):
|
| 300 |
+
super().__init__()
|
| 301 |
+
assert kernel % 2 == 1
|
| 302 |
+
self.depth = abs(depth)
|
| 303 |
+
hidden_size = int(channels / compress)
|
| 304 |
+
norm = lambda d: nn.GroupNorm(1, d)
|
| 305 |
+
self.layers = nn.ModuleList([])
|
| 306 |
+
for _ in range(self.depth):
|
| 307 |
+
padding = (kernel // 2)
|
| 308 |
+
mods = [
|
| 309 |
+
norm(channels),
|
| 310 |
+
nn.Conv1d(channels, hidden_size * 2, kernel, padding=padding),
|
| 311 |
+
nn.GLU(1),
|
| 312 |
+
nn.Conv1d(hidden_size, hidden_size, kernel, padding=padding, groups=hidden_size),
|
| 313 |
+
norm(hidden_size),
|
| 314 |
+
Swish(),
|
| 315 |
+
nn.Conv1d(hidden_size, channels, 1),
|
| 316 |
+
]
|
| 317 |
+
layer = nn.Sequential(*mods)
|
| 318 |
+
self.layers.append(layer)
|
| 319 |
+
|
| 320 |
+
def forward(self, x):
|
| 321 |
+
for layer in self.layers:
|
| 322 |
+
x = x + layer(x)
|
| 323 |
+
return x
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
class FusionLayer(nn.Module):
|
| 327 |
+
"""
|
| 328 |
+
A FusionLayer within the decoder.
|
| 329 |
+
|
| 330 |
+
Args:
|
| 331 |
+
- channels (int): Number of input channels.
|
| 332 |
+
- kernel_size (int, optional): Kernel size for the convolutional layer, defaults to 3.
|
| 333 |
+
- stride (int, optional): Stride for the convolutional layer, defaults to 1.
|
| 334 |
+
- padding (int, optional): Padding for the convolutional layer, defaults to 1.
|
| 335 |
+
"""
|
| 336 |
+
|
| 337 |
+
def __init__(self, channels, kernel_size=3, stride=1, padding=1):
|
| 338 |
+
super(FusionLayer, self).__init__()
|
| 339 |
+
self.conv = nn.Conv2d(channels * 2, channels * 2, kernel_size, stride=stride, padding=padding)
|
| 340 |
+
|
| 341 |
+
def forward(self, x, skip=None):
|
| 342 |
+
if skip is not None:
|
| 343 |
+
x += skip
|
| 344 |
+
x = x.repeat(1, 2, 1, 1)
|
| 345 |
+
x = self.conv(x)
|
| 346 |
+
x = F.glu(x, dim=1)
|
| 347 |
+
return x
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
class SDlayer(nn.Module):
|
| 351 |
+
"""
|
| 352 |
+
Implements a Sparse Down-sample Layer for processing different frequency bands separately.
|
| 353 |
+
|
| 354 |
+
Args:
|
| 355 |
+
- channels_in (int): Input channel count.
|
| 356 |
+
- channels_out (int): Output channel count.
|
| 357 |
+
- band_configs (dict): A dictionary containing configuration for each frequency band.
|
| 358 |
+
Keys are 'low', 'mid', 'high' for each band, and values are
|
| 359 |
+
dictionaries with keys 'SR', 'stride', and 'kernel' for proportion,
|
| 360 |
+
stride, and kernel size, respectively.
|
| 361 |
+
"""
|
| 362 |
+
|
| 363 |
+
def __init__(self, channels_in, channels_out, band_configs):
|
| 364 |
+
super(SDlayer, self).__init__()
|
| 365 |
+
|
| 366 |
+
# Initializing convolutional layers for each band
|
| 367 |
+
self.convs = nn.ModuleList()
|
| 368 |
+
self.strides = []
|
| 369 |
+
self.kernels = []
|
| 370 |
+
for config in band_configs.values():
|
| 371 |
+
self.convs.append(
|
| 372 |
+
nn.Conv2d(channels_in, channels_out, (config['kernel'], 1), (config['stride'], 1), (0, 0)))
|
| 373 |
+
self.strides.append(config['stride'])
|
| 374 |
+
self.kernels.append(config['kernel'])
|
| 375 |
+
|
| 376 |
+
# Saving rate proportions for determining splits
|
| 377 |
+
self.SR_low = band_configs['low']['SR']
|
| 378 |
+
self.SR_mid = band_configs['mid']['SR']
|
| 379 |
+
|
| 380 |
+
def forward(self, x):
|
| 381 |
+
B, C, Fr, T = x.shape
|
| 382 |
+
# Define splitting points based on sampling rates
|
| 383 |
+
splits = [
|
| 384 |
+
(0, math.ceil(Fr * self.SR_low)),
|
| 385 |
+
(math.ceil(Fr * self.SR_low), math.ceil(Fr * (self.SR_low + self.SR_mid))),
|
| 386 |
+
(math.ceil(Fr * (self.SR_low + self.SR_mid)), Fr)
|
| 387 |
+
]
|
| 388 |
+
|
| 389 |
+
# Processing each band with the corresponding convolution
|
| 390 |
+
outputs = []
|
| 391 |
+
original_lengths = []
|
| 392 |
+
for conv, stride, kernel, (start, end) in zip(self.convs, self.strides, self.kernels, splits):
|
| 393 |
+
extracted = x[:, :, start:end, :]
|
| 394 |
+
original_lengths.append(end - start)
|
| 395 |
+
current_length = extracted.shape[2]
|
| 396 |
+
|
| 397 |
+
# padding
|
| 398 |
+
if stride == 1:
|
| 399 |
+
total_padding = kernel - stride
|
| 400 |
+
else:
|
| 401 |
+
total_padding = (stride - current_length % stride) % stride
|
| 402 |
+
pad_left = total_padding // 2
|
| 403 |
+
pad_right = total_padding - pad_left
|
| 404 |
+
|
| 405 |
+
padded = F.pad(extracted, (0, 0, pad_left, pad_right))
|
| 406 |
+
|
| 407 |
+
output = conv(padded)
|
| 408 |
+
outputs.append(output)
|
| 409 |
+
|
| 410 |
+
return outputs, original_lengths
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
class SUlayer(nn.Module):
|
| 414 |
+
"""
|
| 415 |
+
Implements a Sparse Up-sample Layer in decoder.
|
| 416 |
+
|
| 417 |
+
Args:
|
| 418 |
+
- channels_in: The number of input channels.
|
| 419 |
+
- channels_out: The number of output channels.
|
| 420 |
+
- convtr_configs: Dictionary containing the configurations for transposed convolutions.
|
| 421 |
+
"""
|
| 422 |
+
|
| 423 |
+
def __init__(self, channels_in, channels_out, band_configs):
|
| 424 |
+
super(SUlayer, self).__init__()
|
| 425 |
+
|
| 426 |
+
# Initializing convolutional layers for each band
|
| 427 |
+
self.convtrs = nn.ModuleList([
|
| 428 |
+
nn.ConvTranspose2d(channels_in, channels_out, [config['kernel'], 1], [config['stride'], 1])
|
| 429 |
+
for _, config in band_configs.items()
|
| 430 |
+
])
|
| 431 |
+
|
| 432 |
+
def forward(self, x, lengths, origin_lengths):
|
| 433 |
+
B, C, Fr, T = x.shape
|
| 434 |
+
# Define splitting points based on input lengths
|
| 435 |
+
splits = [
|
| 436 |
+
(0, lengths[0]),
|
| 437 |
+
(lengths[0], lengths[0] + lengths[1]),
|
| 438 |
+
(lengths[0] + lengths[1], None)
|
| 439 |
+
]
|
| 440 |
+
# Processing each band with the corresponding convolution
|
| 441 |
+
outputs = []
|
| 442 |
+
for idx, (convtr, (start, end)) in enumerate(zip(self.convtrs, splits)):
|
| 443 |
+
out = convtr(x[:, :, start:end, :])
|
| 444 |
+
# Calculate the distance to trim the output symmetrically to original length
|
| 445 |
+
current_Fr_length = out.shape[2]
|
| 446 |
+
dist = abs(origin_lengths[idx] - current_Fr_length) // 2
|
| 447 |
+
|
| 448 |
+
# Trim the output to the original length symmetrically
|
| 449 |
+
trimmed_out = out[:, :, dist:dist + origin_lengths[idx], :]
|
| 450 |
+
|
| 451 |
+
outputs.append(trimmed_out)
|
| 452 |
+
|
| 453 |
+
# Concatenate trimmed outputs along the frequency dimension to return the final tensor
|
| 454 |
+
x = torch.cat(outputs, dim=2)
|
| 455 |
+
|
| 456 |
+
return x
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
class SDblock(nn.Module):
|
| 460 |
+
"""
|
| 461 |
+
Implements a simplified Sparse Down-sample block in encoder.
|
| 462 |
+
|
| 463 |
+
Args:
|
| 464 |
+
- channels_in (int): Number of input channels.
|
| 465 |
+
- channels_out (int): Number of output channels.
|
| 466 |
+
- band_config (dict): Configuration for the SDlayer specifying band splits and convolutions.
|
| 467 |
+
- conv_config (dict): Configuration for convolution modules applied to each band.
|
| 468 |
+
- depths (list of int): List specifying the convolution depths for low, mid, and high frequency bands.
|
| 469 |
+
"""
|
| 470 |
+
|
| 471 |
+
def __init__(self, channels_in, channels_out, band_configs={}, conv_config={}, depths=[3, 2, 1], kernel_size=3):
|
| 472 |
+
super(SDblock, self).__init__()
|
| 473 |
+
self.SDlayer = SDlayer(channels_in, channels_out, band_configs)
|
| 474 |
+
|
| 475 |
+
# Dynamically create convolution modules for each band based on depths
|
| 476 |
+
self.conv_modules = nn.ModuleList([
|
| 477 |
+
ConvolutionModule(channels_out, depth, **conv_config) for depth in depths
|
| 478 |
+
])
|
| 479 |
+
# Set the kernel_size to an odd number.
|
| 480 |
+
self.globalconv = nn.Conv2d(channels_out, channels_out, kernel_size, 1, (kernel_size - 1) // 2)
|
| 481 |
+
|
| 482 |
+
def forward(self, x):
|
| 483 |
+
bands, original_lengths = self.SDlayer(x)
|
| 484 |
+
# B, C, f, T = band.shape
|
| 485 |
+
bands = [
|
| 486 |
+
F.gelu(
|
| 487 |
+
conv(band.permute(0, 2, 1, 3).reshape(-1, band.shape[1], band.shape[3]))
|
| 488 |
+
.view(band.shape[0], band.shape[2], band.shape[1], band.shape[3])
|
| 489 |
+
.permute(0, 2, 1, 3)
|
| 490 |
+
)
|
| 491 |
+
for conv, band in zip(self.conv_modules, bands)
|
| 492 |
+
|
| 493 |
+
]
|
| 494 |
+
lengths = [band.size(-2) for band in bands]
|
| 495 |
+
full_band = torch.cat(bands, dim=2)
|
| 496 |
+
skip = full_band
|
| 497 |
+
|
| 498 |
+
output = self.globalconv(full_band)
|
| 499 |
+
|
| 500 |
+
return output, skip, lengths, original_lengths
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
class SCNet_Tran(nn.Module):
|
| 504 |
+
"""
|
| 505 |
+
The implementation of SCNet: Sparse Compression Network for Music Source Separation. Paper: https://arxiv.org/abs/2401.13276.pdf
|
| 506 |
+
LSTM layers replaced with transformer layers
|
| 507 |
+
|
| 508 |
+
Args:
|
| 509 |
+
- sources (List[str]): List of sources to be separated.
|
| 510 |
+
- audio_channels (int): Number of audio channels.
|
| 511 |
+
- nfft (int): Number of FFTs to determine the frequency dimension of the input.
|
| 512 |
+
- hop_size (int): Hop size for the STFT.
|
| 513 |
+
- win_size (int): Window size for STFT.
|
| 514 |
+
- normalized (bool): Whether to normalize the STFT.
|
| 515 |
+
- dims (List[int]): List of channel dimensions for each block.
|
| 516 |
+
- band_SR (List[float]): The proportion of each frequency band.
|
| 517 |
+
- band_stride (List[int]): The down-sampling ratio of each frequency band.
|
| 518 |
+
- band_kernel (List[int]): The kernel sizes for down-sampling convolution in each frequency band
|
| 519 |
+
- conv_depths (List[int]): List specifying the number of convolution modules in each SD block.
|
| 520 |
+
- compress (int): Compression factor for convolution module.
|
| 521 |
+
- conv_kernel (int): Kernel size for convolution layer in convolution module.
|
| 522 |
+
- num_dplayer (int): Number of dual-path layers.
|
| 523 |
+
- expand (int): Expansion factor in the dual-path RNN, default is 1.
|
| 524 |
+
|
| 525 |
+
"""
|
| 526 |
+
|
| 527 |
+
def __init__(
|
| 528 |
+
self,
|
| 529 |
+
sources=('drums', 'bass', 'other', 'vocals'),
|
| 530 |
+
audio_channels=2,
|
| 531 |
+
# Main structure
|
| 532 |
+
dims=(4, 32, 64, 128), # dims = [4, 64, 128, 256] in SCNet-large
|
| 533 |
+
# STFT
|
| 534 |
+
nfft=4096,
|
| 535 |
+
hop_size=1024,
|
| 536 |
+
win_size=4096,
|
| 537 |
+
normalized=True,
|
| 538 |
+
# SD/SU layer
|
| 539 |
+
band_SR=(0.175, 0.392, 0.433),
|
| 540 |
+
band_stride=(1, 4, 16),
|
| 541 |
+
band_kernel=(3, 4, 16),
|
| 542 |
+
# Convolution Module
|
| 543 |
+
conv_depths=(3, 2, 1),
|
| 544 |
+
compress=4,
|
| 545 |
+
conv_kernel=3,
|
| 546 |
+
# Dual-path RNN
|
| 547 |
+
num_dplayer=6,
|
| 548 |
+
expand=1,
|
| 549 |
+
tran_rotary_embedding_dim=64,
|
| 550 |
+
tran_depth=1,
|
| 551 |
+
tran_heads=8,
|
| 552 |
+
tran_dim_head=64,
|
| 553 |
+
tran_attn_dropout=0.0,
|
| 554 |
+
tran_ff_dropout=0.0,
|
| 555 |
+
tran_flash_attn=False,
|
| 556 |
+
):
|
| 557 |
+
super().__init__()
|
| 558 |
+
self.sources = sources
|
| 559 |
+
self.audio_channels = audio_channels
|
| 560 |
+
self.dims = dims
|
| 561 |
+
band_keys = ['low', 'mid', 'high']
|
| 562 |
+
self.band_configs = {band_keys[i]: {'SR': band_SR[i], 'stride': band_stride[i], 'kernel': band_kernel[i]} for i
|
| 563 |
+
in range(len(band_keys))}
|
| 564 |
+
self.hop_length = hop_size
|
| 565 |
+
self.conv_config = {
|
| 566 |
+
'compress': compress,
|
| 567 |
+
'kernel': conv_kernel,
|
| 568 |
+
}
|
| 569 |
+
self.tran_params = {
|
| 570 |
+
'rotary_embedding_dim': tran_rotary_embedding_dim,
|
| 571 |
+
'depth': tran_depth,
|
| 572 |
+
'heads': tran_heads,
|
| 573 |
+
'dim_head': tran_dim_head,
|
| 574 |
+
'attn_dropout': tran_attn_dropout,
|
| 575 |
+
'ff_dropout': tran_ff_dropout,
|
| 576 |
+
'flash_attn': tran_flash_attn,
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
self.stft_config = {
|
| 580 |
+
'n_fft': nfft,
|
| 581 |
+
'hop_length': hop_size,
|
| 582 |
+
'win_length': win_size,
|
| 583 |
+
'center': True,
|
| 584 |
+
'normalized': normalized
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
self.first_conv = nn.Conv2d(dims[0], dims[0], 1, 1, 0, bias=False)
|
| 588 |
+
|
| 589 |
+
self.encoder = nn.ModuleList()
|
| 590 |
+
self.decoder = nn.ModuleList()
|
| 591 |
+
|
| 592 |
+
for index in range(len(dims) - 1):
|
| 593 |
+
enc = SDblock(
|
| 594 |
+
channels_in=dims[index],
|
| 595 |
+
channels_out=dims[index + 1],
|
| 596 |
+
band_configs=self.band_configs,
|
| 597 |
+
conv_config=self.conv_config,
|
| 598 |
+
depths=conv_depths
|
| 599 |
+
)
|
| 600 |
+
self.encoder.append(enc)
|
| 601 |
+
|
| 602 |
+
dec = nn.Sequential(
|
| 603 |
+
FusionLayer(channels=dims[index + 1]),
|
| 604 |
+
SUlayer(
|
| 605 |
+
channels_in=dims[index + 1],
|
| 606 |
+
channels_out=dims[index] if index != 0 else dims[index] * len(sources),
|
| 607 |
+
band_configs=self.band_configs,
|
| 608 |
+
)
|
| 609 |
+
)
|
| 610 |
+
self.decoder.insert(0, dec)
|
| 611 |
+
|
| 612 |
+
self.separation_net = SeparationNetTran(
|
| 613 |
+
channels=dims[-1],
|
| 614 |
+
expand=expand,
|
| 615 |
+
num_layers=num_dplayer,
|
| 616 |
+
tran_params=self.tran_params
|
| 617 |
+
)
|
| 618 |
+
|
| 619 |
+
def forward(self, x):
|
| 620 |
+
# B, C, L = x.shape
|
| 621 |
+
B = x.shape[0]
|
| 622 |
+
# In the initial padding, ensure that the number of frames after the STFT (the length of the T dimension) is even,
|
| 623 |
+
# so that the RFFT operation can be used in the separation network.
|
| 624 |
+
padding = self.hop_length - x.shape[-1] % self.hop_length
|
| 625 |
+
if (x.shape[-1] + padding) // self.hop_length % 2 == 0:
|
| 626 |
+
padding += self.hop_length
|
| 627 |
+
x = F.pad(x, (0, padding))
|
| 628 |
+
|
| 629 |
+
# STFT
|
| 630 |
+
L = x.shape[-1]
|
| 631 |
+
x = x.reshape(-1, L)
|
| 632 |
+
x = torch.stft(x, **self.stft_config, return_complex=True)
|
| 633 |
+
x = torch.view_as_real(x)
|
| 634 |
+
x = x.permute(0, 3, 1, 2).reshape(x.shape[0] // self.audio_channels, x.shape[3] * self.audio_channels,
|
| 635 |
+
x.shape[1], x.shape[2])
|
| 636 |
+
|
| 637 |
+
B, C, Fr, T = x.shape
|
| 638 |
+
|
| 639 |
+
save_skip = deque()
|
| 640 |
+
save_lengths = deque()
|
| 641 |
+
save_original_lengths = deque()
|
| 642 |
+
# encoder
|
| 643 |
+
for sd_layer in self.encoder:
|
| 644 |
+
x, skip, lengths, original_lengths = sd_layer(x)
|
| 645 |
+
save_skip.append(skip)
|
| 646 |
+
save_lengths.append(lengths)
|
| 647 |
+
save_original_lengths.append(original_lengths)
|
| 648 |
+
|
| 649 |
+
# separation
|
| 650 |
+
x = self.separation_net(x)
|
| 651 |
+
|
| 652 |
+
# decoder
|
| 653 |
+
for fusion_layer, su_layer in self.decoder:
|
| 654 |
+
x = fusion_layer(x, save_skip.pop())
|
| 655 |
+
x = su_layer(x, save_lengths.pop(), save_original_lengths.pop())
|
| 656 |
+
|
| 657 |
+
# output
|
| 658 |
+
n = self.dims[0]
|
| 659 |
+
x = x.view(B, n, -1, Fr, T)
|
| 660 |
+
|
| 661 |
+
x = x.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
|
| 662 |
+
x = torch.view_as_complex(x.contiguous())
|
| 663 |
+
x = torch.istft(x, **self.stft_config)
|
| 664 |
+
x = x.reshape(B, len(self.sources), self.audio_channels, -1)
|
| 665 |
+
|
| 666 |
+
x = x[:, :, :, :-padding]
|
| 667 |
+
|
| 668 |
+
return x
|
phase_fixer.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchaudio
|
| 3 |
+
import os
|
| 4 |
+
import gc
|
| 5 |
+
|
| 6 |
+
def frequency_blend_phases(phase1, phase2, freq_bins, low_cutoff=500, high_cutoff=5000, base_factor=0.25, scale_factor=1.85):
|
| 7 |
+
if phase1.shape != phase2.shape:
|
| 8 |
+
raise ValueError("phase1 and phase2 must have the same shape.")
|
| 9 |
+
if len(freq_bins) != phase1.shape[0]:
|
| 10 |
+
raise ValueError("freq_bins must have the same length as the number of frequency bins in phase1 and phase2.")
|
| 11 |
+
if low_cutoff >= high_cutoff:
|
| 12 |
+
raise ValueError("low_cutoff must be less than high_cutoff.")
|
| 13 |
+
|
| 14 |
+
blended_phase = torch.zeros_like(phase1)
|
| 15 |
+
blend_factors = torch.zeros_like(freq_bins)
|
| 16 |
+
|
| 17 |
+
blend_factors[freq_bins < low_cutoff] = base_factor
|
| 18 |
+
blend_factors[freq_bins > high_cutoff] = base_factor + scale_factor
|
| 19 |
+
|
| 20 |
+
in_range_mask = (freq_bins >= low_cutoff) & (freq_bins <= high_cutoff)
|
| 21 |
+
blend_factors[in_range_mask] = base_factor + scale_factor * (
|
| 22 |
+
(freq_bins[in_range_mask] - low_cutoff) / (high_cutoff - low_cutoff)
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
for i in range(phase1.shape[0]):
|
| 26 |
+
blended_phase[i, :] = (1 - blend_factors[i]) * phase1[i, :] + blend_factors[i] * phase2[i, :]
|
| 27 |
+
|
| 28 |
+
blended_phase = torch.remainder(blended_phase + torch.pi, 2 * torch.pi) - torch.pi
|
| 29 |
+
|
| 30 |
+
return blended_phase
|
| 31 |
+
|
| 32 |
+
def transfer_magnitude_phase(source_file, target_file, output_folder, transfer_magnitude=False, transfer_phase=True,
|
| 33 |
+
low_cutoff=500, high_cutoff=9000, scale_factor=1.4, output_format='flac'):
|
| 34 |
+
target_name, target_ext = os.path.splitext(os.path.basename(target_file))
|
| 35 |
+
|
| 36 |
+
target_name = target_name.replace("_other", "").replace("_vocals", "").replace("_instrumental", "")
|
| 37 |
+
target_name = target_name.replace("_Other", "").replace("_Vocals", "").replace("_Instrumental", "").strip()
|
| 38 |
+
|
| 39 |
+
ext = '.flac' if output_format == 'flac' else '.wav'
|
| 40 |
+
output_file = os.path.join(output_folder, f"{target_name} (Fixed Instrumental){ext}")
|
| 41 |
+
|
| 42 |
+
print(f"Phase Fixing: {os.path.basename(target_file)}...")
|
| 43 |
+
source_waveform, source_sr = torchaudio.load(source_file)
|
| 44 |
+
target_waveform, target_sr = torchaudio.load(target_file)
|
| 45 |
+
|
| 46 |
+
if source_sr != target_sr:
|
| 47 |
+
raise ValueError("Sample rates of source and target audio files must match.")
|
| 48 |
+
|
| 49 |
+
n_fft = 2048
|
| 50 |
+
hop_length = 512
|
| 51 |
+
window = torch.hann_window(n_fft)
|
| 52 |
+
|
| 53 |
+
source_stfts = torch.stft(source_waveform, n_fft=n_fft, hop_length=hop_length, window=window, return_complex=True, pad_mode="reflect")
|
| 54 |
+
target_stfts = torch.stft(target_waveform, n_fft=n_fft, hop_length=hop_length, window=window, return_complex=True, pad_mode="reflect")
|
| 55 |
+
|
| 56 |
+
freqs = torch.linspace(0, source_sr // 2, steps=n_fft // 2 + 1)
|
| 57 |
+
|
| 58 |
+
modified_stfts = []
|
| 59 |
+
for source_stft, target_stft in zip(source_stfts, target_stfts):
|
| 60 |
+
source_mag, source_phs = torch.abs(source_stft), torch.angle(source_stft)
|
| 61 |
+
target_mag, target_phs = torch.abs(target_stft), torch.angle(target_stft)
|
| 62 |
+
|
| 63 |
+
modified_stft = target_stft.clone()
|
| 64 |
+
if transfer_magnitude:
|
| 65 |
+
modified_stft = source_mag * torch.exp(1j * torch.angle(modified_stft))
|
| 66 |
+
|
| 67 |
+
if transfer_phase:
|
| 68 |
+
blended_phase = frequency_blend_phases(target_phs, source_phs, freqs, low_cutoff, high_cutoff, scale_factor=scale_factor)
|
| 69 |
+
modified_stft = torch.abs(modified_stft) * torch.exp(1j * blended_phase)
|
| 70 |
+
|
| 71 |
+
modified_stfts.append(modified_stft)
|
| 72 |
+
|
| 73 |
+
modified_waveform = torch.istft(
|
| 74 |
+
torch.stack(modified_stfts),
|
| 75 |
+
n_fft=n_fft,
|
| 76 |
+
hop_length=hop_length,
|
| 77 |
+
window=window,
|
| 78 |
+
length=source_waveform.size(1)
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
if output_format == 'flac':
|
| 82 |
+
torchaudio.save(output_file, modified_waveform, target_sr, format="flac", bits_per_sample=16)
|
| 83 |
+
else:
|
| 84 |
+
torchaudio.save(output_file, modified_waveform, target_sr)
|
| 85 |
+
|
| 86 |
+
print(f"Saved: {output_file}")
|
| 87 |
+
return output_file
|
| 88 |
+
|
| 89 |
+
def process_phase_fix(source_file, target_file, output_folder, low_cutoff=500, high_cutoff=9000,
|
| 90 |
+
scale_factor=1.4, output_format='flac'):
|
| 91 |
+
os.makedirs(output_folder, exist_ok=True)
|
| 92 |
+
|
| 93 |
+
try:
|
| 94 |
+
output_file = transfer_magnitude_phase(
|
| 95 |
+
source_file=source_file,
|
| 96 |
+
target_file=target_file,
|
| 97 |
+
output_folder=output_folder,
|
| 98 |
+
transfer_magnitude=False,
|
| 99 |
+
transfer_phase=True,
|
| 100 |
+
low_cutoff=low_cutoff,
|
| 101 |
+
high_cutoff=high_cutoff,
|
| 102 |
+
scale_factor=scale_factor,
|
| 103 |
+
output_format=output_format
|
| 104 |
+
)
|
| 105 |
+
gc.collect()
|
| 106 |
+
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
| 107 |
+
return output_file, "Phase fix completed successfully!"
|
| 108 |
+
except Exception as e:
|
| 109 |
+
return None, f"Error during phase fix: {str(e)}"
|
| 110 |
+
|
| 111 |
+
SOURCE_MODELS = [
|
| 112 |
+
'VOCALS-MelBand-Roformer (by Becruily)',
|
| 113 |
+
'VOCALS-Mel-Roformer big beta 4 (by unwa)',
|
| 114 |
+
'VOCALS-Melband-Roformer BigBeta5e (by unwa)',
|
| 115 |
+
'VOCALS-big_beta6 (by Unwa)',
|
| 116 |
+
'VOCALS-big_beta6X (by Unwa)',
|
| 117 |
+
'VOCALS-MelBand-Roformer (by KimberleyJSN)',
|
| 118 |
+
'VOCALS-MelBand-Roformer Kim FT (by Unwa)',
|
| 119 |
+
'VOCALS-MelBand-Roformer Kim FT 2 (by Unwa)',
|
| 120 |
+
'VOCALS-MelBand-Roformer Kim FT 2 Blendless (by unwa)',
|
| 121 |
+
'VOCALS-Mel-Roformer FT 3 Preview (by unwa)',
|
| 122 |
+
'VOCALS-BS-Roformer_1296 (by viperx)',
|
| 123 |
+
'VOCALS-BS-Roformer_1297 (by viperx)',
|
| 124 |
+
'VOCALS-BS-RoformerLargev1 (by unwa)',
|
| 125 |
+
'bs_roformer_revive (by unwa)'
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
TARGET_MODELS = [
|
| 129 |
+
'INST-MelBand-Roformer (by Becruily)',
|
| 130 |
+
'INST-Mel-Roformer v1 (by unwa)',
|
| 131 |
+
'INST-Mel-Roformer v2 (by unwa)',
|
| 132 |
+
'inst_v1e (by unwa)',
|
| 133 |
+
'INST-Mel-Roformer v1e+ (by unwa)',
|
| 134 |
+
'Inst_GaboxV7 (by Gabox)',
|
| 135 |
+
'INST-VOC-Mel-Roformer a.k.a. duality (by unwa)',
|
| 136 |
+
'INST-VOC-Mel-Roformer a.k.a. duality v2 (by unwa)',
|
| 137 |
+
'inst_gabox (by Gabox)',
|
| 138 |
+
'inst_gaboxFlowersV10 (by Gabox)'
|
| 139 |
+
]
|
processing.py
CHANGED
|
@@ -13,9 +13,16 @@ sys.path.append(current_dir)
|
|
| 13 |
|
| 14 |
from datetime import datetime
|
| 15 |
from helpers import INPUT_DIR, OLD_OUTPUT_DIR, ENSEMBLE_DIR, AUTO_ENSEMBLE_TEMP, move_old_files, clear_directory, BASE_DIR, clean_model, extract_model_name_from_checkpoint, sanitize_filename, find_clear_segment, save_segment, run_matchering, clamp_percentage
|
| 16 |
-
from model import get_model_config
|
| 17 |
from apollo_processing import process_with_apollo # Import Apollo processing
|
| 18 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
import yaml
|
| 20 |
import gradio as gr
|
| 21 |
import threading
|
|
@@ -29,15 +36,25 @@ import locale
|
|
| 29 |
import re
|
| 30 |
import psutil
|
| 31 |
import concurrent.futures
|
| 32 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
import tempfile
|
| 34 |
-
import traceback
|
| 35 |
from urllib.parse import urlparse, quote
|
| 36 |
try:
|
| 37 |
from google.colab import drive
|
|
|
|
| 38 |
IS_COLAB = True
|
| 39 |
except ImportError:
|
| 40 |
IS_COLAB = False
|
|
|
|
| 41 |
import matchering as mg
|
| 42 |
|
| 43 |
import warnings
|
|
@@ -61,14 +78,24 @@ def setup_directories():
|
|
| 61 |
"""Create necessary directories and check Google Drive access."""
|
| 62 |
if IS_COLAB:
|
| 63 |
try:
|
| 64 |
-
if
|
|
|
|
|
|
|
|
|
|
| 65 |
print("Mounting Google Drive...")
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
except Exception as e:
|
| 71 |
-
|
|
|
|
| 72 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 73 |
os.makedirs(INPUT_DIR, exist_ok=True)
|
| 74 |
os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
|
|
@@ -85,25 +112,47 @@ def refresh_auto_output():
|
|
| 85 |
except Exception as e:
|
| 86 |
return None, f"Error refreshing output: {str(e)}"
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
<div id="custom-progress" style="margin-top: 10px;">
|
| 103 |
<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
|
| 104 |
<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
|
| 105 |
-
<div id="progress-bar" style="width: {progress_percent}%; height: 20px; background
|
| 106 |
</div>
|
|
|
|
| 107 |
</div>
|
| 108 |
"""
|
| 109 |
|
|
@@ -112,18 +161,38 @@ def extract_model_name_from_checkpoint(checkpoint_path):
|
|
| 112 |
return "Unknown"
|
| 113 |
base_name = os.path.basename(checkpoint_path)
|
| 114 |
model_name = os.path.splitext(base_name)[0]
|
| 115 |
-
print(f"Original checkpoint path: {checkpoint_path}, extracted model_name: {model_name}")
|
| 116 |
return model_name.strip()
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def run_command_and_process_files(
|
| 119 |
model_type,
|
| 120 |
config_path,
|
| 121 |
start_check_point,
|
| 122 |
INPUT_DIR,
|
| 123 |
OUTPUT_DIR,
|
| 124 |
-
extract_instrumental
|
| 125 |
-
use_tta
|
| 126 |
-
demud_phaseremix_inst
|
| 127 |
progress=None,
|
| 128 |
use_apollo=True,
|
| 129 |
apollo_normal_model="Apollo Universal Model",
|
|
@@ -133,52 +202,113 @@ def run_command_and_process_files(
|
|
| 133 |
apollo_overlap=2,
|
| 134 |
apollo_method="normal_method",
|
| 135 |
apollo_midside_model=None,
|
| 136 |
-
output_format="wav"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 153 |
|
| 154 |
-
# Komut oluştur
|
| 155 |
-
INFERENCE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "inference.py")
|
| 156 |
cmd_parts = [
|
| 157 |
-
|
| 158 |
-
"--model_type",
|
| 159 |
-
"--config_path",
|
| 160 |
-
"--start_check_point",
|
| 161 |
-
"--input_folder",
|
| 162 |
-
"--store_dir",
|
| 163 |
"--chunk_size", str(inference_chunk_size),
|
| 164 |
"--overlap", str(inference_overlap),
|
| 165 |
"--export_format", f"{output_format} FLOAT"
|
| 166 |
]
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
process = subprocess.Popen(
|
| 180 |
cmd_parts,
|
| 181 |
-
cwd=
|
| 182 |
stdout=subprocess.PIPE,
|
| 183 |
stderr=subprocess.PIPE,
|
| 184 |
text=True,
|
|
@@ -186,156 +316,188 @@ def run_command_and_process_files(
|
|
| 186 |
universal_newlines=True
|
| 187 |
)
|
| 188 |
|
| 189 |
-
# İlerleme takibi
|
| 190 |
-
mixture_paths = sorted(glob.glob(os.path.join(INPUT_DIR, '*.*')))
|
| 191 |
-
total_files = len(mixture_paths)
|
| 192 |
-
processed_files = 0
|
| 193 |
-
base_progress_per_file = 80 / total_files if total_files > 0 else 80 # 0-80% ayrıştırma
|
| 194 |
-
|
| 195 |
stderr_output = ""
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
if process.returncode != 0:
|
| 223 |
-
raise
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
logging.info(f"Subprocess completed in {elapsed_time:.2f} seconds")
|
| 227 |
|
| 228 |
-
#
|
| 229 |
filename_model = extract_model_name_from_checkpoint(start_check_point)
|
| 230 |
-
output_files =
|
| 231 |
if not output_files:
|
| 232 |
-
raise FileNotFoundError(
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 256 |
-
progress(progress_value / 100, desc=i18n("renaming_files").format(renamed_files, total_output_files))
|
| 257 |
-
update_progress_html(i18n("renaming_files").format(renamed_files, total_output_files), progress_value)
|
| 258 |
-
except Exception as e:
|
| 259 |
-
logging.error(f"Could not rename {file_path} to {new_file_path}: {e}")
|
| 260 |
|
| 261 |
-
|
|
|
|
|
|
|
| 262 |
if not output_files:
|
| 263 |
-
raise FileNotFoundError(
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
| 267 |
matching_files = [
|
| 268 |
-
os.path.join(OUTPUT_DIR, f) for f in output_files
|
| 269 |
-
if
|
| 270 |
]
|
| 271 |
return matching_files[0] if matching_files else None
|
| 272 |
|
| 273 |
output_list = [
|
| 274 |
-
find_file('vocals'), find_file('instrumental'), find_file('phaseremix'),
|
| 275 |
find_file('drum'), find_file('bass'), find_file('other'), find_file('effects'),
|
| 276 |
find_file('speech'), find_file('music'), find_file('dry'), find_file('male'),
|
| 277 |
-
find_file('female'), find_file('bleed'), find_file('karaoke')
|
|
|
|
| 278 |
]
|
| 279 |
|
| 280 |
-
# Normalizasyon: 90-95%
|
| 281 |
normalized_outputs = []
|
| 282 |
-
for
|
| 283 |
if output_file and os.path.exists(output_file):
|
| 284 |
normalized_file = os.path.join(OUTPUT_DIR, f"{sanitize_filename(os.path.splitext(os.path.basename(output_file))[0])}.{output_format}")
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
normalized_outputs.append(normalized_file)
|
| 294 |
-
|
| 295 |
-
progress_value = round(90 + (i + 1) / len(output_list) * 5)
|
| 296 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 297 |
-
progress(progress_value / 100, desc=i18n("normalizing_output").format(i + 1, len(output_list)))
|
| 298 |
-
update_progress_html(i18n("normalizing_output").format(i + 1, len(output_list), progress_value))
|
| 299 |
-
except Exception as e:
|
| 300 |
-
logging.error(f"Normalization failed for {output_file}: {e}")
|
| 301 |
-
normalized_outputs.append(None)
|
| 302 |
else:
|
| 303 |
-
normalized_outputs.append(
|
| 304 |
|
| 305 |
-
# Apollo
|
| 306 |
if use_apollo:
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
)
|
| 322 |
-
except ImportError:
|
| 323 |
-
logging.warning("apollo_processing module not found, skipping Apollo processing")
|
| 324 |
-
except Exception as e:
|
| 325 |
-
logging.error(f"Apollo processing failed: {e}")
|
| 326 |
-
|
| 327 |
-
# Tamamlandı
|
| 328 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 329 |
-
progress(1.0, desc=i18n("separation_complete"))
|
| 330 |
-
update_progress_html(i18n("separation_complete"), 100)
|
| 331 |
|
| 332 |
-
|
| 333 |
-
|
| 334 |
|
|
|
|
|
|
|
|
|
|
| 335 |
except Exception as e:
|
| 336 |
-
|
| 337 |
-
traceback
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
def process_audio(
|
| 341 |
input_audio_file,
|
|
@@ -343,6 +505,10 @@ def process_audio(
|
|
| 343 |
chunk_size,
|
| 344 |
overlap,
|
| 345 |
export_format,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
use_tta,
|
| 347 |
demud_phaseremix_inst,
|
| 348 |
extract_instrumental,
|
|
@@ -352,12 +518,16 @@ def process_audio(
|
|
| 352 |
apollo_method,
|
| 353 |
apollo_normal_model,
|
| 354 |
apollo_midside_model,
|
| 355 |
-
use_matchering
|
| 356 |
-
matchering_passes
|
| 357 |
progress=gr.Progress(track_tqdm=True),
|
| 358 |
*args,
|
| 359 |
**kwargs
|
| 360 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
try:
|
| 362 |
# Check Google Drive connection
|
| 363 |
setup_directories()
|
|
@@ -365,29 +535,29 @@ def process_audio(
|
|
| 365 |
if input_audio_file is not None:
|
| 366 |
audio_path = input_audio_file.name if hasattr(input_audio_file, 'name') else input_audio_file
|
| 367 |
else:
|
| 368 |
-
|
| 369 |
-
None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 370 |
"No audio file provided",
|
| 371 |
update_progress_html("No input provided", 0)
|
| 372 |
)
|
|
|
|
| 373 |
|
| 374 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 375 |
os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
|
| 376 |
move_old_files(OUTPUT_DIR)
|
| 377 |
|
| 378 |
-
print(f"process_audio: model parameter received: {model}")
|
| 379 |
# Clean model name, remove ⭐ and other unwanted characters
|
| 380 |
clean_model_name = clean_model(model) if not model.startswith("/") else extract_model_name_from_checkpoint(model)
|
| 381 |
-
print(f"Processing
|
| 382 |
-
|
| 383 |
-
print(f"Raw UI inputs - chunk_size: {chunk_size}, overlap: {overlap}, apollo_chunk_size: {apollo_chunk_size}, apollo_overlap: {apollo_overlap}, apollo_method: {apollo_method}")
|
| 384 |
|
| 385 |
# Validate inference parameters
|
|
|
|
| 386 |
try:
|
| 387 |
-
inference_chunk_size = int(chunk_size)
|
| 388 |
except (TypeError, ValueError):
|
| 389 |
print(f"Invalid chunk_size: {chunk_size}. Defaulting to: 352800.")
|
| 390 |
inference_chunk_size = 352800
|
|
|
|
| 391 |
|
| 392 |
try:
|
| 393 |
inference_overlap = int(overlap)
|
|
@@ -416,21 +586,32 @@ def process_audio(
|
|
| 416 |
else:
|
| 417 |
print(f"Invalid apollo_method: {apollo_method}. Defaulting to: normal_method.")
|
| 418 |
apollo_method = "normal_method"
|
| 419 |
-
print(f"Parsed apollo_method: {apollo_method}")
|
| 420 |
-
|
| 421 |
-
print(f"Corrected values - inference_chunk_size: {inference_chunk_size}, inference_overlap: {inference_overlap}, apollo_chunk_size: {apollo_chunk_size}, apollo_overlap: {apollo_overlap}")
|
| 422 |
-
|
| 423 |
# Copy input file to INPUT_DIR
|
| 424 |
input_filename = os.path.basename(audio_path)
|
| 425 |
dest_path = os.path.join(INPUT_DIR, input_filename)
|
| 426 |
shutil.copy(audio_path, dest_path)
|
| 427 |
-
print(f"Input file copied: {dest_path}")
|
| 428 |
|
| 429 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
model_type, config_path, start_check_point = get_model_config(clean_model_name, inference_chunk_size, inference_overlap)
|
| 431 |
-
print(f"Model configuration: model_type={model_type}, config_path={config_path}, start_check_point={start_check_point}")
|
| 432 |
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
model_type=model_type,
|
| 435 |
config_path=config_path,
|
| 436 |
start_check_point=start_check_point,
|
|
@@ -448,17 +629,32 @@ def process_audio(
|
|
| 448 |
apollo_overlap=apollo_overlap,
|
| 449 |
apollo_method=apollo_method,
|
| 450 |
apollo_midside_model=apollo_midside_model,
|
| 451 |
-
output_format=export_format.split()[0].lower()
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
if outputs is None or all(output is None for output in outputs):
|
| 455 |
raise ValueError("run_command_and_process_files returned None or all None outputs")
|
| 456 |
|
| 457 |
# Apply Matchering (if enabled)
|
| 458 |
if use_matchering:
|
| 459 |
-
#
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
# Find clean segment from original audio
|
| 464 |
segment_start, segment_end, segment_audio = find_clear_segment(audio_path)
|
|
@@ -488,12 +684,11 @@ def process_audio(
|
|
| 488 |
|
| 489 |
outputs = tuple(mastered_outputs)
|
| 490 |
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
return (
|
| 495 |
outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6],
|
| 496 |
outputs[7], outputs[8], outputs[9], outputs[10], outputs[11], outputs[12], outputs[13],
|
|
|
|
| 497 |
"Audio processing completed",
|
| 498 |
update_progress_html("Audio processing completed", 100)
|
| 499 |
)
|
|
@@ -502,8 +697,8 @@ def process_audio(
|
|
| 502 |
print(f"process_audio error: {str(e)}")
|
| 503 |
import traceback
|
| 504 |
traceback.print_exc()
|
| 505 |
-
|
| 506 |
-
None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 507 |
f"Error occurred: {str(e)}",
|
| 508 |
update_progress_html("Error occurred", 0)
|
| 509 |
)
|
|
@@ -534,23 +729,63 @@ def ensemble_audio_fn(files, method, weights, progress=gr.Progress()):
|
|
| 534 |
ensemble_args += ["--weights", *weights_list]
|
| 535 |
|
| 536 |
progress(0, desc="Starting ensemble process", total=100)
|
| 537 |
-
|
|
|
|
|
|
|
| 538 |
["python", "ensemble.py"] + ensemble_args,
|
| 539 |
-
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
| 541 |
)
|
| 542 |
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
| 554 |
log = f"Success: {result.stdout}" if not result.stderr else f"Error: {result.stderr}"
|
| 555 |
return output_path, log
|
| 556 |
|
|
@@ -602,18 +837,15 @@ def auto_ensemble_process(
|
|
| 602 |
input_filename = os.path.basename(audio_path)
|
| 603 |
dest_path = os.path.join(INPUT_DIR, input_filename)
|
| 604 |
shutil.copy(audio_path, dest_path)
|
| 605 |
-
print(f"Input file copied: {dest_path}")
|
| 606 |
|
| 607 |
# Parse apollo method
|
| 608 |
if auto_apollo_method in ["2", 2]:
|
| 609 |
auto_apollo_method = "mid_side_method"
|
| 610 |
elif auto_apollo_method in ["1", 1]:
|
| 611 |
auto_apollo_method = "normal_method"
|
| 612 |
-
print(f"Parsed auto_apollo_method: {auto_apollo_method}")
|
| 613 |
|
| 614 |
corrected_auto_chunk_size = int(auto_apollo_chunk_size)
|
| 615 |
corrected_auto_overlap = int(auto_apollo_overlap)
|
| 616 |
-
print(f"Corrected values - auto_apollo_chunk_size: {corrected_auto_chunk_size}, auto_apollo_overlap: {corrected_auto_overlap}")
|
| 617 |
|
| 618 |
# Setup temporary directories
|
| 619 |
auto_ensemble_temp = os.path.join(BASE_DIR, "auto_ensemble_temp")
|
|
@@ -627,7 +859,6 @@ def auto_ensemble_process(
|
|
| 627 |
|
| 628 |
for i, model in enumerate(selected_models):
|
| 629 |
clean_model_name = clean_model(model)
|
| 630 |
-
print(f"Processing model {i+1}/{total_models}: Original={model}, Cleaned={clean_model_name}")
|
| 631 |
model_output_dir = os.path.join(auto_ensemble_temp, clean_model_name)
|
| 632 |
os.makedirs(model_output_dir, exist_ok=True)
|
| 633 |
|
|
@@ -639,7 +870,12 @@ def auto_ensemble_process(
|
|
| 639 |
)
|
| 640 |
|
| 641 |
model_type, config_path, start_check_point = get_model_config(clean_model_name, auto_chunk_size, auto_overlap)
|
| 642 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
|
| 644 |
cmd = [
|
| 645 |
"python", INFERENCE_PATH,
|
|
@@ -648,7 +884,7 @@ def auto_ensemble_process(
|
|
| 648 |
"--start_check_point", start_check_point,
|
| 649 |
"--input_folder", INPUT_DIR,
|
| 650 |
"--store_dir", model_output_dir,
|
| 651 |
-
"--chunk_size", str(
|
| 652 |
"--overlap", str(auto_overlap),
|
| 653 |
"--export_format", f"{export_format.split()[0].lower()} FLOAT"
|
| 654 |
]
|
|
@@ -668,20 +904,83 @@ def auto_ensemble_process(
|
|
| 668 |
)
|
| 669 |
|
| 670 |
stderr_output = ""
|
|
|
|
|
|
|
|
|
|
| 671 |
for line in process.stdout:
|
| 672 |
-
|
| 673 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 674 |
try:
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
model_percentage = (percentage / 100) * model_progress_per_step
|
| 677 |
current_progress = (i * model_progress_per_step) + model_percentage
|
| 678 |
current_progress = clamp_percentage(current_progress)
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
for line in process.stderr:
|
| 687 |
stderr_output += line
|
|
@@ -868,6 +1167,8 @@ def auto_ensemble_process(
|
|
| 868 |
print(f"File not found in Google Drive, copying from local path: {output_path}")
|
| 869 |
shutil.copy(output_path, drive_output_path)
|
| 870 |
print(f"Copied to Google Drive: {drive_output_path}")
|
|
|
|
|
|
|
| 871 |
|
| 872 |
yield output_path, i18n("success_output_created") + f" Saved to {drive_output_path if IS_COLAB else output_path}", update_progress_html(
|
| 873 |
i18n("ensemble_completed"), 100
|
|
|
|
| 13 |
|
| 14 |
from datetime import datetime
|
| 15 |
from helpers import INPUT_DIR, OLD_OUTPUT_DIR, ENSEMBLE_DIR, AUTO_ENSEMBLE_TEMP, move_old_files, clear_directory, BASE_DIR, clean_model, extract_model_name_from_checkpoint, sanitize_filename, find_clear_segment, save_segment, run_matchering, clamp_percentage
|
| 16 |
+
from model import get_model_config, get_model_chunk_size
|
| 17 |
from apollo_processing import process_with_apollo # Import Apollo processing
|
| 18 |
import torch
|
| 19 |
+
|
| 20 |
+
# PyTorch optimized backend (always available)
|
| 21 |
+
try:
|
| 22 |
+
from pytorch_backend import PyTorchBackend
|
| 23 |
+
PYTORCH_OPTIMIZED_AVAILABLE = True
|
| 24 |
+
except ImportError:
|
| 25 |
+
PYTORCH_OPTIMIZED_AVAILABLE = False
|
| 26 |
import yaml
|
| 27 |
import gradio as gr
|
| 28 |
import threading
|
|
|
|
| 36 |
import re
|
| 37 |
import psutil
|
| 38 |
import concurrent.futures
|
| 39 |
+
from tqdm import tqdm
|
| 40 |
+
|
| 41 |
+
# Google OAuth imports (optional - for Colab/Google Drive support)
|
| 42 |
+
try:
|
| 43 |
+
from google.oauth2.credentials import Credentials
|
| 44 |
+
GOOGLE_OAUTH_AVAILABLE = True
|
| 45 |
+
except ImportError:
|
| 46 |
+
GOOGLE_OAUTH_AVAILABLE = False
|
| 47 |
+
Credentials = None
|
| 48 |
+
|
| 49 |
import tempfile
|
|
|
|
| 50 |
from urllib.parse import urlparse, quote
|
| 51 |
try:
|
| 52 |
from google.colab import drive
|
| 53 |
+
# Verify we're actually in a working Colab environment
|
| 54 |
IS_COLAB = True
|
| 55 |
except ImportError:
|
| 56 |
IS_COLAB = False
|
| 57 |
+
drive = None
|
| 58 |
import matchering as mg
|
| 59 |
|
| 60 |
import warnings
|
|
|
|
| 78 |
"""Create necessary directories and check Google Drive access."""
|
| 79 |
if IS_COLAB:
|
| 80 |
try:
|
| 81 |
+
# Check if Google Drive is already mounted
|
| 82 |
+
if os.path.exists('/content/drive/MyDrive'):
|
| 83 |
+
pass # Already mounted, no action needed
|
| 84 |
+
else:
|
| 85 |
print("Mounting Google Drive...")
|
| 86 |
+
try:
|
| 87 |
+
from google.colab import drive
|
| 88 |
+
drive.mount('/content/drive', force_remount=True)
|
| 89 |
+
except AttributeError as ae:
|
| 90 |
+
# Handle 'NoneType' object has no attribute 'kernel' error
|
| 91 |
+
print(f"Warning: Google Drive mount skipped (Colab kernel issue): {str(ae)}")
|
| 92 |
+
print("Continuing with local storage...")
|
| 93 |
+
except Exception as mount_error:
|
| 94 |
+
print(f"Warning: Google Drive mount failed: {str(mount_error)}")
|
| 95 |
+
print("Continuing with local storage...")
|
| 96 |
except Exception as e:
|
| 97 |
+
print(f"Warning: Google Drive setup error: {str(e)}")
|
| 98 |
+
print("Continuing without Google Drive...")
|
| 99 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 100 |
os.makedirs(INPUT_DIR, exist_ok=True)
|
| 101 |
os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
|
|
|
|
| 112 |
except Exception as e:
|
| 113 |
return None, f"Error refreshing output: {str(e)}"
|
| 114 |
|
| 115 |
+
def update_progress_html(progress_label, progress_percent, download_info=None):
|
| 116 |
+
"""Generate progress HTML with smooth animations and optional download percentage.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
progress_label: Text label to show above the progress bar
|
| 120 |
+
progress_percent: Overall progress percentage (0-100)
|
| 121 |
+
download_info: Optional dict with 'filename' and 'percent' for download progress
|
| 122 |
+
"""
|
| 123 |
+
progress_percent = clamp_percentage(progress_percent)
|
| 124 |
+
|
| 125 |
+
# Determine if processing is active for pulse animation
|
| 126 |
+
is_active = 0 < progress_percent < 100
|
| 127 |
+
pulse_style = "animation: progress-pulse 1.5s ease-in-out infinite;" if is_active else ""
|
| 128 |
+
|
| 129 |
+
# Build download sub-bar if downloading
|
| 130 |
+
download_html = ""
|
| 131 |
+
if download_info and isinstance(download_info, dict):
|
| 132 |
+
dl_filename = download_info.get('filename', '')
|
| 133 |
+
dl_percent = clamp_percentage(download_info.get('percent', 0))
|
| 134 |
+
download_html = f"""
|
| 135 |
+
<div style="margin-top: 8px; padding: 8px; background: rgba(0,0,0,0.3); border-radius: 5px;">
|
| 136 |
+
<div style="font-size: 0.85rem; color: #a0a0a0; margin-bottom: 4px;">{dl_filename} - %{int(dl_percent)}</div>
|
| 137 |
+
<div style="width: 100%; background-color: #333; border-radius: 4px; overflow: hidden;">
|
| 138 |
+
<div style="width: {dl_percent}%; height: 14px; background: linear-gradient(90deg, #4ade80, #22d3ee); transition: width 0.3s ease-out; border-radius: 4px;"></div>
|
| 139 |
+
</div>
|
| 140 |
+
</div>
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
return f"""
|
| 144 |
+
<style>
|
| 145 |
+
@keyframes progress-pulse {{
|
| 146 |
+
0%, 100% {{ opacity: 1; }}
|
| 147 |
+
50% {{ opacity: 0.85; }}
|
| 148 |
+
}}
|
| 149 |
+
</style>
|
| 150 |
<div id="custom-progress" style="margin-top: 10px;">
|
| 151 |
<div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
|
| 152 |
<div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
|
| 153 |
+
<div id="progress-bar" style="width: {progress_percent}%; height: 20px; background: linear-gradient(90deg, #6e8efb, #a855f7); transition: width 0.5s ease-out; max-width: 100%; {pulse_style}"></div>
|
| 154 |
</div>
|
| 155 |
+
{download_html}
|
| 156 |
</div>
|
| 157 |
"""
|
| 158 |
|
|
|
|
| 161 |
return "Unknown"
|
| 162 |
base_name = os.path.basename(checkpoint_path)
|
| 163 |
model_name = os.path.splitext(base_name)[0]
|
|
|
|
| 164 |
return model_name.strip()
|
| 165 |
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
|
| 187 |
def run_command_and_process_files(
|
| 188 |
model_type,
|
| 189 |
config_path,
|
| 190 |
start_check_point,
|
| 191 |
INPUT_DIR,
|
| 192 |
OUTPUT_DIR,
|
| 193 |
+
extract_instrumental,
|
| 194 |
+
use_tta,
|
| 195 |
+
demud_phaseremix_inst,
|
| 196 |
progress=None,
|
| 197 |
use_apollo=True,
|
| 198 |
apollo_normal_model="Apollo Universal Model",
|
|
|
|
| 202 |
apollo_overlap=2,
|
| 203 |
apollo_method="normal_method",
|
| 204 |
apollo_midside_model=None,
|
| 205 |
+
output_format="wav",
|
| 206 |
+
optimize_mode='channels_last',
|
| 207 |
+
enable_amp=True,
|
| 208 |
+
enable_tf32=True,
|
| 209 |
+
enable_cudnn_benchmark=True
|
| 210 |
):
|
| 211 |
+
"""
|
| 212 |
+
Run inference.py with specified parameters and process output files.
|
| 213 |
+
This is a generator function that yields progress updates for real-time UI feedback.
|
| 214 |
+
"""
|
| 215 |
try:
|
| 216 |
+
# Create directories and check Google Drive access
|
| 217 |
+
setup_directories()
|
| 218 |
+
|
| 219 |
+
if not config_path:
|
| 220 |
+
raise ValueError(f"Configuration path is empty: model_type: {model_type}")
|
| 221 |
+
if not os.path.exists(config_path):
|
| 222 |
+
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
| 223 |
+
if not start_check_point or not os.path.exists(start_check_point):
|
| 224 |
+
raise FileNotFoundError(f"Checkpoint file not found: {start_check_point}")
|
| 225 |
+
|
| 226 |
+
# Validate inference parameters
|
| 227 |
+
try:
|
| 228 |
+
inference_chunk_size = int(inference_chunk_size)
|
| 229 |
+
inference_overlap = int(inference_overlap)
|
| 230 |
+
except (TypeError, ValueError) as e:
|
| 231 |
+
print(f"Invalid inference_chunk_size or inference_overlap: {e}. Defaulting to: inference_chunk_size=352800, inference_overlap=2")
|
| 232 |
+
inference_chunk_size = 352800
|
| 233 |
+
inference_overlap = 2
|
| 234 |
+
|
| 235 |
+
# Validate Apollo parameters
|
| 236 |
+
try:
|
| 237 |
+
apollo_chunk_size = int(apollo_chunk_size)
|
| 238 |
+
apollo_overlap = int(apollo_overlap)
|
| 239 |
+
except (TypeError, ValueError) as e:
|
| 240 |
+
print(f"Invalid apollo_chunk_size or apollo_overlap: {e}. Defaulting to: apollo_chunk_size=19, apollo_overlap=2")
|
| 241 |
+
apollo_chunk_size = 19
|
| 242 |
+
apollo_overlap = 2
|
| 243 |
+
|
| 244 |
+
# Initial progress yield
|
| 245 |
+
yield {"progress": 0, "status": "Starting audio separation...", "outputs": None}
|
| 246 |
+
|
| 247 |
+
# Always use optimized PyTorch backend
|
| 248 |
+
python_exe = "python"
|
| 249 |
+
|
| 250 |
+
if PYTORCH_OPTIMIZED_AVAILABLE:
|
| 251 |
+
from inference_pytorch import INFERENCE_PATH as PYTORCH_INFERENCE_PATH
|
| 252 |
+
inference_script = PYTORCH_INFERENCE_PATH if os.path.exists(PYTORCH_INFERENCE_PATH) else INFERENCE_PATH
|
| 253 |
+
print(f"Using PyTorch backend (mode: {optimize_mode})")
|
| 254 |
+
print(f" AMP: {enable_amp} | TF32: {enable_tf32} | cuDNN: {enable_cudnn_benchmark}")
|
| 255 |
+
else:
|
| 256 |
+
inference_script = INFERENCE_PATH
|
| 257 |
+
print("Warning: PyTorch optimized backend not available, using standard inference")
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
|
| 264 |
|
|
|
|
| 265 |
|
|
|
|
|
|
|
| 266 |
cmd_parts = [
|
| 267 |
+
python_exe, inference_script,
|
| 268 |
+
"--model_type", model_type,
|
| 269 |
+
"--config_path", config_path,
|
| 270 |
+
"--start_check_point", start_check_point,
|
| 271 |
+
"--input_folder", INPUT_DIR,
|
| 272 |
+
"--store_dir", OUTPUT_DIR,
|
| 273 |
"--chunk_size", str(inference_chunk_size),
|
| 274 |
"--overlap", str(inference_overlap),
|
| 275 |
"--export_format", f"{output_format} FLOAT"
|
| 276 |
]
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
# Add optimized backend arguments (always enabled)
|
| 288 |
+
if PYTORCH_OPTIMIZED_AVAILABLE:
|
| 289 |
+
cmd_parts.extend([
|
| 290 |
+
"--optimize_mode", optimize_mode
|
| 291 |
+
])
|
| 292 |
+
if enable_amp:
|
| 293 |
+
cmd_parts.append("--enable_amp")
|
| 294 |
+
if enable_tf32:
|
| 295 |
+
cmd_parts.append("--enable_tf32")
|
| 296 |
+
if enable_cudnn_benchmark:
|
| 297 |
+
cmd_parts.append("--enable_cudnn_benchmark")
|
| 298 |
+
|
| 299 |
+
if extract_instrumental:
|
| 300 |
+
cmd_parts.append("--extract_instrumental")
|
| 301 |
+
if use_tta:
|
| 302 |
+
cmd_parts.append("--use_tta")
|
| 303 |
+
if demud_phaseremix_inst:
|
| 304 |
+
cmd_parts.append("--demud_phaseremix_inst")
|
| 305 |
+
|
| 306 |
+
print(f"Running command: {' '.join(cmd_parts)}")
|
| 307 |
+
|
| 308 |
+
# Use subprocess.Popen for real-time progress capture
|
| 309 |
process = subprocess.Popen(
|
| 310 |
cmd_parts,
|
| 311 |
+
cwd=BASE_DIR,
|
| 312 |
stdout=subprocess.PIPE,
|
| 313 |
stderr=subprocess.PIPE,
|
| 314 |
text=True,
|
|
|
|
| 316 |
universal_newlines=True
|
| 317 |
)
|
| 318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
stderr_output = ""
|
| 320 |
+
last_yield_percent = -1
|
| 321 |
+
downloading_file = None
|
| 322 |
+
|
| 323 |
+
# Read stdout line-by-line for real-time progress updates
|
| 324 |
+
for line in process.stdout:
|
| 325 |
+
line_stripped = line.strip()
|
| 326 |
+
|
| 327 |
+
# Check for download progress [SESA_DOWNLOAD]
|
| 328 |
+
if line_stripped.startswith("[SESA_DOWNLOAD]"):
|
| 329 |
+
try:
|
| 330 |
+
dl_info = line_stripped.replace("[SESA_DOWNLOAD]", "")
|
| 331 |
+
if dl_info.startswith("START:"):
|
| 332 |
+
downloading_file = dl_info.replace("START:", "")
|
| 333 |
+
yield {"progress": 0, "status": i18n("downloading_model_file").format(downloading_file), "outputs": None}
|
| 334 |
+
elif dl_info.startswith("END:"):
|
| 335 |
+
downloading_file = None
|
| 336 |
+
elif ":" in dl_info:
|
| 337 |
+
parts = dl_info.rsplit(":", 1)
|
| 338 |
+
if len(parts) == 2:
|
| 339 |
+
filename, percent_str = parts
|
| 340 |
+
download_percent = int(percent_str)
|
| 341 |
+
yield {"progress": 0, "status": i18n("downloading_file_progress").format(filename, download_percent), "outputs": None}
|
| 342 |
+
except (ValueError, TypeError):
|
| 343 |
+
pass
|
| 344 |
+
# Check for [SESA_PROGRESS] prefix from inference script
|
| 345 |
+
elif line_stripped.startswith("[SESA_PROGRESS]"):
|
| 346 |
+
try:
|
| 347 |
+
percentage_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
|
| 348 |
+
percentage = float(percentage_str) if percentage_str else 0
|
| 349 |
+
percentage = min(max(percentage, 0), 100)
|
| 350 |
+
|
| 351 |
+
# Scale progress to 0-80% range (saving 80-100% for Apollo)
|
| 352 |
+
scaled_progress = int(percentage * 0.8)
|
| 353 |
+
|
| 354 |
+
# Yield on every percent change for smooth updates
|
| 355 |
+
if int(percentage) != last_yield_percent:
|
| 356 |
+
last_yield_percent = int(percentage)
|
| 357 |
+
yield {"progress": scaled_progress, "status": f"Separating audio... {int(percentage)}%", "outputs": None}
|
| 358 |
+
except (ValueError, TypeError):
|
| 359 |
+
pass
|
| 360 |
+
else:
|
| 361 |
+
# Only print important non-progress lines (errors, warnings, key info)
|
| 362 |
+
if line_stripped and not line_stripped.startswith((" ", " ")):
|
| 363 |
+
print(line_stripped)
|
| 364 |
+
|
| 365 |
+
# Capture stderr (only print errors)
|
| 366 |
+
for line in process.stderr:
|
| 367 |
+
stderr_output += line
|
| 368 |
+
line_s = line.strip()
|
| 369 |
+
if line_s and ("error" in line_s.lower() or "warning" in line_s.lower() or "traceback" in line_s.lower()):
|
| 370 |
+
print(f"Warning: {line_s}")
|
| 371 |
+
|
| 372 |
+
process.wait()
|
| 373 |
+
|
| 374 |
if process.returncode != 0:
|
| 375 |
+
raise subprocess.CalledProcessError(process.returncode, cmd_parts, stderr=stderr_output)
|
| 376 |
+
|
| 377 |
+
yield {"progress": 80, "status": "Separation complete, processing outputs...", "outputs": None}
|
|
|
|
| 378 |
|
| 379 |
+
# Check if output files were created
|
| 380 |
filename_model = extract_model_name_from_checkpoint(start_check_point)
|
| 381 |
+
output_files = os.listdir(OUTPUT_DIR)
|
| 382 |
if not output_files:
|
| 383 |
+
raise FileNotFoundError("No output files created in OUTPUT_DIR")
|
| 384 |
+
|
| 385 |
+
def rename_files_with_model(folder, filename_model):
|
| 386 |
+
timestamp = datetime.now().strftime("%d-%m-%Y_%H-%M")
|
| 387 |
+
for filename in sorted(os.listdir(folder)):
|
| 388 |
+
file_path = os.path.join(folder, filename)
|
| 389 |
+
if not any(filename.lower().endswith(ext) for ext in ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a']):
|
| 390 |
+
continue
|
| 391 |
+
base, ext = os.path.splitext(filename)
|
| 392 |
+
detected_type = None
|
| 393 |
+
for type_key in ['vocals', 'instrumental', 'instrument', 'phaseremix', 'drum', 'bass', 'other', 'effects', 'speech', 'music', 'dry', 'male', 'female', 'bleed', 'karaoke', 'mid', 'side']:
|
| 394 |
+
if type_key in base.lower():
|
| 395 |
+
detected_type = type_key
|
| 396 |
+
break
|
| 397 |
+
# Normalize 'instrument' to 'Instrumental' for consistency
|
| 398 |
+
type_suffix = 'Instrumental' if detected_type == 'instrument' else (detected_type.capitalize() if detected_type else "Processed")
|
| 399 |
+
clean_base = sanitize_filename(base.split('_')[0]).rsplit('.', 1)[0]
|
| 400 |
+
new_filename = f"{timestamp}_{clean_base}_{type_suffix}_{filename_model}{ext}"
|
| 401 |
+
new_file_path = os.path.join(folder, new_filename)
|
| 402 |
+
try:
|
| 403 |
+
os.rename(file_path, new_file_path)
|
| 404 |
+
except Exception as e:
|
| 405 |
+
print(f"Could not rename file: {os.path.basename(file_path)} -> {os.path.basename(new_file_path)}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
+
rename_files_with_model(OUTPUT_DIR, filename_model)
|
| 408 |
+
|
| 409 |
+
output_files = os.listdir(OUTPUT_DIR)
|
| 410 |
if not output_files:
|
| 411 |
+
raise FileNotFoundError("No output files in OUTPUT_DIR after renaming")
|
| 412 |
|
| 413 |
+
def find_file(keywords):
|
| 414 |
+
"""Find file matching any of the keywords (can be single keyword or list)."""
|
| 415 |
+
if isinstance(keywords, str):
|
| 416 |
+
keywords = [keywords]
|
| 417 |
matching_files = [
|
| 418 |
+
os.path.join(OUTPUT_DIR, f) for f in output_files
|
| 419 |
+
if any(kw in f.lower() for kw in keywords)
|
| 420 |
]
|
| 421 |
return matching_files[0] if matching_files else None
|
| 422 |
|
| 423 |
output_list = [
|
| 424 |
+
find_file('vocals'), find_file(['instrumental', 'instrument']), find_file('phaseremix'),
|
| 425 |
find_file('drum'), find_file('bass'), find_file('other'), find_file('effects'),
|
| 426 |
find_file('speech'), find_file('music'), find_file('dry'), find_file('male'),
|
| 427 |
+
find_file('female'), find_file('bleed'), find_file('karaoke'),
|
| 428 |
+
find_file('mid'), find_file('side')
|
| 429 |
]
|
| 430 |
|
|
|
|
| 431 |
normalized_outputs = []
|
| 432 |
+
for output_file in output_list:
|
| 433 |
if output_file and os.path.exists(output_file):
|
| 434 |
normalized_file = os.path.join(OUTPUT_DIR, f"{sanitize_filename(os.path.splitext(os.path.basename(output_file))[0])}.{output_format}")
|
| 435 |
+
if output_file.endswith(f".{output_format}") and output_file != normalized_file:
|
| 436 |
+
shutil.copy(output_file, normalized_file)
|
| 437 |
+
elif output_file != normalized_file:
|
| 438 |
+
audio, sr = librosa.load(output_file, sr=None, mono=False)
|
| 439 |
+
sf.write(normalized_file, audio.T if audio.ndim > 1 else audio, sr)
|
| 440 |
+
else:
|
| 441 |
+
normalized_file = output_file
|
| 442 |
+
normalized_outputs.append(normalized_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
else:
|
| 444 |
+
normalized_outputs.append(output_file)
|
| 445 |
|
| 446 |
+
# Apollo processing
|
| 447 |
if use_apollo:
|
| 448 |
+
yield {"progress": 80, "status": "Enhancing with Apollo...", "outputs": None}
|
| 449 |
+
normalized_outputs = process_with_apollo(
|
| 450 |
+
output_files=normalized_outputs,
|
| 451 |
+
output_dir=OUTPUT_DIR,
|
| 452 |
+
apollo_chunk_size=apollo_chunk_size,
|
| 453 |
+
apollo_overlap=apollo_overlap,
|
| 454 |
+
apollo_method=apollo_method,
|
| 455 |
+
apollo_normal_model=apollo_normal_model,
|
| 456 |
+
apollo_midside_model=apollo_midside_model,
|
| 457 |
+
output_format=output_format,
|
| 458 |
+
progress=progress,
|
| 459 |
+
total_progress_start=80,
|
| 460 |
+
total_progress_end=100
|
| 461 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
+
# Final yield with outputs
|
| 464 |
+
yield {"progress": 100, "status": "Separation complete", "outputs": tuple(normalized_outputs)}
|
| 465 |
|
| 466 |
+
except subprocess.CalledProcessError as e:
|
| 467 |
+
print(f"Subprocess failed, code: {e.returncode}: {e.stderr}")
|
| 468 |
+
yield {"progress": 0, "status": f"Error: {e.stderr}", "outputs": (None,) * 16}
|
| 469 |
except Exception as e:
|
| 470 |
+
print(f"run_command_and_process_files error: {str(e)}")
|
| 471 |
+
import traceback
|
| 472 |
+
traceback.print_exc()
|
| 473 |
+
yield {"progress": 0, "status": f"Error: {str(e)}", "outputs": (None,) * 16}
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
|
| 501 |
|
| 502 |
def process_audio(
|
| 503 |
input_audio_file,
|
|
|
|
| 505 |
chunk_size,
|
| 506 |
overlap,
|
| 507 |
export_format,
|
| 508 |
+
optimize_mode,
|
| 509 |
+
enable_amp,
|
| 510 |
+
enable_tf32,
|
| 511 |
+
enable_cudnn_benchmark,
|
| 512 |
use_tta,
|
| 513 |
demud_phaseremix_inst,
|
| 514 |
extract_instrumental,
|
|
|
|
| 518 |
apollo_method,
|
| 519 |
apollo_normal_model,
|
| 520 |
apollo_midside_model,
|
| 521 |
+
use_matchering,
|
| 522 |
+
matchering_passes,
|
| 523 |
progress=gr.Progress(track_tqdm=True),
|
| 524 |
*args,
|
| 525 |
**kwargs
|
| 526 |
):
|
| 527 |
+
"""
|
| 528 |
+
Process audio with the selected model. This is a generator function that yields
|
| 529 |
+
progress updates for real-time UI feedback.
|
| 530 |
+
"""
|
| 531 |
try:
|
| 532 |
# Check Google Drive connection
|
| 533 |
setup_directories()
|
|
|
|
| 535 |
if input_audio_file is not None:
|
| 536 |
audio_path = input_audio_file.name if hasattr(input_audio_file, 'name') else input_audio_file
|
| 537 |
else:
|
| 538 |
+
yield (
|
| 539 |
+
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 540 |
"No audio file provided",
|
| 541 |
update_progress_html("No input provided", 0)
|
| 542 |
)
|
| 543 |
+
return
|
| 544 |
|
| 545 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 546 |
os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
|
| 547 |
move_old_files(OUTPUT_DIR)
|
| 548 |
|
|
|
|
| 549 |
# Clean model name, remove ⭐ and other unwanted characters
|
| 550 |
clean_model_name = clean_model(model) if not model.startswith("/") else extract_model_name_from_checkpoint(model)
|
| 551 |
+
print(f"Processing: {os.path.basename(audio_path)} | Model: {clean_model_name}")
|
|
|
|
|
|
|
| 552 |
|
| 553 |
# Validate inference parameters
|
| 554 |
+
_use_yaml_chunk = (chunk_size == "yaml")
|
| 555 |
try:
|
| 556 |
+
inference_chunk_size = 352800 if _use_yaml_chunk else int(chunk_size)
|
| 557 |
except (TypeError, ValueError):
|
| 558 |
print(f"Invalid chunk_size: {chunk_size}. Defaulting to: 352800.")
|
| 559 |
inference_chunk_size = 352800
|
| 560 |
+
_use_yaml_chunk = True # fallback: read from YAML
|
| 561 |
|
| 562 |
try:
|
| 563 |
inference_overlap = int(overlap)
|
|
|
|
| 586 |
else:
|
| 587 |
print(f"Invalid apollo_method: {apollo_method}. Defaulting to: normal_method.")
|
| 588 |
apollo_method = "normal_method"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
# Copy input file to INPUT_DIR
|
| 590 |
input_filename = os.path.basename(audio_path)
|
| 591 |
dest_path = os.path.join(INPUT_DIR, input_filename)
|
| 592 |
shutil.copy(audio_path, dest_path)
|
|
|
|
| 593 |
|
| 594 |
+
# Yield status for model loading
|
| 595 |
+
yield (
|
| 596 |
+
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 597 |
+
f"Loading model: {clean_model_name}...",
|
| 598 |
+
update_progress_html(f"Loading model: {clean_model_name}", 0)
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
# Get model configuration with cleaned model name (downloads if needed)
|
| 602 |
model_type, config_path, start_check_point = get_model_config(clean_model_name, inference_chunk_size, inference_overlap)
|
|
|
|
| 603 |
|
| 604 |
+
# Read the model's native chunk_size from its YAML (now guaranteed to be downloaded)
|
| 605 |
+
native_chunk = get_model_chunk_size(clean_model_name)
|
| 606 |
+
if _use_yaml_chunk and native_chunk:
|
| 607 |
+
print(f"Using model's native chunk_size from YAML: {native_chunk}")
|
| 608 |
+
inference_chunk_size = native_chunk
|
| 609 |
+
elif not _use_yaml_chunk:
|
| 610 |
+
print(f"Using user-selected chunk_size: {inference_chunk_size}")
|
| 611 |
+
|
| 612 |
+
# Iterate over the generator and yield progress updates
|
| 613 |
+
outputs = None
|
| 614 |
+
for update in run_command_and_process_files(
|
| 615 |
model_type=model_type,
|
| 616 |
config_path=config_path,
|
| 617 |
start_check_point=start_check_point,
|
|
|
|
| 629 |
apollo_overlap=apollo_overlap,
|
| 630 |
apollo_method=apollo_method,
|
| 631 |
apollo_midside_model=apollo_midside_model,
|
| 632 |
+
output_format=export_format.split()[0].lower(),
|
| 633 |
+
optimize_mode=optimize_mode,
|
| 634 |
+
enable_amp=enable_amp,
|
| 635 |
+
enable_tf32=enable_tf32,
|
| 636 |
+
enable_cudnn_benchmark=enable_cudnn_benchmark
|
| 637 |
+
):
|
| 638 |
+
if update.get("outputs") is not None:
|
| 639 |
+
outputs = update["outputs"]
|
| 640 |
+
# Yield progress update to Gradio
|
| 641 |
+
yield (
|
| 642 |
+
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 643 |
+
update["status"],
|
| 644 |
+
update_progress_html(update["status"], update["progress"])
|
| 645 |
+
)
|
| 646 |
|
| 647 |
if outputs is None or all(output is None for output in outputs):
|
| 648 |
raise ValueError("run_command_and_process_files returned None or all None outputs")
|
| 649 |
|
| 650 |
# Apply Matchering (if enabled)
|
| 651 |
if use_matchering:
|
| 652 |
+
# Yield progress update for Matchering
|
| 653 |
+
yield (
|
| 654 |
+
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 655 |
+
"Applying Matchering...",
|
| 656 |
+
update_progress_html("Applying Matchering...", 90)
|
| 657 |
+
)
|
| 658 |
|
| 659 |
# Find clean segment from original audio
|
| 660 |
segment_start, segment_end, segment_audio = find_clear_segment(audio_path)
|
|
|
|
| 684 |
|
| 685 |
outputs = tuple(mastered_outputs)
|
| 686 |
|
| 687 |
+
# Final yield with all outputs
|
| 688 |
+
yield (
|
|
|
|
|
|
|
| 689 |
outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6],
|
| 690 |
outputs[7], outputs[8], outputs[9], outputs[10], outputs[11], outputs[12], outputs[13],
|
| 691 |
+
outputs[14], outputs[15],
|
| 692 |
"Audio processing completed",
|
| 693 |
update_progress_html("Audio processing completed", 100)
|
| 694 |
)
|
|
|
|
| 697 |
print(f"process_audio error: {str(e)}")
|
| 698 |
import traceback
|
| 699 |
traceback.print_exc()
|
| 700 |
+
yield (
|
| 701 |
+
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
|
| 702 |
f"Error occurred: {str(e)}",
|
| 703 |
update_progress_html("Error occurred", 0)
|
| 704 |
)
|
|
|
|
| 729 |
ensemble_args += ["--weights", *weights_list]
|
| 730 |
|
| 731 |
progress(0, desc="Starting ensemble process", total=100)
|
| 732 |
+
|
| 733 |
+
# Run ensemble subprocess with real-time output capture
|
| 734 |
+
process = subprocess.Popen(
|
| 735 |
["python", "ensemble.py"] + ensemble_args,
|
| 736 |
+
stdout=subprocess.PIPE,
|
| 737 |
+
stderr=subprocess.PIPE,
|
| 738 |
+
text=True,
|
| 739 |
+
bufsize=1,
|
| 740 |
+
universal_newlines=True
|
| 741 |
)
|
| 742 |
|
| 743 |
+
stdout_output = ""
|
| 744 |
+
stderr_output = ""
|
| 745 |
+
|
| 746 |
+
# Read output in real-time and capture actual progress
|
| 747 |
+
for line in process.stdout:
|
| 748 |
+
stdout_output += line
|
| 749 |
+
line_stripped = line.strip()
|
| 750 |
+
|
| 751 |
+
# Capture real progress percentage from ensemble.py with new format
|
| 752 |
+
if line_stripped.startswith("[SESA_PROGRESS]"):
|
| 753 |
+
try:
|
| 754 |
+
percent_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
|
| 755 |
+
percent = int(float(percent_str)) if percent_str else 0
|
| 756 |
+
percent = min(max(percent, 0), 100)
|
| 757 |
+
progress(percent, desc=f"Ensemble progress: {percent}%")
|
| 758 |
+
except (ValueError, TypeError):
|
| 759 |
+
pass
|
| 760 |
+
# Legacy format support
|
| 761 |
+
elif line_stripped.startswith("Progress:"):
|
| 762 |
+
try:
|
| 763 |
+
percent = int(line_stripped.split(":")[1].strip().replace("%", ""))
|
| 764 |
+
percent = min(max(percent, 0), 100)
|
| 765 |
+
progress(percent, desc=f"Ensemble progress: {percent}%")
|
| 766 |
+
except (ValueError, IndexError):
|
| 767 |
+
pass
|
| 768 |
+
elif "loading" in line.lower():
|
| 769 |
+
print(f"Ensemble: {line_stripped}")
|
| 770 |
+
progress(5, desc="Loading audio files for ensemble...")
|
| 771 |
+
elif "processing ensemble" in line.lower():
|
| 772 |
+
print(f"Ensemble: {line_stripped}")
|
| 773 |
+
progress(10, desc="Starting ensemble processing...")
|
| 774 |
+
elif "saving" in line.lower():
|
| 775 |
+
print(f"Ensemble: {line_stripped}")
|
| 776 |
+
progress(95, desc="Saving ensemble output...")
|
| 777 |
+
elif line_stripped and not line_stripped.startswith("[SESA_PROGRESS]") and not line_stripped.startswith("Progress:"):
|
| 778 |
+
# Only print non-progress messages
|
| 779 |
+
print(f"Ensemble: {line_stripped}")
|
| 780 |
+
|
| 781 |
+
for line in process.stderr:
|
| 782 |
+
stderr_output += line
|
| 783 |
+
print(f"Ensemble stderr: {line.strip()}")
|
| 784 |
|
| 785 |
+
process.wait()
|
| 786 |
+
result = type('Result', (), {'stdout': stdout_output, 'stderr': stderr_output, 'returncode': process.returncode})()
|
| 787 |
+
|
| 788 |
+
progress(100, desc="Ensemble complete")
|
| 789 |
log = f"Success: {result.stdout}" if not result.stderr else f"Error: {result.stderr}"
|
| 790 |
return output_path, log
|
| 791 |
|
|
|
|
| 837 |
input_filename = os.path.basename(audio_path)
|
| 838 |
dest_path = os.path.join(INPUT_DIR, input_filename)
|
| 839 |
shutil.copy(audio_path, dest_path)
|
|
|
|
| 840 |
|
| 841 |
# Parse apollo method
|
| 842 |
if auto_apollo_method in ["2", 2]:
|
| 843 |
auto_apollo_method = "mid_side_method"
|
| 844 |
elif auto_apollo_method in ["1", 1]:
|
| 845 |
auto_apollo_method = "normal_method"
|
|
|
|
| 846 |
|
| 847 |
corrected_auto_chunk_size = int(auto_apollo_chunk_size)
|
| 848 |
corrected_auto_overlap = int(auto_apollo_overlap)
|
|
|
|
| 849 |
|
| 850 |
# Setup temporary directories
|
| 851 |
auto_ensemble_temp = os.path.join(BASE_DIR, "auto_ensemble_temp")
|
|
|
|
| 859 |
|
| 860 |
for i, model in enumerate(selected_models):
|
| 861 |
clean_model_name = clean_model(model)
|
|
|
|
| 862 |
model_output_dir = os.path.join(auto_ensemble_temp, clean_model_name)
|
| 863 |
os.makedirs(model_output_dir, exist_ok=True)
|
| 864 |
|
|
|
|
| 870 |
)
|
| 871 |
|
| 872 |
model_type, config_path, start_check_point = get_model_config(clean_model_name, auto_chunk_size, auto_overlap)
|
| 873 |
+
|
| 874 |
+
# Read the model's native chunk_size from its YAML after download
|
| 875 |
+
native_chunk = get_model_chunk_size(clean_model_name)
|
| 876 |
+
effective_chunk_size = native_chunk if native_chunk else auto_chunk_size
|
| 877 |
+
if native_chunk:
|
| 878 |
+
print(f"Using model's native chunk_size from YAML: {native_chunk} (UI value was: {auto_chunk_size})")
|
| 879 |
|
| 880 |
cmd = [
|
| 881 |
"python", INFERENCE_PATH,
|
|
|
|
| 884 |
"--start_check_point", start_check_point,
|
| 885 |
"--input_folder", INPUT_DIR,
|
| 886 |
"--store_dir", model_output_dir,
|
| 887 |
+
"--chunk_size", str(effective_chunk_size),
|
| 888 |
"--overlap", str(auto_overlap),
|
| 889 |
"--export_format", f"{export_format.split()[0].lower()} FLOAT"
|
| 890 |
]
|
|
|
|
| 904 |
)
|
| 905 |
|
| 906 |
stderr_output = ""
|
| 907 |
+
last_yield_percent = -1
|
| 908 |
+
downloading_file = None
|
| 909 |
+
|
| 910 |
for line in process.stdout:
|
| 911 |
+
line_stripped = line.strip()
|
| 912 |
+
|
| 913 |
+
# Check for download progress [SESA_DOWNLOAD]
|
| 914 |
+
if line_stripped.startswith("[SESA_DOWNLOAD]"):
|
| 915 |
+
try:
|
| 916 |
+
dl_info = line_stripped.replace("[SESA_DOWNLOAD]", "")
|
| 917 |
+
if dl_info.startswith("START:"):
|
| 918 |
+
downloading_file = dl_info.replace("START:", "")
|
| 919 |
+
yield None, i18n("downloading_model_file").format(downloading_file), update_progress_html(
|
| 920 |
+
i18n("downloading_model_file").format(downloading_file),
|
| 921 |
+
i * model_progress_per_step,
|
| 922 |
+
download_info={"filename": downloading_file, "percent": 0}
|
| 923 |
+
)
|
| 924 |
+
elif dl_info.startswith("END:"):
|
| 925 |
+
downloading_file = None
|
| 926 |
+
elif ":" in dl_info:
|
| 927 |
+
parts = dl_info.rsplit(":", 1)
|
| 928 |
+
if len(parts) == 2:
|
| 929 |
+
filename, percent_str = parts
|
| 930 |
+
download_percent = int(percent_str)
|
| 931 |
+
yield None, i18n("downloading_file_progress").format(filename, download_percent), update_progress_html(
|
| 932 |
+
i18n("downloading_model_file").format(filename),
|
| 933 |
+
i * model_progress_per_step,
|
| 934 |
+
download_info={"filename": filename, "percent": download_percent}
|
| 935 |
+
)
|
| 936 |
+
except (ValueError, TypeError):
|
| 937 |
+
pass
|
| 938 |
+
# Check for unique progress prefix [SESA_PROGRESS]
|
| 939 |
+
elif line_stripped.startswith("[SESA_PROGRESS]"):
|
| 940 |
try:
|
| 941 |
+
# Extract percentage from [SESA_PROGRESS]XX format
|
| 942 |
+
percentage_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
|
| 943 |
+
percentage = float(percentage_str) if percentage_str else 0
|
| 944 |
+
percentage = min(max(percentage, 0), 100) # Clamp to 0-100
|
| 945 |
+
|
| 946 |
model_percentage = (percentage / 100) * model_progress_per_step
|
| 947 |
current_progress = (i * model_progress_per_step) + model_percentage
|
| 948 |
current_progress = clamp_percentage(current_progress)
|
| 949 |
+
|
| 950 |
+
# Yield on every percent change for smooth updates
|
| 951 |
+
if int(percentage) != last_yield_percent:
|
| 952 |
+
last_yield_percent = int(percentage)
|
| 953 |
+
yield None, i18n("loading_model_progress_label").format(i+1, total_models, clean_model_name, int(percentage)), update_progress_html(
|
| 954 |
+
f"Model {i+1}/{total_models}: {clean_model_name} - {int(percentage)}%",
|
| 955 |
+
current_progress
|
| 956 |
+
)
|
| 957 |
+
except (ValueError, TypeError):
|
| 958 |
+
# Silently ignore parsing errors for progress lines
|
| 959 |
+
pass
|
| 960 |
+
# Also support legacy "Progress: XX%" format for backwards compatibility
|
| 961 |
+
elif line_stripped.startswith("Progress:"):
|
| 962 |
+
try:
|
| 963 |
+
match = re.search(r"Progress:\s*(\d+(?:\.\d+)?)%?", line_stripped)
|
| 964 |
+
if match:
|
| 965 |
+
percentage = float(match.group(1))
|
| 966 |
+
percentage = min(max(percentage, 0), 100)
|
| 967 |
+
|
| 968 |
+
model_percentage = (percentage / 100) * model_progress_per_step
|
| 969 |
+
current_progress = (i * model_progress_per_step) + model_percentage
|
| 970 |
+
current_progress = clamp_percentage(current_progress)
|
| 971 |
+
|
| 972 |
+
if int(percentage) != last_yield_percent:
|
| 973 |
+
last_yield_percent = int(percentage)
|
| 974 |
+
yield None, i18n("loading_model_progress_label").format(i+1, total_models, clean_model_name, int(percentage)), update_progress_html(
|
| 975 |
+
f"Model {i+1}/{total_models}: {clean_model_name} - {int(percentage)}%",
|
| 976 |
+
current_progress
|
| 977 |
+
)
|
| 978 |
+
except (ValueError, TypeError):
|
| 979 |
+
pass
|
| 980 |
+
else:
|
| 981 |
+
# Print non-progress lines
|
| 982 |
+
if line_stripped:
|
| 983 |
+
print(line_stripped)
|
| 984 |
|
| 985 |
for line in process.stderr:
|
| 986 |
stderr_output += line
|
|
|
|
| 1167 |
print(f"File not found in Google Drive, copying from local path: {output_path}")
|
| 1168 |
shutil.copy(output_path, drive_output_path)
|
| 1169 |
print(f"Copied to Google Drive: {drive_output_path}")
|
| 1170 |
+
else:
|
| 1171 |
+
drive_output_path = output_path
|
| 1172 |
|
| 1173 |
yield output_path, i18n("success_output_created") + f" Saved to {drive_output_path if IS_COLAB else output_path}", update_progress_html(
|
| 1174 |
i18n("ensemble_completed"), 100
|
pytorch_backend.py
ADDED
|
@@ -0,0 +1,621 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
__author__ = 'PyTorch Backend Implementation'
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import pickle
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn as nn
|
| 9 |
+
from typing import Dict, Tuple, Optional, Any
|
| 10 |
+
import warnings
|
| 11 |
+
import hashlib
|
| 12 |
+
import time
|
| 13 |
+
|
| 14 |
+
# Suppress channels_last warnings for 3D audio tensors
|
| 15 |
+
warnings.filterwarnings("ignore", message=".*channels_last.*")
|
| 16 |
+
warnings.filterwarnings("ignore", message=".*rank 3.*")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class PyTorchBackend:
|
| 20 |
+
"""
|
| 21 |
+
ULTRA-OPTIMIZED PyTorch backend for model inference.
|
| 22 |
+
Provides various optimization techniques for maximum speed.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, device='cuda:0', optimize_mode='channels_last'):
|
| 26 |
+
"""
|
| 27 |
+
Initialize ULTRA-OPTIMIZED PyTorch backend.
|
| 28 |
+
|
| 29 |
+
Parameters:
|
| 30 |
+
----------
|
| 31 |
+
device : str
|
| 32 |
+
Device to use for inference (cuda:0, cpu, mps, etc.)
|
| 33 |
+
optimize_mode : str
|
| 34 |
+
Optimization mode: 'channels_last' (recommended), 'compile', 'jit', or 'default'
|
| 35 |
+
"""
|
| 36 |
+
self.device = device
|
| 37 |
+
self.optimize_mode = optimize_mode
|
| 38 |
+
self.model = None
|
| 39 |
+
self.compiled_model = None
|
| 40 |
+
|
| 41 |
+
# Check device availability
|
| 42 |
+
if device.startswith('cuda') and not torch.cuda.is_available():
|
| 43 |
+
warnings.warn("CUDA not available, falling back to CPU")
|
| 44 |
+
self.device = 'cpu'
|
| 45 |
+
elif device == 'mps' and not torch.backends.mps.is_available():
|
| 46 |
+
warnings.warn("MPS not available, falling back to CPU")
|
| 47 |
+
self.device = 'cpu'
|
| 48 |
+
|
| 49 |
+
# Apply ultra optimization settings
|
| 50 |
+
self._apply_ultra_optimizations()
|
| 51 |
+
|
| 52 |
+
def _apply_ultra_optimizations(self):
|
| 53 |
+
"""Apply ultra-speed optimizations globally."""
|
| 54 |
+
if self.device.startswith('cuda'):
|
| 55 |
+
# Enable all CUDA optimizations
|
| 56 |
+
torch.backends.cudnn.benchmark = True
|
| 57 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 58 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 59 |
+
|
| 60 |
+
# Set optimal CUDA settings
|
| 61 |
+
torch.backends.cudnn.deterministic = False
|
| 62 |
+
torch.backends.cudnn.enabled = True
|
| 63 |
+
|
| 64 |
+
# Enable cuBLAS optimizations
|
| 65 |
+
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
|
| 66 |
+
|
| 67 |
+
# Optimize CPU inference
|
| 68 |
+
if self.device == 'cpu':
|
| 69 |
+
import multiprocessing
|
| 70 |
+
num_threads = multiprocessing.cpu_count()
|
| 71 |
+
torch.set_num_threads(num_threads)
|
| 72 |
+
torch.set_num_interop_threads(num_threads)
|
| 73 |
+
print(f"CPU threads set to {num_threads}")
|
| 74 |
+
|
| 75 |
+
def optimize_model(
|
| 76 |
+
self,
|
| 77 |
+
model: nn.Module,
|
| 78 |
+
example_input: Optional[torch.Tensor] = None,
|
| 79 |
+
use_amp: bool = True,
|
| 80 |
+
use_channels_last: bool = True
|
| 81 |
+
) -> nn.Module:
|
| 82 |
+
"""
|
| 83 |
+
Optimize PyTorch model for inference.
|
| 84 |
+
|
| 85 |
+
Parameters:
|
| 86 |
+
----------
|
| 87 |
+
model : nn.Module
|
| 88 |
+
PyTorch model to optimize
|
| 89 |
+
example_input : Optional[torch.Tensor]
|
| 90 |
+
Example input for optimization (required for some modes)
|
| 91 |
+
use_amp : bool
|
| 92 |
+
Use automatic mixed precision (AMP)
|
| 93 |
+
use_channels_last : bool
|
| 94 |
+
Use channels-last memory format
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
-------
|
| 98 |
+
nn.Module
|
| 99 |
+
Optimized model
|
| 100 |
+
"""
|
| 101 |
+
print(f"Optimizing model with mode: {self.optimize_mode}")
|
| 102 |
+
|
| 103 |
+
self.model = model.eval().to(self.device)
|
| 104 |
+
self.use_amp = use_amp
|
| 105 |
+
|
| 106 |
+
# Disable gradients for all parameters (inference only)
|
| 107 |
+
for param in self.model.parameters():
|
| 108 |
+
param.requires_grad = False
|
| 109 |
+
|
| 110 |
+
# Apply memory format optimization (default: channels_last for CUDA)
|
| 111 |
+
# Note: Audio models use 3D tensors, so channels_last is applied only where beneficial
|
| 112 |
+
if use_channels_last and self.device.startswith('cuda'):
|
| 113 |
+
print(" Using channels-last optimization")
|
| 114 |
+
# Only apply to model if it has 4D conv layers, otherwise skip silently
|
| 115 |
+
try:
|
| 116 |
+
with warnings.catch_warnings():
|
| 117 |
+
warnings.simplefilter("ignore")
|
| 118 |
+
self.model = self.model.to(memory_format=torch.channels_last)
|
| 119 |
+
except Exception:
|
| 120 |
+
pass # Silently skip for models that don't support channels_last
|
| 121 |
+
|
| 122 |
+
# Set model to inference mode
|
| 123 |
+
torch.set_grad_enabled(False)
|
| 124 |
+
|
| 125 |
+
# Apply optimization based on mode
|
| 126 |
+
if self.optimize_mode == 'compile':
|
| 127 |
+
self.compiled_model = self._compile_model(self.model)
|
| 128 |
+
elif self.optimize_mode == 'jit':
|
| 129 |
+
if example_input is None:
|
| 130 |
+
raise ValueError("example_input required for JIT optimization")
|
| 131 |
+
self.compiled_model = self._jit_trace_model(self.model, example_input)
|
| 132 |
+
elif self.optimize_mode == 'channels_last':
|
| 133 |
+
self.compiled_model = self.model
|
| 134 |
+
else:
|
| 135 |
+
print(" Using default optimization")
|
| 136 |
+
self.compiled_model = self.model
|
| 137 |
+
|
| 138 |
+
# Apply fusion optimizations if possible
|
| 139 |
+
try:
|
| 140 |
+
if hasattr(torch.nn.utils, 'fusion'):
|
| 141 |
+
self.compiled_model = torch.nn.utils.fusion.fuse_conv_bn_eval(self.compiled_model)
|
| 142 |
+
print(" Conv-BN fusion applied")
|
| 143 |
+
except:
|
| 144 |
+
pass
|
| 145 |
+
|
| 146 |
+
print("Optimization complete")
|
| 147 |
+
return self.compiled_model
|
| 148 |
+
|
| 149 |
+
def _compile_model(self, model: nn.Module) -> nn.Module:
|
| 150 |
+
"""
|
| 151 |
+
Compile model using torch.compile (PyTorch 2.0+) with ULTRA optimization.
|
| 152 |
+
|
| 153 |
+
Parameters:
|
| 154 |
+
----------
|
| 155 |
+
model : nn.Module
|
| 156 |
+
Model to compile
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
-------
|
| 160 |
+
nn.Module
|
| 161 |
+
Compiled model
|
| 162 |
+
"""
|
| 163 |
+
try:
|
| 164 |
+
if hasattr(torch, 'compile'):
|
| 165 |
+
print(" Compiling model with torch.compile")
|
| 166 |
+
# Try max-autotune for best performance
|
| 167 |
+
try:
|
| 168 |
+
compiled = torch.compile(model, mode='max-autotune', fullgraph=True)
|
| 169 |
+
print(" Using max-autotune mode")
|
| 170 |
+
return compiled
|
| 171 |
+
except:
|
| 172 |
+
# Fallback to reduce-overhead
|
| 173 |
+
compiled = torch.compile(model, mode='reduce-overhead')
|
| 174 |
+
print(" Using reduce-overhead mode")
|
| 175 |
+
return compiled
|
| 176 |
+
else:
|
| 177 |
+
print(" torch.compile not available (requires PyTorch 2.0+)")
|
| 178 |
+
return model
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print(f" Compilation failed: {e}")
|
| 181 |
+
return model
|
| 182 |
+
|
| 183 |
+
def _jit_trace_model(self, model: nn.Module, example_input: torch.Tensor) -> nn.Module:
|
| 184 |
+
"""
|
| 185 |
+
Trace model using TorchScript JIT.
|
| 186 |
+
|
| 187 |
+
Parameters:
|
| 188 |
+
----------
|
| 189 |
+
model : nn.Module
|
| 190 |
+
Model to trace
|
| 191 |
+
example_input : torch.Tensor
|
| 192 |
+
Example input for tracing
|
| 193 |
+
|
| 194 |
+
Returns:
|
| 195 |
+
-------
|
| 196 |
+
nn.Module
|
| 197 |
+
Traced model
|
| 198 |
+
"""
|
| 199 |
+
try:
|
| 200 |
+
print(" → Tracing model with TorchScript JIT")
|
| 201 |
+
with torch.no_grad():
|
| 202 |
+
traced = torch.jit.trace(model, example_input)
|
| 203 |
+
traced = torch.jit.optimize_for_inference(traced)
|
| 204 |
+
return traced
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f" JIT tracing failed: {e}")
|
| 207 |
+
return model
|
| 208 |
+
|
| 209 |
+
def save_optimized_model(self, save_path: str):
|
| 210 |
+
"""
|
| 211 |
+
Save optimized model to file.
|
| 212 |
+
|
| 213 |
+
Parameters:
|
| 214 |
+
----------
|
| 215 |
+
save_path : str
|
| 216 |
+
Path to save the model
|
| 217 |
+
"""
|
| 218 |
+
if self.compiled_model is None:
|
| 219 |
+
raise RuntimeError("No model has been optimized yet")
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
# Save based on optimization mode
|
| 223 |
+
if self.optimize_mode == 'jit':
|
| 224 |
+
torch.jit.save(self.compiled_model, save_path)
|
| 225 |
+
else:
|
| 226 |
+
torch.save(self.compiled_model.state_dict(), save_path)
|
| 227 |
+
print(f"✓ Model saved to: {save_path}")
|
| 228 |
+
except Exception as e:
|
| 229 |
+
print(f"✗ Failed to save model: {e}")
|
| 230 |
+
|
| 231 |
+
def load_optimized_model(self, load_path: str, model_template: nn.Module) -> nn.Module:
|
| 232 |
+
"""
|
| 233 |
+
Load optimized model from file.
|
| 234 |
+
|
| 235 |
+
Parameters:
|
| 236 |
+
----------
|
| 237 |
+
load_path : str
|
| 238 |
+
Path to the saved model
|
| 239 |
+
model_template : nn.Module
|
| 240 |
+
Model template for loading state dict
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
-------
|
| 244 |
+
nn.Module
|
| 245 |
+
Loaded model
|
| 246 |
+
"""
|
| 247 |
+
try:
|
| 248 |
+
if self.optimize_mode == 'jit':
|
| 249 |
+
self.compiled_model = torch.jit.load(load_path, map_location=self.device)
|
| 250 |
+
else:
|
| 251 |
+
model_template.load_state_dict(torch.load(load_path, map_location=self.device, weights_only=False))
|
| 252 |
+
self.compiled_model = model_template.eval()
|
| 253 |
+
|
| 254 |
+
print(f"✓ Model loaded from: {load_path}")
|
| 255 |
+
return self.compiled_model
|
| 256 |
+
except (pickle.UnpicklingError, RuntimeError, EOFError) as e:
|
| 257 |
+
error_details = f"""
|
| 258 |
+
CHECKPOINT FILE CORRUPTED
|
| 259 |
+
|
| 260 |
+
Error: {str(e)}
|
| 261 |
+
|
| 262 |
+
The checkpoint file appears to be corrupted or was not downloaded correctly.
|
| 263 |
+
File: {load_path}
|
| 264 |
+
|
| 265 |
+
Common causes:
|
| 266 |
+
- File is an HTML page (wrong download URL, e.g., HuggingFace /blob/ instead of /resolve/)
|
| 267 |
+
- Incomplete or interrupted download
|
| 268 |
+
- Network issues during download
|
| 269 |
+
- File system corruption
|
| 270 |
+
|
| 271 |
+
Solution:
|
| 272 |
+
1. Delete the corrupted checkpoint file:
|
| 273 |
+
{load_path}
|
| 274 |
+
2. Re-run the application - it will automatically re-download the model
|
| 275 |
+
3. If the problem persists, check that your model URL uses /resolve/ not /blob/
|
| 276 |
+
Example: https://huggingface.co/user/repo/resolve/main/model.ckpt
|
| 277 |
+
"""
|
| 278 |
+
print(error_details)
|
| 279 |
+
raise
|
| 280 |
+
except Exception as e:
|
| 281 |
+
print(f"✗ Failed to load model: {e}")
|
| 282 |
+
raise
|
| 283 |
+
|
| 284 |
+
def __call__(self, x: torch.Tensor) -> torch.Tensor:
|
| 285 |
+
"""
|
| 286 |
+
Run inference with optimized model.
|
| 287 |
+
|
| 288 |
+
Parameters:
|
| 289 |
+
----------
|
| 290 |
+
x : torch.Tensor
|
| 291 |
+
Input tensor
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
-------
|
| 295 |
+
torch.Tensor
|
| 296 |
+
Model output
|
| 297 |
+
"""
|
| 298 |
+
if self.compiled_model is None:
|
| 299 |
+
raise RuntimeError("No model has been optimized yet")
|
| 300 |
+
|
| 301 |
+
# Apply memory format if needed (only for 4D tensors - images)
|
| 302 |
+
# Audio models typically use 3D tensors, so we silently skip channels_last for them
|
| 303 |
+
if self.optimize_mode == 'channels_last' and x.dim() == 4:
|
| 304 |
+
x = x.to(memory_format=torch.channels_last)
|
| 305 |
+
|
| 306 |
+
# Run inference with AMP if enabled
|
| 307 |
+
try:
|
| 308 |
+
if self.use_amp and self.device.startswith('cuda'):
|
| 309 |
+
with torch.cuda.amp.autocast():
|
| 310 |
+
with torch.no_grad():
|
| 311 |
+
return self.compiled_model(x)
|
| 312 |
+
else:
|
| 313 |
+
with torch.no_grad():
|
| 314 |
+
return self.compiled_model(x)
|
| 315 |
+
except Exception as e:
|
| 316 |
+
# Fallback to non-compiled model if torch.compile fails at runtime
|
| 317 |
+
# This can happen with rotary embeddings that mutate class variables
|
| 318 |
+
if self.optimize_mode == 'compile' and self.model is not None:
|
| 319 |
+
print(f" ⚠️ torch.compile runtime error: {type(e).__name__}")
|
| 320 |
+
print(f" 🔄 Falling back to non-compiled model...")
|
| 321 |
+
self.compiled_model = self.model
|
| 322 |
+
self.optimize_mode = 'fallback'
|
| 323 |
+
# Retry with non-compiled model
|
| 324 |
+
if self.use_amp and self.device.startswith('cuda'):
|
| 325 |
+
with torch.cuda.amp.autocast():
|
| 326 |
+
with torch.no_grad():
|
| 327 |
+
return self.compiled_model(x)
|
| 328 |
+
else:
|
| 329 |
+
with torch.no_grad():
|
| 330 |
+
return self.compiled_model(x)
|
| 331 |
+
else:
|
| 332 |
+
raise
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
class PyTorchOptimizer:
|
| 336 |
+
"""
|
| 337 |
+
Helper class for various PyTorch optimization techniques.
|
| 338 |
+
"""
|
| 339 |
+
|
| 340 |
+
@staticmethod
|
| 341 |
+
def enable_cudnn_benchmark():
|
| 342 |
+
"""Enable cuDNN benchmark mode."""
|
| 343 |
+
if torch.cuda.is_available():
|
| 344 |
+
torch.backends.cudnn.benchmark = True
|
| 345 |
+
torch.backends.cudnn.deterministic = False
|
| 346 |
+
print("cuDNN benchmark enabled")
|
| 347 |
+
|
| 348 |
+
@staticmethod
|
| 349 |
+
def enable_cudnn_deterministic():
|
| 350 |
+
"""Enable cuDNN deterministic mode for reproducible results."""
|
| 351 |
+
if torch.cuda.is_available():
|
| 352 |
+
torch.backends.cudnn.deterministic = True
|
| 353 |
+
torch.backends.cudnn.benchmark = False
|
| 354 |
+
print("✓ cuDNN deterministic mode enabled")
|
| 355 |
+
|
| 356 |
+
@staticmethod
|
| 357 |
+
def enable_tf32():
|
| 358 |
+
"""Enable TF32 for Ampere GPUs (RTX 30xx+)."""
|
| 359 |
+
if torch.cuda.is_available():
|
| 360 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 361 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 362 |
+
# Also enable for float32 matmul precision
|
| 363 |
+
torch.set_float32_matmul_precision('high') # or 'highest' for max speed
|
| 364 |
+
print("TF32 enabled")
|
| 365 |
+
|
| 366 |
+
@staticmethod
|
| 367 |
+
def set_num_threads(num_threads: int):
|
| 368 |
+
"""Set number of threads for CPU inference."""
|
| 369 |
+
torch.set_num_threads(num_threads)
|
| 370 |
+
print(f"✓ Number of threads set to: {num_threads}")
|
| 371 |
+
|
| 372 |
+
@staticmethod
|
| 373 |
+
def optimize_for_inference(model: nn.Module) -> nn.Module:
|
| 374 |
+
"""
|
| 375 |
+
Apply ULTRA optimization for inference.
|
| 376 |
+
|
| 377 |
+
Parameters:
|
| 378 |
+
----------
|
| 379 |
+
model : nn.Module
|
| 380 |
+
Model to optimize
|
| 381 |
+
|
| 382 |
+
Returns:
|
| 383 |
+
-------
|
| 384 |
+
nn.Module
|
| 385 |
+
ULTRA-optimized model
|
| 386 |
+
"""
|
| 387 |
+
model.eval()
|
| 388 |
+
torch.set_grad_enabled(False)
|
| 389 |
+
|
| 390 |
+
# Disable gradient computation for all parameters
|
| 391 |
+
for param in model.parameters():
|
| 392 |
+
param.requires_grad = False
|
| 393 |
+
|
| 394 |
+
# Fuse operations if possible
|
| 395 |
+
try:
|
| 396 |
+
# Try to fuse batch norm
|
| 397 |
+
model = torch.quantization.fuse_modules(model, inplace=True)
|
| 398 |
+
print("Batch norm fused")
|
| 399 |
+
except:
|
| 400 |
+
pass
|
| 401 |
+
|
| 402 |
+
try:
|
| 403 |
+
# Try to fuse conv-bn if available
|
| 404 |
+
if hasattr(torch.nn.utils, 'fusion'):
|
| 405 |
+
model = torch.nn.utils.fusion.fuse_conv_bn_eval(model)
|
| 406 |
+
print("Conv-BN fused")
|
| 407 |
+
except:
|
| 408 |
+
pass
|
| 409 |
+
|
| 410 |
+
return model
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def benchmark_pytorch_optimizations(
|
| 414 |
+
model: nn.Module,
|
| 415 |
+
input_shape: Tuple[int, ...],
|
| 416 |
+
device: str = 'cuda:0',
|
| 417 |
+
num_iterations: int = 100,
|
| 418 |
+
warmup_iterations: int = 10
|
| 419 |
+
) -> Dict[str, float]:
|
| 420 |
+
"""
|
| 421 |
+
Benchmark different PyTorch optimization techniques.
|
| 422 |
+
|
| 423 |
+
Parameters:
|
| 424 |
+
----------
|
| 425 |
+
model : nn.Module
|
| 426 |
+
Model to benchmark
|
| 427 |
+
input_shape : Tuple[int, ...]
|
| 428 |
+
Input tensor shape
|
| 429 |
+
device : str
|
| 430 |
+
Device to use
|
| 431 |
+
num_iterations : int
|
| 432 |
+
Number of benchmark iterations
|
| 433 |
+
warmup_iterations : int
|
| 434 |
+
Number of warmup iterations
|
| 435 |
+
|
| 436 |
+
Returns:
|
| 437 |
+
-------
|
| 438 |
+
Dict[str, float]
|
| 439 |
+
Benchmark results with average inference times
|
| 440 |
+
"""
|
| 441 |
+
results = {}
|
| 442 |
+
dummy_input = torch.randn(*input_shape).to(device)
|
| 443 |
+
|
| 444 |
+
optimization_modes = ['default', 'compile', 'channels_last']
|
| 445 |
+
|
| 446 |
+
for mode in optimization_modes:
|
| 447 |
+
print(f"\n{'='*60}")
|
| 448 |
+
print(f"Benchmarking: {mode}")
|
| 449 |
+
print('='*60)
|
| 450 |
+
|
| 451 |
+
try:
|
| 452 |
+
backend = PyTorchBackend(device=device, optimize_mode=mode)
|
| 453 |
+
|
| 454 |
+
# Optimize model
|
| 455 |
+
if mode == 'compile':
|
| 456 |
+
optimized_model = backend.optimize_model(model, use_amp=True)
|
| 457 |
+
else:
|
| 458 |
+
optimized_model = backend.optimize_model(
|
| 459 |
+
model,
|
| 460 |
+
example_input=dummy_input,
|
| 461 |
+
use_amp=True,
|
| 462 |
+
use_channels_last=(mode == 'channels_last')
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
# Warmup
|
| 466 |
+
for _ in range(warmup_iterations):
|
| 467 |
+
_ = backend(dummy_input)
|
| 468 |
+
|
| 469 |
+
# Benchmark
|
| 470 |
+
if device.startswith('cuda'):
|
| 471 |
+
torch.cuda.synchronize()
|
| 472 |
+
|
| 473 |
+
start = time.time()
|
| 474 |
+
for _ in range(num_iterations):
|
| 475 |
+
_ = backend(dummy_input)
|
| 476 |
+
|
| 477 |
+
if device.startswith('cuda'):
|
| 478 |
+
torch.cuda.synchronize()
|
| 479 |
+
|
| 480 |
+
elapsed = (time.time() - start) / num_iterations
|
| 481 |
+
results[mode] = elapsed * 1000 # Convert to ms
|
| 482 |
+
|
| 483 |
+
print(f" Average time: {results[mode]:.2f} ms")
|
| 484 |
+
|
| 485 |
+
except Exception as e:
|
| 486 |
+
print(f" Failed: {e}")
|
| 487 |
+
results[mode] = None
|
| 488 |
+
|
| 489 |
+
return results
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def create_inference_session(
|
| 493 |
+
model: nn.Module,
|
| 494 |
+
device: str = 'cuda:0',
|
| 495 |
+
optimize_mode: str = 'default',
|
| 496 |
+
enable_amp: bool = True,
|
| 497 |
+
enable_tf32: bool = True,
|
| 498 |
+
enable_cudnn_benchmark: bool = True
|
| 499 |
+
) -> PyTorchBackend:
|
| 500 |
+
"""
|
| 501 |
+
Create an optimized inference session.
|
| 502 |
+
|
| 503 |
+
Parameters:
|
| 504 |
+
----------
|
| 505 |
+
model : nn.Module
|
| 506 |
+
Model to use for inference
|
| 507 |
+
device : str
|
| 508 |
+
Device to use
|
| 509 |
+
optimize_mode : str
|
| 510 |
+
Optimization mode
|
| 511 |
+
enable_amp : bool
|
| 512 |
+
Enable automatic mixed precision
|
| 513 |
+
enable_tf32 : bool
|
| 514 |
+
Enable TF32 (for Ampere GPUs)
|
| 515 |
+
enable_cudnn_benchmark : bool
|
| 516 |
+
Enable cuDNN benchmark
|
| 517 |
+
|
| 518 |
+
Returns:
|
| 519 |
+
-------
|
| 520 |
+
PyTorchBackend
|
| 521 |
+
Configured inference session
|
| 522 |
+
"""
|
| 523 |
+
# Apply global optimizations
|
| 524 |
+
optimizer = PyTorchOptimizer()
|
| 525 |
+
|
| 526 |
+
if enable_cudnn_benchmark:
|
| 527 |
+
optimizer.enable_cudnn_benchmark()
|
| 528 |
+
|
| 529 |
+
if enable_tf32 and device.startswith('cuda'):
|
| 530 |
+
optimizer.enable_tf32()
|
| 531 |
+
|
| 532 |
+
# Create backend
|
| 533 |
+
backend = PyTorchBackend(device=device, optimize_mode=optimize_mode)
|
| 534 |
+
backend.optimize_model(model, use_amp=enable_amp)
|
| 535 |
+
|
| 536 |
+
return backend
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
def convert_model_to_onnx(
|
| 540 |
+
model: nn.Module,
|
| 541 |
+
input_shape: Tuple[int, ...],
|
| 542 |
+
output_path: str,
|
| 543 |
+
opset_version: int = 14
|
| 544 |
+
):
|
| 545 |
+
"""
|
| 546 |
+
Convert PyTorch model to ONNX format.
|
| 547 |
+
|
| 548 |
+
Parameters:
|
| 549 |
+
----------
|
| 550 |
+
model : nn.Module
|
| 551 |
+
Model to convert
|
| 552 |
+
input_shape : Tuple[int, ...]
|
| 553 |
+
Input tensor shape
|
| 554 |
+
output_path : str
|
| 555 |
+
Path to save ONNX model
|
| 556 |
+
opset_version : int
|
| 557 |
+
ONNX opset version
|
| 558 |
+
"""
|
| 559 |
+
try:
|
| 560 |
+
import onnx
|
| 561 |
+
|
| 562 |
+
model.eval()
|
| 563 |
+
dummy_input = torch.randn(*input_shape)
|
| 564 |
+
|
| 565 |
+
print(f"Converting model to ONNX (opset {opset_version})...")
|
| 566 |
+
torch.onnx.export(
|
| 567 |
+
model,
|
| 568 |
+
dummy_input,
|
| 569 |
+
output_path,
|
| 570 |
+
export_params=True,
|
| 571 |
+
opset_version=opset_version,
|
| 572 |
+
do_constant_folding=True,
|
| 573 |
+
input_names=['input'],
|
| 574 |
+
output_names=['output'],
|
| 575 |
+
dynamic_axes={
|
| 576 |
+
'input': {0: 'batch_size'},
|
| 577 |
+
'output': {0: 'batch_size'}
|
| 578 |
+
}
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
# Verify ONNX model
|
| 582 |
+
onnx_model = onnx.load(output_path)
|
| 583 |
+
onnx.checker.check_model(onnx_model)
|
| 584 |
+
|
| 585 |
+
print(f"✓ ONNX model saved to: {output_path}")
|
| 586 |
+
|
| 587 |
+
except ImportError:
|
| 588 |
+
print("✗ ONNX not available. Install with: pip install onnx")
|
| 589 |
+
except Exception as e:
|
| 590 |
+
print(f"✗ ONNX conversion failed: {e}")
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
def get_model_info(model: nn.Module) -> Dict[str, Any]:
|
| 594 |
+
"""
|
| 595 |
+
Get information about a PyTorch model.
|
| 596 |
+
|
| 597 |
+
Parameters:
|
| 598 |
+
----------
|
| 599 |
+
model : nn.Module
|
| 600 |
+
Model to analyze
|
| 601 |
+
|
| 602 |
+
Returns:
|
| 603 |
+
-------
|
| 604 |
+
Dict[str, Any]
|
| 605 |
+
Model information
|
| 606 |
+
"""
|
| 607 |
+
total_params = sum(p.numel() for p in model.parameters())
|
| 608 |
+
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
| 609 |
+
|
| 610 |
+
# Estimate model size
|
| 611 |
+
param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
|
| 612 |
+
buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
|
| 613 |
+
size_mb = (param_size + buffer_size) / (1024 ** 2)
|
| 614 |
+
|
| 615 |
+
return {
|
| 616 |
+
'total_parameters': total_params,
|
| 617 |
+
'trainable_parameters': trainable_params,
|
| 618 |
+
'model_size_mb': size_mb,
|
| 619 |
+
'device': next(model.parameters()).device,
|
| 620 |
+
'dtype': next(model.parameters()).dtype
|
| 621 |
+
}
|
requirements.txt
CHANGED
|
@@ -5,6 +5,7 @@ numpy
|
|
| 5 |
pandas
|
| 6 |
scipy
|
| 7 |
torch
|
|
|
|
| 8 |
torchvision
|
| 9 |
torchmetrics
|
| 10 |
ml_collections
|
|
@@ -17,7 +18,7 @@ demucs
|
|
| 17 |
audiomentations
|
| 18 |
torch_audiomentations
|
| 19 |
httpx
|
| 20 |
-
gradio
|
| 21 |
google-api-python-client
|
| 22 |
pytube
|
| 23 |
tqdm
|
|
@@ -37,4 +38,10 @@ pyloudnorm
|
|
| 37 |
gdown
|
| 38 |
validators
|
| 39 |
requests
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
pandas
|
| 6 |
scipy
|
| 7 |
torch
|
| 8 |
+
torchaudio
|
| 9 |
torchvision
|
| 10 |
torchmetrics
|
| 11 |
ml_collections
|
|
|
|
| 18 |
audiomentations
|
| 19 |
torch_audiomentations
|
| 20 |
httpx
|
| 21 |
+
gradio>=4.0
|
| 22 |
google-api-python-client
|
| 23 |
pytube
|
| 24 |
tqdm
|
|
|
|
| 38 |
gdown
|
| 39 |
validators
|
| 40 |
requests
|
| 41 |
+
sympy
|
| 42 |
+
typing_extensions
|
| 43 |
+
packaging
|
| 44 |
+
opencv-python-headless
|
| 45 |
+
pypresence
|
| 46 |
+
mpmath
|
| 47 |
+
pillow
|
utils.py
CHANGED
|
@@ -13,13 +13,39 @@ from omegaconf import OmegaConf
|
|
| 13 |
from tqdm.auto import tqdm
|
| 14 |
from typing import Dict, List, Tuple, Any, Union
|
| 15 |
import loralib as lora
|
| 16 |
-
import gc # For garbage collection
|
| 17 |
-
import logging # Hata takibi için
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
try:
|
| 24 |
with open(config_path, 'r') as f:
|
| 25 |
if model_type == 'htdemucs':
|
|
@@ -32,6 +58,7 @@ def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaCon
|
|
| 32 |
except Exception as e:
|
| 33 |
raise ValueError(f"Error loading configuration: {e}")
|
| 34 |
|
|
|
|
| 35 |
def get_model_from_config(model_type: str, config_path: str) -> Tuple:
|
| 36 |
"""
|
| 37 |
Load the model specified by the model type and configuration file.
|
|
@@ -101,97 +128,251 @@ def get_model_from_config(model_type: str, config_path: str) -> Tuple:
|
|
| 101 |
elif model_type == 'experimental_mdx23c_stht':
|
| 102 |
from models.mdx23c_tfc_tdf_v3_with_STHT import TFC_TDF_net
|
| 103 |
model = TFC_TDF_net(config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
else:
|
| 105 |
raise ValueError(f"Unknown model type: {model_type}")
|
| 106 |
|
| 107 |
return model, config
|
| 108 |
|
|
|
|
| 109 |
def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
try:
|
| 111 |
mix, sr = sf.read(path)
|
| 112 |
-
if len(mix.shape) == 1: # Mono audio
|
| 113 |
-
mix = np.expand_dims(mix, axis=-1)
|
| 114 |
-
return mix.T, sr
|
| 115 |
except Exception as e:
|
| 116 |
if skip_err:
|
| 117 |
print(f"No stem {instr}: skip!")
|
| 118 |
return None, None
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
def normalize_audio(audio: np.ndarray) -> Tuple[np.ndarray, Dict[str, float]]:
|
| 122 |
mono = audio.mean(0)
|
| 123 |
mean, std = mono.mean(), mono.std()
|
| 124 |
-
return (audio - mean) /
|
|
|
|
| 125 |
|
| 126 |
def denormalize_audio(audio: np.ndarray, norm_params: Dict[str, float]) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
return audio * norm_params["std"] + norm_params["mean"]
|
| 128 |
|
|
|
|
| 129 |
def apply_tta(
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
progress=None # Gradio progress nesnesi
|
| 137 |
) -> Dict[str, torch.Tensor]:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
for i, augmented_mix in enumerate(track_proc_list):
|
| 143 |
-
|
| 144 |
-
processed_steps += 1
|
| 145 |
-
progress_value = round((processed_steps / total_steps) * 50) # TTA için 0-50% aralığı
|
| 146 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 147 |
-
progress(progress_value / 100, desc=f"Applying TTA step {processed_steps}/{total_steps}")
|
| 148 |
-
update_progress_html(f"Applying TTA step {processed_steps}/{total_steps}", progress_value)
|
| 149 |
-
|
| 150 |
-
waveforms = demix(config, model, augmented_mix, device, model_type=model_type, pbar=False, progress=progress)
|
| 151 |
for el in waveforms:
|
| 152 |
if i == 0:
|
| 153 |
-
waveforms_orig[el] += waveforms[el][::-1].
|
| 154 |
else:
|
| 155 |
waveforms_orig[el] -= waveforms[el]
|
| 156 |
-
del waveforms, augmented_mix
|
| 157 |
-
gc.collect()
|
| 158 |
-
if device.startswith('cuda'):
|
| 159 |
-
torch.cuda.empty_cache()
|
| 160 |
|
|
|
|
| 161 |
for el in waveforms_orig:
|
| 162 |
-
waveforms_orig[el] /=
|
| 163 |
-
|
| 164 |
-
# TTA tamamlandı
|
| 165 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 166 |
-
progress(0.5, desc="TTA completed")
|
| 167 |
-
update_progress_html("TTA completed", 50)
|
| 168 |
|
| 169 |
return waveforms_orig
|
| 170 |
|
|
|
|
| 171 |
def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
fadein = torch.linspace(0, 1, fade_size)
|
| 173 |
fadeout = torch.linspace(1, 0, fade_size)
|
|
|
|
| 174 |
window = torch.ones(window_size)
|
| 175 |
window[-fade_size:] = fadeout
|
| 176 |
window[:fade_size] = fadein
|
| 177 |
return window
|
| 178 |
|
|
|
|
| 179 |
def demix(
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
if mode == 'demucs':
|
| 196 |
chunk_size = config.training.samplerate * config.training.segment
|
| 197 |
num_instruments = len(config.training.instruments)
|
|
@@ -201,146 +382,272 @@ def demix(
|
|
| 201 |
chunk_size = config.audio.chunk_size
|
| 202 |
num_instruments = len(prefer_target_instrument(config))
|
| 203 |
num_overlap = config.inference.num_overlap
|
|
|
|
| 204 |
fade_size = chunk_size // 10
|
| 205 |
step = chunk_size // num_overlap
|
| 206 |
border = chunk_size - step
|
| 207 |
length_init = mix.shape[-1]
|
| 208 |
-
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
|
|
|
| 209 |
if length_init > 2 * border and border > 0:
|
| 210 |
mix = nn.functional.pad(mix, (border, border), mode="reflect")
|
| 211 |
|
| 212 |
-
batch_size =
|
| 213 |
-
|
| 214 |
-
# Modeli cihaza taşı (ZeroGPU için cuda:0)
|
| 215 |
-
model = model.to(device)
|
| 216 |
-
model.eval()
|
| 217 |
|
| 218 |
-
|
| 219 |
-
total_chunks = (mix.shape[1] + step - 1) // step
|
| 220 |
-
processed_chunks = 0
|
| 221 |
|
| 222 |
-
with torch.
|
| 223 |
-
with torch.
|
|
|
|
| 224 |
req_shape = (num_instruments,) + mix.shape
|
| 225 |
-
result = torch.zeros(req_shape, dtype=torch.
|
| 226 |
-
counter = torch.zeros(req_shape, dtype=torch.
|
| 227 |
|
| 228 |
i = 0
|
| 229 |
batch_data = []
|
| 230 |
batch_locations = []
|
| 231 |
-
|
|
|
|
|
|
|
| 232 |
|
| 233 |
while i < mix.shape[1]:
|
| 234 |
-
|
|
|
|
| 235 |
chunk_len = part.shape[-1]
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
| 237 |
part = nn.functional.pad(part, (0, chunk_size - chunk_len), mode=pad_mode, value=0)
|
| 238 |
|
| 239 |
batch_data.append(part)
|
| 240 |
batch_locations.append((i, chunk_len))
|
| 241 |
i += step
|
| 242 |
|
|
|
|
| 243 |
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
x = model(arr) # Model çıkarımı GPU'da
|
| 247 |
-
|
| 248 |
-
# Sonuçları hemen CPU'ya taşı
|
| 249 |
-
x = x.cpu()
|
| 250 |
|
| 251 |
if mode == "generic":
|
| 252 |
-
window = windowing_array.clone()
|
| 253 |
-
if i - step == 0:
|
| 254 |
window[:fade_size] = 1
|
| 255 |
-
elif i >= mix.shape[1]:
|
| 256 |
window[-fade_size:] = 1
|
| 257 |
|
| 258 |
for j, (start, seg_len) in enumerate(batch_locations):
|
| 259 |
if mode == "generic":
|
| 260 |
-
result[..., start:start + seg_len] +=
|
| 261 |
counter[..., start:start + seg_len] += window[..., :seg_len]
|
| 262 |
else:
|
| 263 |
-
result[..., start:start + seg_len] += x[j, ..., :seg_len]
|
| 264 |
counter[..., start:start + seg_len] += 1.0
|
| 265 |
|
| 266 |
-
# İlerleme güncellemesi
|
| 267 |
-
processed_chunks += len(batch_data)
|
| 268 |
-
progress_value = min(round((processed_chunks / total_chunks) * 100), 100) # %1 hassasiyet
|
| 269 |
-
if progress is not None and callable(getattr(progress, '__call__', None)):
|
| 270 |
-
progress(progress_value / 100, desc=f"Processing chunk {processed_chunks}/{total_chunks}")
|
| 271 |
-
update_progress_html(f"Processing chunk {processed_chunks}/{total_chunks}", progress_value)
|
| 272 |
-
|
| 273 |
-
del arr, x
|
| 274 |
batch_data.clear()
|
| 275 |
batch_locations.clear()
|
| 276 |
-
gc.collect()
|
| 277 |
-
if device.startswith('cuda'):
|
| 278 |
-
torch.cuda.empty_cache()
|
| 279 |
-
logging.info("Cleared CUDA cache")
|
| 280 |
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
|
| 285 |
-
estimated_sources =
|
|
|
|
| 286 |
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
instruments = config.training.instruments if mode == "demucs" else prefer_target_instrument(config)
|
| 292 |
ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
|
| 293 |
-
logging.info("Demix completed successfully")
|
| 294 |
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
|
| 300 |
-
return ret_data
|
| 301 |
|
| 302 |
def prefer_target_instrument(config: ConfigDict) -> List[str]:
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
-
def load_not_compatible_weights(model: nn.Module, weights: str, verbose: bool = False) -> None:
|
| 306 |
new_model = model.state_dict()
|
| 307 |
-
old_model = torch.load(weights,
|
| 308 |
if 'state' in old_model:
|
|
|
|
| 309 |
old_model = old_model['state']
|
| 310 |
if 'state_dict' in old_model:
|
|
|
|
| 311 |
old_model = old_model['state_dict']
|
|
|
|
| 312 |
for el in new_model:
|
| 313 |
-
if el in old_model
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
model.load_state_dict(lora_state_dict, strict=False)
|
| 320 |
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
print(f'Start from checkpoint: {args.start_check_point}')
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
if 'lora' not in config:
|
| 334 |
raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
|
| 335 |
-
|
|
|
|
|
|
|
| 336 |
for name, module in model.named_modules():
|
| 337 |
hierarchy = name.split('.')
|
| 338 |
layer_name = hierarchy[-1]
|
|
|
|
|
|
|
| 339 |
if isinstance(module, nn.Linear):
|
| 340 |
try:
|
|
|
|
| 341 |
parent_module = model
|
| 342 |
for submodule_name in hierarchy[:-1]:
|
| 343 |
parent_module = getattr(parent_module, submodule_name)
|
|
|
|
|
|
|
| 344 |
setattr(
|
| 345 |
parent_module,
|
| 346 |
layer_name,
|
|
@@ -351,23 +658,41 @@ def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
|
|
| 351 |
**config['lora']
|
| 352 |
)
|
| 353 |
)
|
| 354 |
-
replaced_layers += 1
|
|
|
|
| 355 |
except Exception as e:
|
| 356 |
print(f"Error replacing layer {name}: {e}")
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
return model
|
| 359 |
|
|
|
|
| 360 |
def draw_spectrogram(waveform, sample_rate, length, output_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
import librosa.display
|
|
|
|
|
|
|
| 362 |
x = waveform[:int(length * sample_rate), :]
|
| 363 |
-
X = librosa.stft(x.mean(axis=-1))
|
| 364 |
-
Xdb = librosa.amplitude_to_db(np.abs(X), ref=np.max)
|
| 365 |
fig, ax = plt.subplots()
|
|
|
|
| 366 |
img = librosa.display.specshow(
|
| 367 |
-
Xdb,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
)
|
| 369 |
ax.set(title='File: ' + os.path.basename(output_file))
|
| 370 |
fig.colorbar(img, ax=ax, format="%+2.f dB")
|
| 371 |
-
if output_file:
|
| 372 |
plt.savefig(output_file)
|
| 373 |
-
plt.close()
|
|
|
|
| 13 |
from tqdm.auto import tqdm
|
| 14 |
from typing import Dict, List, Tuple, Any, Union
|
| 15 |
import loralib as lora
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# Fix matplotlib backend for isolated Python environments
|
| 18 |
+
try:
|
| 19 |
+
import matplotlib
|
| 20 |
+
matplotlib.use('Agg') # Non-interactive backend
|
| 21 |
+
import matplotlib.pyplot as plt
|
| 22 |
+
except ImportError:
|
| 23 |
+
plt = None
|
| 24 |
+
|
| 25 |
|
| 26 |
def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
|
| 27 |
+
"""
|
| 28 |
+
Load the configuration from the specified path based on the model type.
|
| 29 |
+
|
| 30 |
+
Parameters:
|
| 31 |
+
----------
|
| 32 |
+
model_type : str
|
| 33 |
+
The type of model to load (e.g., 'htdemucs', 'mdx23c', etc.).
|
| 34 |
+
config_path : str
|
| 35 |
+
The path to the YAML or OmegaConf configuration file.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
-------
|
| 39 |
+
config : Any
|
| 40 |
+
The loaded configuration, which can be in different formats (e.g., OmegaConf or ConfigDict).
|
| 41 |
+
|
| 42 |
+
Raises:
|
| 43 |
+
------
|
| 44 |
+
FileNotFoundError:
|
| 45 |
+
If the configuration file at `config_path` is not found.
|
| 46 |
+
ValueError:
|
| 47 |
+
If there is an error loading the configuration file.
|
| 48 |
+
"""
|
| 49 |
try:
|
| 50 |
with open(config_path, 'r') as f:
|
| 51 |
if model_type == 'htdemucs':
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
raise ValueError(f"Error loading configuration: {e}")
|
| 60 |
|
| 61 |
+
|
| 62 |
def get_model_from_config(model_type: str, config_path: str) -> Tuple:
|
| 63 |
"""
|
| 64 |
Load the model specified by the model type and configuration file.
|
|
|
|
| 128 |
elif model_type == 'experimental_mdx23c_stht':
|
| 129 |
from models.mdx23c_tfc_tdf_v3_with_STHT import TFC_TDF_net
|
| 130 |
model = TFC_TDF_net(config)
|
| 131 |
+
elif model_type == 'mel_band_roformer_experimental':
|
| 132 |
+
from models.bs_roformer.mel_band_roformer_experimental import MelBandRoformer
|
| 133 |
+
model = MelBandRoformer(**dict(config.model))
|
| 134 |
+
elif model_type == 'bs_roformer_experimental':
|
| 135 |
+
from models.bs_roformer.bs_roformer_experimental import BSRoformer
|
| 136 |
+
model = BSRoformer(**dict(config.model))
|
| 137 |
+
elif model_type == 'bs_roformer_custom':
|
| 138 |
+
from models.bs_roformer.bs_roformer_custom.bs_roformer import BSRoformer
|
| 139 |
+
model = BSRoformer(**dict(config.model))
|
| 140 |
+
elif model_type == 'scnet_tran':
|
| 141 |
+
from models.scnet.scnet_tran import SCNet_Tran
|
| 142 |
+
model = SCNet_Tran(**config.model)
|
| 143 |
+
elif model_type == 'scnet_masked':
|
| 144 |
+
from models.scnet.scnet_masked import SCNet
|
| 145 |
+
model = SCNet(**config.model)
|
| 146 |
+
elif model_type == 'conformer':
|
| 147 |
+
from models.conformer_model import ConformerMSS, NeuralModel
|
| 148 |
+
model = ConformerMSS(
|
| 149 |
+
core=NeuralModel(**config.model),
|
| 150 |
+
n_fft=config.stft.n_fft,
|
| 151 |
+
hop_length=config.stft.hop_length,
|
| 152 |
+
win_length=getattr(config.stft, 'win_length', config.stft.n_fft),
|
| 153 |
+
center=config.stft.center
|
| 154 |
+
)
|
| 155 |
+
elif model_type == 'mel_band_conformer':
|
| 156 |
+
from models.mel_band_conformer import MelBandConformer
|
| 157 |
+
model = MelBandConformer(**config.model)
|
| 158 |
else:
|
| 159 |
raise ValueError(f"Unknown model type: {model_type}")
|
| 160 |
|
| 161 |
return model, config
|
| 162 |
|
| 163 |
+
|
| 164 |
def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
|
| 165 |
+
"""
|
| 166 |
+
Reads an audio file, ensuring mono audio is converted to two-dimensional format,
|
| 167 |
+
and transposes the data to have channels as the first dimension.
|
| 168 |
+
Parameters
|
| 169 |
+
----------
|
| 170 |
+
path : str
|
| 171 |
+
Path to the audio file.
|
| 172 |
+
skip_err: bool
|
| 173 |
+
If true, not raise errors
|
| 174 |
+
instr:
|
| 175 |
+
name of instument
|
| 176 |
+
Returns
|
| 177 |
+
-------
|
| 178 |
+
Tuple[np.ndarray, int]
|
| 179 |
+
A tuple containing:
|
| 180 |
+
- Transposed audio data as a NumPy array with shape (channels, length).
|
| 181 |
+
For mono audio, the shape will be (1, length).
|
| 182 |
+
- Sampling rate (int), e.g., 44100.
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
try:
|
| 186 |
mix, sr = sf.read(path)
|
|
|
|
|
|
|
|
|
|
| 187 |
except Exception as e:
|
| 188 |
if skip_err:
|
| 189 |
print(f"No stem {instr}: skip!")
|
| 190 |
return None, None
|
| 191 |
+
else:
|
| 192 |
+
raise RuntimeError(f"Error reading the file at {path}: {e}")
|
| 193 |
+
else:
|
| 194 |
+
if len(mix.shape) == 1: # For mono audio
|
| 195 |
+
mix = np.expand_dims(mix, axis=-1)
|
| 196 |
+
return mix.T, sr
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def normalize_audio(audio: np.ndarray) -> tuple[np.ndarray, Dict[str, float]]:
|
| 200 |
+
"""
|
| 201 |
+
Normalize an audio signal by subtracting the mean and dividing by the standard deviation.
|
| 202 |
+
|
| 203 |
+
Parameters:
|
| 204 |
+
----------
|
| 205 |
+
audio : np.ndarray
|
| 206 |
+
Input audio array with shape (channels, time) or (time,).
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
-------
|
| 210 |
+
tuple[np.ndarray, dict[str, float]]
|
| 211 |
+
- Normalized audio array with the same shape as the input.
|
| 212 |
+
- Dictionary containing the mean and standard deviation of the original audio.
|
| 213 |
+
"""
|
| 214 |
|
|
|
|
| 215 |
mono = audio.mean(0)
|
| 216 |
mean, std = mono.mean(), mono.std()
|
| 217 |
+
return (audio - mean) / std, {"mean": mean, "std": std}
|
| 218 |
+
|
| 219 |
|
| 220 |
def denormalize_audio(audio: np.ndarray, norm_params: Dict[str, float]) -> np.ndarray:
|
| 221 |
+
"""
|
| 222 |
+
Denormalize an audio signal by reversing the normalization process (multiplying by the standard deviation
|
| 223 |
+
and adding the mean).
|
| 224 |
+
|
| 225 |
+
Parameters:
|
| 226 |
+
----------
|
| 227 |
+
audio : np.ndarray
|
| 228 |
+
Normalized audio array to be denormalized.
|
| 229 |
+
norm_params : dict[str, float]
|
| 230 |
+
Dictionary containing the 'mean' and 'std' values used for normalization.
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
-------
|
| 234 |
+
np.ndarray
|
| 235 |
+
Denormalized audio array with the same shape as the input.
|
| 236 |
+
"""
|
| 237 |
+
|
| 238 |
return audio * norm_params["std"] + norm_params["mean"]
|
| 239 |
|
| 240 |
+
|
| 241 |
def apply_tta(
|
| 242 |
+
config,
|
| 243 |
+
model: torch.nn.Module,
|
| 244 |
+
mix: torch.Tensor,
|
| 245 |
+
waveforms_orig: Dict[str, torch.Tensor],
|
| 246 |
+
device: torch.device,
|
| 247 |
+
model_type: str
|
|
|
|
| 248 |
) -> Dict[str, torch.Tensor]:
|
| 249 |
+
"""
|
| 250 |
+
Apply Test-Time Augmentation (TTA) for source separation.
|
| 251 |
+
|
| 252 |
+
This function processes the input mixture with test-time augmentations, including
|
| 253 |
+
channel inversion and polarity inversion, to enhance the separation results. The
|
| 254 |
+
results from all augmentations are averaged to produce the final output.
|
| 255 |
|
| 256 |
+
Parameters:
|
| 257 |
+
----------
|
| 258 |
+
config : Any
|
| 259 |
+
Configuration object containing model and processing parameters.
|
| 260 |
+
model : torch.nn.Module
|
| 261 |
+
The trained model used for source separation.
|
| 262 |
+
mix : torch.Tensor
|
| 263 |
+
The mixed audio tensor with shape (channels, time).
|
| 264 |
+
waveforms_orig : Dict[str, torch.Tensor]
|
| 265 |
+
Dictionary of original separated waveforms (before TTA) for each instrument.
|
| 266 |
+
device : torch.device
|
| 267 |
+
Device (CPU or CUDA) on which the model will be executed.
|
| 268 |
+
model_type : str
|
| 269 |
+
Type of the model being used (e.g., "demucs", "custom_model").
|
| 270 |
+
|
| 271 |
+
Returns:
|
| 272 |
+
-------
|
| 273 |
+
Dict[str, torch.Tensor]
|
| 274 |
+
Updated dictionary of separated waveforms after applying TTA.
|
| 275 |
+
"""
|
| 276 |
+
# Create augmentations: channel inversion and polarity inversion
|
| 277 |
+
track_proc_list = [mix[::-1].copy(), -1.0 * mix.copy()]
|
| 278 |
+
|
| 279 |
+
# Process each augmented mixture
|
| 280 |
for i, augmented_mix in enumerate(track_proc_list):
|
| 281 |
+
waveforms = demix(config, model, augmented_mix, device, model_type=model_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
for el in waveforms:
|
| 283 |
if i == 0:
|
| 284 |
+
waveforms_orig[el] += waveforms[el][::-1].copy()
|
| 285 |
else:
|
| 286 |
waveforms_orig[el] -= waveforms[el]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
+
# Average the results across augmentations
|
| 289 |
for el in waveforms_orig:
|
| 290 |
+
waveforms_orig[el] /= len(track_proc_list) + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
return waveforms_orig
|
| 293 |
|
| 294 |
+
|
| 295 |
def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
|
| 296 |
+
"""
|
| 297 |
+
Generate a windowing array with a linear fade-in at the beginning and a fade-out at the end.
|
| 298 |
+
|
| 299 |
+
This function creates a window of size `window_size` where the first `fade_size` elements
|
| 300 |
+
linearly increase from 0 to 1 (fade-in) and the last `fade_size` elements linearly decrease
|
| 301 |
+
from 1 to 0 (fade-out). The middle part of the window is filled with ones.
|
| 302 |
+
|
| 303 |
+
Parameters:
|
| 304 |
+
----------
|
| 305 |
+
window_size : int
|
| 306 |
+
The total size of the window.
|
| 307 |
+
fade_size : int
|
| 308 |
+
The size of the fade-in and fade-out regions.
|
| 309 |
+
|
| 310 |
+
Returns:
|
| 311 |
+
-------
|
| 312 |
+
torch.Tensor
|
| 313 |
+
A tensor of shape (window_size,) containing the generated windowing array.
|
| 314 |
+
|
| 315 |
+
Example:
|
| 316 |
+
-------
|
| 317 |
+
If `window_size=10` and `fade_size=3`, the output will be:
|
| 318 |
+
tensor([0.0000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.0000])
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
fadein = torch.linspace(0, 1, fade_size)
|
| 322 |
fadeout = torch.linspace(1, 0, fade_size)
|
| 323 |
+
|
| 324 |
window = torch.ones(window_size)
|
| 325 |
window[-fade_size:] = fadeout
|
| 326 |
window[:fade_size] = fadein
|
| 327 |
return window
|
| 328 |
|
| 329 |
+
|
| 330 |
def demix(
|
| 331 |
+
config: ConfigDict,
|
| 332 |
+
model: torch.nn.Module,
|
| 333 |
+
mix: torch.Tensor,
|
| 334 |
+
device: torch.device,
|
| 335 |
+
model_type: str,
|
| 336 |
+
pbar: bool = False
|
| 337 |
+
) -> Tuple[List[Dict[str, np.ndarray]], np.ndarray]:
|
| 338 |
+
"""
|
| 339 |
+
Unified function for audio source separation with support for multiple processing modes.
|
| 340 |
+
|
| 341 |
+
This function separates audio into its constituent sources using either a generic custom logic
|
| 342 |
+
or a Demucs-specific logic. It supports batch processing and overlapping window-based chunking
|
| 343 |
+
for efficient and artifact-free separation.
|
| 344 |
|
| 345 |
+
Parameters:
|
| 346 |
+
----------
|
| 347 |
+
config : ConfigDict
|
| 348 |
+
Configuration object containing audio and inference settings.
|
| 349 |
+
model : torch.nn.Module
|
| 350 |
+
The trained model used for audio source separation.
|
| 351 |
+
mix : torch.Tensor
|
| 352 |
+
Input audio tensor with shape (channels, time).
|
| 353 |
+
device : torch.device
|
| 354 |
+
The computation device (CPU or CUDA).
|
| 355 |
+
model_type : str, optional
|
| 356 |
+
Processing mode:
|
| 357 |
+
- "demucs" for logic specific to the Demucs model.
|
| 358 |
+
Default is "generic".
|
| 359 |
+
pbar : bool, optional
|
| 360 |
+
If True, displays a progress bar during chunk processing. Default is False.
|
| 361 |
+
|
| 362 |
+
Returns:
|
| 363 |
+
-------
|
| 364 |
+
Union[Dict[str, np.ndarray], np.ndarray]
|
| 365 |
+
- A dictionary mapping target instruments to separated audio sources if multiple instruments are present.
|
| 366 |
+
- A numpy array of the separated source if only one instrument is present.
|
| 367 |
+
"""
|
| 368 |
+
|
| 369 |
+
mix = torch.tensor(mix, dtype=torch.float32)
|
| 370 |
+
|
| 371 |
+
if model_type == 'htdemucs':
|
| 372 |
+
mode = 'demucs'
|
| 373 |
+
else:
|
| 374 |
+
mode = 'generic'
|
| 375 |
+
# Define processing parameters based on the mode
|
| 376 |
if mode == 'demucs':
|
| 377 |
chunk_size = config.training.samplerate * config.training.segment
|
| 378 |
num_instruments = len(config.training.instruments)
|
|
|
|
| 382 |
chunk_size = config.audio.chunk_size
|
| 383 |
num_instruments = len(prefer_target_instrument(config))
|
| 384 |
num_overlap = config.inference.num_overlap
|
| 385 |
+
|
| 386 |
fade_size = chunk_size // 10
|
| 387 |
step = chunk_size // num_overlap
|
| 388 |
border = chunk_size - step
|
| 389 |
length_init = mix.shape[-1]
|
| 390 |
+
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
| 391 |
+
# Add padding for generic mode to handle edge artifacts
|
| 392 |
if length_init > 2 * border and border > 0:
|
| 393 |
mix = nn.functional.pad(mix, (border, border), mode="reflect")
|
| 394 |
|
| 395 |
+
batch_size = config.inference.batch_size
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
+
use_amp = getattr(config.training, 'use_amp', True)
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 400 |
+
with torch.inference_mode():
|
| 401 |
+
# Initialize result and counter tensors
|
| 402 |
req_shape = (num_instruments,) + mix.shape
|
| 403 |
+
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 404 |
+
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 405 |
|
| 406 |
i = 0
|
| 407 |
batch_data = []
|
| 408 |
batch_locations = []
|
| 409 |
+
progress_bar = tqdm(
|
| 410 |
+
total=mix.shape[1], desc="Processing audio chunks", leave=False
|
| 411 |
+
) if pbar else None
|
| 412 |
|
| 413 |
while i < mix.shape[1]:
|
| 414 |
+
# Extract chunk and apply padding if necessary
|
| 415 |
+
part = mix[:, i:i + chunk_size].to(device)
|
| 416 |
chunk_len = part.shape[-1]
|
| 417 |
+
if mode == "generic" and chunk_len > chunk_size // 2:
|
| 418 |
+
pad_mode = "reflect"
|
| 419 |
+
else:
|
| 420 |
+
pad_mode = "constant"
|
| 421 |
part = nn.functional.pad(part, (0, chunk_size - chunk_len), mode=pad_mode, value=0)
|
| 422 |
|
| 423 |
batch_data.append(part)
|
| 424 |
batch_locations.append((i, chunk_len))
|
| 425 |
i += step
|
| 426 |
|
| 427 |
+
# Process batch if it's full or the end is reached
|
| 428 |
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 429 |
+
arr = torch.stack(batch_data, dim=0)
|
| 430 |
+
x = model(arr)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
if mode == "generic":
|
| 433 |
+
window = windowing_array.clone() # using clone() fixes the clicks at chunk edges when using batch_size=1
|
| 434 |
+
if i - step == 0: # First audio chunk, no fadein
|
| 435 |
window[:fade_size] = 1
|
| 436 |
+
elif i >= mix.shape[1]: # Last audio chunk, no fadeout
|
| 437 |
window[-fade_size:] = 1
|
| 438 |
|
| 439 |
for j, (start, seg_len) in enumerate(batch_locations):
|
| 440 |
if mode == "generic":
|
| 441 |
+
result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 442 |
counter[..., start:start + seg_len] += window[..., :seg_len]
|
| 443 |
else:
|
| 444 |
+
result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu()
|
| 445 |
counter[..., start:start + seg_len] += 1.0
|
| 446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
batch_data.clear()
|
| 448 |
batch_locations.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
|
| 450 |
+
if progress_bar:
|
| 451 |
+
progress_bar.update(step)
|
| 452 |
+
|
| 453 |
+
if progress_bar:
|
| 454 |
+
progress_bar.close()
|
| 455 |
|
| 456 |
+
# Compute final estimated sources
|
| 457 |
+
estimated_sources = result / counter
|
| 458 |
+
estimated_sources = estimated_sources.cpu().numpy()
|
| 459 |
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 460 |
|
| 461 |
+
# Remove padding for generic mode
|
| 462 |
+
if mode == "generic":
|
| 463 |
+
if length_init > 2 * border and border > 0:
|
| 464 |
+
estimated_sources = estimated_sources[..., border:-border]
|
| 465 |
+
|
| 466 |
+
# Return the result as a dictionary or a single array
|
| 467 |
+
if mode == "demucs":
|
| 468 |
+
instruments = config.training.instruments
|
| 469 |
+
else:
|
| 470 |
+
instruments = prefer_target_instrument(config)
|
| 471 |
|
|
|
|
| 472 |
ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
|
|
|
|
| 473 |
|
| 474 |
+
if mode == "demucs" and num_instruments <= 1:
|
| 475 |
+
return estimated_sources
|
| 476 |
+
else:
|
| 477 |
+
return ret_data
|
| 478 |
|
|
|
|
| 479 |
|
| 480 |
def prefer_target_instrument(config: ConfigDict) -> List[str]:
|
| 481 |
+
"""
|
| 482 |
+
Return the list of target instruments based on the configuration.
|
| 483 |
+
If a specific target instrument is specified in the configuration,
|
| 484 |
+
it returns a list with that instrument. Otherwise, it returns the list of instruments.
|
| 485 |
+
|
| 486 |
+
Parameters:
|
| 487 |
+
----------
|
| 488 |
+
config : ConfigDict
|
| 489 |
+
Configuration object containing the list of instruments or the target instrument.
|
| 490 |
+
|
| 491 |
+
Returns:
|
| 492 |
+
-------
|
| 493 |
+
List[str]
|
| 494 |
+
A list of target instruments.
|
| 495 |
+
"""
|
| 496 |
+
if getattr(config.training, 'target_instrument', None):
|
| 497 |
+
return [config.training.target_instrument]
|
| 498 |
+
else:
|
| 499 |
+
return config.training.instruments
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def load_not_compatible_weights(model: torch.nn.Module, weights: str, verbose: bool = False) -> None:
|
| 503 |
+
"""
|
| 504 |
+
Load weights into a model, handling mismatched shapes and dimensions.
|
| 505 |
+
|
| 506 |
+
Args:
|
| 507 |
+
model: PyTorch model into which the weights will be loaded.
|
| 508 |
+
weights: Path to the weights file.
|
| 509 |
+
verbose: If True, prints detailed information about matching and mismatched layers.
|
| 510 |
+
"""
|
| 511 |
|
|
|
|
| 512 |
new_model = model.state_dict()
|
| 513 |
+
old_model = torch.load(weights, weights_only=False)
|
| 514 |
if 'state' in old_model:
|
| 515 |
+
# Fix for htdemucs weights loading
|
| 516 |
old_model = old_model['state']
|
| 517 |
if 'state_dict' in old_model:
|
| 518 |
+
# Fix for apollo weights loading
|
| 519 |
old_model = old_model['state_dict']
|
| 520 |
+
|
| 521 |
for el in new_model:
|
| 522 |
+
if el in old_model:
|
| 523 |
+
if verbose:
|
| 524 |
+
print(f'Match found for {el}!')
|
| 525 |
+
if new_model[el].shape == old_model[el].shape:
|
| 526 |
+
if verbose:
|
| 527 |
+
print('Action: Just copy weights!')
|
| 528 |
+
new_model[el] = old_model[el]
|
| 529 |
+
else:
|
| 530 |
+
if len(new_model[el].shape) != len(old_model[el].shape):
|
| 531 |
+
if verbose:
|
| 532 |
+
print('Action: Different dimension! Too lazy to write the code... Skip it')
|
| 533 |
+
else:
|
| 534 |
+
if verbose:
|
| 535 |
+
print(f'Shape is different: {tuple(new_model[el].shape)} != {tuple(old_model[el].shape)}')
|
| 536 |
+
ln = len(new_model[el].shape)
|
| 537 |
+
max_shape = []
|
| 538 |
+
slices_old = []
|
| 539 |
+
slices_new = []
|
| 540 |
+
for i in range(ln):
|
| 541 |
+
max_shape.append(max(new_model[el].shape[i], old_model[el].shape[i]))
|
| 542 |
+
slices_old.append(slice(0, old_model[el].shape[i]))
|
| 543 |
+
slices_new.append(slice(0, new_model[el].shape[i]))
|
| 544 |
+
# print(max_shape)
|
| 545 |
+
# print(slices_old, slices_new)
|
| 546 |
+
slices_old = tuple(slices_old)
|
| 547 |
+
slices_new = tuple(slices_new)
|
| 548 |
+
max_matrix = np.zeros(max_shape, dtype=np.float32)
|
| 549 |
+
for i in range(ln):
|
| 550 |
+
max_matrix[slices_old] = old_model[el].cpu().numpy()
|
| 551 |
+
max_matrix = torch.from_numpy(max_matrix)
|
| 552 |
+
new_model[el] = max_matrix[slices_new]
|
| 553 |
+
else:
|
| 554 |
+
if verbose:
|
| 555 |
+
print(f'Match not found for {el}!')
|
| 556 |
+
model.load_state_dict(
|
| 557 |
+
new_model
|
| 558 |
+
)
|
| 559 |
|
| 560 |
+
|
| 561 |
+
def load_lora_weights(model: torch.nn.Module, lora_path: str, device: str = 'cpu') -> None:
|
| 562 |
+
"""
|
| 563 |
+
Load LoRA weights into a model.
|
| 564 |
+
This function updates the given model with LoRA-specific weights from the specified checkpoint file.
|
| 565 |
+
It does not require the checkpoint to match the model's full state dictionary, as only LoRA layers are updated.
|
| 566 |
+
|
| 567 |
+
Parameters:
|
| 568 |
+
----------
|
| 569 |
+
model : Module
|
| 570 |
+
The PyTorch model into which the LoRA weights will be loaded.
|
| 571 |
+
lora_path : str
|
| 572 |
+
Path to the LoRA checkpoint file.
|
| 573 |
+
device : str, optional
|
| 574 |
+
The device to load the weights onto, by default 'cpu'. Common values are 'cpu' or 'cuda'.
|
| 575 |
+
|
| 576 |
+
Returns:
|
| 577 |
+
-------
|
| 578 |
+
None
|
| 579 |
+
The model is updated in place.
|
| 580 |
+
"""
|
| 581 |
+
lora_state_dict = torch.load(lora_path, map_location=device, weights_only=False)
|
| 582 |
model.load_state_dict(lora_state_dict, strict=False)
|
| 583 |
|
| 584 |
+
|
| 585 |
+
def load_start_checkpoint(args: argparse.Namespace, model: torch.nn.Module, type_='train') -> None:
|
| 586 |
+
"""
|
| 587 |
+
Load the starting checkpoint for a model.
|
| 588 |
+
|
| 589 |
+
Args:
|
| 590 |
+
args: Parsed command-line arguments containing the checkpoint path.
|
| 591 |
+
model: PyTorch model to load the checkpoint into.
|
| 592 |
+
type_: how to load weights - for train we can load not fully compatible weights
|
| 593 |
+
"""
|
| 594 |
+
|
| 595 |
print(f'Start from checkpoint: {args.start_check_point}')
|
| 596 |
+
if type_ in ['train']:
|
| 597 |
+
if 1:
|
| 598 |
+
load_not_compatible_weights(model, args.start_check_point, verbose=False)
|
| 599 |
+
else:
|
| 600 |
+
model.load_state_dict(torch.load(args.start_check_point, weights_only=False))
|
| 601 |
+
else:
|
| 602 |
+
device='cpu'
|
| 603 |
+
if args.model_type in ['htdemucs', 'apollo']:
|
| 604 |
+
state_dict = torch.load(args.start_check_point, map_location=device, weights_only=False)
|
| 605 |
+
# Fix for htdemucs pretrained models
|
| 606 |
+
if 'state' in state_dict:
|
| 607 |
+
state_dict = state_dict['state']
|
| 608 |
+
# Fix for apollo pretrained models
|
| 609 |
+
if 'state_dict' in state_dict:
|
| 610 |
+
state_dict = state_dict['state_dict']
|
| 611 |
+
else:
|
| 612 |
+
state_dict = torch.load(args.start_check_point, map_location=device, weights_only=True)
|
| 613 |
+
model.load_state_dict(state_dict)
|
| 614 |
+
|
| 615 |
|
| 616 |
def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
|
| 617 |
+
"""
|
| 618 |
+
Replaces specific layers in the model with LoRA-extended versions.
|
| 619 |
+
|
| 620 |
+
Parameters:
|
| 621 |
+
----------
|
| 622 |
+
config : Dict[str, Any]
|
| 623 |
+
Configuration containing parameters for LoRA. It should include a 'lora' key with parameters for `MergedLinear`.
|
| 624 |
+
model : nn.Module
|
| 625 |
+
The original model in which the layers will be replaced.
|
| 626 |
+
|
| 627 |
+
Returns:
|
| 628 |
+
-------
|
| 629 |
+
nn.Module
|
| 630 |
+
The modified model with the replaced layers.
|
| 631 |
+
"""
|
| 632 |
+
|
| 633 |
if 'lora' not in config:
|
| 634 |
raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
|
| 635 |
+
|
| 636 |
+
replaced_layers = 0 # Counter for replaced layers
|
| 637 |
+
|
| 638 |
for name, module in model.named_modules():
|
| 639 |
hierarchy = name.split('.')
|
| 640 |
layer_name = hierarchy[-1]
|
| 641 |
+
|
| 642 |
+
# Check if this is the target layer to replace (and layer_name == 'to_qkv')
|
| 643 |
if isinstance(module, nn.Linear):
|
| 644 |
try:
|
| 645 |
+
# Get the parent module
|
| 646 |
parent_module = model
|
| 647 |
for submodule_name in hierarchy[:-1]:
|
| 648 |
parent_module = getattr(parent_module, submodule_name)
|
| 649 |
+
|
| 650 |
+
# Replace the module with LoRA-enabled layer
|
| 651 |
setattr(
|
| 652 |
parent_module,
|
| 653 |
layer_name,
|
|
|
|
| 658 |
**config['lora']
|
| 659 |
)
|
| 660 |
)
|
| 661 |
+
replaced_layers += 1 # Increment the counter
|
| 662 |
+
|
| 663 |
except Exception as e:
|
| 664 |
print(f"Error replacing layer {name}: {e}")
|
| 665 |
+
|
| 666 |
+
if replaced_layers == 0:
|
| 667 |
+
print("Warning: No layers were replaced. Check the model structure and configuration.")
|
| 668 |
+
else:
|
| 669 |
+
print(f"Number of layers replaced with LoRA: {replaced_layers}")
|
| 670 |
+
|
| 671 |
return model
|
| 672 |
|
| 673 |
+
|
| 674 |
def draw_spectrogram(waveform, sample_rate, length, output_file):
|
| 675 |
+
if plt is None:
|
| 676 |
+
print("Warning: matplotlib not available, skipping spectrogram")
|
| 677 |
+
return
|
| 678 |
+
|
| 679 |
import librosa.display
|
| 680 |
+
|
| 681 |
+
# Cut only required part of spectorgram
|
| 682 |
x = waveform[:int(length * sample_rate), :]
|
| 683 |
+
X = librosa.stft(x.mean(axis=-1)) # perform short-term fourier transform on mono signal
|
| 684 |
+
Xdb = librosa.amplitude_to_db(np.abs(X), ref=np.max) # convert an amplitude spectrogram to dB-scaled spectrogram.
|
| 685 |
fig, ax = plt.subplots()
|
| 686 |
+
# plt.figure(figsize=(30, 10)) # initialize the fig size
|
| 687 |
img = librosa.display.specshow(
|
| 688 |
+
Xdb,
|
| 689 |
+
cmap='plasma',
|
| 690 |
+
sr=sample_rate,
|
| 691 |
+
x_axis='time',
|
| 692 |
+
y_axis='linear',
|
| 693 |
+
ax=ax
|
| 694 |
)
|
| 695 |
ax.set(title='File: ' + os.path.basename(output_file))
|
| 696 |
fig.colorbar(img, ax=ax, format="%+2.f dB")
|
| 697 |
+
if output_file is not None:
|
| 698 |
plt.savefig(output_file)
|
|
|