ASesYusuf1 commited on
Commit
ddadeb4
·
1 Parent(s): 24d9ef5

SESA: GitHub'dan güncel dosyalar aktarıldı - 2026-03-18 23:04

Browse files
assets/config.json CHANGED
@@ -2,5 +2,10 @@
2
  "lang": {
3
  "override": true,
4
  "selected_lang": "en_us"
 
 
 
 
 
5
  }
6
  }
 
2
  "lang": {
3
  "override": true,
4
  "selected_lang": "en_us"
5
+ },
6
+ "sharing": {
7
+ "method": "gradio",
8
+ "ngrok_token": "",
9
+ "port": 8142
10
  }
11
  }
assets/custom_models.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
assets/i18n/languages/ar_sa.json CHANGED
@@ -17,11 +17,10 @@
17
  "overlap": "التداخل",
18
  "overlap_info": "موصى به: 2-10 (القيم الأعلى تزيد من الجودة ولكن تتطلب المزيد من VRAM)",
19
  "tta_boost": "تعزيز TTA",
20
- "tta_info": "يحسن الجودة ولكنه يبطئ سرعة المعالجة",
21
  "phase_fix": "تصحيح الطور",
 
22
  "phase_fix_info": "تصحيح طور متقدم للمسارات الآلية",
23
  "instrumental": "آلي",
24
- "instrumental_info": "عادةً يعطي مخرجين، لكن هناك أوقات يعطي فيها مخرجًا واحدًا، تحتاج إلى التجربة :)",
25
  "process": "🚀 معالجة",
26
  "reset": "🧹 إعادة تعيين",
27
  "status": "الحالة",
@@ -71,99 +70,210 @@
71
  "output_preview": "معاينة الإخراج",
72
  "refresh_output": "🔄 تحديث الإخراج",
73
  "ensemble_copy_status": "سيتم نسخ مخرجات التجميع هنا...",
74
- "guidelines": "<h4>إرشادات اختيار النموذج</h4><ul><li><strong>تجنب الخلط بين الفئات:</strong> قد يؤدي الجمع بين نماذج الأصوات والآلات إلى خلط غير مرغوب فيه</li><li><strong>ملاحظات نموذج خاصة:</strong><ul><li>نماذج الازدواجية (v1/v2) - تخرج كلا الجذعين</li><li>MDX23C Separator - نتائج هجينة</li></ul></li><li><strong>أفضل الممارسات:</strong> استخدم 3-5 نماذج متشابهة من نفس الفئة</li></ul><div>💡 نصيحة احترافية: ابدأ بتركيبة \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
75
- "download_sources_tab": "⬇️ تحميل المصادر",
76
- "direct_links": "🌐 روابط مباشرة",
77
  "audio_file_url": "رابط ملف الصوت",
78
- "download_from_url": "⬇️ تحميل من الرابط",
79
- "download_status": "حالة التحميل",
80
  "downloaded_file": "الملف المحمل",
81
  "cookie_management": "🍪 إدارة الكوكيز",
82
- "upload_cookies_txt": "رفع ملف Cookies.txt",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 لماذا هو ضروري؟**<br>- الوصول إلى المحتوى المقيد بالعمر<br>- تحميل الفيديوهات الخاصة/غير المدرجة<br>- تجاوز القيود الإقليمية<br>- تجنب حدود تحميل يوتيوب<br><br>**⚠️ ملاحظات هامة**<br>- لا تشارك ملفات الكوكيز الخاصة بك أبدًا!<br>- قم بتحديث الكوكيز عندما:<br> • تظهر أخطاء \"403 ممنوع\"<br> • تتوقف التنزيلات فجأة<br> • تظهر رسائل \"انتهت الجلسة\"<br><br>**🔄 خطوات التجديد**<br>1. قم بتثبيت هذا <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">امتداد كروم</a><br>2. سجل الدخول إلى يوتيوب في كروم<br>3. انقر على أيقونة الامتداد → \"تصدير\"<br>4. ارفع الملف المحمل هنا<br><br>**⏳ عمر الكوكيز**<br>- الجلسات العادية: 24 ساعة<br>- العمليات الحساسة: ساعة واحدة<br>- تغييرات كلمة المرور: إبطال فوري</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ التجميع اليدوي",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "input_sources": "📂 مصادر الإدخال",
86
  "refresh": "🔄 تحديث",
87
  "ensemble_algorithm": "خوارزمية التجميع",
88
- "select_audio_files": "اختر ملفات الصوت",
89
  "available_files": "الملفات المتاحة",
90
- "custom_weights": "أوزان مخصصة (مفصولة بفواصل)",
91
- "custom_weights_placeholder": "مثال: 0.8، 1.2، 1.0، ...",
92
- "custom_weights_info": "اتركه فارغًا لأوزان متساوية",
93
  "result_preview_tab": "🎧 معاينة النتيجة",
94
- "ensembled_output": "مخرج التجميع",
95
  "processing_log_tab": "📋 سجل المعالجة",
96
  "processing_details": "تفاصيل المعالجة",
97
  "process_ensemble": "⚡ معالجة التجميع",
98
  "languages_tab": "🌐 اللغات",
99
  "Vocal Models": "نماذج الأصوات",
100
- "Instrumental Models": "نماذج الآلات",
101
- "4-Stem Models": "نماذج 4 جذور",
102
  "Denoise Models": "نماذج إزالة الضوضاء",
103
  "Dereverb Models": "نماذج إزالة الصدى",
104
- "Other Models": "نماذج أخرى",
105
- "ensemble_files_description": "يجمع عدة ملفات صوتية في مخرج واحد باستخدام طرق التجميع المحددة.",
 
106
  "ensemble_files_help": "مسار جميع ملفات الصوت للتجميع",
107
- "ensemble_type_help": "واحد من avg_wave، median_wave، min_wave، max_wave، avg_fft، median_fft، min_fft، max_fft",
108
- "ensemble_weights_help": "الأوزان لإنشاء التجميع. يجب أن يتساوى عدد الأوزان مع عدد الملفات",
109
- "ensemble_output_help": "مسار ملف WAV حيث سيتم حفظ نتيجة التجميع",
110
  "ensemble_type_print": "نوع التجميع: {}",
111
  "num_input_files_print": "عدد ملفات الإدخال: {}",
112
  "weights_print": "الأوزان: {}",
113
  "output_file_print": "ملف الإخراج: {}",
114
  "duration_mismatch_error": "يجب أن تكون جميع الملفات بنفس المدة",
115
- "file_not_found_error": "خطأ. لا يمكن العثور على الملف: {}. تحقق من المسارات.",
116
  "reading_chunk_print": "قراءة قطعة من الملف: {} (البداية: {}ث، المدة: {}ث)",
117
  "chunk_result_shape_print": "شكل نتيجة القطعة: {}",
118
  "ensemble_completed_print": "اكتمل التجميع. تم حفظ الإخراج في: {}",
119
- "file_deletion_error": "لا يمكن حذف {}: {}",
120
- "directory_not_exist_warning": "⚠️ الدليل غير موجود: {}",
121
- "not_a_directory_warning": "⚠️ المسار ليس دليلًا: {}",
122
  "item_deletion_error": "⚠️ خطأ في حذف {}: {}",
 
123
  "old_output_not_exist": "❌ مجلد الإخراج القديم غير موجود",
124
- "old_outputs_cleared": "✅ تم مسح المخرجات القديمة بنجاح!",
125
  "error": "🔥 خطأ: {}",
126
  "ffmpeg_error": "خطأ FFmpeg ({}): {}",
127
  "file_saved_successfully": "تم حفظ الملف بنجاح: {}",
128
- "total_files_found": "إجمالي الملفات الموجودة: {}. باستخدام معدل العينة: {}",
129
- "total_progress": "التقدم الإجمالي",
130
- "detailed_pbar_enabled": "تم تفعيل شريط التقدم التفصيلي: {}",
131
- "loaded_audio": "الصوت المحمل: {}، الشكل: {}",
132
- "cannot_read_track": "لا يمكن قراءة المسار: {}",
133
  "error_message": "رسالة الخطأ: {}",
134
  "demudding_track": "تنظيف المسار (إعادة مزج الطور - آلي): {}",
135
  "elapsed_time": "الوقت المنقضي: {:.2f} ثانية.",
136
- "proc_folder_description": "يعالج ملفات الصوت في مجلد باستخدام نموذج محدد.",
137
- "model_type_help": "نوع النموذج (bandit، bs_roformer، mdx23c، إلخ.)",
138
  "config_path_help": "مسار ملف التكوين",
139
- "demud_phaseremix_help": "تفعيل إعادة مزج الطور للآلي",
140
- "start_checkpoint_help": "نقطة التفتيش الأولية للأوزان الصالحة",
141
- "input_folder_help": "مجلد يحتوي على الخلطات للمعالجة",
142
  "audio_path_help": "مسار ملف صوتي واحد للمعالجة",
143
- "store_dir_help": "مسار لتخزين النتائج",
144
  "device_ids_help": "قائمة معرفات GPU",
145
- "extract_instrumental_help": "قلب الأصوات للحصول على الآلي إذا تم توفيره",
146
- "disable_detailed_pbar_help": "تعطيل شريط التقدم التفصيلي في الفصل",
147
- "force_cpu_help": "فرض استخدام CPU حتى لو كان CUDA متاحًا",
148
  "flac_file_help": "إخراج ملف FLAC بدلاً من WAV",
149
- "export_format_help": "صيغة التصدير ونوع PCM",
150
  "pcm_type_help": "نوع PCM لملفات FLAC",
151
- "use_tta_help": "تفعيل التكبير في وقت الاختبار",
152
  "lora_checkpoint_help": "نقطة التفتيش الأولية لأوزان LoRA",
153
  "cuda_available": "CUDA متاح، استخدم --force_cpu لتعطيله.",
154
- "using_device": "استخدام الجهاز: {}",
155
- "instruments_print": "الآلات: {}",
156
  "model_load_time": "وقت تحميل النموذج: {:.2f} ثانية",
157
  "invalid_url": "❌ رابط غير صالح",
158
  "cookie_file_updated": "✅ تم تحديث ملف الكوكيز!",
159
  "cookie_installation_error": "⚠️ خطأ في تثبيت الكوكيز: {}",
 
160
  "file_size_zero_error": "حجم الملف صفر أو لم يتم إنشاء الملف",
161
- "google_drive_error": "❌ خطأ في تحميل Google Drive: {}",
162
  "wav_conversion_failed": "فشل تحويل WAV",
163
- "download_error": "❌ خطأ في التحميل: {}",
164
- "download_success": "🎉 تم التحميل بنجاح!",
165
- "download_failed": "❌ فشل التحميل",
166
- "no_output_files_found": " لم يتم العثور على ملفات إخراج.",
 
 
167
  "output_refreshed_successfully": "✅ تم تحديث الإخراج بنجاح!",
168
  "error_refreshing_output": "❌ خطأ في تحديث الإخراج: {}",
169
  "starting_audio_separation": "بدء فصل الصوت...",
@@ -172,109 +282,64 @@
172
  "separation_complete": "اكتمل الفصل!",
173
  "progress_parsing_error": "خطأ في تحليل التقدم: {}",
174
  "error_occurred": "حدث خطأ: {}",
175
- "separation_process_completed": "اكتمل عملية الفصل!",
176
- "no_audio_file_error": "❌ لم يتم توفير ملف صوتي ولا يوجد ملف موجود في دليل الإدخال.",
177
- "no_input_progress_label": "خطأ: لم يتم توفير إدخال -- 0.0%",
178
  "processing_audio_print": "معالجة الصوت من: {} باستخدام النموذج: {}",
179
  "starting_audio_separation_progress_label": "بدء فصل الصوت... -- 0.0%",
180
  "separating_audio_progress_label": "فصل الصوت... -- {}.0%",
181
  "audio_processing_completed_progress_label": "اكتملت معالجة الصوت! -- 100.0%",
182
  "audio_processing_completed": "✅ اكتملت معالجة الصوت!",
183
  "error_occurred_progress_label": "حدث خطأ -- 0.0%",
184
- "minimum_files_required": "⚠️ الحد الأدنى 2 ملف مطلوب",
185
  "valid_files_not_found": "❌ لم يتم العثور على ملفات صالحة",
186
  "starting_ensemble_process": "بدء عملية التجميع...",
187
- "ensembling_progress": "التجميع... ({:.1f}%)",
188
  "finalizing_ensemble_output": "إنهاء إخراج التجميع...",
189
  "success_log": "✅ نجاح!\n{}",
190
  "error_log": "❌ خطأ!\n{}",
191
- "critical_error": "⛔ خطأ حرج: {}",
192
  "ensemble_process_completed": "اكتملت عملية التجميع!",
193
- "no_models_selected": "❌ لم يتم اختيار نماذج",
194
- "no_input_audio_provided": "❌ لم يتم توفير صوت إدخال",
195
- "loading_model": "تحميل النموذج {}/{}: {}...",
196
- "loading_model_progress_label": "تحميل النموذج {}/{}: {}... -- {}.0%",
197
  "completed_model": "اكتمل النموذج {}/{}: {}",
198
  "completed_model_progress_label": "اكتمل النموذج {}/{}: {} -- {}.0%",
199
  "running_command": "تشغيل الأمر: {}",
200
  "model_failed": "فشل النموذج {}: {}",
201
- "critical_error_with_model": "خطأ حرج مع {}: {}",
202
- "model_output_failed": "فشل {} في إنتاج مخرج",
203
  "waiting_for_files": "في انتظار جاهزية جميع الملفات...",
204
  "waiting_for_files_progress_label": "في انتظار جاهزية جميع الملفات... -- 90.0%",
205
- "performing_ensemble": "تنفيذ التجميع...",
206
- "performing_ensemble_progress_label": "تنفيذ التجميع... -- 92.0%",
207
  "memory_usage_before_ensemble": "استخدام الذاكرة قبل التجميع: {}%",
208
  "memory_usage_after_ensemble": "استخدام الذاكرة بعد التجميع: {}%",
209
  "finalizing_ensemble_output_progress_label": "إنهاء إخراج التجميع... -- 98.0%",
210
- "ensemble_file_creation_failed": "لم يتم إنشاء ملف التجميع: {}",
211
  "ensemble_completed_progress_label": "اكتمل التجميع بنجاح! -- 100.0%",
212
  "success_output_created": "✅ نجاح! تم إنشاء ملف الإخراج.",
213
- "drive_mounted_copying_ensemble": "تم ربط Google Drive. يتم نسخ مخرج التجميع...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive مربوط بالفعل. يتم نسخ مخرج التجميع...",
215
- "no_ensemble_output_files_found": "❌ لم يتم العثور على ملفات إخراج التجميع.",
216
- "ensemble_output_copied": "✅ تم نسخ مخرج التجميع إلى {}",
217
- "error_copying_ensemble_output": "❌ خطأ في نسخ مخرج التجميع: {}",
218
- "drive_mounted_copying_files": "تم ربط Google Drive. يتم نسخ الملفات...",
219
- "drive_already_mounted_copying_files": "Google Drive مربوط بالفعل. يتم نسخ الملفات...",
220
- "files_copied_to_drive": "✅ تم نسخ الملفات إلى {}",
221
- "error_copying_files": "❌ خطأ في نسخ الملفات: {}",
222
- "mounting_drive": "ربط Google Drive...",
223
- "settings_saved": "تم حفظ الإعدادات في config.json: اللغة={lang}، الطريقة={method}، المنفذ={port}",
224
- "selected_language": "اللغة المختارة: {lang}",
225
- "available_languages": "اللغات المتاحة: {langs}",
226
- "select_language_prompt": "اختر لغة (الافتراضي: {default}): ",
227
- "invalid_language": "اختيار لغة غير صالح! استخدام اللغة الافتراضية ({default}).",
228
- "available_sharing_methods": "طرق المشاركة المتاحة: gradio، localtunnel، ngrok",
229
- "select_sharing_method_prompt": "اختر طريقة مشاركة (الافتراضي: {default}): ",
230
- "enter_ngrok_token_prompt": "أدخل رمز Ngrok الخاص بك (الافتراضي: {default}): ",
231
- "ngrok_token_required": "خطأ: رمز Ngrok مطلوب لطريقة ngrok!",
232
- "enter_port_prompt": "أدخل رقم المنفذ (الافتراضي: {default}): ",
233
- "opening_previous_url": "فتح الرابط السابق: {url}",
234
- "open_url_manually": "افتح الرابط يدويًا في Colab: {url}",
235
- "invalid_previous_url": "رابط سابق غير صالح: {url}، لم يتم فتحه.",
236
- "starting_method": "بدء {method} على المنفذ {port}...",
237
- "process_stopped": "\n🛑 تم إيقاف العملية من قبل المستخدم",
238
- "selected_port": "المنفذ المختار: {port}",
239
- "starting_gradio_with_sharing": "بدء Gradio مع المشاركة المدمجة...",
240
- "starting_localtunnel": "بدء Localtunnel على المنفذ {port}...",
241
- "share_link": "رابط المشاركة: {url}",
242
- "password_ip": "كلمة مرور IP: {ip}",
243
- "starting_ngrok": "بدء Ngrok على المنفذ {port}...",
244
- "ngrok_url": "رابط Ngrok: {url}",
245
- "ngrok_error": "خطأ في بدء Ngrok: {error}",
246
- "apollo_chunk_size": "حجم قطعة Apollo",
247
- "apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
248
- "apollo_overlap": "تداخل Apollo",
249
- "enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
250
- "apollo_processing_completed": "اكتملت معالجة Apollo!",
251
- "apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
252
- "invalid_method": "خطأ: طريقة غير صالحة! استخدم 'gradio'، 'localtunnel'، أو 'ngrok'.",
253
- "apollo_enhancement_settings": "إعدادات تحسين Apollo",
254
- "enhance_with_apollo": "تحسين مع Apollo",
255
- "enhance_with_apollo_info": "تفعيل Apollo لتحسين الصوت بعد الفصل",
256
- "apollo_method": "طريقة Apollo",
257
- "apollo_method_info": "اختر طريقة المعالجة لـ Apollo",
258
- "normal_method": "الطريقة العادية",
259
- "mid_side_method": "طريقة الوسط/الجانب",
260
- "apollo_normal_model": "نموذج Apollo العادي",
261
- "apollo_normal_model_info": "النموذج المستخدم لمعالجة Apollo العادية",
262
- "apollo_midside_model": "نموذج Apollo الوسط/الجانب",
263
- "apollo_enhancement_info": "تحسين جودة الصوت",
264
- "selected_models": "النماذج المختارة",
265
- "save_preset": "حفظ الإعداد المسبق",
266
- "delete_preset": "حذف الإعداد المسبق",
267
- "refresh_presets": "تحديث الإعدادات المسبقة",
268
- "preset_name": "اسم الإعداد المسبق",
269
- "select_preset": "اختيار إعداد مسبق",
270
- "add_favorite": "إضافة إلى المفضلة",
271
- "apollo_midside_model_info": "النموذج المستخدم لمعالجة الوسط/الجانب (اختياري)",
272
- "language_changed_message": "تم تغيير اللغة. يرجى إعادة تشغيل الخلية.",
273
- "apply_matchering": "تطبيق Matchering",
274
- "matchering_info": "تطبيق Matchering لتحسين جودة الصوت وتوازنه",
275
- "matchering_passes": "تمريرات Matchering",
276
- "matchering_passes_info": "عدد التكرارات لـ Matchering (1-5، القيم الأعلى تحسن الجودة ولكن تستغرق وقتًا أطول)",
277
- "matchering_processing": "جارٍ تطبيق Matchering... ({:.1f}%)",
278
- "matchering_completed": "✅ اكتمل معالجة Matchering!",
279
- "matchering_error": "❌ حدث خطأ أثناء Matchering: {}"
280
  }
 
17
  "overlap": "التداخل",
18
  "overlap_info": "موصى به: 2-10 (القيم الأعلى تزيد من الجودة ولكن تتطلب المزيد من VRAM)",
19
  "tta_boost": "تعزيز TTA",
 
20
  "phase_fix": "تصحيح الطور",
21
+ "use_phase_correction": "تطبيق تصحيح طور متقدم",
22
  "phase_fix_info": "تصحيح طور متقدم للمسارات الآلية",
23
  "instrumental": "آلي",
 
24
  "process": "🚀 معالجة",
25
  "reset": "🧹 إعادة تعيين",
26
  "status": "الحالة",
 
70
  "output_preview": "معاينة الإخراج",
71
  "refresh_output": "🔄 تحديث الإخراج",
72
  "ensemble_copy_status": "سيتم نسخ مخرجات التجميع هنا...",
73
+ "guidelines": "<h4>إرشادات اختيار النموذج</h4><ul><li><strong>تجنب الخلط بين الفئات:</strong> قد يؤدي الجمع بين نماذج الأصوات والآلات إلى خلط غير مرغوب فيه</li><li><strong>ملاحظات نموذج خاصة:</strong><ul><li>نماذج الازدواجية (v1/v2) - تنتج كلا الستيمات</li><li>فاصل MDX23C - نتائج هجينة</li></ul></li><li><strong>أفضل ممارسة:</strong> استخدم 3-5 نماذج مشابهة من نفس الفئة</li></ul><div>💡 نصيحة محترف: ابدأ بمزيج \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
74
+ "download_sources_tab": "⬇️ تنزيل المصادر",
75
+ "direct_links": "🌐 الروابط المباشرة",
76
  "audio_file_url": "رابط ملف الصوت",
77
+ "download_from_url": "⬇️ تنزيل من الرابط",
78
+ "download_status": "حالة التنزيل",
79
  "downloaded_file": "الملف المحمل",
80
  "cookie_management": "🍪 إدارة الكوكيز",
81
+ "upload_cookies_txt": "رفع Cookies.txt",
82
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 لماذا مطلوب؟**<br>- الوصول إلى المحتوى المقيد بالعمر<br>- تنزيل الفيديوهات الخاصة/غير المدرجة<br>- تجاوز القيود الإقليمية<br>- تجنب حدود تنزيل يوتيوب<br><br>**⚠️ ملاحظات مهمة**<br>- لا تشارك ملفات الكوكيز الخاصة بك أبدًا!<br>- قم بتحديث الكوكيز عند:<br> • الحصول على أخطاء \"403 ممنوع\"<br> • توقف التنزيلات فجأة<br> • رؤية رسائل \"انتهت صلاحي�� الجلسة\"<br><br>**🔄 خطوات التحديث**<br>1. قم بتثبيت <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">إضافة كروم هذه</a><br>2. تسجيل الدخول إلى يوتيوب في كروم<br>3. انقر على أيقونة الإضافة → \"تصدير\"<br>4. قم برفع الملف المحمل هنا<br><br>**⏳ عمر الكوكيز**<br>- الجلسات العادية: 24 ساعة<br>- العمليات الحساسة: 1 ساعة<br>- تغيير كلمة المرور: تحديث فوري</div>",
83
+ "ensemble_output_copied": "✅ تم نسخ إخراج التجميع إلى {}",
84
+ "error_copying_ensemble_output": "❌ خطأ في نسخ إخراج التجميع: {}",
85
+ "drive_mounted_copying_ensemble": "تم تحميل Google Drive. نسخ إخراج التجميع...",
86
+ "drive_already_mounted_copying_ensemble": "Google Drive محمل بالفعل. نسخ إخراج التجميع...",
87
+ "no_ensemble_output_files_found": "❌ لم يتم العثور على ملفات إخراج تجميع.",
88
+ "drive_mounted_copying_files": "تم تحميل Google Drive. نسخ الملفات...",
89
+ "drive_already_mounted_copying_files": "Google Drive محمل بالفعل. نسخ الملفات...",
90
+ "files_copied_to_drive": "✅ تم نسخ الملفات إلى {}",
91
+ "error_copying_files": "❌ خطأ في نسخ الملفات: {}",
92
+ "mounting_drive": "تحميل Google Drive...",
93
+ "settings_saved": "تم حفظ الإعدادات في config.json: اللغة={lang}, الطريقة={method}, المنفذ={port}",
94
+ "selected_language": "اللغة المختارة: {lang}",
95
+ "available_languages": "اللغات المتاحة: {langs}",
96
+ "select_language_prompt": "اختر لغة (افتراضي: {default}): ",
97
+ "invalid_language": "اختيار لغة غير صالح! استخدام اللغة الافتراضية ({default}).",
98
+ "available_sharing_methods": "طرق المشاركة المتاحة: gradio، localtunnel، ngrok",
99
+ "select_sharing_method_prompt": "اختر طريقة مشاركة (الافتراضي: {default}): ",
100
+ "enter_ngrok_token_prompt": "أدخل رمز Ngrok الخاص بك (الافتراضي: {default}): ",
101
+ "ngrok_token_required": "خطأ: رمز Ngrok مطلوب لطريقة ngrok!",
102
+ "enter_port_prompt": "أدخل رقم المنفذ (الافتراضي: {default}): ",
103
+ "opening_previous_url": "فتح الرابط السابق: {url}",
104
+ "open_url_manually": "افتح الرابط يدويًا في Colab: {url}",
105
+ "invalid_previous_url": "رابط سابق غير صالح: {url}، لم يتم فتحه.",
106
+ "starting_method": "بدء {method} على المنفذ {port}...",
107
+ "process_stopped": "\n🛑 تم إيقاف العملية من قبل المستخدم",
108
+ "selected_port": "المنفذ المختار: {port}",
109
+ "starting_gradio_with_sharing": "بدء Gradio مع المشاركة المدمجة...",
110
+ "starting_localtunnel": "بدء Localtunnel على المنفذ {port}...",
111
+ "share_link": "رابط المشاركة: {url}",
112
+ "password_ip": "كلمة مرور IP: {ip}",
113
+ "starting_ngrok": "بدء Ngrok على المنفذ {port}...",
114
+ "ngrok_url": "رابط Ngrok: {url}",
115
+ "ngrok_error": "خطأ في بدء Ngrok: {error}",
116
+ "auto_apollo_chunk_size": "حجم قطعة Apollo",
117
+ "auto_apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
118
+ "auto_apollo_overlap": "تداخل Apollo",
119
+ "auto_enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
120
+ "auto_apollo_processing_completed": "اكتملت معالجة Apollo!",
121
+ "auto_apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
122
+ "apollo_chunk_size": "حجم قطعة Apollo",
123
+ "apollo_chunk_size_info": "حجم القطعة لتحسين Apollo (موصى به: 19 للنموذج العام)",
124
+ "apollo_overlap": "تداخل Apollo",
125
+ "enhance_with_apollo": "تحسين مع Apollo",
126
+ "apollo_enhancement_info": "تحسين جودة الصوت",
127
+ "enhancing_with_apollo": "تحسين مع Apollo ({}/{} ملفات)...",
128
+ "apollo_processing_completed": "اكتملت معالجة Apollo!",
129
+ "apollo_overlap_info": "التداخل لتحسين Apollo (موصى به: 2)",
130
+ "selected_models": "النماذج المختارة",
131
+ "save_preset": "حفظ الإعداد المسبق",
132
+ "delete_preset": "حذف الإعداد المسبق",
133
+ "refresh_presets": "تحديث الإعدادات المسبقة",
134
+ "preset_name": "اسم الإعداد المسبق",
135
+ "select_preset": "اختيار إعداد مسبق",
136
+ "add_favorite": "إضافة إلى المفضلة",
137
+ "apply_matchering": "تطبيق Matchering",
138
+ "matchering_info": "تطبيق Matchering لتحسين جودة الصوت وتوازنه",
139
+ "matchering_passes": "تمريرات Matchering",
140
+ "matchering_passes_info": "عدد التكر��رات لـ Matchering (1-5، القيم الأعلى تحسن الجودة ولكن تستغرق وقتًا أطول)",
141
+ "matchering_processing": "جارٍ تطبيق Matchering... ({:.1f}%)",
142
+ "matchering_completed": "✅ اكتمل معالجة Matchering!",
143
+ "matchering_error": "❌ حدث خطأ أثناء Matchering: {}",
144
+ "invalid_method": "خطأ: طريقة غير صالحة! استخدم 'gradio'، 'localtunnel'، أو 'ngrok'.",
145
+ "language_changed_message": "تم تغيير اللغة. يرجى إعادة تشغيل الخلية.",
146
+ "backend_settings": "🚀 إعدادات الخلفية",
147
+ "inference_backend": "خلفية الاستدلال",
148
+ "optimization_mode": "وضع التحسين",
149
+ "optimization_mode_info": "اختيار وضع تحسين PyTorch",
150
+ "mixed_precision_amp": "الدقة المختلطة (AMP)",
151
+ "mixed_precision_info": "استدلال أسرع 2x - موصى به",
152
+ "tf32_acceleration": "تسريع TF32",
153
+ "tf32_acceleration_info": "زيادة سرعة إضافية لـ RTX 30xx+",
154
+ "cudnn_benchmark": "معيار cuDNN",
155
+ "cudnn_benchmark_info": "أبطأ في التشغيل الأول، أسرع بكثير في التشغيلات اللاحقة",
156
+ "ultra_optimized_pytorch": "خلفية PyTorch فائقة التحسين",
157
+ "default_active_max_speed": "نشط افتراضيًا - تحسين سرعة قصوى",
158
+ "channels_last_mode": "الأسرع لـ RTX GPUs",
159
+ "compile_mode": "سرعة إضافية لـ PyTorch 2.0+",
160
+ "default_mode": "قياسي",
161
+ "tta_info": "يزيد من الجودة لكنه يبطئ المعالجة",
162
+ "direct_download_error": "❌ خطأ في التنزيل المباشر: {}",
163
+ "invalid_google_drive_url": "❌ رابط Google Drive غير صالح",
164
+ "continuing_without_google_drive": "فشل الاتصال بـ Google Drive، الاستمرار بدونه...",
165
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive غير محمل، تخطي النسخ إلى Drive...",
166
+ "direct_download_failed": "فشل تنزيل الملف مباشرة",
167
+ "instrumental_info": "عادةً ينتج 2 ستيمات، لكن أحيانًا واحدة فقط - تحتاج إلى التجربة! :)",
168
  "manual_ensemble_tab": "🎚️ التجميع اليدوي",
169
+ "phase_fixer_tab": "🔧 مصحح الطور",
170
+ "batch_processing_tab": "📦 المعالجة الدفعية",
171
+ "batch_description": "معالجة ملفات متعددة في وقت واحد بنفس النموذج",
172
+ "batch_input_folder": "مجلد الإدخال",
173
+ "batch_input_folder_placeholder": "أدخل مسار المجلد الذي يحتوي على ملفات الصوت",
174
+ "batch_output_folder": "مجلد الإخراج",
175
+ "batch_output_folder_placeholder": "أدخل مسار المجلد لملفات الإخراج",
176
+ "batch_file_list": "الملفات المراد معالجتها",
177
+ "batch_add_files": "إضافة ملفات",
178
+ "batch_clear_list": "مسح القائمة",
179
+ "batch_start": "🚀 بدء المعالجة الدفعية",
180
+ "batch_stop": "⏹️ إيقاف",
181
+ "batch_progress": "تقدم المعالجة الدفعية",
182
+ "batch_current_file": "يُعالَج حاليًا",
183
+ "batch_completed": "✅ اكتملت المعالجة الدفعية!",
184
+ "batch_stopped": "⏹️ تم إيقاف المعالجة الدفعية",
185
+ "batch_no_files": "❌ لم يتم اختيار ملفات للمعالجة",
186
+ "source_model": "النموذج المصدر (أصوات)",
187
+ "source_model_info": "نموذج لاستخراج الأصوات/بيانات الطور",
188
+ "target_model": "النموذج الهدف (آلي)",
189
+ "target_model_info": "النموذج الذي يتم تطبيق تصحيح الطور عليه",
190
  "input_sources": "📂 مصادر الإدخال",
191
  "refresh": "🔄 تحديث",
192
  "ensemble_algorithm": "خوارزمية التجميع",
193
+ "select_audio_files": "اختيار ملفات الصوت",
194
  "available_files": "الملفات المتاحة",
195
+ "custom_weights": "أوزان مخصصة (مفصولة بفاصلة)",
196
+ "custom_weights_placeholder": "مثال: 0.8, 1.2, 1.0, ...",
197
+ "custom_weights_info": "اتركه فارغًا للأوزان المتساوية",
198
  "result_preview_tab": "🎧 معاينة النتيجة",
199
+ "ensembled_output": "الإخراج المجمَّع",
200
  "processing_log_tab": "📋 سجل المعالجة",
201
  "processing_details": "تفاصيل المعالجة",
202
  "process_ensemble": "⚡ معالجة التجميع",
203
  "languages_tab": "🌐 اللغات",
204
  "Vocal Models": "نماذج الأصوات",
205
+ "Instrumental Models": "النماذج الآلية",
206
+ "4-Stem Models": "نماذج 4 مسارات",
207
  "Denoise Models": "نماذج إزالة الضوضاء",
208
  "Dereverb Models": "نماذج إزالة الصدى",
209
+ "Other Shapes": "نماذج أخرى",
210
+ "Custom Models": "النماذج المخصصة",
211
+ "ensemble_files_description": "يجمع ملفات صوتية متعددة في إخراج واحد باستخدام طرق تجميع محددة.",
212
  "ensemble_files_help": "مسار جميع ملفات الصوت للتجميع",
213
+ "ensemble_type_help": "أحد: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
214
+ "ensemble_weights_help": "الأوزان لإنشاء التجميع. يجب أن يساوي عدد الأوزان عدد الملفات",
215
+ "ensemble_output_help": "مسار ملف WAV حيث سيتم تخزين نتيجة التجميع",
216
  "ensemble_type_print": "نوع التجميع: {}",
217
  "num_input_files_print": "عدد ملفات الإدخال: {}",
218
  "weights_print": "الأوزان: {}",
219
  "output_file_print": "ملف الإخراج: {}",
220
  "duration_mismatch_error": "يجب أن تكون جميع الملفات بنفس المدة",
221
+ "file_not_found_error": "خطأ. تعذر العثور على الملف: {}. تحقق من المسارات.",
222
  "reading_chunk_print": "قراءة قطعة من الملف: {} (البداية: {}ث، المدة: {}ث)",
223
  "chunk_result_shape_print": "شكل نتيجة القطعة: {}",
224
  "ensemble_completed_print": "اكتمل التجميع. تم حفظ الإخراج في: {}",
225
+ "file_deletion_error": "تعذر حذف {}: {}",
226
+ "directory_not_exist_warning": "⚠️ المجلد غير موجود: {}",
227
+ "not_a_directory_warning": "⚠️ المسار ليس مجلدًا: {}",
228
  "item_deletion_error": "⚠️ خطأ في حذف {}: {}",
229
+ "critical_error": "⛔ خطأ فادح: {}",
230
  "old_output_not_exist": "❌ مجلد الإخراج القديم غير موجود",
231
+ "old_outputs_cleared": "✅ تم مسح الإخراجات القديمة بنجاح!",
232
  "error": "🔥 خطأ: {}",
233
  "ffmpeg_error": "خطأ FFmpeg ({}): {}",
234
  "file_saved_successfully": "تم حفظ الملف بنجاح: {}",
235
+ "total_files_found": "إجمالي الملفات الموجودة: {}. معدل العينات المستخدم: {}",
236
+ "total_progress": "التقدم الكلي",
237
+ "detailed_pbar_enabled": "شريط التقدم التفصيلي مُفعَّل: {}",
238
+ "loaded_audio": "تم تحميل الصوت: {}، الشكل: {}",
239
+ "cannot_read_track": "تعذر قراءة المسار: {}",
240
  "error_message": "رسالة الخطأ: {}",
241
  "demudding_track": "تنظيف المسار (إعادة مزج الطور - آلي): {}",
242
  "elapsed_time": "الوقت المنقضي: {:.2f} ثانية.",
243
+ "proc_folder_description": "معالجة ملفات الصوت في مجلد باستخدام نموذج محدد.",
244
+ "model_type_help": "نوع النموذج (bandit, bs_roformer, mdx23c، إلخ)",
245
  "config_path_help": "مسار ملف التكوين",
246
+ "demud_phaseremix_help": "تمكين إعادة مزج الطور للآلات الموسيقية",
247
+ "start_checkpoint_help": "نقطة التفتيش الأولية للأوزان الصحيحة",
248
+ "input_folder_help": "المجلد الذي يحتوي على المزيجات للمعالجة",
249
  "audio_path_help": "مسار ملف صوتي واحد للمعالجة",
250
+ "store_dir_help": "مسار تخزين النتائج",
251
  "device_ids_help": "قائمة معرفات GPU",
252
+ "extract_instrumental_help": "عكس الأصوات للحصول على الآلي إذا كان متاحًا",
253
+ "disable_detailed_pbar_help": "تعطيل شريط التقدم التفصيلي في demix",
254
+ "force_cpu_help": "إجبار استخدام CPU حتى لو كانت CUDA متاحة",
255
  "flac_file_help": "إخراج ملف FLAC بدلاً من WAV",
256
+ "export_format_help": "تنسيق التصدير ونوع PCM",
257
  "pcm_type_help": "نوع PCM لملفات FLAC",
258
+ "use_tta_help": "تمكين زيادة وقت الاختبار",
259
  "lora_checkpoint_help": "نقطة التفتيش الأولية لأوزان LoRA",
260
  "cuda_available": "CUDA متاح، استخدم --force_cpu لتعطيله.",
261
+ "using_device": "الجهاز المستخدم: {}",
262
+ "instruments_print": "الأدوات الموسيقية: {}",
263
  "model_load_time": "وقت تحميل النموذج: {:.2f} ثانية",
264
  "invalid_url": "❌ رابط غير صالح",
265
  "cookie_file_updated": "✅ تم تحديث ملف الكوكيز!",
266
  "cookie_installation_error": "⚠️ خطأ في تثبيت الكوكيز: {}",
267
+ "google_drive_mount_error": "❌ خطأ في ربط Google Drive: {}",
268
  "file_size_zero_error": "حجم الملف صفر أو لم يتم إنشاء الملف",
269
+ "google_drive_error": "❌ خطأ في تنزيل Google Drive: {}",
270
  "wav_conversion_failed": "فشل تحويل WAV",
271
+ "download_error": "❌ خطأ في التنزيل: {}",
272
+ "file_copied_to_drive": "تم نسخ الملف إلى Google Drive: {}",
273
+ "copy_to_drive_error": "❌ خطأ في النسخ إلى Google Drive: {}",
274
+ "download_success": "🎉 تم التنزيل بنجاح!",
275
+ "download_failed": "❌ فشل التنزيل",
276
+ "no_output_files_found": "❌ لم يتم العثور على ملفات الإخراج.",
277
  "output_refreshed_successfully": "✅ تم تحديث الإخراج بنجاح!",
278
  "error_refreshing_output": "❌ خطأ في تحديث الإخراج: {}",
279
  "starting_audio_separation": "بدء فصل الصوت...",
 
282
  "separation_complete": "اكتمل الفصل!",
283
  "progress_parsing_error": "خطأ في تحليل التقدم: {}",
284
  "error_occurred": "حدث خطأ: {}",
285
+ "separation_process_completed": "اكتملت عملية الفصل!",
286
+ "no_audio_file_error": "❌ لم يتم تقديم ملف صوتي ولا يوجد ملف في مجلد الإدخال.",
287
+ "no_input_progress_label": "خطأ: لم يتم تقديم إدخال -- 0.0%",
288
  "processing_audio_print": "معالجة الصوت من: {} باستخدام النموذج: {}",
289
  "starting_audio_separation_progress_label": "بدء فصل الصوت... -- 0.0%",
290
  "separating_audio_progress_label": "فصل الصوت... -- {}.0%",
291
  "audio_processing_completed_progress_label": "اكتملت معالجة الصوت! -- 100.0%",
292
  "audio_processing_completed": "✅ اكتملت معالجة الصوت!",
293
  "error_occurred_progress_label": "حدث خطأ -- 0.0%",
294
+ "minimum_files_required": "⚠️ مطلوب ملفان على الأقل",
295
  "valid_files_not_found": "❌ لم يتم العثور على ملفات صالحة",
296
  "starting_ensemble_process": "بدء عملية التجميع...",
297
+ "ensembling_progress": "جارٍ التجميع... ({:.1f}%)",
298
  "finalizing_ensemble_output": "إنهاء إخراج التجميع...",
299
  "success_log": "✅ نجاح!\n{}",
300
  "error_log": "❌ خطأ!\n{}",
 
301
  "ensemble_process_completed": "اكتملت عملية التجميع!",
302
+ "no_models_selected": "❌ لم يتم اختيار أي نماذج",
303
+ "no_input_audio_provided": "❌ لم يتم تقديم صوت إدخال",
304
+ "loading_model": "جارٍ تحميل النموذج {}/{}: {}...",
305
+ "loading_model_progress_label": "جارٍ تحميل النموذج {}/{}: {}... -- {}.0%",
306
  "completed_model": "اكتمل النموذج {}/{}: {}",
307
  "completed_model_progress_label": "اكتمل النموذج {}/{}: {} -- {}.0%",
308
  "running_command": "تشغيل الأمر: {}",
309
  "model_failed": "فشل النموذج {}: {}",
310
+ "critical_error_with_model": "خطأ فادح مع {}: {}",
311
+ "model_output_failed": "فشل {} في إنتاج الإخراج",
312
  "waiting_for_files": "في انتظار جاهزية جميع الملفات...",
313
  "waiting_for_files_progress_label": "في انتظار جاهزية جميع الملفات... -- 90.0%",
314
+ "performing_ensemble": "جارٍ التجميع...",
315
+ "performing_ensemble_progress_label": "جارٍ التجميع... -- 92.0%",
316
  "memory_usage_before_ensemble": "استخدام الذاكرة قبل التجميع: {}%",
317
  "memory_usage_after_ensemble": "استخدام الذاكرة بعد التجميع: {}%",
318
  "finalizing_ensemble_output_progress_label": "إنهاء إخراج التجميع... -- 98.0%",
319
+ "ensemble_file_creation_failed": "تعذر إنشاء ملف التجميع: {}",
320
  "ensemble_completed_progress_label": "اكتمل التجميع بنجاح! -- 100.0%",
321
  "success_output_created": "✅ نجاح! تم إنشاء ملف الإخراج.",
322
+ "custom_models_tab": " النماذج المخصصة",
323
+ "add_custom_model": " إضافة نموذج مخصص",
324
+ "custom_model_name": "اسم النموذج",
325
+ "custom_model_name_placeholder": "أدخل اسمًا لنموذجك",
326
+ "checkpoint_url": "رابط نقطة التفتيش (.ckpt/.pth)",
327
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
328
+ "config_url": "رابط التكوين (.yaml)",
329
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
330
+ "custom_py_url": "رابط ملف Python المخصص (اختياري)",
331
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
332
+ "auto_detect_type": "الكشف التلقائي عن نوع النموذج",
333
+ "model_type": "نوع النموذج",
334
+ "add_model_btn": " إضافة نموذج",
335
+ "custom_models_list": "📋 قائمة النماذج المخصصة",
336
+ "delete_model": "🗑️ حذف",
337
+ "no_custom_models": "لم تتم إضافة نماذج مخصصة بعد",
338
+ "model_added_success": " تم إضافة النموذج بنجاح!",
339
+ "model_deleted_success": " تم حذف النموذج بنجاح!",
340
+ "model_add_error": "خطأ في إضافة النموذج: {}",
341
+ "model_delete_error": "خطأ في حذف النموذج: {}",
342
+ "refresh_models": "🔄 تحديث",
343
+ "custom_model_info": "أضف نماذج مخصصة عن طريق توفير روابط التنزيل. سيتم تنزيل النماذج تلقائيًا عند استخدامها.",
344
+ "select_model_to_delete": "اختر نموذجًا للحذف"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  }
assets/i18n/languages/de_de.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "select_language": "Sprache auswählen",
3
  "gecekondu_production": "Gecekondu Production",
4
- "ultimate_audio_separation": "Deine ultimative Audio-Trennung",
5
  "presented_by": "Präsentiert von Gecekondu Production © 2025",
6
  "audio_separation_tab": "🎙️ Audio-Trennung",
7
  "input_model": "📥 Eingabe & Modell",
@@ -13,15 +13,16 @@
13
  "settings": "⚙ Einstellungen",
14
  "format": "Format",
15
  "chunk_size": "Chunk-Größe",
16
- "chunk_size_info": "Nicht ändern, es sei denn, du hast spezifische Anforderungen",
17
  "overlap": "Überlappung",
18
- "overlap_info": "Empfohlen: 2-10 (Höhere Werte verbessern die Qualität, benötigen aber mehr VRAM)",
19
- "tta_boost": "TTA Boost",
20
- "tta_info": "Verbessert die Qualität, verlangsamt aber die Verarbeitungsgeschwindigkeit",
21
  "phase_fix": "Phasenkorrektur",
22
- "phase_fix_info": "Fortgeschrittene Phasenkorrektur für Instrumentalspuren",
 
23
  "instrumental": "Instrumental",
24
- "instrumental_info": "Liefert normalerweise 2 Ausgaben, manchmal aber nur eine einzige Ausgabe, du musst es ausprobieren :)",
25
  "process": "🚀 Verarbeiten",
26
  "reset": "🧹 Zurücksetzen",
27
  "status": "Status",
@@ -37,28 +38,28 @@
37
  "drums": "Schlagzeug",
38
  "bass": "Bass",
39
  "effects": "Effekte",
40
- "advanced_tab": "⚙ Fortgeschritten",
41
  "phase_remix": "Phasen-Remix",
42
  "dry": "Trocken",
43
  "music": "Musik",
44
  "karaoke": "Karaoke",
45
- "bleed": "Überlauf",
46
- "copy_to_drive": "📂 Auf Laufwerk kopieren",
47
  "copy_status": "Kopierstatus",
48
  "files_will_be_copied": "Dateien werden hierher kopiert...",
49
- "processing_tip": "<b>🔈 Verarbeitungstipp:</b> Für verrauschte Ergebnisse verwende <code>bleed_suppressor_v1</code> oder <code>denoisedebleed</code> Modelle in der Kategorie <i>\"Rauschunterdrückung & Effektentfernung\"</i>, um die Ausgabe zu bereinigen",
50
  "waiting_for_processing": "Warten auf Verarbeitung...",
51
  "auto_ensemble_tab": "🤖 Auto-Ensemble",
52
  "upload_file": "Datei hochladen",
53
  "enter_file_path": "Oder Dateipfad eingeben",
54
  "file_path_placeholder": "Vollständigen Pfad zur Audiodatei eingeben",
55
- "advanced_settings": "⚙️ Fortgeschrittene Einstellungen",
56
  "use_tta": "TTA verwenden",
57
  "instrumental_only": "Nur Instrumental",
58
  "auto_overlap": "Überlappung",
59
  "auto_chunk_size": "Chunk-Größe",
60
  "output_format": "Ausgabeformat",
61
- "model_selection": "🧠 Modellwahl",
62
  "model_category": "Modellkategorie",
63
  "select_models": "Modelle aus Kategorie auswählen",
64
  "ensemble_settings": "⚡ Ensemble-Einstellungen",
@@ -68,10 +69,10 @@
68
  "original_audio_tab": "🔊 Original-Audio",
69
  "original_audio": "Original-Audio",
70
  "ensemble_result_tab": "🎚️ Ensemble-Ergebnis",
71
- "output_preview": "Ausgabe-Vorschau",
72
  "refresh_output": "🔄 Ausgabe aktualisieren",
73
  "ensemble_copy_status": "Ensemble-Ausgabe wird hierher kopiert...",
74
- "guidelines": "<h4>Richtlinien zur Modellwahl</h4><ul><li><strong>Vermeide das Mischen von Kategorien:</strong> Die Kombination von Gesangs- und Instrumentalmodellen kann unerwünschte Mischungen erzeugen</li><li><strong>Besondere Modellhinweise:</strong><ul><li>Dualitätsmodelle (v1/v2) - Geben beide Stems aus</li><li>MDX23C Separator - Hybride Ergebnisse</li></ul></li><li><strong>Beste Praxis:</strong> Verwende 3-5 ähnliche Modelle aus derselben Kategorie</li></ul><div>💡 Profi-Tipp: Beginne mit der Kombination \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
75
  "download_sources_tab": "⬇️ Quellen herunterladen",
76
  "direct_links": "🌐 Direkte Links",
77
  "audio_file_url": "Audio-Datei-URL",
@@ -80,7 +81,85 @@
80
  "downloaded_file": "Heruntergeladene Datei",
81
  "cookie_management": "🍪 Cookie-Verwaltung",
82
  "upload_cookies_txt": "Cookies.txt hochladen",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Warum nötig?**<br>- Zugriff auf altersbeschränkte Inhalte<br>- Herunterladen privater/nicht gelisteter Videos<br>- Umgehen regionaler Einschränkungen<br>- Vermeiden von YouTube-Download-Limits<br><br>**⚠️ Wichtige Hinweise**<br>- TEILE NIEMALS deine Cookie-Dateien!<br>- Aktualisiere Cookies, wenn:<br> • \"403 Forbidden\"-Fehler auftreten<br> • Downloads plötzlich stoppen<br> • \"Sitzung abgelaufen\"-Meldungen erscheinen<br><br>**🔄 Erneuerungsschritte**<br>1. Installiere diese <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome-Erweiterung</a><br>2. Melde dich bei YouTube in Chrome an<br>3. Klicke auf das Erweiterungssymbol → \"Exportieren\"<br>4. Lade die heruntergeladene Datei hier hoch<br><br>**⏳ Cookie-Lebensdauer**<br>- Normale Sitzungen: 24 Stunden<br>- Sensible Operationen: 1 Stunde<br>- Passwortänderungen: Sofortige Ungültigkeit</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ Manuelles Ensemble",
85
  "input_sources": "📂 Eingabequellen",
86
  "refresh": "🔄 Aktualisieren",
@@ -99,23 +178,24 @@
99
  "Vocal Models": "Gesangsmodelle",
100
  "Instrumental Models": "Instrumentalmodelle",
101
  "4-Stem Models": "4-Stem-Modelle",
102
- "Denoise Models": "Rauschunterdrückungsmodelle",
103
- "Dereverb Models": "Hallentfernungsmodelle",
104
  "Other Models": "Andere Modelle",
105
- "ensemble_files_description": "Kombiniert mehrere Audiodateien zu einer einzigen Ausgabe unter Verwendung spezifizierter Ensemble-Methoden.",
 
106
  "ensemble_files_help": "Pfad zu allen Audiodateien für das Ensemble",
107
- "ensemble_type_help": "Einer von avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
- "ensemble_weights_help": "Gewichte zur Erstellung des Ensembles. Die Anzahl der Gewichte muss der Anzahl der Dateien entsprechen",
109
  "ensemble_output_help": "Pfad zur WAV-Datei, in der das Ensemble-Ergebnis gespeichert wird",
110
  "ensemble_type_print": "Ensemble-Typ: {}",
111
  "num_input_files_print": "Anzahl der Eingabedateien: {}",
112
  "weights_print": "Gewichte: {}",
113
  "output_file_print": "Ausgabedatei: {}",
114
  "duration_mismatch_error": "Alle Dateien müssen die gleiche Dauer haben",
115
- "file_not_found_error": "Fehler. Datei nicht gefunden: {}. Überprüfe die Pfade.",
116
  "reading_chunk_print": "Lese Chunk aus Datei: {} (Start: {}s, Dauer: {}s)",
117
  "chunk_result_shape_print": "Chunk-Ergebnisform: {}",
118
- "ensemble_completed_print": "Ensemble abgeschlossen. Ausgabe gespeichert unter: {}",
119
  "file_deletion_error": "{} konnte nicht gelöscht werden: {}",
120
  "directory_not_exist_warning": "⚠️ Verzeichnis existiert nicht: {}",
121
  "not_a_directory_warning": "⚠️ Pfad ist kein Verzeichnis: {}",
@@ -125,42 +205,50 @@
125
  "error": "🔥 Fehler: {}",
126
  "ffmpeg_error": "FFmpeg-Fehler ({}): {}",
127
  "file_saved_successfully": "Datei erfolgreich gespeichert: {}",
128
- "total_files_found": "Gesamtzahl gefundener Dateien: {}. Verwende Abtastrate: {}",
129
  "total_progress": "Gesamtfortschritt",
130
- "detailed_pbar_enabled": "Detaillierter Fortschrittsbalken aktiviert: {}",
131
- "loaded_audio": "Geladenes Audio: {}, Form: {}",
132
- "cannot_read_track": "Spur kann nicht gelesen werden: {}",
133
  "error_message": "Fehlermeldung: {}",
134
- "demudding_track": "Spur entmischen (Phasen-Remix - Instrumental): {}",
135
  "elapsed_time": "Verstrichene Zeit: {:.2f} Sekunden.",
136
- "proc_folder_description": "Verarbeitet Audiodateien in einem Ordner mit einem spezifizierten Modell.",
137
  "model_type_help": "Modelltyp (bandit, bs_roformer, mdx23c, etc.)",
138
  "config_path_help": "Pfad zur Konfigurationsdatei",
139
  "demud_phaseremix_help": "Phasen-Remix für Instrumental aktivieren",
140
- "start_checkpoint_help": "Initialer Checkpoint für gültige Gewichte",
141
- "input_folder_help": "Ordner mit Mischungen zur Verarbeitung",
142
- "audio_path_help": "Pfad zu einer einzelnen Audiodatei zur Verarbeitung",
143
  "store_dir_help": "Pfad zum Speichern der Ergebnisse",
144
  "device_ids_help": "Liste der GPU-IDs",
145
  "extract_instrumental_help": "Gesang invertieren, um Instrumental zu erhalten, falls vorhanden",
146
- "disable_detailed_pbar_help": "Detaillierten Fortschrittsbalken in Demix deaktivieren",
147
- "force_cpu_help": "CPU erzwingen, selbst wenn CUDA verfügbar ist",
148
- "flac_file_help": "FLAC-Datei anstelle von WAV ausgeben",
149
  "export_format_help": "Exportformat und PCM-Typ",
150
  "pcm_type_help": "PCM-Typ für FLAC-Dateien",
151
- "use_tta_help": "Testzeit-Augmentation aktivieren",
152
- "lora_checkpoint_help": "Initialer Checkpoint für LoRA-Gewichte",
153
- "cuda_available": "CUDA ist verfügbar, verwende --force_cpu, um es zu deaktivieren.",
154
  "using_device": "Verwendetes Gerät: {}",
155
  "instruments_print": "Instrumente: {}",
156
  "model_load_time": "Modell-Ladezeit: {:.2f} Sek.",
157
  "invalid_url": "❌ Ungültige URL",
158
  "cookie_file_updated": "✅ Cookie-Datei aktualisiert!",
159
- "cookie_installation_error": "⚠️ Fehler bei der Cookie-Installation: {}",
160
- "file_size_zero_error": "Dateigröße null oder Datei nicht erstellt",
161
- "google_drive_error": " Google Drive Download-Fehler: {}",
 
162
  "wav_conversion_failed": "WAV-Konvertierung fehlgeschlagen",
163
  "download_error": "❌ Download-Fehler: {}",
 
 
 
 
 
 
 
164
  "download_success": "🎉 Erfolgreich heruntergeladen!",
165
  "download_failed": "❌ Download fehlgeschlagen",
166
  "no_output_files_found": "❌ Keine Ausgabedateien gefunden.",
@@ -170,19 +258,19 @@
170
  "processing_audio": "Audio wird verarbeitet",
171
  "separating_audio": "Audio wird getrennt... ({:.1f}%)",
172
  "separation_complete": "Trennung abgeschlossen!",
173
- "progress_parsing_error": "Fortschrittsanalyse-Fehler: {}",
174
  "error_occurred": "Ein Fehler ist aufgetreten: {}",
175
  "separation_process_completed": "Trennungsprozess abgeschlossen!",
176
- "no_audio_file_error": "❌ Keine Audiodatei bereitgestellt und keine vorhandene Datei im Eingabeordner.",
177
  "no_input_progress_label": "Fehler: Keine Eingabe bereitgestellt -- 0.0%",
178
  "processing_audio_print": "Verarbeite Audio von: {} mit Modell: {}",
179
  "starting_audio_separation_progress_label": "Audio-Trennung wird gestartet... -- 0.0%",
180
  "separating_audio_progress_label": "Audio wird getrennt... -- {}.0%",
181
- "audio_processing_completed_progress_label": "Audioverarbeitung abgeschlossen! -- 100.0%",
182
- "audio_processing_completed": "✅ Audioverarbeitung abgeschlossen!",
183
  "error_occurred_progress_label": "Fehler aufgetreten -- 0.0%",
184
  "minimum_files_required": "⚠️ Mindestens 2 Dateien erforderlich",
185
- "valid_files_not_found": "❌ Gültige Dateien nicht gefunden",
186
  "starting_ensemble_process": "Ensemble-Prozess wird gestartet...",
187
  "ensembling_progress": "Ensemble wird erstellt... ({:.1f}%)",
188
  "finalizing_ensemble_output": "Ensemble-Ausgabe wird finalisiert...",
@@ -194,87 +282,65 @@
194
  "no_input_audio_provided": "❌ Kein Eingabe-Audio bereitgestellt",
195
  "loading_model": "Lade Modell {}/{}: {}...",
196
  "loading_model_progress_label": "Lade Modell {}/{}: {}... -- {}.0%",
197
- "completed_model": "Abgeschlossenes Modell {}/{}: {}",
198
- "completed_model_progress_label": "Abgeschlossenes Modell {}/{}: {} -- {}.0%",
199
  "running_command": "Führe Befehl aus: {}",
200
  "model_failed": "Modell {} fehlgeschlagen: {}",
201
  "critical_error_with_model": "Kritischer Fehler mit {}: {}",
202
  "model_output_failed": "{} konnte keine Ausgabe erzeugen",
203
- "waiting_for_files": "Warten auf die Bereitschaft aller Dateien...",
204
- "waiting_for_files_progress_label": "Warten auf die Bereitschaft aller Dateien... -- 90.0%",
205
  "performing_ensemble": "Ensemble wird durchgeführt...",
206
  "performing_ensemble_progress_label": "Ensemble wird durchgeführt... -- 92.0%",
207
- "memory_usage_before_ensemble": "Speicherauslastung vor Ensemble: {}%",
208
- "memory_usage_after_ensemble": "Speicherauslastung nach Ensemble: {}%",
209
  "finalizing_ensemble_output_progress_label": "Ensemble-Ausgabe wird finalisiert... -- 98.0%",
210
  "ensemble_file_creation_failed": "Ensemble-Datei konnte nicht erstellt werden: {}",
211
  "ensemble_completed_progress_label": "Ensemble erfolgreich abgeschlossen! -- 100.0%",
212
  "success_output_created": "✅ Erfolg! Ausgabedatei erstellt.",
213
- "drive_mounted_copying_ensemble": "Google Drive eingebunden. Ensemble-Ausgabe wird kopiert...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive bereits eingebunden. Ensemble-Ausgabe wird kopiert...",
215
- "no_ensemble_output_files_found": " Keine Ensemble-Ausgabedateien gefunden.",
216
- "ensemble_output_copied": "✅ Ensemble-Ausgabe kopiert nach {}",
217
- "error_copying_ensemble_output": " Fehler beim Kopieren der Ensemble-Ausgabe: {}",
218
- "drive_mounted_copying_files": "Google Drive eingebunden. Dateien werden kopiert...",
219
- "drive_already_mounted_copying_files": "Google Drive bereits eingebunden. Dateien werden kopiert...",
220
- "files_copied_to_drive": " Dateien kopiert nach {}",
221
- "error_copying_files": "❌ Fehler beim Kopieren der Dateien: {}",
222
- "mounting_drive": "Google Drive wird eingebunden...",
223
- "settings_saved": "Einstellungen in config.json gespeichert: Sprache={lang}, Methode={method}, Port={port}",
224
- "selected_language": "Ausgewählte Sprache: {lang}",
225
- "available_languages": "Verfügbare Sprachen: {langs}",
226
- "select_language_prompt": "Wähle eine Sprache (Standard: {default}): ",
227
- "invalid_language": "Ungültige Sprachauswahl! Verwende Standardsprache ({default}).",
228
- "available_sharing_methods": "Verfügbare Freigabemethoden: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "Wähle eine Freigabemethode (Standard: {default}): ",
230
- "enter_ngrok_token_prompt": "Gib deinen Ngrok-Token ein (Standard: {default}): ",
231
- "ngrok_token_required": "Fehler: Ngrok-Token ist für die ngrok-Methode erforderlich!",
232
- "enter_port_prompt": "Gib die Portnummer ein (Standard: {default}): ",
233
- "opening_previous_url": "Öffne vorherige URL: {url}",
234
- "open_url_manually": "Öffne die URL manuell in Colab: {url}",
235
- "invalid_previous_url": "Ungültige vorherige URL: {url}, nicht geöffnet.",
236
- "starting_method": "Starte {method} auf Port {port}...",
237
- "process_stopped": "\n🛑 Prozess vom Benutzer gestoppt",
238
- "selected_port": "Ausgewählter Port: {port}",
239
- "starting_gradio_with_sharing": "Starte Gradio mit integrierter Freigabe...",
240
- "starting_localtunnel": "Starte Localtunnel auf Port {port}...",
241
- "share_link": "Freigabe-Link: {url}",
242
- "password_ip": "Passwort-IP: {ip}",
243
- "starting_ngrok": "Starte Ngrok auf Port {port}...",
244
- "ngrok_url": "Ngrok-URL: {url}",
245
- "ngrok_error": "Fehler beim Starten von Ngrok: {error}",
246
- "apollo_chunk_size": "Apollo Chunk-Größe",
247
- "apollo_chunk_size_info": "Chunk-Größe für Apollo-Verbesserung (empfohlen: 19 für Universalmodell)",
248
- "apollo_overlap": "Apollo-Überlappung",
249
- "enhancing_with_apollo": "Verbessere mit Apollo ({}/{} Dateien)...",
250
- "apollo_processing_completed": "Apollo-Verarbeitung abgeschlossen!",
251
- "apollo_overlap_info": "Überlappung für Apollo-Verbesserung (empfohlen: 2)",
252
- "invalid_method": "Fehler: Ungültige Methode! Verwende 'gradio', 'localtunnel' oder 'ngrok'.",
253
- "apollo_enhancement_settings": "Apollo-Verbesserungseinstellungen",
254
- "enhance_with_apollo": "Mit Apollo verbessern",
255
- "enhance_with_apollo_info": "Apollo für Audioverbesserung nach der Trennung aktivieren",
256
- "apollo_method": "Apollo-Methode",
257
- "apollo_method_info": "Wähle die Verarbeitungsmethode für Apollo",
258
- "normal_method": "Normale Methode",
259
- "mid_side_method": "Mitte/Seite-Methode",
260
- "apollo_normal_model": "Apollo Normalmodell",
261
- "apollo_normal_model_info": "Modell für normale Apollo-Verarbeitung",
262
- "apollo_midside_model": "Apollo Mitte/Seite-Modell",
263
- "apollo_enhancement_info": "Verbesserung der Klangqualität",
264
- "selected_models": "Ausgewählte Modelle",
265
- "save_preset": "Voreinstellung Speichern",
266
- "delete_preset": "Voreinstellung Löschen",
267
- "refresh_presets": "Voreinstellungen Aktualisieren",
268
- "preset_name": "Name der Voreinstellung",
269
- "select_preset": "Voreinstellung Auswählen",
270
- "add_favorite": "Zu Favoriten Hinzufügen",
271
- "apollo_midside_model_info": "Modell für Mitte/Seite-Verarbeitung (optional)",
272
- "language_changed_message": "Sprache geändert. Bitte starte die Zelle neu.",
273
- "apply_matchering": "Matchering anwenden",
274
- "matchering_info": "Matchering anwenden, um Audio-Mastering und -Balance zu verbessern",
275
- "matchering_passes": "Matchering-Durchgänge",
276
- "matchering_passes_info": "Anzahl der Matchering-Iterationen (1-5, höhere Werte verbessern die Qualität, dauern aber länger)",
277
- "matchering_processing": "Matchering wird angewendet... ({:.1f}%)",
278
- "matchering_completed": "✅ Matchering-Verarbeitung abgeschlossen!",
279
- "matchering_error": "❌ Fehler während Matchering: {}"
280
- }
 
1
  {
2
  "select_language": "Sprache auswählen",
3
  "gecekondu_production": "Gecekondu Production",
4
+ "ultimate_audio_separation": "Ihre ultimative Audio-Trennung",
5
  "presented_by": "Präsentiert von Gecekondu Production © 2025",
6
  "audio_separation_tab": "🎙️ Audio-Trennung",
7
  "input_model": "📥 Eingabe & Modell",
 
13
  "settings": "⚙ Einstellungen",
14
  "format": "Format",
15
  "chunk_size": "Chunk-Größe",
16
+ "chunk_size_info": "Nicht ändern, es sei denn, Sie haben spezifische Anforderungen",
17
  "overlap": "Überlappung",
18
+ "overlap_info": "Empfohlen: 2-10 (höhere Werte erhöhen die Qualität, erfordern aber mehr VRAM)",
19
+ "tta_boost": "TTA-Boost",
20
+ "tta_info": "Erhöht die Qualität, verlangsamt aber die Verarbeitung",
21
  "phase_fix": "Phasenkorrektur",
22
+ "use_phase_correction": "Erweiterte Phasenkorrektur anwenden",
23
+ "phase_fix_info": "Erweiterte Phasenkorrektur für Instrumentalspuren",
24
  "instrumental": "Instrumental",
25
+ "instrumental_info": "Gibt normalerweise 2 Ausgaben, manchmal nur eine - Sie müssen es ausprobieren! :)",
26
  "process": "🚀 Verarbeiten",
27
  "reset": "🧹 Zurücksetzen",
28
  "status": "Status",
 
38
  "drums": "Schlagzeug",
39
  "bass": "Bass",
40
  "effects": "Effekte",
41
+ "advanced_tab": "⚙ Erweitert",
42
  "phase_remix": "Phasen-Remix",
43
  "dry": "Trocken",
44
  "music": "Musik",
45
  "karaoke": "Karaoke",
46
+ "bleed": "Übersprechen",
47
+ "copy_to_drive": "📂 Auf Drive kopieren",
48
  "copy_status": "Kopierstatus",
49
  "files_will_be_copied": "Dateien werden hierher kopiert...",
50
+ "processing_tip": "<b>🔈 Verarbeitungstipp:</b> Für verrauschte Ergebnisse verwenden Sie die Modelle <code>bleed_suppressor_v1</code> oder <code>denoisedebleed</code> in der Kategorie <i>\"Entrauschen & Effektentfernung\"</i>, um die Ausgabe zu bereinigen",
51
  "waiting_for_processing": "Warten auf Verarbeitung...",
52
  "auto_ensemble_tab": "🤖 Auto-Ensemble",
53
  "upload_file": "Datei hochladen",
54
  "enter_file_path": "Oder Dateipfad eingeben",
55
  "file_path_placeholder": "Vollständigen Pfad zur Audiodatei eingeben",
56
+ "advanced_settings": "⚙️ Erweiterte Einstellungen",
57
  "use_tta": "TTA verwenden",
58
  "instrumental_only": "Nur Instrumental",
59
  "auto_overlap": "Überlappung",
60
  "auto_chunk_size": "Chunk-Größe",
61
  "output_format": "Ausgabeformat",
62
+ "model_selection": "🧠 Modellauswahl",
63
  "model_category": "Modellkategorie",
64
  "select_models": "Modelle aus Kategorie auswählen",
65
  "ensemble_settings": "⚡ Ensemble-Einstellungen",
 
69
  "original_audio_tab": "🔊 Original-Audio",
70
  "original_audio": "Original-Audio",
71
  "ensemble_result_tab": "🎚️ Ensemble-Ergebnis",
72
+ "output_preview": "Ausgabevorschau",
73
  "refresh_output": "🔄 Ausgabe aktualisieren",
74
  "ensemble_copy_status": "Ensemble-Ausgabe wird hierher kopiert...",
75
+ "guidelines": "<h4>Richtlinien zur Modellauswahl</h4><ul><li><strong>Kategorien nicht mischen:</strong> Das Kombinieren von Gesangs- und Instrumentalmodellen kann unerwünschte Mischungen erzeugen</li><li><strong>Hinweise zu Spezialmodellen:</strong><ul><li>Dualitätsmodelle (v1/v2) - Geben beide Stems aus</li><li>MDX23C Separator - Hybride Ergebnisse</li></ul></li><li><strong>Best Practice:</strong> Verwenden Sie 3-5 ähnliche Modelle aus derselben Kategorie</li></ul><div>💡 Pro-Tipp: Starten Sie mit der Kombination \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
76
  "download_sources_tab": "⬇️ Quellen herunterladen",
77
  "direct_links": "🌐 Direkte Links",
78
  "audio_file_url": "Audio-Datei-URL",
 
81
  "downloaded_file": "Heruntergeladene Datei",
82
  "cookie_management": "🍪 Cookie-Verwaltung",
83
  "upload_cookies_txt": "Cookies.txt hochladen",
84
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Warum notwendig?**<br>- Zugriff auf altersbeschränkte Inhalte<br>- Herunterladen privater/nicht gelisteter Videos<br>- Umgehung regionaler Einschränkungen<br>- Vermeidung von YouTube-Download-Limits<br><br>**⚠️ Wichtige Hinweise**<br>- Teilen Sie Ihre Cookie-Dateien NIEMALS!<br>- Aktualisieren Sie Cookies, wenn:<br> • \"403 Verboten\"-Fehler auftreten<br> • Downloads plötzlich stoppen<br> • \"Sitzung abgelaufen\"-Meldungen erscheinen<br><br>**🔄 Aktualisierungsschritte**<br>1. Installieren Sie diese <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome-Erweiterung</a><br>2. Melden Sie sich in Chrome bei YouTube an<br>3. Klicken Sie auf das Erweiterungs-Symbol → \"Exportieren\"<br>4. Laden Sie die heruntergeladene Datei hier hoch<br><br>**⏳ Cookie-Lebensdauer**<br>- Normale Sitzungen: 24 Stunden<br>- Sensible Operationen: 1 Stunde<br>- Passwortänderung: Sofortige Aktualisierung</div>",
85
+ "ensemble_output_copied": "✅ Ensemble-Ausgabe nach {} kopiert",
86
+ "error_copying_ensemble_output": "❌ Fehler beim Kopieren der Ensemble-Ausgabe: {}",
87
+ "drive_mounted_copying_ensemble": "Google Drive gemountet. Kopiere Ensemble-Ausgabe...",
88
+ "drive_already_mounted_copying_ensemble": "Google Drive bereits gemountet. Kopiere Ensemble-Ausgabe...",
89
+ "no_ensemble_output_files_found": "❌ Keine Ensemble-Ausgabedateien gefunden.",
90
+ "drive_mounted_copying_files": "Google Drive gemountet. Kopiere Dateien...",
91
+ "drive_already_mounted_copying_files": "Google Drive bereits gemountet. Kopiere Dateien...",
92
+ "files_copied_to_drive": "✅ Dateien nach {} kopiert",
93
+ "error_copying_files": "❌ Fehler beim Kopieren der Dateien: {}",
94
+ "mounting_drive": "Google Drive wird gemountet...",
95
+ "settings_saved": "Einstellungen in config.json gespeichert: Sprache={lang}, Methode={method}, Port={port}",
96
+ "selected_language": "Ausgewählte Sprache: {lang}",
97
+ "available_languages": "Verfügbare Sprachen: {langs}",
98
+ "select_language_prompt": "Wählen Sie eine Sprache (Standard: {default}): ",
99
+ "invalid_language": "Ungültige Sprachauswahl! Verwende Standardsprache ({default}).",
100
+ "available_sharing_methods": "Verfügbare Freigabemethoden: gradio, localtunnel, ngrok",
101
+ "select_sharing_method_prompt": "Wählen Sie eine Freigabemethode (Standard: {default}): ",
102
+ "enter_ngrok_token_prompt": "Geben Sie Ihren Ngrok-Token ein (Standard: {default}): ",
103
+ "ngrok_token_required": "Fehler: Ngrok-Token für ngrok-Methode erforderlich!",
104
+ "enter_port_prompt": "Geben Sie die Portnummer ein (Standard: {default}): ",
105
+ "opening_previous_url": "Öffne vorherige URL: {url}",
106
+ "open_url_manually": "Öffnen Sie die URL manuell in Colab: {url}",
107
+ "invalid_previous_url": "Ungültige vorherige URL: {url}, nicht geöffnet.",
108
+ "starting_method": "Starte {method} auf Port {port}...",
109
+ "process_stopped": "\n🛑 Prozess vom Benutzer gestoppt",
110
+ "selected_port": "Ausgewählter Port: {port}",
111
+ "starting_gradio_with_sharing": "Starte Gradio mit integrierter Freigabe...",
112
+ "starting_localtunnel": "Starte Localtunnel auf Port {port}...",
113
+ "share_link": "Freigabelink: {url}",
114
+ "password_ip": "Passwort-IP: {ip}",
115
+ "starting_ngrok": "Starte Ngrok auf Port {port}...",
116
+ "ngrok_url": "Ngrok-URL: {url}",
117
+ "ngrok_error": "Fehler beim Starten von Ngrok: {error}",
118
+ "auto_apollo_chunk_size": "Apollo-Chunk-Größe",
119
+ "auto_apollo_chunk_size_info": "Chunk-Größe für Apollo-Verbesserung (empfohlen: 19 für Universal-Modell)",
120
+ "auto_apollo_overlap": "Apollo-Überlappung",
121
+ "auto_enhancing_with_apollo": "Verbesserung mit Apollo ({}/{} Dateien)...",
122
+ "auto_apollo_processing_completed": "Apollo-Verarbeitung abgeschlossen!",
123
+ "auto_apollo_overlap_info": "Überlappung für Apollo-Verbesserung (empfohlen: 2)",
124
+ "apollo_chunk_size": "Apollo-Chunk-Größe",
125
+ "apollo_chunk_size_info": "Chunk-Größe für Apollo-Verbesserung (empfohlen: 19 für Universal-Modell)",
126
+ "apollo_overlap": "Apollo-Überlappung",
127
+ "enhance_with_apollo": "Mit Apollo verbessern",
128
+ "apollo_enhancement_info": "Audioqualitätsverbesserung",
129
+ "enhancing_with_apollo": "Verbesserung mit Apollo ({}/{} Dateien)...",
130
+ "apollo_processing_completed": "Apollo-Verarbeitung abgeschlossen!",
131
+ "apollo_overlap_info": "Überlappung für Apollo-Verbesserung (empfohlen: 2)",
132
+ "selected_models": "Ausgewählte Modelle",
133
+ "save_preset": "Preset speichern",
134
+ "delete_preset": "Preset löschen",
135
+ "refresh_presets": "Presets aktualisieren",
136
+ "preset_name": "Preset-Name",
137
+ "select_preset": "Preset auswählen",
138
+ "add_favorite": "Zu Favoriten hinzufügen",
139
+ "apply_matchering": "Matchering anwenden",
140
+ "matchering_info": "Matchering anwenden, um Audio-Mastering und -Balance zu verbessern",
141
+ "matchering_passes": "Matchering-Durchläufe",
142
+ "matchering_passes_info": "Anzahl der Matchering-Iterationen (1-5, höhere Werte verbessern die Qualität, dauern aber länger)",
143
+ "matchering_processing": "Matchering wird angewendet... ({:.1f}%)",
144
+ "matchering_completed": "✅ Matchering-Verarbeitung abgeschlossen!",
145
+ "matchering_error": "❌ Fehler während Matchering: {}",
146
+ "invalid_method": "Fehler: Ungültige Methode! Verwenden Sie 'gradio', 'localtunnel' oder 'ngrok'.",
147
+ "language_changed_message": "Sprache geändert. Bitte starten Sie die Zelle neu.",
148
+ "backend_settings": "🚀 Backend-Einstellungen",
149
+ "inference_backend": "Inference-Backend",
150
+ "optimization_mode": "Optimierungsmodus",
151
+ "optimization_mode_info": "Auswahl des PyTorch-Optimierungsmodus",
152
+ "mixed_precision_amp": "Gemischte Präzision (AMP)",
153
+ "mixed_precision_info": "2x schnellere Inference - empfohlen",
154
+ "tf32_acceleration": "TF32-Beschleunigung",
155
+ "tf32_acceleration_info": "Zusätzlicher Geschwindigkeitsboost für RTX 30xx+",
156
+ "cudnn_benchmark": "cuDNN-Benchmark",
157
+ "cudnn_benchmark_info": "Langsamer beim ersten Lauf, viel schneller bei nachfolgenden Läufen",
158
+ "ultra_optimized_pytorch": "ULTRA-OPTIMIERTES PyTorch-Backend",
159
+ "default_active_max_speed": "Standardmäßig aktiv - Maximale Geschwindigkeitsoptimierung",
160
+ "channels_last_mode": "Schnellstes für RTX-GPUs",
161
+ "compile_mode": "Zusätzliche Geschwindigkeit für PyTorch 2.0+",
162
+ "default_mode": "Standard",
163
  "manual_ensemble_tab": "🎚️ Manuelles Ensemble",
164
  "input_sources": "📂 Eingabequellen",
165
  "refresh": "🔄 Aktualisieren",
 
178
  "Vocal Models": "Gesangsmodelle",
179
  "Instrumental Models": "Instrumentalmodelle",
180
  "4-Stem Models": "4-Stem-Modelle",
181
+ "Denoise Models": "Entrauschungsmodelle",
182
+ "Dereverb Models": "Nachhallentfernungsmodelle",
183
  "Other Models": "Andere Modelle",
184
+ "Other Shapes": "Andere Modelle",
185
+ "ensemble_files_description": "Kombiniert mehrere Audiodateien zu einer einzigen Ausgabe unter Verwendung angegebener Ensemble-Methoden.",
186
  "ensemble_files_help": "Pfad zu allen Audiodateien für das Ensemble",
187
+ "ensemble_type_help": "Einer von: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
188
+ "ensemble_weights_help": "Gewichte zum Erstellen des Ensembles. Anzahl der Gewichte muss der Anzahl der Dateien entsprechen",
189
  "ensemble_output_help": "Pfad zur WAV-Datei, in der das Ensemble-Ergebnis gespeichert wird",
190
  "ensemble_type_print": "Ensemble-Typ: {}",
191
  "num_input_files_print": "Anzahl der Eingabedateien: {}",
192
  "weights_print": "Gewichte: {}",
193
  "output_file_print": "Ausgabedatei: {}",
194
  "duration_mismatch_error": "Alle Dateien müssen die gleiche Dauer haben",
195
+ "file_not_found_error": "Fehler. Datei nicht gefunden: {}. Überprüfen Sie die Pfade.",
196
  "reading_chunk_print": "Lese Chunk aus Datei: {} (Start: {}s, Dauer: {}s)",
197
  "chunk_result_shape_print": "Chunk-Ergebnisform: {}",
198
+ "ensemble_completed_print": "Ensemble abgeschlossen. Ausgabe gespeichert in: {}",
199
  "file_deletion_error": "{} konnte nicht gelöscht werden: {}",
200
  "directory_not_exist_warning": "⚠️ Verzeichnis existiert nicht: {}",
201
  "not_a_directory_warning": "⚠️ Pfad ist kein Verzeichnis: {}",
 
205
  "error": "🔥 Fehler: {}",
206
  "ffmpeg_error": "FFmpeg-Fehler ({}): {}",
207
  "file_saved_successfully": "Datei erfolgreich gespeichert: {}",
208
+ "total_files_found": "Gesamtzahl gefundener Dateien: {}. Verwendete Abtastrate: {}",
209
  "total_progress": "Gesamtfortschritt",
210
+ "detailed_pbar_enabled": "Detaillierte Fortschrittsanzeige aktiviert: {}",
211
+ "loaded_audio": "Audio geladen: {}, Form: {}",
212
+ "cannot_read_track": "Track kann nicht gelesen werden: {}",
213
  "error_message": "Fehlermeldung: {}",
214
+ "demudding_track": "Track wird bereinigt (Phasen-Remix - Instrumental): {}",
215
  "elapsed_time": "Verstrichene Zeit: {:.2f} Sekunden.",
216
+ "proc_folder_description": "Verarbeitet Audiodateien in einem Ordner mit einem angegebenen Modell.",
217
  "model_type_help": "Modelltyp (bandit, bs_roformer, mdx23c, etc.)",
218
  "config_path_help": "Pfad zur Konfigurationsdatei",
219
  "demud_phaseremix_help": "Phasen-Remix für Instrumental aktivieren",
220
+ "start_checkpoint_help": "Anfangs-Checkpoint für gültige Gewichte",
221
+ "input_folder_help": "Ordner mit zu verarbeitenden Mischungen",
222
+ "audio_path_help": "Pfad zu einer einzelnen zu verarbeitenden Audiodatei",
223
  "store_dir_help": "Pfad zum Speichern der Ergebnisse",
224
  "device_ids_help": "Liste der GPU-IDs",
225
  "extract_instrumental_help": "Gesang invertieren, um Instrumental zu erhalten, falls vorhanden",
226
+ "disable_detailed_pbar_help": "Detaillierte Fortschrittsanzeige beim Demix deaktivieren",
227
+ "force_cpu_help": "CPU-Nutzung erzwingen, auch wenn CUDA verfügbar ist",
228
+ "flac_file_help": "FLAC-Datei statt WAV ausgeben",
229
  "export_format_help": "Exportformat und PCM-Typ",
230
  "pcm_type_help": "PCM-Typ für FLAC-Dateien",
231
+ "use_tta_help": "Test-Time-Augmentation aktivieren",
232
+ "lora_checkpoint_help": "Anfangs-Checkpoint für LoRA-Gewichte",
233
+ "cuda_available": "CUDA ist verfügbar, verwenden Sie --force_cpu zum Deaktivieren.",
234
  "using_device": "Verwendetes Gerät: {}",
235
  "instruments_print": "Instrumente: {}",
236
  "model_load_time": "Modell-Ladezeit: {:.2f} Sek.",
237
  "invalid_url": "❌ Ungültige URL",
238
  "cookie_file_updated": "✅ Cookie-Datei aktualisiert!",
239
+ "cookie_installation_error": "⚠️ Cookie-Installationsfehler: {}",
240
+ "google_drive_mount_error": " Google Drive-Verbindungsfehler: {}",
241
+ "file_size_zero_error": "Dateigröße Null oder Datei nicht erstellt",
242
+ "google_drive_error": "❌ Google Drive-Download-Fehler: {}",
243
  "wav_conversion_failed": "WAV-Konvertierung fehlgeschlagen",
244
  "download_error": "❌ Download-Fehler: {}",
245
+ "file_copied_to_drive": "Datei nach Google Drive kopiert: {}",
246
+ "copy_to_drive_error": "❌ Fehler beim Kopieren nach Google Drive: {}",
247
+ "direct_download_failed": "Direkter Download der Datei fehlgeschlagen",
248
+ "direct_download_error": "❌ Direkter Download-Fehler: {}",
249
+ "invalid_google_drive_url": "❌ Ungültige Google Drive-URL",
250
+ "continuing_without_google_drive": "Google Drive-Verbindung fehlgeschlagen, fahre ohne fort...",
251
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive nicht gemountet, überspringe Drive-Kopie...",
252
  "download_success": "🎉 Erfolgreich heruntergeladen!",
253
  "download_failed": "❌ Download fehlgeschlagen",
254
  "no_output_files_found": "❌ Keine Ausgabedateien gefunden.",
 
258
  "processing_audio": "Audio wird verarbeitet",
259
  "separating_audio": "Audio wird getrennt... ({:.1f}%)",
260
  "separation_complete": "Trennung abgeschlossen!",
261
+ "progress_parsing_error": "Fortschritts-Parsing-Fehler: {}",
262
  "error_occurred": "Ein Fehler ist aufgetreten: {}",
263
  "separation_process_completed": "Trennungsprozess abgeschlossen!",
264
+ "no_audio_file_error": "❌ Keine Audiodatei bereitgestellt und keine vorhandene Datei im Eingabeverzeichnis.",
265
  "no_input_progress_label": "Fehler: Keine Eingabe bereitgestellt -- 0.0%",
266
  "processing_audio_print": "Verarbeite Audio von: {} mit Modell: {}",
267
  "starting_audio_separation_progress_label": "Audio-Trennung wird gestartet... -- 0.0%",
268
  "separating_audio_progress_label": "Audio wird getrennt... -- {}.0%",
269
+ "audio_processing_completed_progress_label": "Audio-Verarbeitung abgeschlossen! -- 100.0%",
270
+ "audio_processing_completed": "✅ Audio-Verarbeitung abgeschlossen!",
271
  "error_occurred_progress_label": "Fehler aufgetreten -- 0.0%",
272
  "minimum_files_required": "⚠️ Mindestens 2 Dateien erforderlich",
273
+ "valid_files_not_found": "❌ Keine gültigen Dateien gefunden",
274
  "starting_ensemble_process": "Ensemble-Prozess wird gestartet...",
275
  "ensembling_progress": "Ensemble wird erstellt... ({:.1f}%)",
276
  "finalizing_ensemble_output": "Ensemble-Ausgabe wird finalisiert...",
 
282
  "no_input_audio_provided": "❌ Kein Eingabe-Audio bereitgestellt",
283
  "loading_model": "Lade Modell {}/{}: {}...",
284
  "loading_model_progress_label": "Lade Modell {}/{}: {}... -- {}.0%",
285
+ "completed_model": "Modell abgeschlossen {}/{}: {}",
286
+ "completed_model_progress_label": "Modell abgeschlossen {}/{}: {} -- {}.0%",
287
  "running_command": "Führe Befehl aus: {}",
288
  "model_failed": "Modell {} fehlgeschlagen: {}",
289
  "critical_error_with_model": "Kritischer Fehler mit {}: {}",
290
  "model_output_failed": "{} konnte keine Ausgabe erzeugen",
291
+ "waiting_for_files": "Warten auf Bereitstellung aller Dateien...",
292
+ "waiting_for_files_progress_label": "Warten auf Bereitstellung aller Dateien... -- 90.0%",
293
  "performing_ensemble": "Ensemble wird durchgeführt...",
294
  "performing_ensemble_progress_label": "Ensemble wird durchgeführt... -- 92.0%",
295
+ "memory_usage_before_ensemble": "Speichernutzung vor Ensemble: {}%",
296
+ "memory_usage_after_ensemble": "Speichernutzung nach Ensemble: {}%",
297
  "finalizing_ensemble_output_progress_label": "Ensemble-Ausgabe wird finalisiert... -- 98.0%",
298
  "ensemble_file_creation_failed": "Ensemble-Datei konnte nicht erstellt werden: {}",
299
  "ensemble_completed_progress_label": "Ensemble erfolgreich abgeschlossen! -- 100.0%",
300
  "success_output_created": "✅ Erfolg! Ausgabedatei erstellt.",
301
+ "phase_fixer_tab": "🔧 Phasenkorrektor",
302
+ "batch_processing_tab": "📦 Stapelverarbeitung",
303
+ "batch_description": "Mehrere Dateien gleichzeitig mit demselben Modell verarbeiten",
304
+ "batch_input_folder": "Eingabeordner",
305
+ "batch_input_folder_placeholder": "Pfad zum Ordner mit Audiodateien eingeben",
306
+ "batch_output_folder": "Ausgabeordner",
307
+ "batch_output_folder_placeholder": "Pfad zum Ausgabeordner eingeben",
308
+ "batch_file_list": "Zu verarbeitende Dateien",
309
+ "batch_add_files": "Dateien hinzufügen",
310
+ "batch_clear_list": "Liste leeren",
311
+ "batch_start": "🚀 Stapelverarbeitung starten",
312
+ "batch_stop": "⏹️ Stopp",
313
+ "batch_progress": "Stapelverarbeitungsfortschritt",
314
+ "batch_current_file": "Aktuell in Bearbeitung",
315
+ "batch_completed": " Stapelverarbeitung abgeschlossen!",
316
+ "batch_stopped": "⏹️ Stapelverarbeitung gestoppt",
317
+ "batch_no_files": " Keine Dateien für die Verarbeitung ausgewählt",
318
+ "source_model": "Quellmodell (Gesang)",
319
+ "source_model_info": "Modell zur Extraktion von Gesang/Phasendaten",
320
+ "target_model": "Zielmodell (Instrumental)",
321
+ "target_model_info": "Modell zur Anwendung der Phasenkorrektur",
322
+ "custom_models_tab": " Benutzerdefinierte Modelle",
323
+ "Custom Models": "Benutzerdefinierte Modelle",
324
+ "add_custom_model": " Benutzerdefiniertes Modell hinzufügen",
325
+ "custom_model_name": "Modellname",
326
+ "custom_model_name_placeholder": "Geben Sie einen Namen für Ihr Modell ein",
327
+ "checkpoint_url": "Checkpoint-URL (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "Konfigurations-URL (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "Benutzerdefinierte Python-Datei URL (optional)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "Modelltyp automatisch erkennen",
334
+ "model_type": "Modelltyp",
335
+ "add_model_btn": " Modell hinzufügen",
336
+ "custom_models_list": "📋 Liste benutzerdefinierter Modelle",
337
+ "delete_model": "🗑️ Löschen",
338
+ "no_custom_models": "Noch keine benutzerdefinierten Modelle hinzugefügt",
339
+ "model_added_success": " Modell erfolgreich hinzugefügt!",
340
+ "model_deleted_success": " Modell erfolgreich gelöscht!",
341
+ "model_add_error": "❌ Fehler beim Hinzufügen des Modells: {}",
342
+ "model_delete_error": " Fehler beim Löschen des Modells: {}",
343
+ "refresh_models": "🔄 Aktualisieren",
344
+ "custom_model_info": "Fügen Sie benutzerdefinierte Modelle durch Angabe von Download-Links hinzu. Modelle werden bei Verwendung automatisch heruntergeladen.",
345
+ "select_model_to_delete": "Wählen Sie ein Modell zum Löschen aus",
346
+ "downloading_model_file": "Modell wird heruntergeladen: {}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/i18n/languages/en_us.json CHANGED
@@ -81,6 +81,27 @@
81
  "upload_cookies_txt": "Upload Cookies.txt",
82
  "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Why Needed?**<br>- Access age-restricted content<br>- Download private/unlisted videos<br>- Bypass regional restrictions<br>- Avoid YouTube download limits<br><br>**⚠️ Important Notes**<br>- NEVER share your cookie files!<br>- Refresh cookies when:<br> • Getting \"403 Forbidden\" errors<br> • Downloads suddenly stop<br> • Seeing \"Session expired\" messages<br><br>**🔄 Renewal Steps**<br>1. Install this <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome extension</a><br>2. Login to YouTube in Chrome<br>3. Click extension icon → \"Export\"<br>4. Upload the downloaded file here<br><br>**⏳ Cookie Lifespan**<br>- Normal sessions: 24 hours<br>- Sensitive operations: 1 hour<br>- Password changes: Immediate invalidation</div>",
83
  "manual_ensemble_tab": "🎚️ Manual Ensemble",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "input_sources": "📂 Input Sources",
85
  "refresh": "🔄 Refresh",
86
  "ensemble_algorithm": "Ensemble Algorithm",
@@ -165,6 +186,10 @@
165
  "file_copied_to_drive": "File copied to Google Drive: {}",
166
  "copy_to_drive_error": "❌ Error copying to Google Drive: {}",
167
  "direct_download_failed": "Failed to download the file directly",
 
 
 
 
168
  "download_success": "🎉 Downloaded successfully!",
169
  "download_failed": "❌ Download failed",
170
  "no_output_files_found": "❌ No output files found.",
@@ -275,5 +300,79 @@
275
  "matchering_completed": "✅ Matchering processing completed!",
276
  "matchering_error": "❌ Error during Matchering: {}",
277
  "invalid_method": "Error: Invalid method! Use 'gradio', 'localtunnel', or 'ngrok'.",
278
- "language_changed_message": "Language changed. please restart the cell."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  }
 
81
  "upload_cookies_txt": "Upload Cookies.txt",
82
  "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Why Needed?**<br>- Access age-restricted content<br>- Download private/unlisted videos<br>- Bypass regional restrictions<br>- Avoid YouTube download limits<br><br>**⚠️ Important Notes**<br>- NEVER share your cookie files!<br>- Refresh cookies when:<br> • Getting \"403 Forbidden\" errors<br> • Downloads suddenly stop<br> • Seeing \"Session expired\" messages<br><br>**🔄 Renewal Steps**<br>1. Install this <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome extension</a><br>2. Login to YouTube in Chrome<br>3. Click extension icon → \"Export\"<br>4. Upload the downloaded file here<br><br>**⏳ Cookie Lifespan**<br>- Normal sessions: 24 hours<br>- Sensitive operations: 1 hour<br>- Password changes: Immediate invalidation</div>",
83
  "manual_ensemble_tab": "🎚️ Manual Ensemble",
84
+ "phase_fixer_tab": "🔧 Phase Fixer",
85
+ "batch_processing_tab": "📦 Batch Processing",
86
+ "batch_description": "Process multiple files at once with the same model",
87
+ "batch_input_folder": "Input Folder",
88
+ "batch_input_folder_placeholder": "Enter folder path containing audio files",
89
+ "batch_output_folder": "Output Folder",
90
+ "batch_output_folder_placeholder": "Enter folder path for output files",
91
+ "batch_file_list": "Files to Process",
92
+ "batch_add_files": "Add Files",
93
+ "batch_clear_list": "Clear List",
94
+ "batch_start": "🚀 Start Batch Processing",
95
+ "batch_stop": "⏹️ Stop",
96
+ "batch_progress": "Batch Progress",
97
+ "batch_current_file": "Currently Processing",
98
+ "batch_completed": "✅ Batch processing completed!",
99
+ "batch_stopped": "⏹️ Batch processing stopped",
100
+ "batch_no_files": "❌ No files selected for processing",
101
+ "source_model": "Source Model (Vocals)",
102
+ "source_model_info": "Model for extracting vocals/phase data",
103
+ "target_model": "Target Model (Instrumental)",
104
+ "target_model_info": "Model to apply phase correction",
105
  "input_sources": "📂 Input Sources",
106
  "refresh": "🔄 Refresh",
107
  "ensemble_algorithm": "Ensemble Algorithm",
 
186
  "file_copied_to_drive": "File copied to Google Drive: {}",
187
  "copy_to_drive_error": "❌ Error copying to Google Drive: {}",
188
  "direct_download_failed": "Failed to download the file directly",
189
+ "direct_download_error": "❌ Direct download error: {}",
190
+ "invalid_google_drive_url": "❌ Invalid Google Drive URL",
191
+ "continuing_without_google_drive": "Google Drive connection failed, continuing without it...",
192
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive not mounted, skipping Drive copy...",
193
  "download_success": "🎉 Downloaded successfully!",
194
  "download_failed": "❌ Download failed",
195
  "no_output_files_found": "❌ No output files found.",
 
300
  "matchering_completed": "✅ Matchering processing completed!",
301
  "matchering_error": "❌ Error during Matchering: {}",
302
  "invalid_method": "Error: Invalid method! Use 'gradio', 'localtunnel', or 'ngrok'.",
303
+ "language_changed_message": "Language changed. please restart the cell.",
304
+ "backend_settings": "🚀 Backend Settings",
305
+ "inference_backend": "Inference Backend",
306
+ "optimization_mode": "Optimization Mode",
307
+ "optimization_mode_info": "PyTorch optimization mode selection",
308
+ "mixed_precision_amp": "Mixed Precision (AMP)",
309
+ "mixed_precision_info": "2x faster inference - recommended",
310
+ "tf32_acceleration": "TF32 Acceleration",
311
+ "tf32_acceleration_info": "Extra speed boost for RTX 30xx+",
312
+ "cudnn_benchmark": "cuDNN Benchmark",
313
+ "cudnn_benchmark_info": "Slower at first run, much faster on subsequent runs",
314
+ "ultra_optimized_pytorch": "ULTRA-OPTIMIZED PyTorch Backend",
315
+ "default_active_max_speed": "Active by default - Maximum speed optimization",
316
+ "channels_last_mode": "Fastest for RTX GPUs",
317
+ "compile_mode": "Extra speed for PyTorch 2.0+",
318
+ "default_mode": "Standard",
319
+ "tta_info": "Increases quality but slows down processing",
320
+ "instrumental_info": "Usually outputs 2 stems, but sometimes only one - you need to try! :)",
321
+ "custom_models_tab": "⚡ Custom Models",
322
+ "Custom Models": "Custom Models",
323
+ "add_custom_model": "➕ Add Custom Model",
324
+ "custom_model_name": "Model Name",
325
+ "custom_model_name_placeholder": "Enter a name for your model",
326
+ "checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
327
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
328
+ "config_url": "Config URL (.yaml)",
329
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
330
+ "custom_py_url": "Custom Python File URL (optional)",
331
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
332
+ "auto_detect_type": "Auto-detect model type",
333
+ "model_type": "Model Type",
334
+ "add_model_btn": "➕ Add Model",
335
+ "custom_models_list": "📋 Custom Models List",
336
+ "delete_model": "🗑️ Delete",
337
+ "no_custom_models": "No custom models added yet",
338
+ "model_added_success": "✅ Model added successfully!",
339
+ "model_deleted_success": "✅ Model deleted successfully!",
340
+ "model_add_error": "❌ Error adding model: {}",
341
+ "model_delete_error": "❌ Error deleting model: {}",
342
+ "refresh_models": "🔄 Refresh",
343
+ "custom_model_info": "Add custom models by providing download links. Models will be downloaded automatically when used.",
344
+ "select_model_to_delete": "Select a model to delete",
345
+ "downloading_model_file": "Downloading model: {}",
346
+ "downloading_file_progress": "Downloading: {} - {}%",
347
+ "chunk_size_mode": "Chunk Size Mode",
348
+ "chunk_size_mode_info": "base: standard presets | custom: enter your own value | yaml: read from model YAML after download",
349
+ "chunk_size_custom_label": "Custom Chunk Size",
350
+ "chunk_size_custom_info": "Enter a custom chunk size value",
351
+ "chunk_size_yaml_label": "YAML Chunk Size",
352
+ "chunk_size_yaml_display_info": "Will be read from the model's YAML config after download",
353
+ "chunk_size_yaml_not_downloaded": "⏳ Model YAML not downloaded yet — value will be read during processing",
354
+ "chunk_size_yaml_detected": "✅ From YAML: {}",
355
+ "remove_favorite": "Remove from Favorites",
356
+ "normal_method": "Normal method",
357
+ "mid_side_method": "Mid-side method",
358
+ "apollo_processing_method": "Apollo Processing Method",
359
+ "apollo_normal_model": "Apollo Normal Model",
360
+ "apollo_mid_side_model": "Apollo Mid-Side Model",
361
+ "scale_factor": "Scale Factor",
362
+ "scale_factor_info": "Phase scaling factor (recommended: 1.4)",
363
+ "source_file_label": "Source File (Vocals)",
364
+ "target_file_label": "Target File (Instrumental)",
365
+ "run_phase_fixer": "🔧 Run Phase Fixer",
366
+ "phase_fixed_output": "Phase-Fixed Output",
367
+ "phase_fixer_settings": "Phase Fixer Settings",
368
+ "low_cutoff": "Low Cutoff (Hz)",
369
+ "high_cutoff": "High Cutoff (Hz)",
370
+ "low_cutoff_info": "Low frequency cutoff for phase correction",
371
+ "high_cutoff_info": "High frequency cutoff for phase correction",
372
+ "no_preset_name_provided": "⚠️ No preset name provided",
373
+ "no_models_selected_for_preset": "⚠️ No models selected for preset",
374
+ "preset_saved": "✅ Preset '{}' saved!",
375
+ "please_upload_both_files": "⚠️ Please upload both source and target files",
376
+ "processing_log_placeholder": "Processing log will appear here...",
377
+ "enter_preset_name": "Enter preset name..."
378
  }
assets/i18n/languages/es_es.json CHANGED
@@ -17,11 +17,10 @@
17
  "overlap": "Solapamiento",
18
  "overlap_info": "Recomendado: 2-10 (valores más altos mejoran la calidad pero requieren más VRAM)",
19
  "tta_boost": "Impulso TTA",
20
- "tta_info": "Mejora la calidad pero ralentiza la velocidad de procesamiento",
21
  "phase_fix": "Corrección de fase",
 
22
  "phase_fix_info": "Corrección avanzada de fase para pistas instrumentales",
23
  "instrumental": "Instrumental",
24
- "instrumental_info": "Normalmente da 2 salidas, pero a veces da una sola salida, ¡tienes que probar! :)",
25
  "process": "🚀 Procesar",
26
  "reset": "🧹 Restablecer",
27
  "status": "Estado",
@@ -80,7 +79,87 @@
80
  "downloaded_file": "Archivo descargado",
81
  "cookie_management": "🍪 Gestión de cookies",
82
  "upload_cookies_txt": "Subir Cookies.txt",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 ¿Por qué es necesario?**<br>- Acceso a contenido restringido por edad<br>- Descarga de videos privados/no listados<br>- Evitar restricciones regionales<br>- Evitar límites de descarga de YouTube<br><br>**⚠️ Notas importantes**<br>- ¡NUNCA compartas tus archivos de cookies!<br>- Actualiza las cookies cuando:<br> • Recibes errores \"403 Prohibido\"<br> • Las descargas se detienen repentinamente<br> • Ves mensajes de \"Sesión expirada\"<br><br>**🔄 Pasos para renovar**<br>1. Instala esta <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">extensión de Chrome</a><br>2. Inicia sesión en YouTube en Chrome<br>3. Haz clic en el ícono de la extensión → \"Exportar\"<br>4. Sube el archivo descargado aquí<br><br>**⏳ Vida útil de las cookies**<br>- Sesiones normales: 24 horas<br>- Operaciones sensibles: 1 hora<br>- Cambios de contraseña: Invalidación inmediata</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ Ensamblaje manual",
85
  "input_sources": "📂 Fuentes de entrada",
86
  "refresh": "🔄 Actualizar",
@@ -93,19 +172,20 @@
93
  "result_preview_tab": "🎧 Vista previa del resultado",
94
  "ensembled_output": "Salida ensamblada",
95
  "processing_log_tab": "📋 Registro de procesamiento",
96
- "processing_details": "Detalles de procesamiento",
97
  "process_ensemble": "⚡ Procesar ensamblaje",
98
  "languages_tab": "🌐 Idiomas",
99
  "Vocal Models": "Modelos vocales",
100
  "Instrumental Models": "Modelos instrumentales",
101
- "4-Stem Models": "Modelos de 4 stems",
102
  "Denoise Models": "Modelos de eliminación de ruido",
103
  "Dereverb Models": "Modelos de eliminación de reverberación",
104
  "Other Models": "Otros modelos",
105
- "ensemble_files_description": "Combina varios archivos de audio en una sola salida usando métodos de ensamblaje especificados.",
106
- "ensemble_files_help": "Ruta a todos los archivos de audio para el ensamblaje",
107
- "ensemble_type_help": "Uno de avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
- "ensemble_weights_help": "Pesos para crear el ensamblaje. El número de pesos debe igualar el número de archivos",
 
109
  "ensemble_output_help": "Ruta al archivo WAV donde se almacenará el resultado del ensamblaje",
110
  "ensemble_type_print": "Tipo de ensamblaje: {}",
111
  "num_input_files_print": "Número de archivos de entrada: {}",
@@ -116,7 +196,7 @@
116
  "reading_chunk_print": "Leyendo fragmento del archivo: {} (inicio: {}s, duración: {}s)",
117
  "chunk_result_shape_print": "Forma del resultado del fragmento: {}",
118
  "ensemble_completed_print": "Ensamblaje completado. Salida guardada en: {}",
119
- "file_deletion_error": "{} no pudo ser eliminado: {}",
120
  "directory_not_exist_warning": "⚠️ El directorio no existe: {}",
121
  "not_a_directory_warning": "⚠️ La ruta no es un directorio: {}",
122
  "item_deletion_error": "⚠️ Error al eliminar {}: {}",
@@ -125,67 +205,75 @@
125
  "error": "🔥 Error: {}",
126
  "ffmpeg_error": "Error de FFmpeg ({}): {}",
127
  "file_saved_successfully": "Archivo guardado con éxito: {}",
128
- "total_files_found": "Total de archivos encontrados: {}. Usando tasa de muestreo: {}",
129
  "total_progress": "Progreso total",
130
- "detailed_pbar_enabled": "Barra de progreso detallada activada: {}",
131
  "loaded_audio": "Audio cargado: {}, forma: {}",
132
  "cannot_read_track": "No se puede leer la pista: {}",
133
  "error_message": "Mensaje de error: {}",
134
- "demudding_track": "Desm Mezclando pista (remezcla de fase - instrumental): {}",
135
  "elapsed_time": "Tiempo transcurrido: {:.2f} segundos.",
136
  "proc_folder_description": "Procesa archivos de audio en una carpeta usando un modelo especificado.",
137
  "model_type_help": "Tipo de modelo (bandit, bs_roformer, mdx23c, etc.)",
138
  "config_path_help": "Ruta al archivo de configuración",
139
  "demud_phaseremix_help": "Habilitar remezcla de fase para instrumental",
140
  "start_checkpoint_help": "Punto de control inicial para pesos válidos",
141
- "input_folder_help": "Carpeta con mezclas para procesar",
142
- "audio_path_help": "Ruta a un solo archivo de audio para procesar",
143
- "store_dir_help": "Ruta para almacenar los resultados",
144
  "device_ids_help": "Lista de IDs de GPU",
145
- "extract_instrumental_help": "Invertir las voces para obtener instrumental si se proporciona",
146
- "disable_detailed_pbar_help": "Deshabilitar barra de progreso detallada en la desmezcla",
147
  "force_cpu_help": "Forzar el uso de CPU incluso si CUDA está disponible",
148
  "flac_file_help": "Generar archivo FLAC en lugar de WAV",
149
  "export_format_help": "Formato de exportación y tipo PCM",
150
  "pcm_type_help": "Tipo PCM para archivos FLAC",
151
  "use_tta_help": "Habilitar aumento en tiempo de prueba",
152
  "lora_checkpoint_help": "Punto de control inicial para pesos LoRA",
153
- "cuda_available": "CUDA está disponible, usa --force_cpu para deshabilitarlo.",
154
  "using_device": "Usando dispositivo: {}",
155
  "instruments_print": "Instrumentos: {}",
156
  "model_load_time": "Tiempo de carga del modelo: {:.2f} seg",
157
  "invalid_url": "❌ URL inválida",
158
  "cookie_file_updated": "✅ ¡Archivo de cookies actualizado!",
159
- "cookie_installation_error": "⚠️ Error en la instalación de cookies: {}",
160
- "file_size_zero_error": "Tamaño del archivo cero o archivo no creado",
 
161
  "google_drive_error": "❌ Error de descarga de Google Drive: {}",
162
- "wav_conversion_failed": "Fallo en la conversión a WAV",
163
  "download_error": "❌ Error de descarga: {}",
 
 
 
 
 
 
 
164
  "download_success": "🎉 ¡Descargado con éxito!",
165
- "download_failed": "❌ Fallo en la descarga",
166
  "no_output_files_found": "❌ No se encontraron archivos de salida.",
167
  "output_refreshed_successfully": "✅ ¡Salida actualizada con éxito!",
168
- "error_refreshing_output": "❌ Error al actualizar la salida: {}",
169
  "starting_audio_separation": "Iniciando separación de audio...",
170
  "processing_audio": "Procesando audio",
171
  "separating_audio": "Separando audio... ({:.1f}%)",
172
- "separation_complete": "¡Separación completada!",
173
- "progress_parsing_error": "Error al analizar el progreso: {}",
174
  "error_occurred": "Ocurrió un error: {}",
175
  "separation_process_completed": "¡Proceso de separación completado!",
176
  "no_audio_file_error": "❌ No se proporcionó archivo de audio y no hay archivo existente en el directorio de entrada.",
177
  "no_input_progress_label": "Error: No se proporcionó entrada -- 0.0%",
178
- "processing_audio_print": "Procesando audio desde: {} usando el modelo: {}",
179
  "starting_audio_separation_progress_label": "Iniciando separación de audio... -- 0.0%",
180
  "separating_audio_progress_label": "Separando audio... -- {}.0%",
181
  "audio_processing_completed_progress_label": "¡Procesamiento de audio completado! -- 100.0%",
182
  "audio_processing_completed": "✅ ¡Procesamiento de audio completado!",
183
  "error_occurred_progress_label": "Ocurrió un error -- 0.0%",
184
- "minimum_files_required": "⚠️ Se requieren al menos 2 archivos",
185
  "valid_files_not_found": "❌ No se encontraron archivos válidos",
186
  "starting_ensemble_process": "Iniciando proceso de ensamblaje...",
187
  "ensembling_progress": "Ensamblando... ({:.1f}%)",
188
- "finalizing_ensemble_output": "Finalizando salida del ensamblaje...",
189
  "success_log": "✅ ¡Éxito!\n{}",
190
  "error_log": "❌ ¡Error!\n{}",
191
  "critical_error": "⛔ Error crítico: {}",
@@ -197,84 +285,95 @@
197
  "completed_model": "Modelo completado {}/{}: {}",
198
  "completed_model_progress_label": "Modelo completado {}/{}: {} -- {}.0%",
199
  "running_command": "Ejecutando comando: {}",
200
- "model_failed": "El modelo {} falló: {}",
201
  "critical_error_with_model": "Error crítico con {}: {}",
202
- "model_output_failed": "{} falló al producir salida",
203
- "waiting_for_files": "Esperando a que todos los archivos estén listos...",
204
- "waiting_for_files_progress_label": "Esperando a que todos los archivos estén listos... -- 90.0%",
205
  "performing_ensemble": "Realizando ensamblaje...",
206
  "performing_ensemble_progress_label": "Realizando ensamblaje... -- 92.0%",
207
  "memory_usage_before_ensemble": "Uso de memoria antes del ensamblaje: {}%",
208
  "memory_usage_after_ensemble": "Uso de memoria después del ensamblaje: {}%",
209
- "finalizing_ensemble_output_progress_label": "Finalizando salida del ensamblaje... -- 98.0%",
210
  "ensemble_file_creation_failed": "No se pudo crear el archivo de ensamblaje: {}",
211
  "ensemble_completed_progress_label": "¡Ensamblaje completado con éxito! -- 100.0%",
212
  "success_output_created": "✅ ¡Éxito! Archivo de salida creado.",
213
- "drive_mounted_copying_ensemble": "Google Drive montado. Copiando salida del ensamblaje...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive ya montado. Copiando salida del ensamblaje...",
215
- "no_ensemble_output_files_found": " No se encontraron archivos de salida de ensamblaje.",
216
- "ensemble_output_copied": " Salida del ensamblaje copiada a {}",
217
- "error_copying_ensemble_output": " Error al copiar la salida del ensamblaje: {}",
218
- "drive_mounted_copying_files": "Google Drive montado. Copiando archivos...",
219
- "drive_already_mounted_copying_files": "Google Drive ya montado. Copiando archivos...",
220
- "files_copied_to_drive": "Archivos copiados a {}",
221
- "error_copying_files": " Error al copiar archivos: {}",
222
- "mounting_drive": "Montando Google Drive...",
223
- "settings_saved": "Configuración guardada en config.json: Idioma={lang}, Método={method}, Puerto={port}",
224
- "selected_language": "Idioma seleccionado: {lang}",
225
- "available_languages": "Idiomas disponibles: {langs}",
226
- "select_language_prompt": "Selecciona un idioma (predeterminado: {default}): ",
227
- "invalid_language": "¡Selección de idioma inválida! Usando idioma predeterminado ({default}).",
228
- "available_sharing_methods": "Métodos de compartición disponibles: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "Elige un método de compartición (predeterminado: {default}): ",
230
- "enter_ngrok_token_prompt": "Ingresa tu token de Ngrok (predeterminado: {default}): ",
231
- "ngrok_token_required": "Error: ¡Se requiere un token de Ngrok para el método ngrok!",
232
- "enter_port_prompt": "Ingresa el número de puerto (predeterminado: {default}): ",
233
- "opening_previous_url": "Abriendo URL anterior: {url}",
234
- "open_url_manually": "Abre la URL manualmente en Colab: {url}",
235
- "invalid_previous_url": "URL anterior inválida: {url}, no abierta.",
236
- "starting_method": "Iniciando {method} en el puerto {port}...",
237
- "process_stopped": "\n🛑 Proceso detenido por el usuario",
238
- "selected_port": "Puerto seleccionado: {port}",
239
- "starting_gradio_with_sharing": "Iniciando Gradio con compartición integrada...",
240
- "starting_localtunnel": "Iniciando Localtunnel en el puerto {port}...",
241
- "share_link": "Enlace para compartir: {url}",
242
- "password_ip": "Contraseña IP: {ip}",
243
- "starting_ngrok": "Iniciando Ngrok en el puerto {port}...",
244
- "ngrok_url": "URL de Ngrok: {url}",
245
- "ngrok_error": "Error al iniciar Ngrok: {error}",
246
- "apollo_chunk_size": "Tamaño de fragmento Apollo",
247
- "apollo_chunk_size_info": "Tamaño de fragmento para mejora Apollo (recomendado: 19 para modelo universal)",
248
- "apollo_overlap": "Solapamiento Apollo",
249
- "enhancing_with_apollo": "Mejorando con Apollo ({}/{} archivos)...",
250
- "apollo_processing_completed": "¡Procesamiento Apollo completado!",
251
- "apollo_overlap_info": "Solapamiento para mejora Apollo (recomendado: 2)",
252
- "invalid_method": "Error: ¡Método inválido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
253
- "apollo_enhancement_settings": "Configuración de mejora Apollo",
254
- "enhance_with_apollo": "Mejorar con Apollo",
255
- "enhance_with_apollo_info": "Habilitar Apollo para mejora de audio después de la separación",
256
- "apollo_method": "Método Apollo",
257
- "apollo_method_info": "Selecciona el método de procesamiento para Apollo",
 
 
 
 
 
 
 
 
 
 
 
258
  "normal_method": "Método normal",
259
- "mid_side_method": "Método centro/lateral",
260
- "apollo_normal_model": "Modelo Apollo normal",
261
- "apollo_normal_model_info": "Modelo a usar para procesamiento Apollo normal",
262
- "apollo_midside_model": "Modelo Apollo centro/lateral",
263
- "apollo_enhancement_info": "Mejora de la calidad del sonido",
264
- "selected_models": "Modelos Seleccionados",
265
- "save_preset": "Guardar Preajuste",
266
- "delete_preset": "Eliminar Preajuste",
267
- "refresh_presets": "Actualizar Preajustes",
268
- "preset_name": "Nombre del Preajuste",
269
- "select_preset": "Seleccionar Preajuste",
270
- "add_favorite": "Añadir a Favoritos",
271
- "apollo_midside_model_info": "Modelo a usar para procesamiento centro/lateral (opcional)",
272
- "language_changed_message": "Idioma cambiado. Por favor, reinicia la celda.",
273
- "apply_matchering": "Aplicar Matchering",
274
- "matchering_info": "Aplicar Matchering para mejorar el masterizado y el balance del audio",
275
- "matchering_passes": "Pasadas de Matchering",
276
- "matchering_passes_info": "Número de iteraciones de Matchering (1-5, valores más altos mejoran la calidad pero toman más tiempo)",
277
- "matchering_processing": "Aplicando Matchering... ({:.1f}%)",
278
- "matchering_completed": " ¡Procesamiento de Matchering completado!",
279
- "matchering_error": " Error durante Matchering: {}"
280
  }
 
17
  "overlap": "Solapamiento",
18
  "overlap_info": "Recomendado: 2-10 (valores más altos mejoran la calidad pero requieren más VRAM)",
19
  "tta_boost": "Impulso TTA",
 
20
  "phase_fix": "Corrección de fase",
21
+ "use_phase_correction": "Aplicar Corrección de Fase Avanzada",
22
  "phase_fix_info": "Corrección avanzada de fase para pistas instrumentales",
23
  "instrumental": "Instrumental",
 
24
  "process": "🚀 Procesar",
25
  "reset": "🧹 Restablecer",
26
  "status": "Estado",
 
79
  "downloaded_file": "Archivo descargado",
80
  "cookie_management": "🍪 Gestión de cookies",
81
  "upload_cookies_txt": "Subir Cookies.txt",
82
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 ¿Por qué es necesario?**<br>- Acceso a contenido con restricción de edad<br>- Descarga de videos privados/no listados<br>- Evitar restricciones regionales<br>- Evitar límites de descarga de YouTube<br><br>**⚠️ Notas Importantes**<br>- ¡NUNCA compartas tus archivos de cookies!<br>- Actualiza cookies cuando:<br> • Obtengas errores \"403 Prohibido\"<br> • Las descargas se detengan de repente<br> • Veas mensajes \"Sesión expirada\"<br><br>**🔄 Pasos de Renovación**<br>1. Instala esta <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">extensión de Chrome</a><br>2. Inicia sesión en YouTube en Chrome<br>3. Haz clic en el icono de la extensión → \"Exportar\"<br>4. Sube el archivo descargado aquí<br><br>**⏳ Vida Útil de Cookies**<br>- Sesiones normales: 24 horas<br>- Operaciones sensibles: 1 hora<br>- Cambio de contraseña: Actualización inmediata</div>",
83
+ "ensemble_output_copied": "✅ Salida del ensamblaje copiada a {}",
84
+ "error_copying_ensemble_output": "❌ Error al copiar salida del ensamblaje: {}",
85
+ "drive_mounted_copying_ensemble": "Google Drive montado. Copiando salida del ensamblaje...",
86
+ "drive_already_mounted_copying_ensemble": "Google Drive ya montado. Copiando salida del ensamblaje...",
87
+ "no_ensemble_output_files_found": "❌ No se encontraron archivos de salida del ensamblaje.",
88
+ "drive_mounted_copying_files": "Google Drive montado. Copiando archivos...",
89
+ "drive_already_mounted_copying_files": "Google Drive ya montado. Copiando archivos...",
90
+ "files_copied_to_drive": "✅ Archivos copiados a {}",
91
+ "error_copying_files": "❌ Error al copiar archivos: {}",
92
+ "mounting_drive": "Montando Google Drive...",
93
+ "settings_saved": "Configuración guardada en config.json: Idioma={lang}, Método={method}, Puerto={port}",
94
+ "selected_language": "Idioma seleccionado: {lang}",
95
+ "available_languages": "Idiomas disponibles: {langs}",
96
+ "select_language_prompt": "Selecciona un idioma (predeterminado: {default}): ",
97
+ "invalid_language": "¡Selección de idioma inválida! Usando idioma predeterminado ({default}).",
98
+ "available_sharing_methods": "Métodos de compartición disponibles: gradio, localtunnel, ngrok",
99
+ "select_sharing_method_prompt": "Elige un método de compartición (predeterminado: {default}): ",
100
+ "enter_ngrok_token_prompt": "Ingresa tu token de Ngrok (predeterminado: {default}): ",
101
+ "ngrok_token_required": "Error: ¡Se requiere un token de Ngrok para el método ngrok!",
102
+ "enter_port_prompt": "Ingresa el número de puerto (predeterminado: {default}): ",
103
+ "opening_previous_url": "Abriendo URL anterior: {url}",
104
+ "open_url_manually": "Abre la URL manualmente en Colab: {url}",
105
+ "invalid_previous_url": "URL anterior inválida: {url}, no abierta.",
106
+ "starting_method": "Iniciando {method} en el puerto {port}...",
107
+ "process_stopped": "\n🛑 Proceso detenido por el usuario",
108
+ "selected_port": "Puerto seleccionado: {port}",
109
+ "starting_gradio_with_sharing": "Iniciando Gradio con compartición integrada...",
110
+ "starting_localtunnel": "Iniciando Localtunnel en el puerto {port}...",
111
+ "share_link": "Enlace para compartir: {url}",
112
+ "password_ip": "Contraseña IP: {ip}",
113
+ "starting_ngrok": "Iniciando Ngrok en el puerto {port}...",
114
+ "ngrok_url": "URL de Ngrok: {url}",
115
+ "ngrok_error": "Error al iniciar Ngrok: {error}",
116
+ "auto_apollo_chunk_size": "Tamaño de fragmento Apollo",
117
+ "auto_apollo_chunk_size_info": "Tamaño de fragmento para mejora Apollo (recomendado: 19 para modelo universal)",
118
+ "auto_apollo_overlap": "Solapamiento Apollo",
119
+ "auto_enhancing_with_apollo": "Mejorando con Apollo ({}/{} archivos)...",
120
+ "auto_apollo_processing_completed": "¡Procesamiento Apollo completado!",
121
+ "auto_apollo_overlap_info": "Solapamiento para mejora Apollo (recomendado: 2)",
122
+ "apollo_chunk_size": "Tamaño de fragmento Apollo",
123
+ "apollo_chunk_size_info": "Tamaño de fragmento para mejora Apollo (recomendado: 19 para modelo universal)",
124
+ "apollo_overlap": "Solapamiento Apollo",
125
+ "enhance_with_apollo": "Mejorar con Apollo",
126
+ "apollo_enhancement_info": "Mejora de la calidad del sonido",
127
+ "enhancing_with_apollo": "Mejorando con Apollo ({}/{} archivos)...",
128
+ "apollo_processing_completed": "¡Procesamiento Apollo completado!",
129
+ "apollo_overlap_info": "Solapamiento para mejora Apollo (recomendado: 2)",
130
+ "selected_models": "Modelos seleccionados",
131
+ "save_preset": "Guardar preajuste",
132
+ "delete_preset": "Eliminar preajuste",
133
+ "refresh_presets": "Actualizar preajustes",
134
+ "preset_name": "Nombre del preajuste",
135
+ "select_preset": "Seleccionar preajuste",
136
+ "add_favorite": "Añadir a favoritos",
137
+ "apply_matchering": "Aplicar Matchering",
138
+ "matchering_info": "Aplicar Matchering para mejorar el masterizado y el balance del audio",
139
+ "matchering_passes": "Pasadas de Matchering",
140
+ "matchering_passes_info": "Número de iteraciones de Matchering (1-5, valores más altos mejoran la calidad pero toman más tiempo)",
141
+ "matchering_processing": "Aplicando Matchering... ({:.1f}%)",
142
+ "matchering_completed": "✅ ¡Procesamiento de Matchering completado!",
143
+ "matchering_error": "❌ Error durante Matchering: {}",
144
+ "invalid_method": "Error: ¡Método inválido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
145
+ "language_changed_message": "Idioma cambiado. Por favor, reinicia la celda.",
146
+ "backend_settings": "🚀 Configuración de backend",
147
+ "inference_backend": "Backend de inferencia",
148
+ "optimization_mode": "Modo de optimización",
149
+ "optimization_mode_info": "Selección de modo de optimización PyTorch",
150
+ "mixed_precision_amp": "Precisión mixta (AMP)",
151
+ "mixed_precision_info": "Inferencia 2x más rápida - recomendado",
152
+ "tf32_acceleration": "Aceleración TF32",
153
+ "tf32_acceleration_info": "Aumento de velocidad extra para RTX 30xx+",
154
+ "cudnn_benchmark": "Benchmark cuDNN",
155
+ "cudnn_benchmark_info": "Más lento en la primera ejecución, mucho más rápido en ejecuciones posteriores",
156
+ "ultra_optimized_pytorch": "Backend PyTorch ULTRA-OPTIMIZADO",
157
+ "default_active_max_speed": "Activo por defecto - Optimización de velocidad máxima",
158
+ "channels_last_mode": "El más rápido para GPUs RTX",
159
+ "compile_mode": "Velocidad extra para PyTorch 2.0+",
160
+ "default_mode": "Estándar",
161
+ "tta_info": "Aumenta la calidad pero ralentiza el procesamiento",
162
+ "instrumental_info": "Normalmente produce 2 tallos, pero a veces solo uno - ¡tienes que probar! :)",
163
  "manual_ensemble_tab": "🎚️ Ensamblaje manual",
164
  "input_sources": "📂 Fuentes de entrada",
165
  "refresh": "🔄 Actualizar",
 
172
  "result_preview_tab": "🎧 Vista previa del resultado",
173
  "ensembled_output": "Salida ensamblada",
174
  "processing_log_tab": "📋 Registro de procesamiento",
175
+ "processing_details": "Detalles del procesamiento",
176
  "process_ensemble": "⚡ Procesar ensamblaje",
177
  "languages_tab": "🌐 Idiomas",
178
  "Vocal Models": "Modelos vocales",
179
  "Instrumental Models": "Modelos instrumentales",
180
+ "4-Stem Models": "Modelos de 4 tallos",
181
  "Denoise Models": "Modelos de eliminación de ruido",
182
  "Dereverb Models": "Modelos de eliminación de reverberación",
183
  "Other Models": "Otros modelos",
184
+ "Other Shapes": "Otros modelos",
185
+ "ensemble_files_description": "Combina múltiples archivos de audio en una sola salida usando métodos de ensamblaje especificados.",
186
+ "ensemble_files_help": "Ruta a todos los archivos de audio para ensamblaje",
187
+ "ensemble_type_help": "Uno de: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
188
+ "ensemble_weights_help": "Pesos para crear el ensamblaje. El número de pesos debe ser igual al número de archivos",
189
  "ensemble_output_help": "Ruta al archivo WAV donde se almacenará el resultado del ensamblaje",
190
  "ensemble_type_print": "Tipo de ensamblaje: {}",
191
  "num_input_files_print": "Número de archivos de entrada: {}",
 
196
  "reading_chunk_print": "Leyendo fragmento del archivo: {} (inicio: {}s, duración: {}s)",
197
  "chunk_result_shape_print": "Forma del resultado del fragmento: {}",
198
  "ensemble_completed_print": "Ensamblaje completado. Salida guardada en: {}",
199
+ "file_deletion_error": "{} no se pudo eliminar: {}",
200
  "directory_not_exist_warning": "⚠️ El directorio no existe: {}",
201
  "not_a_directory_warning": "⚠️ La ruta no es un directorio: {}",
202
  "item_deletion_error": "⚠️ Error al eliminar {}: {}",
 
205
  "error": "🔥 Error: {}",
206
  "ffmpeg_error": "Error de FFmpeg ({}): {}",
207
  "file_saved_successfully": "Archivo guardado con éxito: {}",
208
+ "total_files_found": "Total de archivos encontrados: {}. Tasa de muestreo utilizada: {}",
209
  "total_progress": "Progreso total",
210
+ "detailed_pbar_enabled": "Barra de progreso detallada habilitada: {}",
211
  "loaded_audio": "Audio cargado: {}, forma: {}",
212
  "cannot_read_track": "No se puede leer la pista: {}",
213
  "error_message": "Mensaje de error: {}",
214
+ "demudding_track": "Limpiando pista (remezcla de fase - instrumental): {}",
215
  "elapsed_time": "Tiempo transcurrido: {:.2f} segundos.",
216
  "proc_folder_description": "Procesa archivos de audio en una carpeta usando un modelo especificado.",
217
  "model_type_help": "Tipo de modelo (bandit, bs_roformer, mdx23c, etc.)",
218
  "config_path_help": "Ruta al archivo de configuración",
219
  "demud_phaseremix_help": "Habilitar remezcla de fase para instrumental",
220
  "start_checkpoint_help": "Punto de control inicial para pesos válidos",
221
+ "input_folder_help": "Carpeta con mezclas a procesar",
222
+ "audio_path_help": "Ruta a un solo archivo de audio a procesar",
223
+ "store_dir_help": "Ruta para almacenar resultados",
224
  "device_ids_help": "Lista de IDs de GPU",
225
+ "extract_instrumental_help": "Invertir voces para obtener instrumental si se proporciona",
226
+ "disable_detailed_pbar_help": "Desactivar barra de progreso detallada en demix",
227
  "force_cpu_help": "Forzar el uso de CPU incluso si CUDA está disponible",
228
  "flac_file_help": "Generar archivo FLAC en lugar de WAV",
229
  "export_format_help": "Formato de exportación y tipo PCM",
230
  "pcm_type_help": "Tipo PCM para archivos FLAC",
231
  "use_tta_help": "Habilitar aumento en tiempo de prueba",
232
  "lora_checkpoint_help": "Punto de control inicial para pesos LoRA",
233
+ "cuda_available": "CUDA está disponible, use --force_cpu para desactivarlo.",
234
  "using_device": "Usando dispositivo: {}",
235
  "instruments_print": "Instrumentos: {}",
236
  "model_load_time": "Tiempo de carga del modelo: {:.2f} seg",
237
  "invalid_url": "❌ URL inválida",
238
  "cookie_file_updated": "✅ ¡Archivo de cookies actualizado!",
239
+ "cookie_installation_error": "⚠️ Error de instalación de cookies: {}",
240
+ "google_drive_mount_error": " Error de conexión de Google Drive: {}",
241
+ "file_size_zero_error": "Tamaño de archivo cero o archivo no creado",
242
  "google_drive_error": "❌ Error de descarga de Google Drive: {}",
243
+ "wav_conversion_failed": "Conversión WAV fallida",
244
  "download_error": "❌ Error de descarga: {}",
245
+ "file_copied_to_drive": "Archivo copiado a Google Drive: {}",
246
+ "copy_to_drive_error": "❌ Error al copiar a Google Drive: {}",
247
+ "direct_download_failed": "Falló la descarga directa del archivo",
248
+ "direct_download_error": "❌ Error de descarga directa: {}",
249
+ "invalid_google_drive_url": "❌ URL de Google Drive no válida",
250
+ "continuing_without_google_drive": "Falló la conexión con Google Drive, continuando sin él...",
251
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive no montado, omitiendo copia a Drive...",
252
  "download_success": "🎉 ¡Descargado con éxito!",
253
+ "download_failed": "❌ Descarga fallida",
254
  "no_output_files_found": "❌ No se encontraron archivos de salida.",
255
  "output_refreshed_successfully": "✅ ¡Salida actualizada con éxito!",
256
+ "error_refreshing_output": "❌ Error al actualizar salida: {}",
257
  "starting_audio_separation": "Iniciando separación de audio...",
258
  "processing_audio": "Procesando audio",
259
  "separating_audio": "Separando audio... ({:.1f}%)",
260
+ "separation_complete": "¡Separación completa!",
261
+ "progress_parsing_error": "Error de análisis de progreso: {}",
262
  "error_occurred": "Ocurrió un error: {}",
263
  "separation_process_completed": "¡Proceso de separación completado!",
264
  "no_audio_file_error": "❌ No se proporcionó archivo de audio y no hay archivo existente en el directorio de entrada.",
265
  "no_input_progress_label": "Error: No se proporcionó entrada -- 0.0%",
266
+ "processing_audio_print": "Procesando audio desde: {} usando modelo: {}",
267
  "starting_audio_separation_progress_label": "Iniciando separación de audio... -- 0.0%",
268
  "separating_audio_progress_label": "Separando audio... -- {}.0%",
269
  "audio_processing_completed_progress_label": "¡Procesamiento de audio completado! -- 100.0%",
270
  "audio_processing_completed": "✅ ¡Procesamiento de audio completado!",
271
  "error_occurred_progress_label": "Ocurrió un error -- 0.0%",
272
+ "minimum_files_required": "⚠️ Se requieren mínimo 2 archivos",
273
  "valid_files_not_found": "❌ No se encontraron archivos válidos",
274
  "starting_ensemble_process": "Iniciando proceso de ensamblaje...",
275
  "ensembling_progress": "Ensamblando... ({:.1f}%)",
276
+ "finalizing_ensemble_output": "Finalizando salida de ensamblaje...",
277
  "success_log": "✅ ¡Éxito!\n{}",
278
  "error_log": "❌ ¡Error!\n{}",
279
  "critical_error": "⛔ Error crítico: {}",
 
285
  "completed_model": "Modelo completado {}/{}: {}",
286
  "completed_model_progress_label": "Modelo completado {}/{}: {} -- {}.0%",
287
  "running_command": "Ejecutando comando: {}",
288
+ "model_failed": "Modelo {} falló: {}",
289
  "critical_error_with_model": "Error crítico con {}: {}",
290
+ "model_output_failed": "{} no pudo producir salida",
291
+ "waiting_for_files": "Esperando que todos los archivos estén listos...",
292
+ "waiting_for_files_progress_label": "Esperando que todos los archivos estén listos... -- 90.0%",
293
  "performing_ensemble": "Realizando ensamblaje...",
294
  "performing_ensemble_progress_label": "Realizando ensamblaje... -- 92.0%",
295
  "memory_usage_before_ensemble": "Uso de memoria antes del ensamblaje: {}%",
296
  "memory_usage_after_ensemble": "Uso de memoria después del ensamblaje: {}%",
297
+ "finalizing_ensemble_output_progress_label": "Finalizando salida de ensamblaje... -- 98.0%",
298
  "ensemble_file_creation_failed": "No se pudo crear el archivo de ensamblaje: {}",
299
  "ensemble_completed_progress_label": "¡Ensamblaje completado con éxito! -- 100.0%",
300
  "success_output_created": "✅ ¡Éxito! Archivo de salida creado.",
301
+ "phase_fixer_tab": "🔧 Corrector de fase",
302
+ "batch_processing_tab": "📦 Procesamiento por lotes",
303
+ "batch_description": "Procesa múltiples archivos a la vez con el mismo modelo",
304
+ "batch_input_folder": "Carpeta de entrada",
305
+ "batch_input_folder_placeholder": "Ingresa la ruta de la carpeta con archivos de audio",
306
+ "batch_output_folder": "Carpeta de salida",
307
+ "batch_output_folder_placeholder": "Ingresa la ruta de la carpeta de salida",
308
+ "batch_file_list": "Archivos a procesar",
309
+ "batch_add_files": "Añadir archivos",
310
+ "batch_clear_list": "Limpiar lista",
311
+ "batch_start": "🚀 Iniciar procesamiento por lotes",
312
+ "batch_stop": "⏹️ Detener",
313
+ "batch_progress": "Progreso del lote",
314
+ "batch_current_file": "Procesando actualmente",
315
+ "batch_completed": "¡Procesamiento por lotes completado!",
316
+ "batch_stopped": "⏹️ Procesamiento por lotes detenido",
317
+ "batch_no_files": " No se seleccionaron archivos para procesar",
318
+ "source_model": "Modelo fuente (Voces)",
319
+ "source_model_info": "Modelo para extraer voces/datos de fase",
320
+ "target_model": "Modelo objetivo (Instrumental)",
321
+ "target_model_info": "Modelo al que se aplica la corrección de fase",
322
+ "custom_models_tab": " Modelos personalizados",
323
+ "Custom Models": "Modelos personalizados",
324
+ "add_custom_model": " Añadir modelo personalizado",
325
+ "custom_model_name": "Nombre del modelo",
326
+ "custom_model_name_placeholder": "Ingresa un nombre para tu modelo",
327
+ "checkpoint_url": "URL del Checkpoint (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "URL de configuración (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "URL de archivo Python personalizado (opcional)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "Detectar tipo de modelo automáticamente",
334
+ "model_type": "Tipo de modelo",
335
+ "add_model_btn": " Añadir modelo",
336
+ "custom_models_list": "📋 Lista de modelos personalizados",
337
+ "delete_model": "🗑️ Eliminar",
338
+ "no_custom_models": "Aún no se han añadido modelos personalizados",
339
+ "model_added_success": " ¡Modelo añadido con éxito!",
340
+ "model_deleted_success": " ¡Modelo eliminado con éxito!",
341
+ "model_add_error": " Error al añadir el modelo: {}",
342
+ "model_delete_error": " Error al eliminar el modelo: {}",
343
+ "refresh_models": "🔄 Actualizar",
344
+ "custom_model_info": "Añade modelos personalizados proporcionando enlaces de descarga. Los modelos se descargarán automáticamente cuando se utilicen.",
345
+ "select_model_to_delete": "Selecciona un modelo para eliminar",
346
+ "downloading_model_file": "Descargando modelo: {}",
347
+ "downloading_file_progress": "Descargando: {} - {}%",
348
+ "chunk_size_mode": "Modo de Tamaño de Fragmento",
349
+ "chunk_size_mode_info": "base: presets estándar | custom: introduce tu propio valor | yaml: leer del YAML del modelo",
350
+ "chunk_size_custom_label": "Tamaño de Fragmento Personalizado",
351
+ "chunk_size_custom_info": "Introduce un valor personalizado de tamaño de fragmento",
352
+ "chunk_size_yaml_label": "Tamaño de Fragmento YAML",
353
+ "chunk_size_yaml_display_info": "Se leerá del archivo de configuración YAML del modelo tras la descarga",
354
+ "chunk_size_yaml_not_downloaded": "⏳ YAML del modelo aún no descargado — el valor se leerá durante el procesamiento",
355
+ "chunk_size_yaml_detected": "✅ Del YAML: {}",
356
+ "remove_favorite": "Eliminar de Favoritos",
357
  "normal_method": "Método normal",
358
+ "mid_side_method": "Método Mid-Side",
359
+ "apollo_processing_method": "Método de Procesamiento Apollo",
360
+ "apollo_normal_model": "Modelo Normal Apollo",
361
+ "apollo_mid_side_model": "Modelo Mid-Side Apollo",
362
+ "scale_factor": "Factor de Escala",
363
+ "scale_factor_info": "Factor de escala de fase (recomendado: 1.4)",
364
+ "source_file_label": "Archivo Fuente (Voces)",
365
+ "target_file_label": "Archivo Destino (Instrumental)",
366
+ "run_phase_fixer": "🔧 Ejecutar Corrector de Fase",
367
+ "phase_fixed_output": "Salida con Fase Corregida",
368
+ "phase_fixer_settings": "Ajustes del Corrector de Fase",
369
+ "low_cutoff": "Corte Bajo (Hz)",
370
+ "high_cutoff": "Corte Alto (Hz)",
371
+ "low_cutoff_info": "Frecuencia de corte baja para la corrección de fase",
372
+ "high_cutoff_info": "Frecuencia de corte alta para la corrección de fase",
373
+ "no_preset_name_provided": "⚠️ No se proporcionó nombre de preset",
374
+ "no_models_selected_for_preset": "⚠️ No se seleccionaron modelos para el preset",
375
+ "preset_saved": " ¡Preset '{}' guardado!",
376
+ "please_upload_both_files": "⚠️ Por favor sube tanto el archivo fuente como el destino",
377
+ "processing_log_placeholder": "El registro de procesamiento aparecerá aquí...",
378
+ "enter_preset_name": "Introduce el nombre del preset..."
379
  }
assets/i18n/languages/fr_fr.json CHANGED
@@ -209,7 +209,6 @@
209
  "finalizing_ensemble_output_progress_label": "Finalisation de la sortie de l'assemblage... -- 98.0%",
210
  "ensemble_file_creation_failed": "Le fichier d'assemblage n'a pas pu être créé : {}",
211
  "ensemble_completed_progress_label": "Assemblage terminé avec succès ! -- 100.0%",
212
- "success_output_created": "✅ Succès ! Fichier de sortie créé.",
213
  "drive_mounted_copying_ensemble": "Google Drive monté. Copie de la sortie de l'assemblage...",
214
  "drive_already_mounted_copying_ensemble": "Google Drive déjà monté. Copie de la sortie de l'assemblage...",
215
  "no_ensemble_output_files_found": "❌ Aucun fichier de sortie d'assemblage trouvé.",
@@ -269,5 +268,90 @@
269
  "select_preset": "Sélectionner un Préréglage",
270
  "add_favorite": "Ajouter aux Favoris",
271
  "apollo_midside_model_info": "Modèle à utiliser pour le traitement centre/côté (optionnel)",
272
- "language_changed_message": "Langue modifiée. Veuillez redémarrer la cellule."
273
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  "finalizing_ensemble_output_progress_label": "Finalisation de la sortie de l'assemblage... -- 98.0%",
210
  "ensemble_file_creation_failed": "Le fichier d'assemblage n'a pas pu être créé : {}",
211
  "ensemble_completed_progress_label": "Assemblage terminé avec succès ! -- 100.0%",
 
212
  "drive_mounted_copying_ensemble": "Google Drive monté. Copie de la sortie de l'assemblage...",
213
  "drive_already_mounted_copying_ensemble": "Google Drive déjà monté. Copie de la sortie de l'assemblage...",
214
  "no_ensemble_output_files_found": "❌ Aucun fichier de sortie d'assemblage trouvé.",
 
268
  "select_preset": "Sélectionner un Préréglage",
269
  "add_favorite": "Ajouter aux Favoris",
270
  "apollo_midside_model_info": "Modèle à utiliser pour le traitement centre/côté (optionnel)",
271
+ "language_changed_message": "Langue modifiée. Veuillez redémarrer la cellule.",
272
+ "Other Shapes": "Autres modèles",
273
+ "use_phase_correction": "Appliquer la correction de phase avancée",
274
+ "google_drive_mount_error": "❌ Erreur de liaison Google Drive : {}",
275
+ "file_copied_to_drive": "Fichier copié sur Google Drive : {}",
276
+ "copy_to_drive_error": "❌ Erreur lors de la copie sur Google Drive : {}",
277
+ "direct_download_failed": "Échec du téléchargement direct du fichier",
278
+ "direct_download_error": "❌ Erreur de téléchargement direct : {}",
279
+ "invalid_google_drive_url": "❌ URL Google Drive non valide",
280
+ "continuing_without_google_drive": "La connexion Google Drive a échoué, poursuite sans...",
281
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive non monté, omission de la copie sur Drive...",
282
+ "auto_apollo_chunk_size": "Taille de fragment Apollo",
283
+ "auto_apollo_chunk_size_info": "Taille de fragment pour l'amélioration Apollo (recommandé : 19 pour le modèle universel)",
284
+ "auto_apollo_overlap": "Chevauchement Apollo",
285
+ "auto_enhancing_with_apollo": "Amélioration avec Apollo ({}/{} fichiers)...",
286
+ "auto_apollo_processing_completed": "Traitement Apollo terminé !",
287
+ "auto_apollo_overlap_info": "Chevauchement pour l'amélioration Apollo (recommandé : 2)",
288
+ "apply_matchering": "Appliquer Matchering",
289
+ "matchering_info": "Appliquer Matchering pour améliorer le mastering et l'équilibre audio",
290
+ "matchering_passes": "Passages Matchering",
291
+ "matchering_passes_info": "Nombre d'itérations Matchering (1-5, des valeurs plus élevées améliorent la qualité mais prennent plus de temps)",
292
+ "matchering_processing": "Application de Matchering... ({:.1f}%)",
293
+ "matchering_completed": "✅ Traitement Matchering terminé !",
294
+ "matchering_error": "❌ Erreur pendant Matchering : {}",
295
+ "backend_settings": "🚀 Paramètres Backend",
296
+ "inference_backend": "Backend d'Inference",
297
+ "optimization_mode": "Mode d'Optimisation",
298
+ "optimization_mode_info": "Sélection du mode d'optimisation PyTorch",
299
+ "mixed_precision_amp": "Précision Mixte (AMP)",
300
+ "mixed_precision_info": "Inférence 2x plus rapide - recommandé",
301
+ "tf32_acceleration": "Accélération TF32",
302
+ "tf32_acceleration_info": "Augmentation de vitesse supplémentaire pour RTX 30xx+",
303
+ "cudnn_benchmark": "Benchmark cuDNN",
304
+ "cudnn_benchmark_info": "Plus lent lors de la première exécution, beaucoup plus rapide lors des exécutions suivantes",
305
+ "ultra_optimized_pytorch": "Backend PyTorch ULTRA-OPTIMISÉ",
306
+ "default_active_max_speed": "Actif par défaut - Optimisation de vitesse maximale",
307
+ "channels_last_mode": "Le plus rapide pour les GPU RTX",
308
+ "compile_mode": "Vitesse supplémentaire pour PyTorch 2.0+",
309
+ "default_mode": "Standard",
310
+ "phase_fixer_tab": "🔧 Correcteur de phase",
311
+ "batch_processing_tab": "📦 Traitement par lots",
312
+ "batch_description": "Traiter plusieurs fichiers à la fois avec le même modèle",
313
+ "batch_input_folder": "Dossier d'entrée",
314
+ "batch_input_folder_placeholder": "Entrez le chemin du dossier contenant les fichiers audio",
315
+ "batch_output_folder": "Dossier de sortie",
316
+ "batch_output_folder_placeholder": "Entrez le chemin du dossier de sortie",
317
+ "batch_file_list": "Fichiers à traiter",
318
+ "batch_add_files": "Ajouter des fichiers",
319
+ "batch_clear_list": "Vider la liste",
320
+ "batch_start": "🚀 Démarrer le traitement par lots",
321
+ "batch_stop": "⏹️ Arrêter",
322
+ "batch_progress": "Progression du lot",
323
+ "batch_current_file": "En cours de traitement",
324
+ "batch_completed": "✅ Traitement par lots terminé !",
325
+ "batch_stopped": "⏹️ Traitement par lots arrêté",
326
+ "batch_no_files": "❌ Aucun fichier sélectionné pour le traitement",
327
+ "source_model": "Modèle source (Voix)",
328
+ "source_model_info": "Modèle pour extraire les voix/données de phase",
329
+ "target_model": "Modèle cible (Instrumental)",
330
+ "target_model_info": "Modèle auquel appliquer la correction de phase",
331
+ "custom_models_tab": "⚡ Modèles personnalisés",
332
+ "Custom Models": "Modèles personnalisés",
333
+ "add_custom_model": "➕ Ajouter un modèle personnalisé",
334
+ "custom_model_name": "Nom du modèle",
335
+ "custom_model_name_placeholder": "Entrez un nom pour votre modèle",
336
+ "checkpoint_url": "URL du Checkpoint (.ckpt/.pth)",
337
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
338
+ "config_url": "URL de configuration (.yaml)",
339
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
340
+ "custom_py_url": "URL du fichier Python personnalisé (optionnel)",
341
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
342
+ "auto_detect_type": "Détecter automatiquement le type de modèle",
343
+ "model_type": "Type de modèle",
344
+ "add_model_btn": "➕ Ajouter le modèle",
345
+ "custom_models_list": "📋 Liste des modèles personnalisés",
346
+ "delete_model": "🗑️ Supprimer",
347
+ "no_custom_models": "Aucun modèle personnalisé ajouté pour l'instant",
348
+ "model_added_success": "✅ Modèle ajouté avec succès !",
349
+ "model_deleted_success": "✅ Modèle supprimé avec succès !",
350
+ "model_add_error": "❌ Erreur lors de l'ajout du modèle : {}",
351
+ "model_delete_error": "❌ Erreur lors de la suppression du modèle : {}",
352
+ "refresh_models": "🔄 Actualiser",
353
+ "custom_model_info": "Ajoutez des modèles personnalisés en fournissant des liens de téléchargement. Les modèles seront téléchargés automatiquement lors de leur utilisation.",
354
+ "select_model_to_delete": "Sélectionnez un modèle à supprimer",
355
+ "success_output_created": "✅ Succès ! Fichier de sortie créé.",
356
+ "downloading_model_file": "Téléchargement du modèle : {}",
357
+ "downloading_file_progress": "Téléchargement : {} - {}%",
assets/i18n/languages/hi_in.json CHANGED
@@ -17,11 +17,10 @@
17
  "overlap": "ओवरलैप",
18
  "overlap_info": "अनुशंसित: 2-10 (उच्च मान गुणवत्ता बढ़ाते हैं लेकिन अधिक VRAM की आवश्यकता होती है)",
19
  "tta_boost": "TTA बूस्ट",
20
- "tta_info": "गुणवत्ता में सुधार करता है लेकिन प्रसंस्करण गति को धीमा करता है",
21
  "phase_fix": "फेज सुधार",
 
22
  "phase_fix_info": "वाद्य ट्रैक के लिए उन्नत फेज सुधार",
23
  "instrumental": "वाद्य",
24
- "instrumental_info": "आमतौर पर 2 आउटपुट देता है, लेकिन कभी-कभी एक ही आउटपुट देता है, आपको कोशिश करनी होगी :)",
25
  "process": "🚀 प्रक्रिया करें",
26
  "reset": "🧹 रीसेट करें",
27
  "status": "स्थिति",
@@ -73,215 +72,274 @@
73
  "ensemble_copy_status": "संयोजन आउटपुट यहाँ कॉपी किया जाएगा...",
74
  "guidelines": "<h4>मॉडल चयन दिशानिर्देश</h4><ul><li><strong>क्रॉस-श्रेणी मिश्रण से बचें:</strong> स्वर और वाद्य मॉडल का संयोजन अवांछित मिश्रण बना सकता है</li><li><strong>विशेष मॉडल नोट्स:</strong><ul><li>द्वैत मॉडल (v1/v2) - दोनों स्टेम्स आउटपुट करते हैं</li><li>MDX23C सेपरेटर - हाइब्रिड परिणाम</li></ul></li><li><strong>सर्वोत्तम अभ्यास:</strong> एक ही श्रेणी से 3-5 समान मॉडल का उपयोग करें</li></ul><div>💡 प्रो टिप: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" संयोजन से शुरू करें</div>",
75
  "download_sources_tab": "⬇️ स्रोत डाउनलोड करें",
76
- "direct_links": "🌐 सीधे लिंक",
77
  "audio_file_url": "ऑडियो फाइल URL",
78
  "download_from_url": "⬇️ URL से डाउनलोड करें",
79
  "download_status": "डाउनलोड स्थिति",
80
  "downloaded_file": "डाउनलोड की गई फाइल",
81
  "cookie_management": "🍪 कुकी प्रबंधन",
82
- "upload_cookies_txt": "Cookies.txt अपलोड करें",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 क्यों जरूरी?**<br>- आयु-प्रतिबंधित सामग्री तक पहुंच<br>- निजी/असूचीबद वीडियो डाउनलोड करें<br>- क्षेत्रीय प्रतिबंधों को बायपास करें<br>- यूट्यूब डाउनलोड सीमाओं से बचें<br><br>**⚠️ महत्वपूर्ण नोट्स**<br>- अपनी कुकी फाइलें कभी साझा न करें!<br>- कुकी को ताज़ा करें जब:<br> • \"403 निषिद्ध\" त्रुटिया मिलें<br> • डाउनलोड अचानक रुक जाए<br> • \"सत्र समाप्त\" संदेश दिखें<br><br>**🔄 नवीकरण चरण**<br>1. इस <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">क्रोम एक्सटेंशन</a> को स्थापित करें<br>2. क्रोम में यूट्यूब में लॉगिन करें<br>3. एक्सटेंशन आइकन पर क्लिक करें → \"निर्यात\"<br>4. डाउनलोड की गई फाइल यहाँ अपलोड करें<br><br>**⏳ कुकी जीवनकाल**<br>- सामान्य सत्र: 24 घंटे<br>- संवेदनशील संचालन: 1 घंटा<br>- पासवर्ड परिवर्तन: तत्काल अमानयता</div>",
84
- "manual_ensemble_tab": "🎚️ मैनुअल संयोजन",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "input_sources": "📂 इनपुट स्रोत",
86
- "refresh": "🔄 ताज़ा करे",
87
- "ensemble_algorithm": "संयोजन एल्गोरिदम",
88
  "select_audio_files": "ऑडियो फाइलें चुनें",
89
  "available_files": "उपलब्ध फाइलें",
90
- "custom_weights": "कस्टम वेट्स (अल्पविराम से अलग)",
91
  "custom_weights_placeholder": "उदाहरण: 0.8, 1.2, 1.0, ...",
92
- "custom_weights_info": "समान वेट्स के लिए खाली छोड़ें",
93
  "result_preview_tab": "🎧 परिणाम पूर्वावलोकन",
94
- "ensembled_output": "संयोजित आउटपुट",
95
- "processing_log_tab": "📋 प्रस्करण लॉग",
96
- "processing_details": "प्रस्करण विवरण",
97
- "process_ensemble": "⚡ संयोजन प्रक्रिया करें",
98
- "languages_tab": "🌐 भाषाए",
99
  "Vocal Models": "स्वर मॉडल",
100
  "Instrumental Models": "वाद्य मॉडल",
101
  "4-Stem Models": "4-स्टेम मॉडल",
102
- "Denoise Models": "शोर हटा के मॉडल",
103
- "Dereverb Models": "रिवर्ब हटाने के मॉडल",
104
- "Other Models": "अन्य मॉडल",
105
- "ensemble_files_description": "निरदिष् संयोजन विधियों का उपयोग करके कई ऑडियो फाइलों को एक ��ी आउटपुट ें जो़ता है।",
106
- "ensemble_files_help": "संयोजन के लिए सभी ऑडियो फाइलों क",
107
- "ensemble_type_help": "avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft मेसे एक",
108
- "ensemble_weights_help": "संयोज बनानके लिए वेट्। वट्स संख्या फाइलों की संख्या के बराबर होनी चाहिए",
109
- "ensemble_output_help": "WAV इल पथहां संयोजन परिण सं्रहीत होा",
110
- "ensemble_type_print": "संयोजन पार: {}",
 
111
  "num_input_files_print": "इनपुट फाइलों की संख्या: {}",
112
- "weights_print": "वेट्स: {}",
113
  "output_file_print": "आउटपुट फाइल: {}",
114
  "duration_mismatch_error": "सभी फाइलों की अवधि समान होनी चाहिए",
115
- "file_not_found_error": "त्रुटि। फाइल नहीं मिली: {}। पथ जाचें।",
116
- "reading_chunk_print": "फाइल से खंड पढ़ रह है: {} (शुरुआत: {}सेकंड, अवधि: {}सेकंड)",
117
- "chunk_result_shape_print": "खंड परिणाम आकार: {}",
118
- "ensemble_completed_print": "संयोजन पूरा हुआ। आउटपुट यहाँ संग्रीत: {}",
119
  "file_deletion_error": "{} हटाया नहीं जा सका: {}",
120
- "directory_not_exist_warning": "⚠️ डायरेक्टरी मौजूद नहीं है: {}",
121
- "not_a_directory_warning": "⚠️ पथ डायरेक्टरी नहीं है: {}",
122
  "item_deletion_error": "⚠️ {} हटाने में त्रुटि: {}",
123
- "old_output_not_exist": " पुाना आउटपुट फ़ोल्डर मौजूद नहीं है",
124
- "old_outputs_cleared": " पुरान आउटपुट फलतापूरवक हटा िए गए!",
 
125
  "error": "🔥 त्रुटि: {}",
126
  "ffmpeg_error": "FFmpeg त्रुटि ({}): {}",
127
- "file_saved_successfully": "फाइल सफलतापूर्वक संग्रही: {}",
128
- "total_files_found": "कुल फालें मिलीं: {}। सैंपल रेट का उपयोग: {}",
129
  "total_progress": "कुल प्रगति",
130
  "detailed_pbar_enabled": "विस्तृत प्रगति बार सक्षम: {}",
131
- "loaded_audio": "लोड किया गया ऑडियो: {}, आकार: {}",
132
- "cannot_read_track": "ट्रैक पढ़ा नहीं जा सकत: {}",
133
  "error_message": "त्रुटि संदेश: {}",
134
- "demudding_track": "ट्रैक को डिमड कर रह है (फेज रीमिक्स - वाद्य): {}",
135
  "elapsed_time": "बीता हुआ समय: {:.2f} सेकंड।",
136
- "proc_folder_description": "एक निर्दिष्ट मॉडल का उपयोग करके फोल्डर में ऑडियो फाइलको प्रोसेस करता है।",
137
  "model_type_help": "मॉडल प्रकार (bandit, bs_roformer, mdx23c, आदि)",
138
  "config_path_help": "कॉन्फ़िग फाइल का पथ",
139
- "demud_phaseremix_help": "वाद्य के लिए फेज रीमिक्स सक्षम करें",
140
- "start_checkpoint_help": "मान्येट्स के लिए प्रारंभिक चेकपॉइंट",
141
- "input_folder_help": "प्रोसेस करने के लिए मिश्रणों वाला फोल्डर",
142
- "audio_path_help": "प्रोसेस करने के लिकल ऑडियो फाइल का पथ",
143
  "store_dir_help": "परिणाम संग्रहीत करने का पथ",
144
- "device_ids_help": "GPU IDs की सूची",
145
- "extract_instrumental_help": "यदि प्रदान किया गया हो तो स्र को उलटकर वाद्य प्राप्त करें",
146
- "disable_detailed_pbar_help": "डिमिक्स में विस्तृत प्रगति बार अक्षम करें",
147
- "force_cpu_help": "CUDA उपलब्ध होने पर भी CPU का उपयोग करें",
148
  "flac_file_help": "WAV के बजाय FLAC फाइल आउटपुट करें",
149
  "export_format_help": "निर्यात प्रारूप और PCM प्रकार",
150
  "pcm_type_help": "FLAC फाइलों के लिए PCM प्रकार",
151
- "use_tta_help": "टेस्ट टाइम ऑगमेंटेशन सक्षम करें",
152
- "lora_checkpoint_help": "LoRA वेट्स के लिए प्रारंभिक चेकपॉइंट",
153
  "cuda_available": "CUDA उपलब्ध है, इसे अक्षम करने के लिए --force_cpu का उपयोग करें।",
154
- "using_device": "उपयोग किया जा रहा डिवाइस: {}",
155
- "instruments_print": "वाद्य: {}",
156
  "model_load_time": "मॉडल लोड समय: {:.2f} सेकंड",
157
  "invalid_url": "❌ अमान्य URL",
158
- "cookie_file_updated": "✅ कुकी फाइल अपडेट की गई!",
159
- "cookie_installation_error": "⚠️ कुकी स्थाप त्रुटि: {}",
160
- "file_size_zero_error": "फाइल आकार शून्य या फाइ नहीबनाई गई",
 
161
  "google_drive_error": "❌ Google Drive डाउनलोड त्रुटि: {}",
162
  "wav_conversion_failed": "WAV रूपांतरण विफल",
163
  "download_error": "❌ डाउनलोड त्रुटि: {}",
164
- "download_success": "🎉 सलताप्वडाउनलोड ियाया!",
 
 
165
  "download_failed": "❌ डाउनलोड विफल",
166
- "no_output_files_found": "❌ कोई आउटपुट फाइलें नहीं मिली।",
167
- "output_refreshed_successfully": "✅ आउटपुट सफलतापूर्वक ताज़ा किया गया!",
168
- "error_refreshing_output": "❌ आउटपुट ताज़ा करने में त्रुटि: {}",
169
  "starting_audio_separation": "ऑडियो पृथक्करण शुरू हो रहा है...",
170
- "processing_audio": "ऑडियो प्रोसेसिंग",
171
- "separating_audio": "ऑडियो अलग कर रहा है... ({:.1f}%)",
172
- "separation_complete": "पृथक्करण पूर!",
173
  "progress_parsing_error": "प्रगति पार्सिंग त्रुटि: {}",
174
  "error_occurred": "एक त्रुटि हुई: {}",
175
- "separation_process_completed": "पृथक्करण प्रक्रिया पूरी हुई!",
176
- "no_audio_file_error": "❌ कोई ऑडियो फाइल प्रदाहीं ी गई और इनपुट डायरेक्टरी में कोई मौजूदा फाइल नहीं है।",
177
- "no_input_progress_label": "त्रुटि: कोई इनपुट प्रदाहीं िया गया -- 0.0%",
178
- "processing_audio_print": "ऑडियो प्रोसेसिंग: {} से मॉडल का उपयोग करके: {}",
179
  "starting_audio_separation_progress_label": "ऑडियो पृथक्करण शुरू हो रहा है... -- 0.0%",
180
- "separating_audio_progress_label": "ऑडियो अलग कर रहा है... -- {}.0%",
181
- "audio_processing_completed_progress_label": "ऑडियो प्रोसेसिंग पूरी हुई! -- 100.0%",
182
- "audio_processing_completed": "✅ ऑडियो प्रोसेसिंग पूरी हुई!",
183
  "error_occurred_progress_label": "त्रुटि ह���ई -- 0.0%",
184
  "minimum_files_required": "⚠️ न्यूनतम 2 फाइलें आवश्यक",
185
- "valid_files_not_found": "❌ मान्य फाइलें नहीं मिली",
186
- "starting_ensemble_process": "संयोजन प्रक्रिया शुरू हो रही है...",
187
- "ensembling_progress": "संयजन हा है... ({:.1f}%)",
188
- "finalizing_ensemble_output": "संयोजन आउटपुट को अंतिम रूप दे रह है...",
189
- "success_log": "✅ सफलता!\n{}",
190
  "error_log": "❌ त्रुटि!\n{}",
191
- "critical_error": " गंभी्रुटि: {}",
192
- "ensemble_process_completed": "संयोजन प्र्रिया पूरी हुई!",
193
- "no_models_selected": "❌ कोई मॉडल चयनि नहीं",
194
- "no_input_audio_provided": " ई इनपुट ऑियप् ीं किया गया",
195
- "loading_model": "मॉडल लोड कर रहा है {}/{}: {}...",
196
- "loading_model_progress_label": "मॉडल लोड करहा है {}/{}: {}... -- {}.0%",
197
- "completed_model": "मॉडल पूर हुआ {}/{}: {}",
198
- "completed_model_progress_label": "म पूरा हुआ {}/{}: {} -- {}.0%",
199
- "running_command": "कमांड चला रहा है: {}",
200
  "model_failed": "मॉडल {} विफल: {}",
201
  "critical_error_with_model": "{} के साथ गंभीर त्रुटि: {}",
202
- "model_output_failed": "{} आउटपुट उत्प्न करनमें विफल",
203
- "waiting_for_files": "सभी फाइलों के तैयार होने की प्रतीक्षमें...",
204
- "waiting_for_files_progress_label": "सभी फाइलों के तैयार होने की प्रतीक्षमें... -- 90.0%",
205
- "performing_ensemble": "संयोजन कर रह है...",
206
- "performing_ensemble_progress_label": "संयोजन कर रह है... -- 92.0%",
207
- "memory_usage_before_ensemble": "संयोजन से पहले मेमोरी उपयोग: {}%",
208
- "memory_usage_after_ensemble": "संयोजन के बाद मेमोरी उपयोग: {}%",
209
- "finalizing_ensemble_output_progress_label": "संयोजन आउटपुट को अंतिम रूप दे रह है... -- 98.0%",
210
- "ensemble_file_creation_failed": "संयोजन फाइल ाई नहीं जा सकी: {}",
211
- "ensemble_completed_progress_label": "संयोजन सफलतापूर्वक पूरा हुआ! -- 100.0%",
212
- "success_output_created": "✅ सफलता! आउटपुट फाइल बनाई गई।",
213
- "drive_mounted_copying_ensemble": "Google Drive माउंट िया गया। ंयोजन आउपुट पी हो रहा है...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive पहले े माउंहै। संयोजन आउटपुट कपी रहा है...",
215
- "no_ensemble_output_files_found": "❌ कोई संयोजन आउटपुट फाइेंहीं िलीं।",
216
- "ensemble_output_copied": "✅ संयोजआउटपुट {} परॉपी कियगया",
217
- "error_copying_ensemble_output": " संयोजन आउटपुट कॉपी करने में त्रुटि: {}",
218
- "drive_mounted_copying_files": "Google Drive माउंट किया गया। फाइलें कॉपी हो ��ही हैं...",
219
- "drive_already_mounted_copying_files": "Google Drive पहले से माउंट है। फाइलें कॉपी हो रही हैं...",
220
- "files_copied_to_drive": "✅ फाइलें {} पर कॉपी की गईं",
221
- "error_copying_files": " फाइलें कॉपीरने में तरुटि: {}",
222
- "mounting_drive": "Google Drive माउंट कर रहा है...",
223
- "settings_saved": "सेटिंग्स config.json ें संग्रहीत: भषा={lang}, विधि={method}, पो्ट={port}",
224
- "selected_language": "चयनित षा: {lang}",
225
- "available_languages": "उपब्ध भाषाएँ: {langs}",
226
- "select_language_prompt": "भाषा चुनें (डिफ़ॉल्ट: {default}): ",
227
- "invalid_language": "अमान्य भाषा चयन! डिफ़ॉल्षा का उपयोग कर रहा हूँ ({default})।",
228
- "available_sharing_methods": "उपलब्ध साझारण विधियाँ: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "एक साझाकिधि चुनें (िफॉल्ट: {default}): ",
230
- "enter_ngrok_token_prompt": "अपना Ngrok टोकन दर्रें (डिफ़ॉल्: {default}): ",
231
- "ngrok_token_required": "त्रुटि: ngrok विधि के लिए Ngrok टोकन आवश्यक है!",
232
- "enter_port_prompt": "पोर्टंबर दर्ज करें (डिफ़ॉल्ट: {default}): ",
233
- "opening_previous_url": "पिछला URL खोल हा है: {url}",
234
- "open_url_manually": "Colab में URL न्युअल रूप से खोें: {url}",
235
- "invalid_previous_url": "अमान्य पिछ URL: {url}, खो नही गया।",
236
- "starting_method": "{method} को पोर्ट {port} पर शुरू कर रहा है...",
237
- "process_stopped": "\n🛑 उपयोगकर्ता द्वारा प्रक्रिया रोक दी गई",
238
- "selected_port": "चयनित पोर्ट: {port}",
239
- "starting_gradio_with_sharing": "Gradio को अंतर्निहित साझाकरण के साथ शुरू कर रहा है...",
240
- "starting_localtunnel": "Localtunnel को पोर्ट {port} पर शुरू कर रहा है...",
241
- "share_link": "साझा लिंक: {url}",
242
- "password_ip": "पासवर्ड IP: {ip}",
243
- "starting_ngrok": "Ngrok को पोर्ट {port} पर शुरू कर रहा है...",
244
- "ngrok_url": "Ngrok URL: {url}",
245
- "ngrok_error": "Ngrok शुरू करने में त्रुटि: {error}",
246
- "apollo_chunk_size": "Apollo खंड आकार",
247
- "apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
248
- "apollo_overlap": "Apollo ओवरलैप",
249
- "enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
250
- "apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
251
- "apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
252
- "invalid_method": "त्रुटि: अमान्य विधि! 'gradio', 'localtunnel', या 'ngrok' का उपयोग करें।",
253
- "apollo_enhancement_settings": "Apollo सुधार सेटिंग्स",
254
- "enhance_with_apollo": "Apollo के साथ सुधार करें",
255
- "enhance_with_apollo_info": "पृथक्करण के बाद ऑडियो सुधार के लिए Apollo सक्षम करें",
256
- "apollo_method": "Apollo विधि",
257
- "apollo_method_info": "Apollo के लिए प्रसंस्करण विधि चुनें",
258
- "normal_method": "सामान्य विधि",
259
- "mid_side_method": "मध्य/पार्श्व विधि",
260
- "apollo_normal_model": "Apollo सामान्य मॉडल",
261
- "apollo_normal_model_info": "सामान्य Apollo प्रसंस्करण के लिए उपयोग करने वाला मॉडल",
262
- "apollo_midside_model": "Apollo मध्य/पार्श्व मॉडल",
263
- "apollo_enhancement_info": "ध्वनि गुणवत्ता सुधार",
264
- "selected_models": "चयनित मॉडल",
265
- "save_preset": "प्रीसेट सहेजें",
266
- "delete_preset": "प्रीसेट हटाएं",
267
- "refresh_presets": "प्रीसेट रिफ्रेश करें",
268
- "presets_name": "प्रीसेट का नाम",
269
- "select_preset": "प्रीसेट चुनें",
270
- "add_favorite": "पसंदीदा में जोड़ें",
271
- "apollo_midside_model_info": "मध्य/पार्श्व प्रसंस्करण के लिए उपयोग करने वाला मॉडल (वैकल्पिक)",
272
- "language_changed_message": "भाषा बदली गई। कृपया सेल को पुनः शुरू करें।"
273
- "selected_models": "選択されたモデル",
274
- "save_preset": "プリセットを保存",
275
- "delete_preset": "プリセットを削除",
276
- "refresh_presets": "プリセットを更新",
277
- "preset_name": "プリセット名",
278
- "select_preset": "プリセットを選択",
279
- "add_favorite": "お気に入りに追加",
280
- "apply_matchering": "मैचरिंग लागू करें",
281
- "matchering_info": "ऑडियो मास्टरिंग और संतुलन को बढ़ाने के लिए मैचरिंग लागू करें",
282
- "matchering_passes": "मैचरिंग पास",
283
- "matchering_passes_info": "मैचरिंग की पुनरावृत्तियों की संख्या (1-5, उच्च मान गुणवत्ता को बेहतर बनाते हैं लेकिन अधिक समय लेते हैं)",
284
- "matchering_processing": "मैचरिंग लागू किया जा रहा है... ({:.1f}%)",
285
- "matchering_completed": "✅ मैचरिंग प्रक्रिया पूरी हुई!",
286
- "matchering_error": "❌ मैचरिंग के दौरान त्रुटि: {}"
287
  }
 
17
  "overlap": "ओवरलैप",
18
  "overlap_info": "अनुशंसित: 2-10 (उच्च मान गुणवत्ता बढ़ाते हैं लेकिन अधिक VRAM की आवश्यकता होती है)",
19
  "tta_boost": "TTA बूस्ट",
 
20
  "phase_fix": "फेज सुधार",
21
+ "use_phase_correction": "उन्नत फेज सुधार लागू करें",
22
  "phase_fix_info": "वाद्य ट्रैक के लिए उन्नत फेज सुधार",
23
  "instrumental": "वाद्य",
 
24
  "process": "🚀 प्रक्रिया करें",
25
  "reset": "🧹 रीसेट करें",
26
  "status": "स्थिति",
 
72
  "ensemble_copy_status": "संयोजन आउटपुट यहाँ कॉपी किया जाएगा...",
73
  "guidelines": "<h4>मॉडल चयन दिशानिर्देश</h4><ul><li><strong>क्रॉस-श्रेणी मिश्रण से बचें:</strong> स्वर और वाद्य मॉडल का संयोजन अवांछित मिश्रण बना सकता है</li><li><strong>विशेष मॉडल नोट्स:</strong><ul><li>द्वैत मॉडल (v1/v2) - दोनों स्टेम्स आउटपुट करते हैं</li><li>MDX23C सेपरेटर - हाइब्रिड परिणाम</li></ul></li><li><strong>सर्वोत्तम अभ्यास:</strong> एक ही श्रेणी से 3-5 समान मॉडल का उपयोग करें</li></ul><div>💡 प्रो टिप: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" संयोजन से शुरू करें</div>",
74
  "download_sources_tab": "⬇️ स्रोत डाउनलोड करें",
75
+ "direct_links": "🌐 डायरक्ट लिंक्स",
76
  "audio_file_url": "ऑडियो फाइल URL",
77
  "download_from_url": "⬇️ URL से डाउनलोड करें",
78
  "download_status": "डाउनलोड स्थिति",
79
  "downloaded_file": "डाउनलोड की गई फाइल",
80
  "cookie_management": "🍪 कुकी प्रबंधन",
81
+ "upload_cookies_txt": "कुकीज.txt अपलोड करें",
82
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 क्यों आवश्यक?**<br>- आयु-प्रतिबंधित सामग्री तक पहुंच<br>- न��जी/अरकाशित वीडियो डाउनलोड<br>- क्षेत्रीय प्रतिबंधों को बायपास करें<br>- यूट्यूब डाउनलोड सीमाओं से बचें<br><br>**⚠️ महत्वपूर्ण नोट्स**<br>- कभी भी अपनी कुकी फाइलें साझा न करें!<br>- ज रिफ्रेश करें:<br> • \"403 निषिद्ध\" त्रुटिया प्राप्त हो रही हों<br> • डाउनलोड अचानक रुक जाए<br> • \"सत्र समाप्त\" संदेश दखें<br><br>**🔄 नवीनीकरण चरण**<br>1. इस <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">क्रोम एक्सटेंशन</a> को इंस्टॉल करें<br>2. क्रोम में यूट्यूब पर लॉगिन करें<br>3. एक्सटेंशन आइकन पर क्लिक करें → \"एक्सपोर्\"<br>4. यहां डाउनलोड की गई फाइल अपलोड करें<br><br>**⏳ कुकी जीवनकाल**<br>- सामान्य सत्र: 24 घंटे<br>- संवेदनशील संचालन: 1 घंटा<br>- पासवर्ड परिवर्तन: तत्काल रिफरेश</div>",
83
+ "ensemble_output_copied": " संयोजन आउटपुट {} में कॉपी किया गया",
84
+ "error_copying_ensemble_output": "❌ संयोजन आउटपुट कॉपी करने में त्रुटि: {}",
85
+ "drive_mounted_copying_ensemble": "गूगल ड्राइव माउंटेड। संयोजन आउटपुट कॉपी कर रहा है...",
86
+ "drive_already_mounted_copying_ensemble": "गूगल ड्राइव पहले से माउंटेड। संयोजन आउटपुट कॉपी कर रहा है...",
87
+ "no_ensemble_output_files_found": "❌ कोई संयोजन आउटपुट फाइल नहीं मिली।",
88
+ "drive_mounted_copying_files": "गूगल ड्राइव माउंटेड। फाइलें कॉपी कर रहा है...",
89
+ "drive_already_mounted_copying_files": "गूगल ड्राइव पहले से माउंटेड। फाइलें कॉपी कर रहा है...",
90
+ "files_copied_to_drive": "✅ फाइलें {} में कॉपी की गईं",
91
+ "error_copying_files": "❌ फाइलें कॉपी करने में त्रुटि: {}",
92
+ "mounting_drive": "गूगल ड्राइव माउंट कर रहा है...",
93
+ "settings_saved": "सेटिंग्स config.json में सहेजी गईं: भाषा={lang}, विधि={method}, पोर्ट={port}",
94
+ "selected_language": "चयनित भाषा: {lang}",
95
+ "available_languages": "उपलब्ध भाषाएं: {langs}",
96
+ "select_language_prompt": "भाषा चुनें (डिफ़ॉल्ट: {default}): ",
97
+ "invalid_language": "अमान्य भाषा चयन! डिफ़ॉल्ट भाषा ({default}) का उपयोग कर रहा है।",
98
+ "available_sharing_methods": "उपलब्ध साझाकरण विधियां: gradio, localtunnel, ngrok",
99
+ "select_sharing_method_prompt": "साझाकरण विधि चुनें (डिफ़ॉल्ट: {default}): ",
100
+ "enter_ngrok_token_prompt": "अपना Ngrok टोकन दर्ज करें (डिफ़ॉल्ट: {default}): ",
101
+ "ngrok_token_required": "त्रुटि: ngrok विधि के लिए Ngrok टोकन आवश्यक है!",
102
+ "enter_port_prompt": "पोर्ट नंबर दर्ज करें (डिफ़ॉल्ट: {default}): ",
103
+ "opening_previous_url": "पिछला URL खोल रहा है: {url}",
104
+ "open_url_manually": "Colab में URL मैन्युअल रूप से खोलें: {url}",
105
+ "invalid_previous_url": "अमान्य पिछला URL: {url}, नहीं खोला गया।",
106
+ "starting_method": "{method} को पोर्ट {port} पर शुरू कर रहा है...",
107
+ "process_stopped": "\n🛑 उपयोगकर्ता द्वारा प्रक्रिया रोक दी गई",
108
+ "selected_port": "चयनित पोर्ट: {port}",
109
+ "starting_gradio_with_sharing": "Gradio को अंतर्निहित साझाकरण के साथ शुरू कर रहा है...",
110
+ "starting_localtunnel": "Localtunnel को पोर्ट {port} प��� शुरू कर रहा है...",
111
+ "share_link": "साझा लिंक: {url}",
112
+ "password_ip": "पासवर्ड IP: {ip}",
113
+ "starting_ngrok": "Ngrok को पोर्ट {port} पर शुरू कर रहा है...",
114
+ "ngrok_url": "Ngrok URL: {url}",
115
+ "ngrok_error": "Ngrok शुरू करने में त्रुटि: {error}",
116
+ "auto_apollo_chunk_size": "Apollo खंड आकार",
117
+ "auto_apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
118
+ "auto_apollo_overlap": "Apollo ओवरलैप",
119
+ "auto_enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
120
+ "auto_apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
121
+ "auto_apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
122
+ "apollo_chunk_size": "Apollo खंड आकार",
123
+ "apollo_chunk_size_info": "Apollo सुधार के लिए खंड आकार (यूनिवर्सल मॉडल के लिए अनुशंसित: 19)",
124
+ "apollo_overlap": "Apollo ओवरलैप",
125
+ "enhance_with_apollo": "Apollo के साथ सुधार करें",
126
+ "apollo_enhancement_info": "ध्वनि गुणवत्ता सुधार",
127
+ "enhancing_with_apollo": "Apollo के साथ सुधार कर रहा है ({}/{} फाइलें)...",
128
+ "apollo_processing_completed": "Apollo प्रसंस्करण पूरा!",
129
+ "apollo_overlap_info": "Apollo सुधार के लिए ओवरलैप (अनुशंसित: 2)",
130
+ "selected_models": "चयनित मॉडल",
131
+ "save_preset": "प्रीसेट सहेजें",
132
+ "delete_preset": "प्रीसेट हटाएं",
133
+ "refresh_presets": "प्रीसेट रिफ्रेश करें",
134
+ "preset_name": "प्रीसेट का नाम",
135
+ "select_preset": "प्रीसेट चुनें",
136
+ "add_favorite": "पसंदीदा में जोड़ें",
137
+ "apply_matchering": "मैचरिंग लागू करें",
138
+ "matchering_info": "ऑडियो मास्टरिंग और संतुलन को बढ़ाने के लिए मैचरिंग लागू करें",
139
+ "matchering_passes": "मैचरिंग पास",
140
+ "matchering_passes_info": "मैचरिंग की पुनरावृत्तियों की संख्या (1-5, उच्च मान गुणवत्ता को बेहतर बनाते हैं लेकिन अधिक समय लेते हैं)",
141
+ "matchering_processing": "मैचरिंग लागू किया जा रहा है... ({:.1f}%)",
142
+ "matchering_completed": "✅ मैचरिंग प्रक्रिया पूरी हुई!",
143
+ "matchering_error": "❌ मैचरिंग के दौरान त्रुटि: {}",
144
+ "invalid_method": "त्रुटि: अमान्य विधि! 'gradio', 'localtunnel', या 'ngrok' का उपयोग करें।",
145
+ "language_changed_message": "भाषा बदली गई। कृपया सेल को पुनः शुरू करें।",
146
+ "backend_settings": "🚀 बैकएंड सेटिंग्स",
147
+ "inference_backend": "इन्फरेंस बैकएंड",
148
+ "optimization_mode": "ऑप्टिमाइजेशन मोड",
149
+ "optimization_mode_info": "PyTorch ऑप्टिमाइजेशन मोड चयन",
150
+ "mixed_precision_amp": "मिश्रित प्रेसिजन (AMP)",
151
+ "mixed_precision_info": "2x तेज इन्फरेंस - अनुशंसित",
152
+ "tf32_acceleration": "TF32 असेलरेशन",
153
+ "tf32_acceleration_info": "RTX 30xx+ के लिए अतिरिक्त गति बूस्ट",
154
+ "cudnn_benchmark": "cuDNN बेंचमार्क",
155
+ "cudnn_benchmark_info": "पहले रन में धीमा, बाद के रनों में बहुत तेज",
156
+ "ultra_optimized_pytorch": "अल्ट्रा-ऑप्टिमाइज्ड PyTorch बैकएंड",
157
+ "default_active_max_speed": "डिफ़ॉल्ट रूप से सक्रिय - अधिकतम गति ऑप्टिमाइजेशन",
158
+ "channels_last_mode": "RTX GPUs के लिए सबसे तेज",
159
+ "compile_mode": "PyTorch 2.0+ के लिए अतिरिक्त गति",
160
+ "default_mode": "मानक",
161
+ "tta_info": "गुणवत्ता बढ़ाता है लेकिन प्रसंस्करण को धीमा करता है",
162
+ "instrumental_info": "आमतौर पर 2 स���टेम आउटपुट करता है, लेकिन कभी-कभी केवल एक - आपको कोशिश करनी होगी! :)",
163
+ "direct_download_error": "❌ सीधा डाउनलोड त्रुटि: {}",
164
+ "invalid_google_drive_url": "❌ अमान्य Google Drive URL",
165
+ "continuing_without_google_drive": "Google Drive कनेक्शन विफल, इसके बिना जारी रखते हुए...",
166
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive माउंट नहीं है, Drive पर कॉपी छोड़ रहे हैं...",
167
+ "direct_download_failed": "फ़ाइल को सीधे डाउनलोड करना विफल रहा",
168
+ "manual_ensemble_tab": "🎚️ मैनुअल एन्सेम्बल",
169
+ "phase_fixer_tab": "🔧 फेज़ फिक्सर",
170
+ "batch_processing_tab": "📦 बैच प्रोसेसिंग",
171
+ "batch_description": "एक ही मॉडल से एक साथ कई फाइलें प्रोसेस करें",
172
+ "batch_input_folder": "इनपुट फोल्डर",
173
+ "batch_input_folder_placeholder": "ऑडियो फाइलों वाले फोल्डर का पथ दर्ज करें",
174
+ "batch_output_folder": "आउटपुट फोल्डर",
175
+ "batch_output_folder_placeholder": "आउटपुट फाइलों के लिए फोल्डर पथ दर्ज करें",
176
+ "batch_file_list": "प्रोसेस करने के लिए फाइलें",
177
+ "batch_add_files": "फाइलें जोड़ें",
178
+ "batch_clear_list": "सूची साफ करें",
179
+ "batch_start": "🚀 बैच प्रोसेसिंग शुरू करें",
180
+ "batch_stop": "⏹️ रोकें",
181
+ "batch_progress": "बैच प्रगति",
182
+ "batch_current_file": "वर्तमान में प्रक्रिया हो रही है",
183
+ "batch_completed": "✅ बैच प्रोसेसिंग पूर्ण!",
184
+ "batch_stopped": "⏹️ बैच प्रोसेसिंग रोकी गई",
185
+ "batch_no_files": "❌ प्रोसेसिंग के लिए कोई फाइल नहीं चुनी गई",
186
+ "source_model": "स्रोत मॉडल (स्वर)",
187
+ "source_model_info": "स्वर/फेज़ डेटा निकालने के लिए मॉडल",
188
+ "target_model": "लक्ष्य मॉडल (वाद्य)",
189
+ "target_model_info": "फेज़ सुधार लागू करने के लिए मॉडल",
190
  "input_sources": "📂 इनपुट स्रोत",
191
+ "refresh": "🔄 रिफ्र",
192
+ "ensemble_algorithm": "्सेम्बल एल्गोरिदम",
193
  "select_audio_files": "ऑडियो फाइलें चुनें",
194
  "available_files": "उपलब्ध फाइलें",
195
+ "custom_weights": "कस्टम वज़न (अल्पविराम से अलग)",
196
  "custom_weights_placeholder": "उदाहरण: 0.8, 1.2, 1.0, ...",
197
+ "custom_weights_info": "समान वज़न के लिए खाली छोड़ें",
198
  "result_preview_tab": "🎧 परिणाम पूर्वावलोकन",
199
+ "ensembled_output": "एन्ेम्बल आउटपुट",
200
+ "processing_log_tab": "📋 प्रिंग लॉग",
201
+ "processing_details": "प्रिंग विवरण",
202
+ "process_ensemble": "⚡ ्सेम्बल प्रोसेस करें",
203
+ "languages_tab": "🌐 भाषाए",
204
  "Vocal Models": "स्वर मॉडल",
205
  "Instrumental Models": "वाद्य मॉडल",
206
  "4-Stem Models": "4-स्टेम मॉडल",
207
+ "Denoise Models": "डीॉइज़ मॉडल",
208
+ "Dereverb Models": "डीरिवर्ब मॉडल",
209
+ "Other Shapes": "अन्य मॉडल",
210
+ "Custom Models": "कस्टम मॉ",
211
+ "ensemble_files_description": "निर्दिष्ट एन्ेम्बल विधियो का उपयोरककई ऑडियो फाइलों क एक आउटुट में जोड़ता है।",
212
+ "ensemble_files_help": "एन्सेम्बल के लिए सभी ऑडियो फाइलों का पथ",
213
+ "ensemble_type_help": " से : avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
214
+ "ensemble_weights_help": "एन्सेम्बल बनने लिए ़न। वज़न की संख् इलों की संया के बाबर होनी चहिए",
215
+ "ensemble_output_help": "WAV फाइल का पथ हाँ ए्सेम्बल परिणम संग्हीत होगा",
216
+ "ensemble_type_print": "एन्सेम्बल प्रकार: {}",
217
  "num_input_files_print": "इनपुट फाइलों की संख्या: {}",
218
+ "weights_print": "वज़न: {}",
219
  "output_file_print": "आउटपुट फाइल: {}",
220
  "duration_mismatch_error": "सभी फाइलों की अवधि समान होनी चाहिए",
221
+ "file_not_found_error": "त्रुटि। फाइल नहीं मिली: {}। पथ जाचें।",
222
+ "reading_chunk_print": "फाइल से खंड पढ़ रह है: {} (शुर: {}s, अवधि: {}s)",
223
+ "chunk_result_shape_print": "खंड परिणाम का आकार: {}",
224
+ "ensemble_completed_print": "्सेम्बल पूर्ण। आउटपुट यहाँ सहेजा गया: {}",
225
  "file_deletion_error": "{} हटाया नहीं जा सका: {}",
226
+ "directory_not_exist_warning": "⚠️ डायरेक्टरी मौजूद नहीं: {}",
227
+ "not_a_directory_warning": "⚠️ पथ एक डायरेक्टरी नहीं है: {}",
228
  "item_deletion_error": "⚠️ {} हटाने में त्रुटि: {}",
229
+ "critical_error": " गंभीत्रुटि: {}",
230
+ "old_output_not_exist": " पुरान आउटपुट फल्डर मौजूनहीं है",
231
+ "old_outputs_cleared": "✅ पुराने आउटपुट सफलतापूर्वक साफ किए गए!",
232
  "error": "🔥 त्रुटि: {}",
233
  "ffmpeg_error": "FFmpeg त्रुटि ({}): {}",
234
+ "file_saved_successfully": "फाइल सफलतापूर्वक सहेज गई: {}",
235
+ "total_files_found": "कुल फा���लें मिलीं: {}। उपयोग की गई सैंपल रेट: {}",
236
  "total_progress": "कुल प्रगति",
237
  "detailed_pbar_enabled": "विस्तृत प्रगति बार सक्षम: {}",
238
+ "loaded_audio": "ऑडियो लोड हुआ: {}, आकार: {}",
239
+ "cannot_read_track": "ट्रैक नहीं पढ़ सकत: {}",
240
  "error_message": "त्रुटि संदेश: {}",
241
+ "demudding_track": "ट्रैक साफ कर रह है (फेज रीमिक्स - वाद्य): {}",
242
  "elapsed_time": "बीता हुआ समय: {:.2f} सेकंड।",
243
+ "proc_folder_description": "निर्दिष्ट मॉडल का उपयोग करके फोल्डर में ऑडियो फाइलं प्रोसेस करता है।",
244
  "model_type_help": "मॉडल प्रकार (bandit, bs_roformer, mdx23c, आदि)",
245
  "config_path_help": "कॉन्फ़िग फाइल का पथ",
246
+ "demud_phaseremix_help": "वाद्य के लिए फेज रीमिक्स सक्षम करें",
247
+ "start_checkpoint_help": "वैधज़न के लिए प्रारंभिक चेकपॉइंट",
248
+ "input_folder_help": "प्रक्रिया करने के लिए मिश्रण वाला फोल्डर",
249
+ "audio_path_help": "एकल ऑडियो फाइल का पथ",
250
  "store_dir_help": "परिणाम संग्रहीत करने का पथ",
251
+ "device_ids_help": "GPU आईडी की सूची",
252
+ "extract_instrumental_help": "यदि प्रदान किया गया हो तो वाद्य प्राप्त कर के लिए स्वर को उलटें",
253
+ "disable_detailed_pbar_help": "डमिक्स में विस्तृत प्रगति बार अक्षम करें",
254
+ "force_cpu_help": "CUDA उपलब्ध होने पर भी CPU का उपयोग कर के लिए बाध्य करें",
255
  "flac_file_help": "WAV के बजाय FLAC फाइल आउटपुट करें",
256
  "export_format_help": "निर्यात प्रारूप और PCM प्रकार",
257
  "pcm_type_help": "FLAC फाइलों के लिए PCM प्रकार",
258
+ "use_tta_help": "टेस्ट टाइम ऑगमेंटेशन सक्षम करें",
259
+ "lora_checkpoint_help": "LoRA वज़न के लिए प्रारंभिक चेकपॉइंट",
260
  "cuda_available": "CUDA उपलब्ध है, इसे अक्षम करने के लिए --force_cpu का उपयोग करें।",
261
+ "using_device": "डिवाइस उपयोग: {}",
262
+ "instruments_print": "वाद्ययंत्र: {}",
263
  "model_load_time": "मॉडल लोड समय: {:.2f} सेकंड",
264
  "invalid_url": "❌ अमान्य URL",
265
+ "cookie_file_updated": "✅ कुकी फाइल अपडेट हुई!",
266
+ "cookie_installation_error": "⚠️ कुकी इंस्टॉलेशन त्रुटि: {}",
267
+ "google_drive_mount_error": " Google Driveि त्रुटि: {}",
268
+ "file_size_zero_error": "फाइल का आकार शून्य है या फाइल नहीं बनाई गई",
269
  "google_drive_error": "❌ Google Drive डाउनलोड त्रुटि: {}",
270
  "wav_conversion_failed": "WAV रूपांतरण विफल",
271
  "download_error": "❌ डाउनलोड त्रुटि: {}",
272
+ "file_copied_to_drive": "फाइल Google Drive पर ॉपीई: {}",
273
+ "copy_to_drive_error": "❌ Google Drive पर कॉपी करने में त्रुटि: {}",
274
+ "download_success": "🎉 सफलतापूर्वक डाउनलोड हुआ!",
275
  "download_failed": "❌ डाउनलोड विफल",
276
+ "no_output_files_found": "❌ कोई आउटपुट फाइल नहीं मिली।",
277
+ "output_refreshed_successfully": "✅ आउटपुट सफलतापूर्वक िफ्रेश हुआ!",
278
+ "error_refreshing_output": "❌ आउटपुट रिफ्रेश करने में त्रुटि: {}",
279
  "starting_audio_separation": "ऑडियो पृथक्करण शुरू हो रहा है...",
280
+ "processing_audio": "ऑडियो प्रोसेस हो रहा है",
281
+ "separating_audio": "ऑडियो अलग हो रहा है... ({:.1f}%)",
282
+ "separation_complete": "पृथक्करण पूर्ण!",
283
  "progress_parsing_error": "प्रगति पार्सिंग त्रुटि: {}",
284
  "error_occurred": "एक त्रुटि हुई: {}",
285
+ "separation_process_completed": "पृथक्करण प्रक्रिया पूर्ण!",
286
+ "no_audio_file_error": "❌ कोई ऑडियो फाइल नहीं ी गई और इनपुट डायरेक्टरी में कोई मौजूदा फाइल नहीं है।",
287
+ "no_input_progress_label": "त्रुटि: कोई इनपुट नहीं िया गया -- 0.0%",
288
+ "processing_audio_print": "ऑडियो प्रोसेस हो रहा है: {} मॉडल: {}",
289
  "starting_audio_separation_progress_label": "ऑडियो पृथक्करण शुरू हो रहा है... -- 0.0%",
290
+ "separating_audio_progress_label": "ऑडियो अलग हो रहा है... -- {}.0%",
291
+ "audio_processing_completed_progress_label": "ऑडियो प्रक्रिया पूर्ण! -- 100.0%",
292
+ "audio_processing_completed": "✅ ऑडियो प्रक्रिया पूर्ण!",
293
  "error_occurred_progress_label": "त्रुटि ह���ई -- 0.0%",
294
  "minimum_files_required": "⚠️ न्यूनतम 2 फाइलें आवश्यक",
295
+ "valid_files_not_found": "❌ कोई वैध फाइल नहीं मिली",
296
+ "starting_ensemble_process": "्सेम्बल प्रक्रिया शुरू हो रही है...",
297
+ "ensembling_progress": "एन्ेम्बल हो रहा है... ({:.1f}%)",
298
+ "finalizing_ensemble_output": "्सेम्बल आउटपुट अंतिम रूप दे रह है...",
299
+ "success_log": "✅ सफल!\n{}",
300
  "error_log": "❌ त्रुटि!\n{}",
301
+ "ensemble_process_completed": "एन्सेम्बल प््रिया पूर्ण!",
302
+ "no_models_selected": "ोई मॉडल ीं चना गया",
303
+ "no_input_audio_provided": "❌ कोई पुट ऑडियो नहीं दिया गया",
304
+ "loading_model": "मॉडल ोडो रा ह {}/{}: {}...",
305
+ "loading_model_progress_label": "मॉडल लोड हो रहा है {}/{}: {}... -- {}.0%",
306
+ "completed_model": "मॉडल पू्ण {}/{}: {}",
307
+ "completed_model_progress_label": "मॉडल पूर्ण {}/{}: {} -- {}.0%",
308
+ "running_command": "ांचल हैं: {}",
 
309
  "model_failed": "मॉडल {} विफल: {}",
310
  "critical_error_with_model": "{} के साथ गंभीर त्रुटि: {}",
311
+ "model_output_failed": "{} आउटपुट नहीं सका",
312
+ "waiting_for_files": "सभी फाइलों के तैयार होने का तजार...",
313
+ "waiting_for_files_progress_label": "सभी फाइलों के तैयार होने का तजार... -- 90.0%",
314
+ "performing_ensemble": "्सेम्बल कर रह है...",
315
+ "performing_ensemble_progress_label": "्सेम्बल कर रह है... -- 92.0%",
316
+ "memory_usage_before_ensemble": "्सेम्बल से पहले मेमोरी उपयोग: {}%",
317
+ "memory_usage_after_ensemble": "्सेम्बल के बाद मेमोरी उपयोग: {}%",
318
+ "finalizing_ensemble_output_progress_label": "्सेम्बल आउटपुट अंतिम रूप दे रह है... -- 98.0%",
319
+ "ensemble_file_creation_failed": "्सेम्बल फाइल नहीं बनाई जा सकी: {}",
320
+ "ensemble_completed_progress_label": "्सेम्बल सफलतापूर्वक पूर्ण! -- 100.0%",
321
+ "success_output_created": "✅ सफल! आउटपुट फाइल बनाई गई।",
322
+ "custom_models_tab": " कस डल",
323
+ "add_custom_model": " डल ड़ें",
324
+ "custom_model_name": "मॉडल नम",
325
+ "custom_model_name_placeholder": "अप मॉडल के लिए दर्ज करें",
326
+ "checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
327
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
328
+ "config_url": "Config URL (.yaml)",
329
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
330
+ "custom_py_url": "कस्टम Python फाइल URL (वैिक)",
331
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
332
+ "auto_detect_type": "मॉडल ्र स्चालिूप से पहचानें",
333
+ "model_type": "मॉडल प्रक",
334
+ "add_model_btn": "➕ मॉडजोड़ें",
335
+ "custom_models_list": "📋 स्टम सूची",
336
+ "delete_model": "🗑️ टाएं",
337
+ "no_custom_models": "अभी कोई कस्टम मॉडल नहीं जोड़ा गया",
338
+ "model_added_success": " मॉडल फलतपू जोड़ गया!",
339
+ "model_deleted_success": " मॉडल सफलतापूर्ाया गया!",
340
+ "model_add_error": "❌ मॉडल जोड़ने में त्रुटि: {}",
341
+ "model_delete_error": " मॉडल हटा ें रुि: {}",
342
+ "refresh_models": "🔄िफ्रेश",
343
+ "custom_model_info": "डाउनलोड लिंक प्रदान करके कस्ट मॉडल जोड़ेंॉडल उपयोग होे पर सवचाित रूप से डाउनोड होंगे।",
344
+ "select_model_to_delete": "हटान के िए मॉडचु��ें"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  }
assets/i18n/languages/it_it.json CHANGED
@@ -17,8 +17,8 @@
17
  "overlap": "Sovrapposizione",
18
  "overlap_info": "Consigliato: 2-10 (valori più alti migliorano la qualità ma richiedono più VRAM)",
19
  "tta_boost": "Boost TTA",
20
- "tta_info": "Migliora la qualità ma rallenta la velocità di elaborazione",
21
  "phase_fix": "Correzione fase",
 
22
  "phase_fix_info": "Correzione avanzata della fase per tracce strumentali",
23
  "instrumental": "Strumentale",
24
  "instrumental_info": "Di solito produce 2 output, ma a volte solo uno, prova! :)",
@@ -80,111 +80,199 @@
80
  "downloaded_file": "File scaricato",
81
  "cookie_management": "🍪 Gestione cookie",
82
  "upload_cookies_txt": "Carica Cookies.txt",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Perché è necessario?**<br>- Accesso a contenuti con restrizione d'età<br>- Download di video privati/non elencati<br>- Superamento delle restrizioni regionali<br>- Evitare i limiti di download di YouTube<br><br>**⚠️ Note importanti**<br>- NON CONDIVIDERE MAI i tuoi file cookie!<br>- Aggiorna i cookie quando:<br> • Ricevi errori \"403 Vietato\"<br> • I download si interrompono improvvisamente<br> • Vedi il messaggio \"Sessione scaduta\"<br><br>**🔄 Passaggi per l'aggiornamento**<br>1. Installa questa <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">estensione Chrome</a><br>2. Accedi a YouTube su Chrome<br>3. Clicca sull'icona dell'estensione → \"Esporta\"<br>4. Carica il file scaricato qui<br><br>**⏳ Durata dei cookie**<br>- Sessioni normali: 24 ore<br>- Operazioni sensibili: 1 ora<br>- Cambio password: Invalidità immediata</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ Ensemble manuale",
85
- "input_sources": "📂 Sorgenti di input",
86
  "refresh": "🔄 Aggiorna",
87
  "ensemble_algorithm": "Algoritmo ensemble",
88
  "select_audio_files": "Seleziona file audio",
89
  "available_files": "File disponibili",
90
- "custom_weights": "Pesi personalizzati (separati da virgole)",
91
  "custom_weights_placeholder": "Esempio: 0.8, 1.2, 1.0, ...",
92
  "custom_weights_info": "Lascia vuoto per pesi uguali",
93
  "result_preview_tab": "🎧 Anteprima risultato",
94
  "ensembled_output": "Output ensemble",
95
- "processing_log_tab": "📋 Log di elaborazione",
96
  "processing_details": "Dettagli elaborazione",
97
  "process_ensemble": "⚡ Elabora ensemble",
98
  "languages_tab": "🌐 Lingue",
99
  "Vocal Models": "Modelli vocali",
100
  "Instrumental Models": "Modelli strumentali",
101
- "4-Stem Models": "Modelli a 4 stem",
102
- "Denoise Models": "Modelli di denoising",
103
- "Dereverb Models": "Modelli di rimozione riverbero",
104
  "Other Models": "Altri modelli",
105
- "ensemble_files_description": "Combina più file audio in un unico output utilizzando metodi di ensemble specificati.",
 
106
  "ensemble_files_help": "Percorso a tutti i file audio per l'ensemble",
107
- "ensemble_type_help": "Uno tra: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
- "ensemble_weights_help": "Pesi per creare l'ensemble. Il numero di pesi deve corrispondere al numero di file",
109
- "ensemble_output_help": "Percorso al file WAV dove verrà salvato il risultato dell'ensemble",
110
  "ensemble_type_print": "Tipo ensemble: {}",
111
  "num_input_files_print": "Numero file input: {}",
112
  "weights_print": "Pesi: {}",
113
  "output_file_print": "File output: {}",
114
  "duration_mismatch_error": "Tutti i file devono avere la stessa durata",
115
- "file_not_found_error": "Errore. File non trovato: {}. Controlla i percorsi.",
116
- "reading_chunk_print": "Lettura frammento dal file: {} (inizio: {}s, durata: {}s)",
117
- "chunk_result_shape_print": "Forma risultato frammento: {}",
118
  "ensemble_completed_print": "Ensemble completato. Output salvato in: {}",
119
  "file_deletion_error": "{} non può essere eliminato: {}",
120
  "directory_not_exist_warning": "⚠️ La directory non esiste: {}",
121
  "not_a_directory_warning": "⚠️ Il percorso non è una directory: {}",
122
- "item_deletion_error": "⚠️ Errore durante l'eliminazione di {}: {}",
123
  "old_output_not_exist": "❌ La cartella output vecchia non esiste",
124
- "old_outputs_cleared": "✅ Output vecchi eliminati con successo!",
125
  "error": "🔥 Errore: {}",
126
  "ffmpeg_error": "Errore FFmpeg ({}): {}",
127
  "file_saved_successfully": "File salvato con successo: {}",
128
- "total_files_found": "Totale file trovati: {}. Frequenza di campionamento usata: {}",
129
  "total_progress": "Progresso totale",
130
- "detailed_pbar_enabled": "Barra di progresso dettagliata abilitata: {}",
131
  "loaded_audio": "Audio caricato: {}, forma: {}",
132
- "cannot_read_track": "Impossibile leggere la traccia: {}",
133
- "error_message": "Messaggio di errore: {}",
134
- "demudding_track": "Pulizia traccia (remix fase - strumentale): {}",
135
  "elapsed_time": "Tempo trascorso: {:.2f} secondi.",
136
- "proc_folder_description": "Elabora i file audio in una cartella usando un modello specificato.",
137
- "model_type_help": "Tipo di modello (bandit, bs_roformer, mdx23c, ecc.)",
138
- "config_path_help": "Percorso al file di configurazione",
139
- "demud_phaseremix_help": "Abilita remix fase per lo strumentale",
140
- "start_checkpoint_help": "Punto di controllo iniziale per pesi validi",
141
- "input_folder_help": "Cartella contenente i mix da elaborare",
142
- "audio_path_help": "Percorso a un singolo file audio da elaborare",
143
- "store_dir_help": "Percorso per salvare i risultati",
144
- "device_ids_help": "Lista di ID GPU",
145
- "extract_instrumental_help": "Inverti le vocali per ottenere lo strumentale, se fornito",
146
- "disable_detailed_pbar_help": "Disabilita barra di progresso dettagliata nel demixing",
147
- "force_cpu_help": "Forza l'uso della CPU anche se CUDA è disponibile",
148
- "flac_file_help": "Genera un file FLAC invece di WAV",
149
- "export_format_help": "Formato di esportazione e tipo PCM",
150
  "pcm_type_help": "Tipo PCM per file FLAC",
151
- "use_tta_help": "Abilita l'aumento del tempo di test",
152
- "lora_checkpoint_help": "Punto di controllo iniziale per pesi LoRA",
153
- "cuda_available": "CUDA disponibile, usa --force_cpu per disabilitarlo.",
154
- "using_device": "Dispositivo in uso: {}",
155
  "instruments_print": "Strumenti: {}",
156
- "model_load_time": "Tempo di caricamento modello: {:.2f} sec",
157
  "invalid_url": "❌ URL non valido",
158
  "cookie_file_updated": "✅ File cookie aggiornato!",
159
  "cookie_installation_error": "⚠️ Errore installazione cookie: {}",
 
160
  "file_size_zero_error": "Dimensione file zero o file non creato",
161
  "google_drive_error": "❌ Errore download Google Drive: {}",
162
  "wav_conversion_failed": "Conversione WAV fallita",
163
  "download_error": "❌ Errore download: {}",
164
- "download_success": "🎉 Download completato con successo!",
 
 
 
 
 
 
 
165
  "download_failed": "❌ Download fallito",
166
  "no_output_files_found": "❌ Nessun file output trovato.",
167
  "output_refreshed_successfully": "✅ Output aggiornato con successo!",
168
- "error_refreshing_output": "❌ Errore durante aggiornamento output: {}",
169
- "starting_audio_separation": "Inizio separazione audio...",
170
  "processing_audio": "Elaborazione audio",
171
  "separating_audio": "Separazione audio... ({:.1f}%)",
172
  "separation_complete": "Separazione completata!",
173
- "progress_parsing_error": "Errore analisi progresso: {}",
174
  "error_occurred": "Si è verificato un errore: {}",
175
- "separation_process_completed": "Processo di separazione completato!",
176
- "no_audio_file_error": "❌ Nessun file audio fornito e nessun file esistente nella directory di input.",
177
  "no_input_progress_label": "Errore: Nessun input fornito -- 0.0%",
178
- "processing_audio_print": "Elaborazione audio da: {} usando il modello: {}",
179
- "starting_audio_separation_progress_label": "Inizio separazione audio... -- 0.0%",
180
  "separating_audio_progress_label": "Separazione audio... -- {}.0%",
181
  "audio_processing_completed_progress_label": "Elaborazione audio completata! -- 100.0%",
182
  "audio_processing_completed": "✅ Elaborazione audio completata!",
183
  "error_occurred_progress_label": "Errore verificato -- 0.0%",
184
- "minimum_files_required": "⚠️ Richiesti minimo 2 file",
185
  "valid_files_not_found": "❌ Nessun file valido trovato",
186
- "starting_ensemble_process": "Inizio processo ensemble...",
187
- "ensembling_progress": "Elaborazione ensemble... ({:.1f}%)",
188
  "finalizing_ensemble_output": "Finalizzazione output ensemble...",
189
  "success_log": "✅ Successo!\n{}",
190
  "error_log": "❌ Errore!\n{}",
@@ -200,81 +288,92 @@
200
  "model_failed": "Modello {} fallito: {}",
201
  "critical_error_with_model": "Errore critico con {}: {}",
202
  "model_output_failed": "{} non è riuscito a produrre output",
203
- "waiting_for_files": "In attesa che tutti i file siano pronti...",
204
- "waiting_for_files_progress_label": "In attesa che tutti i file siano pronti... -- 90.0%",
205
  "performing_ensemble": "Esecuzione ensemble...",
206
  "performing_ensemble_progress_label": "Esecuzione ensemble... -- 92.0%",
207
- "memory_usage_before_ensemble": "Uso memoria prima dell'ensemble: {}%",
208
- "memory_usage_after_ensemble": "Uso memoria dopo l'ensemble: {}%",
209
  "finalizing_ensemble_output_progress_label": "Finalizzazione output ensemble... -- 98.0%",
210
- "ensemble_file_creation_failed": "Creazione file ensemble fallita: {}",
211
  "ensemble_completed_progress_label": "Ensemble completato con successo! -- 100.0%",
212
  "success_output_created": "✅ Successo! File output creato.",
213
- "drive_mounted_copying_ensemble": "Google Drive montato. Copia output ensemble...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive già montato. Copia output ensemble...",
215
- "no_ensemble_output_files_found": " Nessun file output ensemble trovato.",
216
- "ensemble_output_copied": " Output ensemble copiato in {}",
217
- "error_copying_ensemble_output": " Errore durante copia output ensemble: {}",
218
- "drive_mounted_copying_files": "Google Drive montato. Copia file...",
219
- "drive_already_mounted_copying_files": "Google Drive già montato. Copia file...",
220
- "files_copied_to_drive": "File copiati in {}",
221
- "error_copying_files": " Errore durante copia file: {}",
222
- "mounting_drive": "Montaggio Google Drive...",
223
- "settings_saved": "Impostazioni salvate in config.json: Lingua={lang}, Metodo={method}, Porta={port}",
224
- "selected_language": "Lingua selezionata: {lang}",
225
- "available_languages": "Lingue disponibili: {langs}",
226
- "select_language_prompt": "Seleziona una lingua (default: {default}): ",
227
- "invalid_language": "Selezione lingua non valida! Uso lingua default ({default}).",
228
- "available_sharing_methods": "Metodi di condivisione disponibili: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "Scegli un metodo di condivisione (default: {default}): ",
230
- "enter_ngrok_token_prompt": "Inserisci il tuo token Ngrok (default: {default}): ",
231
- "ngrok_token_required": "Errore: Token Ngrok richiesto per il metodo ngrok!",
232
- "enter_port_prompt": "Inserisci il numero di porta (default: {default}): ",
233
- "opening_previous_url": "Apertura URL precedente: {url}",
234
- "open_url_manually": "Apri l'URL manualmente in Colab: {url}",
235
- "invalid_previous_url": "URL precedente non valido: {url}, non aperto.",
236
- "starting_method": "Avvio di {method} sulla porta {port}...",
237
- "process_stopped": "\n🛑 Processo fermato dall'utente",
238
- "selected_port": "Porta selezionata: {port}",
239
- "starting_gradio_with_sharing": "Avvio Gradio con condivisione integrata...",
240
- "starting_localtunnel": "Avvio Localtunnel sulla porta {port}...",
241
- "share_link": "Link di condivisione: {url}",
242
- "password_ip": "Password IP: {ip}",
243
- "starting_ngrok": "Avvio Ngrok sulla porta {port}...",
244
- "ngrok_url": "URL Ngrok: {url}",
245
- "ngrok_error": "Errore avvio Ngrok: {error}",
246
- "apollo_chunk_size": "Dimensione frammento Apollo",
247
- "apollo_chunk_size_info": "Dimensione frammento per miglioramento Apollo (consigliato: 19 per modello universale)",
248
- "apollo_overlap": "Sovrapposizione Apollo",
249
- "enhancing_with_apollo": "Miglioramento con Apollo ({}/{} file)...",
250
- "apollo_processing_completed": "Elaborazione Apollo completata!",
251
- "apollo_overlap_info": "Sovrapposizione per miglioramento Apollo (consigliato: 2)",
252
- "invalid_method": "Errore: Metodo non valido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
253
- "apollo_enhancement_settings": "Impostazioni miglioramento Apollo",
254
- "enhance_with_apollo": "Migliora con Apollo",
255
- "enhance_with_apollo_info": "Abilita Apollo per migliorare l'audio dopo la separazione",
256
- "apollo_method": "Metodo Apollo",
257
- "apollo_method_info": "Seleziona il metodo di elaborazione per Apollo",
 
 
 
 
 
 
 
 
 
 
 
258
  "normal_method": "Metodo normale",
259
- "mid_side_method": "Metodo mid/side",
260
- "apollo_normal_model": "Modello Apollo normale",
261
- "apollo_normal_model_info": "Modello da usare per l'elaborazione Apollo normale",
262
- "apollo_midside_model": "Modello Apollo mid/side",
263
- "apollo_enhancement_info": "Miglioramento qualità audio",
264
- "selected_models": "Modelli Selezionati",
265
- "save_preset": "Salva Preimpostazione",
266
- "delete_preset": "Elimina Preimpostazione",
267
- "refresh_presets": "Aggiorna Preimpostazioni",
268
- "preset_name": "Nome Preimpostazione",
269
- "select_preset": "Seleziona Preimpostazione",
270
- "add_favorite": "Aggiungi ai Preferiti",
271
- "apollo_midside_model_info": "Modello da usare per l'elaborazione mid/side (opzionale)",
272
- "language_changed_message": "Lingua cambiata. Riavvia la cella.",
273
- "apply_matchering": "Applica Matchering",
274
- "matchering_info": "Applica Matchering per migliorare il mastering e il bilanciamento dell'audio",
275
- "matchering_passes": "Passaggi di Matchering",
276
- "matchering_passes_info": "Numero di iterazioni di Matchering (1-5, valori più alti migliorano la qualità ma richiedono più tempo)",
277
- "matchering_processing": "Applicazione di Matchering in corso... ({:.1f}%)",
278
- "matchering_completed": " Elaborazione Matchering completata!",
279
- "matchering_error": " Errore durante Matchering: {}"
280
  }
 
17
  "overlap": "Sovrapposizione",
18
  "overlap_info": "Consigliato: 2-10 (valori più alti migliorano la qualità ma richiedono più VRAM)",
19
  "tta_boost": "Boost TTA",
 
20
  "phase_fix": "Correzione fase",
21
+ "use_phase_correction": "Applica Correzione Fase Avanzata",
22
  "phase_fix_info": "Correzione avanzata della fase per tracce strumentali",
23
  "instrumental": "Strumentale",
24
  "instrumental_info": "Di solito produce 2 output, ma a volte solo uno, prova! :)",
 
80
  "downloaded_file": "File scaricato",
81
  "cookie_management": "🍪 Gestione cookie",
82
  "upload_cookies_txt": "Carica Cookies.txt",
83
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Perché è necessario?**<br>- Accesso a contenuti con restrizione d'età<br>- Download di video privati/non elencati<br>- Superamento delle restrizioni regionali<br>- Evitare i limiti di download di YouTube<br><br>**⚠️ Note Importanti**<br>- NON condividere mai i tuoi file cookie!<br>- Aggiorna i cookie quando:<br> • Ricevi errori \"403 Vietato\"<br> • I download si fermano improvvisamente<br> • Vedi messaggi \"Sessione scaduta\"<br><br>**🔄 Passi per il Rinnovo**<br>1. Installa questa <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">estensione Chrome</a><br>2. Accedi a YouTube su Chrome<br>3. Clicca sull'icona dell'estensione → \"Esporta\"<br>4. Carica il file scaricato qui<br><br>**⏳ Durata Cookie**<br>- Sessioni normali: 24 ore<br>- Operazioni sensibili: 1 ora<br>- Cambio password: Rinnovo immediato</div>",
84
+ "ensemble_output_copied": "✅ Output ensemble copiato in {}",
85
+ "error_copying_ensemble_output": "❌ Errore copia output ensemble: {}",
86
+ "drive_mounted_copying_ensemble": "Google Drive montato. Copia output ensemble...",
87
+ "drive_already_mounted_copying_ensemble": "Google Drive già montato. Copia output ensemble...",
88
+ "no_ensemble_output_files_found": "❌ Nessun file output ensemble trovato.",
89
+ "drive_mounted_copying_files": "Google Drive montato. Copia file...",
90
+ "drive_already_mounted_copying_files": "Google Drive già montato. Copia file...",
91
+ "files_copied_to_drive": "✅ File copiati in {}",
92
+ "error_copying_files": "❌ Errore copia file: {}",
93
+ "mounting_drive": "Montaggio Google Drive...",
94
+ "settings_saved": "Impostazioni salvate in config.json: Lingua={lang}, Metodo={method}, Porta={port}",
95
+ "selected_language": "Lingua selezionata: {lang}",
96
+ "available_languages": "Lingue disponibili: {langs}",
97
+ "select_language_prompt": "Seleziona una lingua (default: {default}): ",
98
+ "invalid_language": "Selezione lingua non valida! Uso lingua default ({default}).",
99
+ "available_sharing_methods": "Metodi di condivisione disponibili: gradio, localtunnel, ngrok",
100
+ "select_sharing_method_prompt": "Scegli un metodo di condivisione (default: {default}): ",
101
+ "enter_ngrok_token_prompt": "Inserisci il tuo token Ngrok (default: {default}): ",
102
+ "ngrok_token_required": "Errore: Token Ngrok richiesto per il metodo ngrok!",
103
+ "enter_port_prompt": "Inserisci il numero di porta (default: {default}): ",
104
+ "opening_previous_url": "Apertura URL precedente: {url}",
105
+ "open_url_manually": "Apri l'URL manualmente in Colab: {url}",
106
+ "invalid_previous_url": "URL precedente non valido: {url}, non aperto.",
107
+ "starting_method": "Avvio di {method} sulla porta {port}...",
108
+ "process_stopped": "\n🛑 Processo fermato dall'utente",
109
+ "selected_port": "Porta selezionata: {port}",
110
+ "starting_gradio_with_sharing": "Avvio Gradio con condivisione integrata...",
111
+ "starting_localtunnel": "Avvio Localtunnel sulla porta {port}...",
112
+ "share_link": "Link di condivisione: {url}",
113
+ "password_ip": "Password IP: {ip}",
114
+ "starting_ngrok": "Avvio Ngrok sulla porta {port}...",
115
+ "ngrok_url": "URL Ngrok: {url}",
116
+ "ngrok_error": "Errore avvio Ngrok: {error}",
117
+ "auto_apollo_chunk_size": "Dimensione frammento Apollo",
118
+ "auto_apollo_chunk_size_info": "Dimensione frammento per miglioramento Apollo (consigliato: 19 per modello universale)",
119
+ "auto_apollo_overlap": "Sovrapposizione Apollo",
120
+ "auto_enhancing_with_apollo": "Miglioramento con Apollo ({}/{} file)...",
121
+ "auto_apollo_processing_completed": "Elaborazione Apollo completata!",
122
+ "auto_apollo_overlap_info": "Sovrapposizione per miglioramento Apollo (consigliato: 2)",
123
+ "apollo_chunk_size": "Dimensione frammento Apollo",
124
+ "apollo_chunk_size_info": "Dimensione frammento per miglioramento Apollo (consigliato: 19 per modello universale)",
125
+ "apollo_overlap": "Sovrapposizione Apollo",
126
+ "enhance_with_apollo": "Migliora con Apollo",
127
+ "apollo_enhancement_info": "Miglioramento qualità audio",
128
+ "enhancing_with_apollo": "Miglioramento con Apollo ({}/{} file)...",
129
+ "apollo_processing_completed": "Elaborazione Apollo completata!",
130
+ "apollo_overlap_info": "Sovrapposizione per miglioramento Apollo (consigliato: 2)",
131
+ "selected_models": "Modelli selezionati",
132
+ "save_preset": "Salva preimpostazione",
133
+ "delete_preset": "Elimina preimpostazione",
134
+ "refresh_presets": "Aggiorna preimpostazioni",
135
+ "preset_name": "Nome preimpostazione",
136
+ "select_preset": "Seleziona preimpostazione",
137
+ "add_favorite": "Aggiungi ai preferiti",
138
+ "apply_matchering": "Applica Matchering",
139
+ "matchering_info": "Applica Matchering per migliorare il mastering e il bilanciamento dell'audio",
140
+ "matchering_passes": "Passi Matchering",
141
+ "matchering_passes_info": "Numero di iterazioni Matchering (1-5, valori più alti migliorano la qualità ma richiedono più tempo)",
142
+ "matchering_processing": "Applicazione Matchering in corso... ({:.1f}%)",
143
+ "matchering_completed": "✅ Elaborazione Matchering completata!",
144
+ "matchering_error": "❌ Errore durante Matchering: {}",
145
+ "invalid_method": "Errore: Metodo non valido! Usa 'gradio', 'localtunnel' o 'ngrok'.",
146
+ "language_changed_message": "Lingua cambiata. Riavvia la cella.",
147
+ "backend_settings": "🚀 Impostazioni backend",
148
+ "inference_backend": "Backend inferenza",
149
+ "optimization_mode": "Modalità ottimizzazione",
150
+ "optimization_mode_info": "Selezione modalità ottimizzazione PyTorch",
151
+ "mixed_precision_amp": "Precisione mista (AMP)",
152
+ "mixed_precision_info": "Inferenza 2x più veloce - consigliato",
153
+ "tf32_acceleration": "Accelerazione TF32",
154
+ "tf32_acceleration_info": "Boost di velocità extra per RTX 30xx+",
155
+ "cudnn_benchmark": "Benchmark cuDNN",
156
+ "cudnn_benchmark_info": "Più lento al primo run, molto più veloce nei run successivi",
157
+ "ultra_optimized_pytorch": "Backend PyTorch ULTRA-OTTIMIZZATO",
158
+ "default_active_max_speed": "Attivo per impostazione predefinita - Ottimizzazione velocità massima",
159
+ "channels_last_mode": "Il più veloce per GPU RTX",
160
+ "compile_mode": "Velocità extra per PyTorch 2.0+",
161
+ "default_mode": "Standard",
162
+ "tta_info": "Aumenta la qualità ma rallenta l'elaborazione",
163
  "manual_ensemble_tab": "🎚️ Ensemble manuale",
164
+ "input_sources": "📂 Sorgenti input",
165
  "refresh": "🔄 Aggiorna",
166
  "ensemble_algorithm": "Algoritmo ensemble",
167
  "select_audio_files": "Seleziona file audio",
168
  "available_files": "File disponibili",
169
+ "custom_weights": "Pesi personalizzati (separati da virgola)",
170
  "custom_weights_placeholder": "Esempio: 0.8, 1.2, 1.0, ...",
171
  "custom_weights_info": "Lascia vuoto per pesi uguali",
172
  "result_preview_tab": "🎧 Anteprima risultato",
173
  "ensembled_output": "Output ensemble",
174
+ "processing_log_tab": "📋 Registro elaborazione",
175
  "processing_details": "Dettagli elaborazione",
176
  "process_ensemble": "⚡ Elabora ensemble",
177
  "languages_tab": "🌐 Lingue",
178
  "Vocal Models": "Modelli vocali",
179
  "Instrumental Models": "Modelli strumentali",
180
+ "4-Stem Models": "Modelli 4-stem",
181
+ "Denoise Models": "Modelli denoising",
182
+ "Dereverb Models": "Modelli dereverb",
183
  "Other Models": "Altri modelli",
184
+ "Other Shapes": "Altri modelli",
185
+ "ensemble_files_description": "Combina più file audio in un singolo output usando metodi ensemble specificati.",
186
  "ensemble_files_help": "Percorso a tutti i file audio per l'ensemble",
187
+ "ensemble_type_help": "Uno di: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
188
+ "ensemble_weights_help": "Pesi per creare l'ensemble. Il numero di pesi deve essere uguale al numero di file",
189
+ "ensemble_output_help": "Percorso al file WAV dove verrà memorizzato il risultato dell'ensemble",
190
  "ensemble_type_print": "Tipo ensemble: {}",
191
  "num_input_files_print": "Numero file input: {}",
192
  "weights_print": "Pesi: {}",
193
  "output_file_print": "File output: {}",
194
  "duration_mismatch_error": "Tutti i file devono avere la stessa durata",
195
+ "file_not_found_error": "Errore. Impossibile trovare il file: {}. Controlla i percorsi.",
196
+ "reading_chunk_print": "Lettura chunk dal file: {} (inizio: {}s, durata: {}s)",
197
+ "chunk_result_shape_print": "Forma risultato chunk: {}",
198
  "ensemble_completed_print": "Ensemble completato. Output salvato in: {}",
199
  "file_deletion_error": "{} non può essere eliminato: {}",
200
  "directory_not_exist_warning": "⚠️ La directory non esiste: {}",
201
  "not_a_directory_warning": "⚠️ Il percorso non è una directory: {}",
202
+ "item_deletion_error": "⚠️ Errore eliminazione {}: {}",
203
  "old_output_not_exist": "❌ La cartella output vecchia non esiste",
204
+ "old_outputs_cleared": "✅ Output vecchi cancellati con successo!",
205
  "error": "🔥 Errore: {}",
206
  "ffmpeg_error": "Errore FFmpeg ({}): {}",
207
  "file_saved_successfully": "File salvato con successo: {}",
208
+ "total_files_found": "File totali trovati: {}. Sample rate usato: {}",
209
  "total_progress": "Progresso totale",
210
+ "detailed_pbar_enabled": "Barra progresso dettagliata abilitata: {}",
211
  "loaded_audio": "Audio caricato: {}, forma: {}",
212
+ "cannot_read_track": "Impossibile leggere traccia: {}",
213
+ "error_message": "Messaggio errore: {}",
214
+ "demudding_track": "Pulizia traccia (phase remix - strumentale): {}",
215
  "elapsed_time": "Tempo trascorso: {:.2f} secondi.",
216
+ "proc_folder_description": "Elabora file audio in una cartella usando un modello specificato.",
217
+ "model_type_help": "Tipo modello (bandit, bs_roformer, mdx23c, ecc.)",
218
+ "config_path_help": "Percorso al file configurazione",
219
+ "demud_phaseremix_help": "Abilita phase remix per strumentale",
220
+ "start_checkpoint_help": "Checkpoint iniziale per pesi validi",
221
+ "input_folder_help": "Cartella con mix da elaborare",
222
+ "audio_path_help": "Percorso a singolo file audio da elaborare",
223
+ "store_dir_help": "Percorso per memorizzare risultati",
224
+ "device_ids_help": "Lista ID GPU",
225
+ "extract_instrumental_help": "Inverti vocali per ottenere strumentale se fornito",
226
+ "disable_detailed_pbar_help": "Disabilita barra progresso dettagliata in demix",
227
+ "force_cpu_help": "Forza uso CPU anche se CUDA è disponibile",
228
+ "flac_file_help": "Output file FLAC invece di WAV",
229
+ "export_format_help": "Formato export e tipo PCM",
230
  "pcm_type_help": "Tipo PCM per file FLAC",
231
+ "use_tta_help": "Abilita test time augmentation",
232
+ "lora_checkpoint_help": "Checkpoint iniziale per pesi LoRA",
233
+ "cuda_available": "CUDA è disponibile, usa --force_cpu per disabilitarlo.",
234
+ "using_device": "Uso dispositivo: {}",
235
  "instruments_print": "Strumenti: {}",
236
+ "model_load_time": "Tempo caricamento modello: {:.2f} sec",
237
  "invalid_url": "❌ URL non valido",
238
  "cookie_file_updated": "✅ File cookie aggiornato!",
239
  "cookie_installation_error": "⚠️ Errore installazione cookie: {}",
240
+ "google_drive_mount_error": "❌ Errore connessione Google Drive: {}",
241
  "file_size_zero_error": "Dimensione file zero o file non creato",
242
  "google_drive_error": "❌ Errore download Google Drive: {}",
243
  "wav_conversion_failed": "Conversione WAV fallita",
244
  "download_error": "❌ Errore download: {}",
245
+ "file_copied_to_drive": "File copiato su Google Drive: {}",
246
+ "copy_to_drive_error": "❌ Errore copia su Google Drive: {}",
247
+ "direct_download_failed": "Download diretto file fallito",
248
+ "direct_download_error": "❌ Errore download diretto: {}",
249
+ "invalid_google_drive_url": "❌ URL Google Drive non valido",
250
+ "continuing_without_google_drive": "Connessione Google Drive fallita, continuo senza...",
251
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive non montato, salto copia su Drive...",
252
+ "download_success": "🎉 Scaricato con successo!",
253
  "download_failed": "❌ Download fallito",
254
  "no_output_files_found": "❌ Nessun file output trovato.",
255
  "output_refreshed_successfully": "✅ Output aggiornato con successo!",
256
+ "error_refreshing_output": "❌ Errore aggiornamento output: {}",
257
+ "starting_audio_separation": "Avvio separazione audio...",
258
  "processing_audio": "Elaborazione audio",
259
  "separating_audio": "Separazione audio... ({:.1f}%)",
260
  "separation_complete": "Separazione completata!",
261
+ "progress_parsing_error": "Errore parsing progresso: {}",
262
  "error_occurred": "Si è verificato un errore: {}",
263
+ "separation_process_completed": "Processo separazione completato!",
264
+ "no_audio_file_error": "❌ Nessun file audio fornito e nessun file esistente nella directory input.",
265
  "no_input_progress_label": "Errore: Nessun input fornito -- 0.0%",
266
+ "processing_audio_print": "Elaborazione audio da: {} usando modello: {}",
267
+ "starting_audio_separation_progress_label": "Avvio separazione audio... -- 0.0%",
268
  "separating_audio_progress_label": "Separazione audio... -- {}.0%",
269
  "audio_processing_completed_progress_label": "Elaborazione audio completata! -- 100.0%",
270
  "audio_processing_completed": "✅ Elaborazione audio completata!",
271
  "error_occurred_progress_label": "Errore verificato -- 0.0%",
272
+ "minimum_files_required": "⚠️ Minimo 2 file richiesti",
273
  "valid_files_not_found": "❌ Nessun file valido trovato",
274
+ "starting_ensemble_process": "Avvio processo ensemble...",
275
+ "ensembling_progress": "Ensemble in corso... ({:.1f}%)",
276
  "finalizing_ensemble_output": "Finalizzazione output ensemble...",
277
  "success_log": "✅ Successo!\n{}",
278
  "error_log": "❌ Errore!\n{}",
 
288
  "model_failed": "Modello {} fallito: {}",
289
  "critical_error_with_model": "Errore critico con {}: {}",
290
  "model_output_failed": "{} non è riuscito a produrre output",
291
+ "waiting_for_files": "Attesa che tutti i file siano pronti...",
292
+ "waiting_for_files_progress_label": "Attesa che tutti i file siano pronti... -- 90.0%",
293
  "performing_ensemble": "Esecuzione ensemble...",
294
  "performing_ensemble_progress_label": "Esecuzione ensemble... -- 92.0%",
295
+ "memory_usage_before_ensemble": "Uso memoria prima ensemble: {}%",
296
+ "memory_usage_after_ensemble": "Uso memoria dopo ensemble: {}%",
297
  "finalizing_ensemble_output_progress_label": "Finalizzazione output ensemble... -- 98.0%",
298
+ "ensemble_file_creation_failed": "Impossibile creare file ensemble: {}",
299
  "ensemble_completed_progress_label": "Ensemble completato con successo! -- 100.0%",
300
  "success_output_created": "✅ Successo! File output creato.",
301
+ "phase_fixer_tab": "🔧 Correttore di fase",
302
+ "batch_processing_tab": "📦 Elaborazione batch",
303
+ "batch_description": "Elabora più file contemporaneamente con lo stesso modello",
304
+ "batch_input_folder": "Cartella di input",
305
+ "batch_input_folder_placeholder": "Inserisci il percorso della cartella contenente i file audio",
306
+ "batch_output_folder": "Cartella di output",
307
+ "batch_output_folder_placeholder": "Inserisci il percorso della cartella di output",
308
+ "batch_file_list": "File da elaborare",
309
+ "batch_add_files": "Aggiungi file",
310
+ "batch_clear_list": "Svuota lista",
311
+ "batch_start": "🚀 Avvia elaborazione batch",
312
+ "batch_stop": "⏹️ Ferma",
313
+ "batch_progress": "Progresso batch",
314
+ "batch_current_file": "In elaborazione",
315
+ "batch_completed": " Elaborazione batch completata!",
316
+ "batch_stopped": "⏹️ Elaborazione batch fermata",
317
+ "batch_no_files": " Nessun file selezionato per l'elaborazione",
318
+ "source_model": "Modello sorgente (Vocali)",
319
+ "source_model_info": "Modello per estrarre vocali/dati di fase",
320
+ "target_model": "Modello destinazione (Strumentale)",
321
+ "target_model_info": "Modello a cui applicare la correzione di fase",
322
+ "custom_models_tab": " Modelli personalizzati",
323
+ "Custom Models": "Modelli personalizzati",
324
+ "add_custom_model": " Aggiungi modello personalizzato",
325
+ "custom_model_name": "Nome modello",
326
+ "custom_model_name_placeholder": "Inserisci un nome per il tuo modello",
327
+ "checkpoint_url": "URL Checkpoint (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "URL Configurazione (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "URL file Python personalizzato (opzionale)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "Rileva automaticamente il tipo di modello",
334
+ "model_type": "Tipo modello",
335
+ "add_model_btn": " Aggiungi modello",
336
+ "custom_models_list": "📋 Lista modelli personalizzati",
337
+ "delete_model": "🗑️ Elimina",
338
+ "no_custom_models": "Nessun modello personalizzato aggiunto",
339
+ "model_added_success": " Modello aggiunto con successo!",
340
+ "model_deleted_success": " Modello eliminato con successo!",
341
+ "model_add_error": " Errore aggiunta modello: {}",
342
+ "model_delete_error": " Errore eliminazione modello: {}",
343
+ "refresh_models": "🔄 Aggiorna",
344
+ "custom_model_info": "Aggiungi modelli personalizzati fornendo link di download. I modelli verranno scaricati automaticamente quando usati.",
345
+ "select_model_to_delete": "Seleziona un modello da eliminare",
346
+ "downloading_model_file": "Download modello: {}",
347
+ "downloading_file_progress": "Download: {} - {}%",
348
+ "chunk_size_mode": "Modalità Dimensione Frammento",
349
+ "chunk_size_mode_info": "base: preset standard | custom: inserisci il tuo valore | yaml: leggi dallo YAML del modello",
350
+ "chunk_size_custom_label": "Dimensione Frammento Personalizzata",
351
+ "chunk_size_custom_info": "Inserisci un valore personalizzato per la dimensione del frammento",
352
+ "chunk_size_yaml_label": "Dimensione Frammento YAML",
353
+ "chunk_size_yaml_display_info": "Verrà letta dal file di configurazione YAML del modello dopo il download",
354
+ "chunk_size_yaml_not_downloaded": "⏳ YAML del modello non ancora scaricato — il valore verrà letto durante l'elaborazione",
355
+ "chunk_size_yaml_detected": "✅ Da YAML: {}",
356
+ "remove_favorite": "Rimuovi dai Preferiti",
357
  "normal_method": "Metodo normale",
358
+ "mid_side_method": "Metodo Mid-Side",
359
+ "apollo_processing_method": "Metodo di Elaborazione Apollo",
360
+ "apollo_normal_model": "Modello Normale Apollo",
361
+ "apollo_mid_side_model": "Modello Mid-Side Apollo",
362
+ "scale_factor": "Fattore di Scala",
363
+ "scale_factor_info": "Fattore di scala della fase (consigliato: 1.4)",
364
+ "source_file_label": "File Sorgente (Voce)",
365
+ "target_file_label": "File di Destinazione (Strumentale)",
366
+ "run_phase_fixer": "🔧 Avvia Correttore di Fase",
367
+ "phase_fixed_output": "Output con Fase Corretta",
368
+ "phase_fixer_settings": "Impostazioni Correttore di Fase",
369
+ "low_cutoff": "Taglio Basso (Hz)",
370
+ "high_cutoff": "Taglio Alto (Hz)",
371
+ "low_cutoff_info": "Frequenza di taglio bassa per la correzione della fase",
372
+ "high_cutoff_info": "Frequenza di taglio alta per la correzione della fase",
373
+ "no_preset_name_provided": "⚠️ Nessun nome preset fornito",
374
+ "no_models_selected_for_preset": "⚠️ Nessun modello selezionato per il preset",
375
+ "preset_saved": " Preset '{}' salvato!",
376
+ "please_upload_both_files": "⚠️ Carica sia il file sorgente che quello di destinazione",
377
+ "processing_log_placeholder": "Il registro di elaborazione apparirà qui...",
378
+ "enter_preset_name": "Inserisci il nome del preset..."
379
  }
assets/i18n/languages/ja_jp.json CHANGED
@@ -17,11 +17,10 @@
17
  "overlap": "オーバーラップ",
18
  "overlap_info": "推奨: 2-10 (高い値は品質を向上させますが、より多くのVRAMが必要です)",
19
  "tta_boost": "TTAブースト",
20
- "tta_info": "品質を向上させますが、処理速度が遅くなります",
21
  "phase_fix": "位相補正",
 
22
  "phase_fix_info": "インストゥルメンタルトラックのための高度な位相補正",
23
  "instrumental": "インストゥルメンタル",
24
- "instrumental_info": "通常2つの出力が得られますが、時には1つの出力だけの場合もあります。試してみてください :)",
25
  "process": "🚀 処理",
26
  "reset": "🧹 リセット",
27
  "status": "ステータス",
@@ -80,201 +79,267 @@
80
  "downloaded_file": "ダウンロードされたファイル",
81
  "cookie_management": "🍪 クッキー管理",
82
  "upload_cookies_txt": "Cookies.txtをアップロード",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 なぜ必要か?**<br>- 年齢制限付きコンテンツへのアクセス<br>- プライベート/非公開ビデオのダウンロード<br>- 地域制限の回避<br>- YouTubeダウンロード制限の回避<br><br>**⚠️ 重要な注意**<br>- クッキーファイルを決して共有しないでください!<br>- クッキーを更新するタイミング:<br> • \"403 Forbidden\" エラーが出た場合<br> • ダウンロードが突然停止した場合<br> • \"セッション期限切れ\" メッセージが表示された場合<br><br>**🔄 更新手順**<br>1. この <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome拡張機能</a> をインストール<br>2. ChromeでYouTubeにログイン<br>3. 拡張機能アイコンをクリック → \"エクスポート\"<br>4. ダウンロードしたファイルをここにアップロード<br><br>**⏳ クッキー寿命**<br>- 通常セッション: 24時間<br>- 機密操作: 1時間<br>- パスワード変更: 即時無効化</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ 手動アンサンブル",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "input_sources": "📂 入力ソース",
86
  "refresh": "🔄 更新",
87
  "ensemble_algorithm": "アンサンブルアルゴリズム",
88
  "select_audio_files": "オーディオファイルを選択",
89
  "available_files": "利用可能なファイル",
90
- "custom_weights": "カスタムウェイト (カンマ区切り)",
91
  "custom_weights_placeholder": "例: 0.8, 1.2, 1.0, ...",
92
- "custom_weights_info": "ウェイトの場合は空白のまま",
93
  "result_preview_tab": "🎧 結果プレビュー",
94
  "ensembled_output": "アンサンブル出力",
95
  "processing_log_tab": "📋 処理ログ",
96
- "processing_details": "処理詳細",
97
  "process_ensemble": "⚡ アンサンブルを処理",
98
  "languages_tab": "🌐 言語",
99
  "Vocal Models": "ボーカルモデル",
100
  "Instrumental Models": "インストゥルメンタルモデル",
101
  "4-Stem Models": "4ステムモデル",
102
  "Denoise Models": "ノイズ除去モデル",
103
- "Dereverb Models": "リバーブ除去モデル",
104
- "Other Models": "その他のモデル",
105
- "ensemble_files_description": "指定されたアンサンブル手法を使用して複数のオーィオファイを1つの出力に統合します。",
 
106
  "ensemble_files_help": "アンサンブル用のすべてのオーディオファイルへのパス",
107
- "ensemble_type_help": "avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft のいずれか",
108
- "ensemble_weights_help": "アンサンブル作成するためのウェイト。ウェイト数はファイル数と一致する必要があります",
109
- "ensemble_output_help": "アンサンブル結果を保存するWAVファイルのパス",
110
  "ensemble_type_print": "アンサンブルタイプ: {}",
111
  "num_input_files_print": "入力ファイル数: {}",
112
  "weights_print": "ウェイト: {}",
113
  "output_file_print": "出力ファイル: {}",
114
- "duration_mismatch_error": "すべてのファイルは同じ長さでなければなりません",
115
  "file_not_found_error": "エラー。ファイルが見つかりません: {}。パスを確認してください。",
116
- "reading_chunk_print": "ファイルからチャンクを読み込み: {} (開始: {}, 長さ: {})",
117
  "chunk_result_shape_print": "チャンク結果の形状: {}",
118
- "ensemble_completed_print": "アンサンブル完了。出力は以下に保存: {}",
119
  "file_deletion_error": "{} を削除できませんでした: {}",
120
  "directory_not_exist_warning": "⚠️ ディレクトリが存在しません: {}",
121
- "not_a_directory_warning": "⚠️ パスディレクトリではありません: {}",
122
  "item_deletion_error": "⚠️ {} の削除エラー: {}",
 
123
  "old_output_not_exist": "❌ 古い出力フォルダが存在しません",
124
- "old_outputs_cleared": "✅ 古い出が正常に削除されました!",
125
  "error": "🔥 エラー: {}",
126
  "ffmpeg_error": "FFmpegエラー ({}): {}",
127
  "file_saved_successfully": "ファイルが正常に保存されました: {}",
128
  "total_files_found": "見つかったファイルの総数: {}。使用サンプルレート: {}",
129
- "total_progress": "進捗",
130
- "detailed_pbar_enabled": "詳細プログレスバー有効: {}",
131
- "loaded_audio": "ロードされたオーディオ: {}, 形状: {}",
132
  "cannot_read_track": "トラックを読み込めません: {}",
133
  "error_message": "エラーメッセージ: {}",
134
- "demudding_track": "トラックのデマッディング (位相リミックス - インストゥルメンタル): {}",
135
- "elapsed_time": "経過時間: {:.2f} 秒。",
136
  "proc_folder_description": "指定されたモデルを使用してフォルダ内のオーディオファイルを処理します。",
137
- "model_type_help": "モデルタイプ (bandit, bs_roformer, mdx23c など)",
138
  "config_path_help": "設定ファイルへのパス",
139
- "demud_phaseremix_help": "インストゥルメンタルの位相リミックスを有効",
140
  "start_checkpoint_help": "有効なウェイトの初期チェックポイント",
141
- "input_folder_help": "処理するミックスを含むフォルダ",
142
- "audio_path_help": "処理する単一オーディオファイルのパス",
143
  "store_dir_help": "結果を保存するパス",
144
  "device_ids_help": "GPU IDのリスト",
145
  "extract_instrumental_help": "提供されている場合、ボーカルを反転してインストゥルメンタルを取得",
146
- "disable_detailed_pbar_help": "デミックス詳細プログレスバーを無効",
147
- "force_cpu_help": "CUDAが利用可能でもCPUを強制使用",
148
  "flac_file_help": "WAVの代わりにFLACファイルを出力",
149
- "export_format_help": "エクスポートフォーマットとPCMタイプ",
150
  "pcm_type_help": "FLACファイルのPCMタイプ",
151
- "use_tta_help": "テスト時間増強を有効",
152
  "lora_checkpoint_help": "LoRAウェイトの初期チェックポイント",
153
- "cuda_available": "CUDAが利用可能です。無効するには --force_cpu を使用してください。",
154
  "using_device": "使用デバイス: {}",
155
  "instruments_print": "楽器: {}",
156
- "model_load_time": "モデルロード時間: {:.2f} 秒",
157
  "invalid_url": "❌ 無効なURL",
158
- "cookie_file_updated": "✅ クッキーファイルが更新されました!",
159
- "cookie_installation_error": "⚠️ クッキーインストールエラー: {}",
 
160
  "file_size_zero_error": "ファイルサイズがゼロまたはファイルが作成されていません",
161
  "google_drive_error": "❌ Google Driveダウンロードエラー: {}",
162
- "wav_conversion_failed": "WAV変換に失敗",
163
  "download_error": "❌ ダウンロードエラー: {}",
164
- "download_success": "🎉 ダウンロド成功!",
165
- "download_failed": "❌ ダウンロド失敗",
 
 
166
  "no_output_files_found": "❌ 出力ファイルが見つかりません。",
167
- "output_refreshed_successfully": "✅ 出力が正常に更新されました!",
168
  "error_refreshing_output": "❌ 出力更新エラー: {}",
169
- "starting_audio_separation": "オーディオ分離を開始...",
170
- "processing_audio": "オーディオ処理中",
171
- "separating_audio": "オーディオ分離中... ({:.1f}%)",
172
- "separation_complete": "分離完了!",
173
  "progress_parsing_error": "進捗解析エラー: {}",
174
  "error_occurred": "エラーが発生しました: {}",
175
- "separation_process_completed": "分離プロセス完了!",
176
  "no_audio_file_error": "❌ オーディオファイルが提供されておらず、入力ディレクトリに既存のファイルもありません。",
177
  "no_input_progress_label": "エラー: 入力が提供されていません -- 0.0%",
178
- "processing_audio_print": "{} からオーディオを処理中、モデル使用: {}",
179
- "starting_audio_separation_progress_label": "オーディオ分離を開始... -- 0.0%",
180
- "separating_audio_progress_label": "オーディオ分離中... -- {}.0%",
181
- "audio_processing_completed_progress_label": "オーディオ処理完了! -- 100.0%",
182
- "audio_processing_completed": "✅ オーディオ処理完了!",
183
  "error_occurred_progress_label": "エラーが発生しました -- 0.0%",
184
- "minimum_files_required": "⚠️ 最低2ファイル必要",
185
  "valid_files_not_found": "❌ 有効なファイルが見つかりません",
186
- "starting_ensemble_process": "アンサンブルプロセスを開始...",
187
  "ensembling_progress": "アンサンブル中... ({:.1f}%)",
188
- "finalizing_ensemble_output": "アンサンブル出力を最終処理中...",
189
- "success_log": "✅ 成功!\n{}",
190
- "error_log": "❌ エラー!\n{}",
191
- "critical_error": "⛔ 重大なエラー: {}",
192
- "ensemble_process_completed": "アンサンブルプロセス完了!",
193
  "no_models_selected": "❌ モデルが選択されていません",
194
  "no_input_audio_provided": "❌ 入力オーディオが提供されていません",
195
- "loading_model": "モデル {}/{} をロード中: {}...",
196
- "loading_model_progress_label": "モデル {}/{} をロード中: {}... -- {}.0%",
197
- "completed_model": "モデル {}/{} 完了: {}",
198
- "completed_model_progress_label": "モデル {}/{} 完了: {} -- {}.0%",
199
- "running_command": "コマンド実行中: {}",
200
- "model_failed": "モデル {} が失敗: {}",
201
- "critical_error_with_model": "{} で重大なエラー: {}",
202
- "model_output_failed": "{} 出力を生成できませんでした",
203
- "waiting_for_files": "すべてのファイル準備待機中...",
204
- "waiting_for_files_progress_label": "すべてのファイル準備待機中... -- 90.0%",
205
- "performing_ensemble": "アンサンブル実行中...",
206
- "performing_ensemble_progress_label": "アンサンブル実行中... -- 92.0%",
207
  "memory_usage_before_ensemble": "アンサンブル前のメモリ使用量: {}%",
208
  "memory_usage_after_ensemble": "アンサンブル後のメモリ使用量: {}%",
209
- "finalizing_ensemble_output_progress_label": "アンサンブル出力を最終処理中... -- 98.0%",
210
- "ensemble_file_creation_failed": "アンサンブルファイル作成に失敗: {}",
211
- "ensemble_completed_progress_label": "アンサンブルが正常に完了しました! -- 100.0%",
212
- "success_output_created": "✅ 成功! 出力ファイルが作成されました。",
213
- "drive_mounted_copying_ensemble": "Google Driveがマウントされました。アンサンブ出力をコピー中...",
214
- "drive_already_mounted_copying_ensemble": "Google Driveはすでにマウントされています。アンサンブ出力コピー中...",
215
- "no_ensemble_output_files_found": "❌ アンサンブ出力ファイルが見つかりません。",
216
- "ensemble_output_copied": "✅ アンサンブが {} にコピーされま",
217
- "error_copying_ensemble_output": "❌ アサンブル出力のコピーにエラー: {}",
218
- "drive_mounted_copying_files": "Google Driveがマウントされました。ファイルをコピー中...",
219
- "drive_already_mounted_copying_files": "Google Driveはすでにマウントされています。ファイルをコピー中...",
220
- "files_copied_to_drive": "✅ ファイルが {} にコピーされました",
221
- "error_copying_files": "ファイルのコピーにエラー: {}",
222
- "mounting_drive": "Google Driveをマウント中...",
223
- "settings_saved": "設定が config.json に保存されました: 言語={lang}, メソッド={method}, ポート={port}",
224
- "selected_language": "選択された言語: {lang}",
225
- "available_languages": "利用可能な言語: {langs}",
226
- "select_language_prompt": "言語を選択 (フォルト: {default}): ",
227
- "invalid_language": "無効な言語選択! デフォルト言語 ({default}) を使用します。",
228
- "available_sharing_methods": "利用可能な共有方法: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "共有方法を選択 (フォト: {default}): ",
230
- "enter_ngrok_token_prompt": "Ngrokトークンを入力 (フォト: {default}): ",
231
- "ngrok_token_required": "エラー: ngrokメソッドにはNgrokトークンが必要です!",
232
- "enter_port_prompt": "ポート番号を入力 (フォ: {default}): ",
233
- "opening_previous_url": "前のURLを開く: {url}",
234
- "open_url_manually": "ColabでURLで開く: {url}",
235
- "invalid_previous_url": "無効な前のURL: {url}, 開かれませんでた。",
236
- "starting_method": "{method} をポート {port} で開始...",
237
- "process_stopped": "\n🛑 ユーザーによってプロセスが停止されました",
238
- "selected_port": "選択されたポート: {port}",
239
- "starting_gradio_with_sharing": "Gradioを共有機能付きで開始...",
240
- "starting_localtunnel": "Localtunnelをポート {port} で開始...",
241
- "share_link": "共有リンク: {url}",
242
- "password_ip": "パスワードIP: {ip}",
243
- "starting_ngrok": "Ngrokをポート {port} で開始...",
244
- "ngrok_url": "Ngrok URL: {url}",
245
- "ngrok_error": "Ngrok開始エラー: {error}",
246
- "apollo_chunk_size": "Apolloチャンクサイズ",
247
- "apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
248
- "apollo_overlap": "Apolloオーバーラップ",
249
- "enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
250
- "apollo_processing_completed": "Apollo処理完了!",
251
- "apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
252
- "invalid_method": "エラー: 無効なメソッド! 'gradio', 'localtunnel', または 'ngrok' を使用してください。",
253
- "apollo_enhancement_settings": "Apollo改善設定",
254
- "enhance_with_apollo": "Apolloで改善",
255
- "enhance_with_apollo_info": "分離後のオーディオ改善のためにApolloを有効化",
256
- "apollo_method": "Apolloメソッド",
257
- "apollo_method_info": "Apolloの処理メソッドを選択",
258
- "normal_method": "通常メソッド",
259
- "mid_side_method": "ミッド/サイドメソッド",
260
- "apollo_normal_model": "Apollo通常モデル",
261
- "apollo_normal_model_info": "通常Apollo処理に使用するモデル",
262
- "apollo_midside_model": "Apolloミッド/サイドモデル",
263
- "apollo_enhancement_info": "音質の改善",
264
- "selected_models": "選択されたモデル",
265
- "save_preset": "プリセットを保存",
266
- "delete_preset": "プリセットを削除",
267
- "refresh_presets": "プリセットを更新",
268
- "preset_name": "プリセット名",
269
- "select_preset": "プリセットを選択",
270
- "add_favorite": "お気に入りに追加",
271
- "apollo_midside_model_info": "ミッド/サイド処理に使用するモデル (オプション)",
272
- "language_changed_message": "言語が変更されました。セルを再起動してください。",
273
- "apply_matchering": "Matcheringを適用",
274
- "matchering_info": "オーディオのマスタリングとバランスを向上させるためにMatcheringを適用",
275
- "matchering_passes": "Matcheringの反復回数",
276
- "matchering_passes_info": "Matcheringの反復回数(1-5、高い値は品質を向上させますが時間がかかります)",
277
- "matchering_processing": "Matcheringを適用中... ({:.1f}%)",
278
- "matchering_completed": "✅ Matchering処理が完了しました!",
279
- "matchering_error": "❌ Matchering中にエラーが発生しました:{}"
280
  }
 
17
  "overlap": "オーバーラップ",
18
  "overlap_info": "推奨: 2-10 (高い値は品質を向上させますが、より多くのVRAMが必要です)",
19
  "tta_boost": "TTAブースト",
 
20
  "phase_fix": "位相補正",
21
+ "use_phase_correction": "高度な位相補正を適用",
22
  "phase_fix_info": "インストゥルメンタルトラックのための高度な位相補正",
23
  "instrumental": "インストゥルメンタル",
 
24
  "process": "🚀 処理",
25
  "reset": "🧹 リセット",
26
  "status": "ステータス",
 
79
  "downloaded_file": "ダウンロードされたファイル",
80
  "cookie_management": "🍪 クッキー管理",
81
  "upload_cookies_txt": "Cookies.txtをアップロード",
82
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 なぜ必要**<br>- 年齢制限コンテンツへのアクセス<br>- プライベート/非公開ビデオのダウンロード<br>- 地域制限の回避<br>- YouTubeダウンロード制限の回避<br><br>**⚠️ 重要な注意**<br>- クッキーファイルを共有しないでください<br>- 以下の時に更新:<br> • \"403 Forbidden\"エラー<br> • ダウンロードが突然停止<br> • \"セッション期限切れ\"メッセージ<br><br>**🔄 更新手順**<br>1. この<a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome拡張</a>をインストール<br>2. ChromeでYouTubeにログイン<br>3. 拡張アイコンをクリック → \"エクスポート\"<br>4. ここにダウンロードしたファイルをアップロード<br><br>**⏳ クッキー寿命**<br>- 通常セッション: 24時間<br>- 敏感な操作: 1時間<br>- パスワード変更: 即時更新</div>",
83
+ "ensemble_output_copied": "✅ アンサンブル出力が {} にコピーされました",
84
+ "error_copying_ensemble_output": "❌ アンサンブル出力のコピーエラー: {}",
85
+ "drive_mounted_copying_ensemble": "Google Driveがマウントされました。アンサンブル出力をコピー中...",
86
+ "drive_already_mounted_copying_ensemble": "Google Driveはすでにマウントされています。アンサンブル出力をコピー中...",
87
+ "no_ensemble_output_files_found": "❌ アンサンブル出力ファイルが見つかりません。",
88
+ "drive_mounted_copying_files": "Google Driveがマウントされました。ファイルをコピー中...",
89
+ "drive_already_mounted_copying_files": "Google Driveはすでにマウントされています。ファイルをコピー中...",
90
+ "files_copied_to_drive": "✅ ファイルが {} にコピーされました",
91
+ "error_copying_files": "❌ ファイルのコピーエラー: {}",
92
+ "mounting_drive": "Google Driveをマウント中...",
93
+ "settings_saved": "設定が config.json に保存されました: 言語={lang}, メソッド={method}, ポート={port}",
94
+ "selected_language": "選択された言語: {lang}",
95
+ "available_languages": "利用可能な言語: {langs}",
96
+ "select_language_prompt": "言語を選択 (デフォルト: {default}): ",
97
+ "invalid_language": "無効な言語選択! デフォルト言語 ({default}) を使用します。",
98
+ "available_sharing_methods": "利用可能な共有方法: gradio, localtunnel, ngrok",
99
+ "select_sharing_method_prompt": "共有方法を選択 (デフォルト: {default}): ",
100
+ "enter_ngrok_token_prompt": "Ngrokトークンを入力 (デフォルト: {default}): ",
101
+ "ngrok_token_required": "エラー: ngrokメソッドにはNgrokトークンが必要です!",
102
+ "enter_port_prompt": "ポート���号を入力 (デフォルト: {default}): ",
103
+ "opening_previous_url": "前のURLを開く: {url}",
104
+ "open_url_manually": "ColabでURLを手動で開く: {url}",
105
+ "invalid_previous_url": "無効な前のURL: {url}, 開かれませんでした。",
106
+ "starting_method": "{method} をポート {port} で開始...",
107
+ "process_stopped": "\n🛑 ユーザーによってプロセスが停止されました",
108
+ "selected_port": "選択されたポート: {port}",
109
+ "starting_gradio_with_sharing": "Gradioを共有機能付きで開始...",
110
+ "starting_localtunnel": "Localtunnelをポート {port} で開始...",
111
+ "share_link": "共有リンク: {url}",
112
+ "password_ip": "パスワードIP: {ip}",
113
+ "starting_ngrok": "Ngrokをポート {port} で開始...",
114
+ "ngrok_url": "Ngrok URL: {url}",
115
+ "ngrok_error": "Ngrok開始エラー: {error}",
116
+ "auto_apollo_chunk_size": "Apolloチャンクサイズ",
117
+ "auto_apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
118
+ "auto_apollo_overlap": "Apolloオーバーラップ",
119
+ "auto_enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
120
+ "auto_apollo_processing_completed": "Apollo処理完了!",
121
+ "auto_apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
122
+ "apollo_chunk_size": "Apolloチャンクサイズ",
123
+ "apollo_chunk_size_info": "Apollo改善のためのチャンクサイズ (ユニバーサルモデル推奨: 19)",
124
+ "apollo_overlap": "Apolloオーバーラップ",
125
+ "enhance_with_apollo": "Apolloで改善",
126
+ "apollo_enhancement_info": "音質の改善",
127
+ "enhancing_with_apollo": "Apolloで改善中 ({}/{} ファイル)...",
128
+ "apollo_processing_completed": "Apollo処理完了!",
129
+ "apollo_overlap_info": "Apollo改善のためのオーバーラップ (推奨: 2)",
130
+ "selected_models": "選択されたモデル",
131
+ "save_preset": "プリセットを保存",
132
+ "delete_preset": "プリセットを削除",
133
+ "refresh_presets": "プリセットを更新",
134
+ "preset_name": "プリセット名",
135
+ "select_preset": "プリセットを選択",
136
+ "add_favorite": "お気に入りに追加",
137
+ "apply_matchering": "Matcheringを適用",
138
+ "matchering_info": "オーディオのマスタリングとバランスを向上させるためにMatcheringを適用",
139
+ "matchering_passes": "Matcheringの反復回数",
140
+ "matchering_passes_info": "Matcheringの反復回数(1-5、高い値は品質を向上させますが時間がかかります)",
141
+ "matchering_processing": "Matcheringを適用中... ({:.1f}%)",
142
+ "matchering_completed": "✅ Matchering処理が完了しました!",
143
+ "matchering_error": "❌ Matchering中にエラーが発生しました:{}",
144
+ "invalid_method": "エラー: 無効なメソッド! 'gradio', 'localtunnel', または 'ngrok' を使用してください。",
145
+ "language_changed_message": "言語が変更されました。セルを再起動してください。",
146
+ "backend_settings": "🚀 バックエンド設定",
147
+ "inference_backend": "推論バックエンド",
148
+ "optimization_mode": "最適化モード",
149
+ "optimization_mode_info": "PyTorch最適化モードの選択",
150
+ "mixed_precision_amp": "混合精度 (AMP)",
151
+ "mixed_precision_info": "2x速い推論 - 推奨",
152
+ "tf32_acceleration": "TF32アクセラレーション",
153
+ "tf32_acceleration_info": "RTX 30xx+のための追加速度ブースト",
154
+ "cudnn_benchmark": "cuDNNベンチマーク",
155
+ "cudnn_benchmark_info": "最初の実行で遅い、後続の実行で非常に速い",
156
+ "ultra_optimized_pytorch": "超最適化PyTorchバックエンド",
157
+ "default_active_max_speed": "デフォルトでアクティブ - 最大速度最適化",
158
+ "channels_last_mode": "RTX GPUで最速",
159
+ "compile_mode": "PyTorch 2.0+のための追加速度",
160
+ "default_mode": "標準",
161
+ "tta_info": "品質を向上させますが、処理速度が遅くなります",
162
+ "instrumental_info": "通常2つのステムを出力しますが、時には1つだけ - 試してみてください! :)",
163
+ "direct_download_error": "❌ 直接ダウンロードエラー:{}",
164
+ "invalid_google_drive_url": "❌ 無効な Google Drive URL",
165
+ "continuing_without_google_drive": "Google Drive接続に失敗しました。続行します...",
166
+ "skipping_drive_copy_no_mount": "⚠️ Google Driveがマウントされていません、Driveへのコピーをスキップします...",
167
+ "direct_download_failed": "ファイルの直接ダウンロードに失敗しました",
168
  "manual_ensemble_tab": "🎚️ 手動アンサンブル",
169
+ "phase_fixer_tab": "🔧 位相補正ツール",
170
+ "batch_processing_tab": "📦 バッチ処理",
171
+ "batch_description": "同じモデルで複数のファイルを一度に処理",
172
+ "batch_input_folder": "入力フォルダ",
173
+ "batch_input_folder_placeholder": "オーディオファイルを含むフォルダのパスを入力",
174
+ "batch_output_folder": "出力フォルダ",
175
+ "batch_output_folder_placeholder": "出力ファイル��フォルダのパスを入力",
176
+ "batch_file_list": "処理するファイル",
177
+ "batch_add_files": "ファイルを追加",
178
+ "batch_clear_list": "リストをクリア",
179
+ "batch_start": "🚀 バッチ処理を開始",
180
+ "batch_stop": "⏹️ 停止",
181
+ "batch_progress": "バッチ処理の進捗",
182
+ "batch_current_file": "現在処理中",
183
+ "batch_completed": "✅ バッチ処理が完了しました!",
184
+ "batch_stopped": "⏹️ バッチ処理が停止されました",
185
+ "batch_no_files": "❌ 処理するファイルが選択されていません",
186
+ "source_model": "ソースモデル(ボーカル)",
187
+ "source_model_info": "ボーカル/位相データを抽出するモデル",
188
+ "target_model": "ターゲットモデル(インストゥルメンタル)",
189
+ "target_model_info": "位相補正を適用するモデル",
190
  "input_sources": "📂 入力ソース",
191
  "refresh": "🔄 更新",
192
  "ensemble_algorithm": "アンサンブルアルゴリズム",
193
  "select_audio_files": "オーディオファイルを選択",
194
  "available_files": "利用可能なファイル",
195
+ "custom_weights": "カスタムウェイトカンマ区切り",
196
  "custom_weights_placeholder": "例: 0.8, 1.2, 1.0, ...",
197
+ "custom_weights_info": "等しいウェイトにするには空白のままにしてください",
198
  "result_preview_tab": "🎧 結果プレビュー",
199
  "ensembled_output": "アンサンブル出力",
200
  "processing_log_tab": "📋 処理ログ",
201
+ "processing_details": "処理詳細",
202
  "process_ensemble": "⚡ アンサンブルを処理",
203
  "languages_tab": "🌐 言語",
204
  "Vocal Models": "ボーカルモデル",
205
  "Instrumental Models": "インストゥルメンタルモデル",
206
  "4-Stem Models": "4ステムモデル",
207
  "Denoise Models": "ノイズ除去モデル",
208
+ "Dereverb Models": "残響除去モデル",
209
+ "Other Shapes": "その他のモデル",
210
+ "Custom Models": "カスタムモデル",
211
+ "ensemble_files_description": "指定されたアンサンブル方法を使用して複数のオーディオファイルを1つの出力に結合します。",
212
  "ensemble_files_help": "アンサンブル用のすべてのオーディオファイルへのパス",
213
+ "ensemble_type_help": "次のいずれか: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
214
+ "ensemble_weights_help": "アンサンブル作成のウェイト。ウェイト数はファイル数と同じである必要があります",
215
+ "ensemble_output_help": "アンサンブル結果を保存するWAVファイルのパス",
216
  "ensemble_type_print": "アンサンブルタイプ: {}",
217
  "num_input_files_print": "入力ファイル数: {}",
218
  "weights_print": "ウェイト: {}",
219
  "output_file_print": "出力ファイル: {}",
220
+ "duration_mismatch_error": "すべてのファイル長さが同じある必要がありま",
221
  "file_not_found_error": "エラー。ファイルが見つかりません: {}。パスを確認してください。",
222
+ "reading_chunk_print": "ファイルからチャンクを読み込み: {} (開始: {}s, 長さ: {}s)",
223
  "chunk_result_shape_print": "チャンク結果の形状: {}",
224
+ "ensemble_completed_print": "アンサンブル完了。出力保存: {}",
225
  "file_deletion_error": "{} を削除できませんでした: {}",
226
  "directory_not_exist_warning": "⚠️ ディレクトリが存在しません: {}",
227
+ "not_a_directory_warning": "⚠️ パスディレクトリではありません: {}",
228
  "item_deletion_error": "⚠️ {} の削除エラー: {}",
229
+ "critical_error": "⛔ 致命的なエラー: {}",
230
  "old_output_not_exist": "❌ 古い出力フォルダが存在しません",
231
+ "old_outputs_cleared": "✅ 古い出��が正常に削除されました",
232
  "error": "🔥 エラー: {}",
233
  "ffmpeg_error": "FFmpegエラー ({}): {}",
234
  "file_saved_successfully": "ファイルが正常に保存されました: {}",
235
  "total_files_found": "見つかったファイルの総数: {}。使用サンプルレート: {}",
236
+ "total_progress": "全体の進捗",
237
+ "detailed_pbar_enabled": "詳細プログレスバー有効: {}",
238
+ "loaded_audio": "オーディオを読み込みました: {}形状: {}",
239
  "cannot_read_track": "トラックを読み込めません: {}",
240
  "error_message": "エラーメッセージ: {}",
241
+ "demudding_track": "トラックをクリーニング中(位相リミックス - インストゥルメンタル: {}",
242
+ "elapsed_time": "経過時間: {:.2f}秒。",
243
  "proc_folder_description": "指定されたモデルを使用してフォルダ内のオーディオファイルを処理します。",
244
+ "model_type_help": "モデルタイプ (bandit, bs_roformer, mdx23cなど)",
245
  "config_path_help": "設定ファイルへのパス",
246
+ "demud_phaseremix_help": "インストゥルメンタルの位相リミックスを有効にする",
247
  "start_checkpoint_help": "有効なウェイトの初期チェックポイント",
248
+ "input_folder_help": "処理するミックスが入ったフォルダ",
249
+ "audio_path_help": "処理する単一オーディオファイルのパス",
250
  "store_dir_help": "結果を保存するパス",
251
  "device_ids_help": "GPU IDのリスト",
252
  "extract_instrumental_help": "提供されている場合、ボーカルを反転してインストゥルメンタルを取得",
253
+ "disable_detailed_pbar_help": "デミックス詳細プログレスバーを無効にする",
254
+ "force_cpu_help": "CUDAが利用可能でもCPUを強制使用する",
255
  "flac_file_help": "WAVの代わりにFLACファイルを出力",
256
+ "export_format_help": "エクスポート形式とPCMタイプ",
257
  "pcm_type_help": "FLACファイルのPCMタイプ",
258
+ "use_tta_help": "テスト時間拡張を有効にする",
259
  "lora_checkpoint_help": "LoRAウェイトの初期チェックポイント",
260
+ "cuda_available": "CUDAが利用可能です。無効するには --force_cpu を使用してください。",
261
  "using_device": "使用デバイス: {}",
262
  "instruments_print": "楽器: {}",
263
+ "model_load_time": "モデル読み込み時間: {:.2f}秒",
264
  "invalid_url": "❌ 無効なURL",
265
+ "cookie_file_updated": "✅ Cookieファイルが更新されました",
266
+ "cookie_installation_error": "⚠️ Cookieインストールエラー: {}",
267
+ "google_drive_mount_error": "❌ Google Drive接続エラー: {}",
268
  "file_size_zero_error": "ファイルサイズがゼロまたはファイルが作成されていません",
269
  "google_drive_error": "❌ Google Driveダウンロードエラー: {}",
270
+ "wav_conversion_failed": "WAV変換に失敗しました",
271
  "download_error": "❌ ダウンロードエラー: {}",
272
+ "file_copied_to_drive": "ファイルがGoogle Driveにコピされました: {}",
273
+ "copy_to_drive_error": "❌ Google Driveへのコピエラー: {}",
274
+ "download_success": "🎉 ダウンロードに成功しました!",
275
+ "download_failed": "❌ ダウンロードに失敗しました",
276
  "no_output_files_found": "❌ 出力ファイルが見つかりません。",
277
+ "output_refreshed_successfully": "✅ 出力が正常に更新されました",
278
  "error_refreshing_output": "❌ 出力更新エラー: {}",
279
+ "starting_audio_separation": "オーディオ分離を開始しています...",
280
+ "processing_audio": "オーディオ処理中",
281
+ "separating_audio": "オーディオ分離中... ({:.1f}%)",
282
+ "separation_complete": "分離完了",
283
  "progress_parsing_error": "進捗解析エラー: {}",
284
  "error_occurred": "エラーが発生しました: {}",
285
+ "separation_process_completed": "分離プロセス完了しました!",
286
  "no_audio_file_error": "❌ オーディオファイルが提供されておらず、入力ディレクトリに既存のファイルもありません。",
287
  "no_input_progress_label": "エラー: 入力が提供されていません -- 0.0%",
288
+ "processing_audio_print": "オーディオを処理中: {}、モデル使用: {}",
289
+ "starting_audio_separation_progress_label": "オーディオ分離を開始しています... -- 0.0%",
290
+ "separating_audio_progress_label": "オーディオ分離中... -- {}.0%",
291
+ "audio_processing_completed_progress_label": "オーディオ処理完了しました! -- 100.0%",
292
+ "audio_processing_completed": "✅ オーディオ処理完了しました!",
293
  "error_occurred_progress_label": "エラーが発生しました -- 0.0%",
294
+ "minimum_files_required": "⚠️ 最低2つのファイル必要です",
295
  "valid_files_not_found": "❌ 有効なファイルが見つかりません",
296
+ "starting_ensemble_process": "アンサンブルプロセスを開始しています...",
297
  "ensembling_progress": "アンサンブル中... ({:.1f}%)",
298
+ "finalizing_ensemble_output": "アンサンブル出力を最終中...",
299
+ "success_log": "✅ 成功\n{}",
300
+ "error_log": "❌ エラー\n{}",
301
+ "ensemble_process_completed": "アンサンブルプロセスが完了しました!",
 
302
  "no_models_selected": "❌ モデルが選択されていません",
303
  "no_input_audio_provided": "❌ 入力オーディオが提供されていません",
304
+ "loading_model": "モデルを読み込み中 {}/{}: {}...",
305
+ "loading_model_progress_label": "モデルを読み込み中 {}/{}: {}... -- {}.0%",
306
+ "completed_model": "モデル完了 {}/{}: {}",
307
+ "completed_model_progress_label": "モデル完了 {}/{}: {} -- {}.0%",
308
+ "running_command": "コマンド実行中: {}",
309
+ "model_failed": "モデル {} が失敗しました: {}",
310
+ "critical_error_with_model": "{} で致命的なエラー: {}",
311
+ "model_output_failed": "{} 出力を生成できませんでした",
312
+ "waiting_for_files": "すべてのファイル準備できるまで待機中...",
313
+ "waiting_for_files_progress_label": "すべてのファイル準備できるまで待機中... -- 90.0%",
314
+ "performing_ensemble": "アンサンブル実行中...",
315
+ "performing_ensemble_progress_label": "アンサンブル実行中... -- 92.0%",
316
  "memory_usage_before_ensemble": "アンサンブル前のメモリ使用量: {}%",
317
  "memory_usage_after_ensemble": "アンサンブル後のメモリ使用量: {}%",
318
+ "finalizing_ensemble_output_progress_label": "アンサンブル出力を最終中... -- 98.0%",
319
+ "ensemble_file_creation_failed": "アンサンブルファイル作成できませんでした: {}",
320
+ "ensemble_completed_progress_label": "アンサンブルが正常に完了しました -- 100.0%",
321
+ "success_output_created": "✅ 成功出力ファイルが作成されました。",
322
+ "custom_models_tab": " カスタムモデル",
323
+ "add_custom_model": " カスタムモデルを追加",
324
+ "custom_model_name": "モデ",
325
+ "custom_model_name_placeholder": "モデの名前を入力してください",
326
+ "checkpoint_url": "チェックポイトURL (.ckpt/.pth)",
327
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
328
+ "config_url": "設定URL (.yaml)",
329
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
330
+ "custom_py_url": "カスタムPythonファイルURL (オプション)",
331
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
332
+ "auto_detect_type": "モデルタイプを自動検出",
333
+ "model_type": "モデルタイプ",
334
+ "add_model_btn": " モデルを追加",
335
+ "custom_models_list": "📋 カスタムモデルリスト",
336
+ "delete_model": "🗑️ 削除",
337
+ "no_custom_models": "カスタムモデルはまだ追加されていません",
338
+ "model_added_success": " デルが正常に追加されました!",
339
+ "model_deleted_success": " デルが正常に削除されました!",
340
+ "model_add_error": "❌ モデル追加エラー: {}",
341
+ "model_delete_error": " デル削除エラー: {}",
342
+ "refresh_models": "🔄 更新",
343
+ "custom_model_info": "ダウンロードリンク提供してカスタムモデルを追加します。モデルは使用時に自的にダウンロードされます。",
344
+ "select_model_to_delete": "削除するモデルを選択てください"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  }
assets/i18n/languages/ru_ru.json CHANGED
@@ -19,6 +19,7 @@
19
  "tta_boost": "Усиление TTA",
20
  "tta_info": "Улучшает качество, но замедляет скорость обработки",
21
  "phase_fix": "Коррекция фазы",
 
22
  "phase_fix_info": "Расширенная коррекция фазы для инструментальных треков",
23
  "instrumental": "Инструментал",
24
  "instrumental_info": "Обычно дает 2 выхода, но иногда только один, нужно экспериментировать :)",
@@ -46,7 +47,7 @@
46
  "copy_to_drive": "📂 Копировать на Диск",
47
  "copy_status": "Статус копирования",
48
  "files_will_be_copied": "Файлы будут скопированы сюда...",
49
- "processing_tip": "<b>🔈 Совет по обработке:</b> Для шумных результатов используйте модели <code>bleed_suppressor_v1</code> или <code>denoisedebleed</code> в категории <i>\"Удаление шума и эффектов\"</i>, чтобы очистить выходные данные",
50
  "waiting_for_processing": "Ожидание обработки...",
51
  "auto_ensemble_tab": "🤖 Автоматический ансамбль",
52
  "upload_file": "Загрузить файл",
@@ -71,68 +72,147 @@
71
  "output_preview": "Предпросмотр вывода",
72
  "refresh_output": "🔄 Обновить вывод",
73
  "ensemble_copy_status": "Вывод ансамбля будет скопирован сюда...",
74
- "guidelines": "<h4>Руководство по выбору моделей</h4><ul><li><strong>Избегайте смешивания категорий:</strong> Сочетание вокальных и инструментальных моделей может привести к нежелательным смешиваниям</li><li><strong>Заметки о специальных моделях:</strong><ul><li>Модели дуальности (v1/v2) - выдают оба стема</li><li>Сепаратор MDX23C - гибридные результаты</li></ul></li><li><strong>Лучшая практика:</strong> Используйте 3-5 схожих моделей из одной категории</li></ul><div>💡 Профессиональный совет: Начните с комбинации \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
75
  "download_sources_tab": "⬇️ Скачать источники",
76
  "direct_links": "🌐 Прямые ссылки",
77
  "audio_file_url": "URL аудиофайла",
78
- "download_from_url": "⬇️ Скачать по URL",
79
  "download_status": "Статус загрузки",
80
- "downloaded_file": "Скачанный файл",
81
- "cookie_management": "🍪 Управление cookies",
82
  "upload_cookies_txt": "Загрузить Cookies.txt",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Зачем это нужно?**<br>- Доступ к контенту с возрастными ограничениями<br>- Скачивание приватных/неопубликованных видео<br>- Обход региональных ограничений<br>- Избежание лимитов скачивания YouTube<br><br>**⚠️ Важные заметки**<br>- НИКОГДА не делитесь своими файлами cookies!<br>- Обновляйте cookies, если:<br> • П��являются ошибки \"403 Запрещено\"<br> • Загрузки внезапно останавливаются<br> • Появляется сообщение \"Сессия истекла\"<br><br>**🔄 Шаги обновления**<br>1. Установите это <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">расширение Chrome</a><br>2. Войдите в YouTube в Chrome<br>3. Нажмите на иконку расширения → \"Экспорт\"<br>4. Загрузите скачанный файл сюда<br><br>**⏳ Срок действия cookies**<br>- Обычные сессии: 24 часа<br>- Чувствительные операции: 1 час<br>- Смена пароля: мгновенная недействительность</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ Ручной ансамбль",
85
  "input_sources": "📂 Источники ввода",
86
  "refresh": "🔄 Обновить",
87
  "ensemble_algorithm": "Алгоритм ансамбля",
88
- "select_audio_files": "Выберите аудиофайлы",
89
  "available_files": "Доступные файлы",
90
- "custom_weights": "Пользовательские веса (разделены запятыми)",
91
  "custom_weights_placeholder": "Пример: 0.8, 1.2, 1.0, ...",
92
  "custom_weights_info": "Оставьте пустым для равных весов",
93
  "result_preview_tab": "🎧 Предпросмотр результата",
94
- "ensembled_output": "Результат ансамбля",
95
  "processing_log_tab": "📋 Журнал обработки",
96
  "processing_details": "Детали обработки",
97
  "process_ensemble": "⚡ Обработать ансамбль",
98
  "languages_tab": "🌐 Языки",
99
  "Vocal Models": "Вокальные модели",
100
  "Instrumental Models": "Инструментальные модели",
101
- "4-Stem Models": "Модели с 4 стемами",
102
  "Denoise Models": "Модели шумоподавления",
103
  "Dereverb Models": "Модели удаления реверберации",
104
  "Other Models": "Другие модели",
105
- "ensemble_files_description": "Объединяет несколько аудиофайлов в один выход с использованием указанных методов ансамбля.",
 
106
  "ensemble_files_help": "Путь ко всем аудиофайлам для ансамбля",
107
  "ensemble_type_help": "Один из: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
  "ensemble_weights_help": "Веса для создания ансамбля. Количество весов должно совпадать с количеством файлов",
109
- "ensemble_output_help": "Путь к WAV-файлу, куда будет сохранен результат ансамбля",
110
  "ensemble_type_print": "Тип ансамбля: {}",
111
  "num_input_files_print": "Количество входных файлов: {}",
112
  "weights_print": "Веса: {}",
113
  "output_file_print": "Выходной файл: {}",
114
- "duration_mismatch_error": "Все файлы должны иметь одинаковую длительность",
115
- "file_not_found_error": "Ошибка. Файл не найден: {}. Проверьте пути.",
116
- "reading_chunk_print": "Чтение фрагмента из файла: {} (начало: {} сек, длительность: {} сек)",
117
  "chunk_result_shape_print": "Форма результата фрагмента: {}",
118
- "ensemble_completed_print": "Ансамбль завершен. Выход сохранен в: {}",
119
- "file_deletion_error": "Не удалось удалить {}: {}",
120
- "directory_not_exist_warning": "⚠️ Директория не существует: {}",
121
- "not_a_directory_warning": "⚠️ Путь не является директорией: {}",
122
  "item_deletion_error": "⚠️ Ошибка удаления {}: {}",
123
  "old_output_not_exist": "❌ Старая папка вывода не существует",
124
- "old_outputs_cleared": "✅ Старые выходные данные успешно удалены!",
125
  "error": "🔥 Ошибка: {}",
126
  "ffmpeg_error": "Ошибка FFmpeg ({}): {}",
127
  "file_saved_successfully": "Файл успешно сохранен: {}",
128
  "total_files_found": "Всего найдено файлов: {}. Используемая частота дискретизации: {}",
129
  "total_progress": "Общий прогресс",
130
- "detailed_pbar_enabled": "Подробная шкала прогресса включена: {}",
131
- "loaded_audio": "Загруженное аудио: {}, форма: {}",
132
  "cannot_read_track": "Не удается прочитать трек: {}",
133
  "error_message": "Сообщение об ошибке: {}",
134
  "demudding_track": "Очистка трека (ремикс фазы - инструментал): {}",
135
- "elapsed_time": "Прошедшее время: {:.2f} сек.",
136
  "proc_folder_description": "Обрабатывает аудиофайлы в папке с использованием указанной модели.",
137
  "model_type_help": "Тип модели (bandit, bs_roformer, mdx23c и т.д.)",
138
  "config_path_help": "Путь к файлу конфигурации",
@@ -140,32 +220,40 @@
140
  "start_checkpoint_help": "Начальная контрольная точка для действительных весов",
141
  "input_folder_help": "Папка с миксами для обработки",
142
  "audio_path_help": "Путь к одному аудиофайлу для обработки",
143
- "store_dir_help": "Путь для хранения результатов",
144
  "device_ids_help": "Список идентификаторов GPU",
145
- "extract_instrumental_help": "Инвертировать вокал для получения инструментала, если предоставлено",
146
- "disable_detailed_pbar_help": "Отключить подробную шкалу прогресса при демиксе",
147
- "force_cpu_help": "Принудительно использовать CPU, даже если CUDA доступен",
148
- "flac_file_help": "Выводить файл FLAC вместо WAV",
149
  "export_format_help": "Формат экспорта и тип PCM",
150
- "pcm_type_help": "Тип PCM для файлов FLAC",
151
- "use_tta_help": "Включить увеличение времени теста",
152
  "lora_checkpoint_help": "Начальная контрольная точка для весов LoRA",
153
- "cuda_available": "CUDA доступен, используйте --force_cpu для отключения.",
154
  "using_device": "Используемое устройство: {}",
155
  "instruments_print": "Инструменты: {}",
156
  "model_load_time": "Время загрузки модели: {:.2f} сек",
157
- "invalid_url": "❌ Неверный URL",
158
- "cookie_file_updated": "✅ Файл cookies обновлен!",
159
- "cookie_installation_error": "⚠️ Ошибка установки cookies: {}",
160
- "file_size_zero_error": "Размер файла нулевой или файл не создан",
161
- "google_drive_error": "❌ Ошибка загрузки с Google Drive: {}",
162
- "wav_conversion_failed": "Не удалось конвертировать в WAV",
 
163
  "download_error": "❌ Ошибка загрузки: {}",
164
- "download_success": "🎉 Загрузка успешно завершена!",
 
 
 
 
 
 
 
165
  "download_failed": "❌ Загрузка не удалась",
166
  "no_output_files_found": "❌ Выходные файлы не найдены.",
167
  "output_refreshed_successfully": "✅ Вывод успешно обновлен!",
168
- "error_refreshing_output": "❌ Ошибка при обновлении вывода: {}",
169
  "starting_audio_separation": "Запуск разделения аудио...",
170
  "processing_audio": "Обработка аудио",
171
  "separating_audio": "Разделение аудио... ({:.1f}%)",
@@ -173,7 +261,7 @@
173
  "progress_parsing_error": "Ошибка разбора прогресса: {}",
174
  "error_occurred": "Произошла ошибка: {}",
175
  "separation_process_completed": "Процесс разделения завершен!",
176
- "no_audio_file_error": "❌ Аудиофайл не предоставлен, и в директории ввода нет существующего файла.",
177
  "no_input_progress_label": "Ошибка: Ввод не предоставлен -- 0.0%",
178
  "processing_audio_print": "Обработка аудио из: {} с использованием модели: {}",
179
  "starting_audio_separation_progress_label": "Запуск разделения аудио... -- 0.0%",
@@ -182,11 +270,11 @@
182
  "audio_processing_completed": "✅ Обработка аудио завершена!",
183
  "error_occurred_progress_label": "Произошла ошибка -- 0.0%",
184
  "minimum_files_required": "⚠️ Требуется минимум 2 файла",
185
- "valid_files_not_found": "❌ Валидные файлы не найдены",
186
  "starting_ensemble_process": "Запуск процесса ансамбля...",
187
- "ensembling_progress": "Создание ансамбля... ({:.1f}%)",
188
  "finalizing_ensemble_output": "Завершение вывода ансамбля...",
189
- "success_log": "✅ Успех!\n{}",
190
  "error_log": "❌ Ошибка!\n{}",
191
  "critical_error": "⛔ Критическая ошибка: {}",
192
  "ensemble_process_completed": "Процесс ансамбля завершен!",
@@ -204,77 +292,55 @@
204
  "waiting_for_files_progress_label": "Ожидание готовности всех файлов... -- 90.0%",
205
  "performing_ensemble": "Выполнение ансамбля...",
206
  "performing_ensemble_progress_label": "Выполнение ансамбля... -- 92.0%",
207
- "memory_usage_before_ensemble": "Использование памяти перед ансамблем: {}%",
208
  "memory_usage_after_ensemble": "Использование памяти после ансамбля: {}%",
209
  "finalizing_ensemble_output_progress_label": "Завершение вывода ансамбля... -- 98.0%",
210
  "ensemble_file_creation_failed": "Не удалось создать файл ансамбля: {}",
211
  "ensemble_completed_progress_label": "Ансамбль успешно завершен! -- 100.0%",
212
- "success_output_created": "✅ Успех! Выходной файл создан.",
213
- "drive_mounted_copying_ensemble": "Google Drive подключен. Копирование вывода ансамбля...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive уже подключен. Копирование вывода ансамбля...",
215
- "no_ensemble_output_files_found": "❌ Файлы вывода ансамбля не найдены.",
216
- "ensemble_output_copied": "Вывод ансамбля скопирован в {}",
217
- "error_copying_ensemble_output": "❌ Ошибка копирования вывода ансамбля: {}",
218
- "drive_mounted_copying_files": "Google Drive подключен. Копирование файлов...",
219
- "drive_already_mounted_copying_files": "Google Drive уже подключен. Копирование файлов...",
220
- "files_copied_to_drive": "Файлы скопированы в {}",
221
- "error_copying_files": "❌ Ошибка копирования файлов: {}",
222
- "mounting_drive": "Подключение Google Drive...",
223
- "settings_saved": "Настройки сохранены в config.json: Язык={lang}, Метод={method}, Порт={port}",
224
- "selected_language": "Выбранный язык: {lang}",
225
- "available_languages": "Доступные языки: {langs}",
226
- "select_language_prompt": "Выберите язык (по умолчанию: {default}): ",
227
- "invalid_language": "Неверный выбор языка! Используется язык по умолчанию ({default}).",
228
- "available_sharing_methods": "Доступные методы совместного доступа: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "Выберите метод совместного доступа (по умолчанию: {default}): ",
230
- "enter_ngrok_token_prompt": "Введите ваш токен Ngrok (по умолчанию: {default}): ",
231
- "ngrok_token_required": "Ошибка: Для метода ngrok требуется токен Ngrok!",
232
- "enter_port_prompt": "Введите номер порта (по умолчанию: {default}): ",
233
- "opening_previous_url": "Открытие предыдущего URL: {url}",
234
- "open_url_manually": "Откройте URL вручную в Colab: {url}",
235
- "invalid_previous_url": "Неверный предыдущий URL: {url}, не открыт.",
236
- "starting_method": "Запуск {method} на порту {port}...",
237
- "process_stopped": "\n🛑 Процесс остановлен пользователем",
238
- "selected_port": "Выбранный порт: {port}",
239
- "starting_gradio_with_sharing": "Запуск Gradio с встроенным совместным доступом...",
240
- "starting_localtunnel": "Запуск Localtunnel на порту {port}...",
241
- "share_link": "Ссылка для совместного доступа: {url}",
242
- "password_ip": "Пароль IP: {ip}",
243
- "starting_ngrok": "Запуск Ngrok на порту {port}...",
244
- "ngrok_url": "URL Ngrok: {url}",
245
- "ngrok_error": "Ошибка запуска Ngrok: {error}",
246
- "apollo_chunk_size": "Размер фрагмента Apollo",
247
- "apollo_chunk_size_info": "Размер фрагмента для улучшения Apollo (рекомендуется: 19 для универсальной модели)",
248
- "apollo_overlap": "Перекрытие Apollo",
249
- "enhancing_with_apollo": "Улучшение с Apollo ({}/{} файлов)...",
250
- "apollo_processing_completed": "Обработка Apollo завершена!",
251
- "apollo_overlap_info": "Перекрытие для улучшения Apollo (рекомендуется: 2)",
252
- "invalid_method": "Ошибка: Неверный метод! Используйте 'gradio', 'localtunnel' или 'ngrok'.",
253
- "apollo_enhancement_settings": "Настройки улучшения Apollo",
254
- "enhance_with_apollo": "Улучшить с Apollo",
255
- "enhance_with_apollo_info": "Включить Apollo для улучшения аудио после разделения",
256
- "apollo_method": "Метод Apollo",
257
- "apollo_method_info": "Выберите метод обработки для Apollo",
258
- "normal_method": "Обычный метод",
259
- "mid_side_method": "Метод середины/сторон",
260
- "apollo_normal_model": "Обычная модель Apollo",
261
- "apollo_normal_model_info": "Модель, используемая для обычной обработки Apollo",
262
- "apollo_midside_model": "Модель Apollo середины/сторон",
263
- "apollo_enhancement_info": "Улучшение качества звука",
264
- "selected_models": "Выбранные модели",
265
- "save_preset": "Сохранить пресет",
266
- "delete_preset": "Удалить пресет",
267
- "refresh_presets": "Обновить пресеты",
268
- "preset_name": "Имя пресета",
269
- "select_preset": "Выбрать пресет",
270
- "add_favorite": "Добавить в избранное",
271
- "apollo_midside_model_info": "Модель, используемая для обработки середины/сторон (опционально)",
272
- "language_changed_message": "Язык изменен. Пожалуйста, перезапустите ячейку.",
273
- "apply_matchering": "Применить Matchering",
274
- "matchering_info": "Применить Matchering для улучшения мастеринга и баланса звука",
275
- "matchering_passes": "Проходы Matchering",
276
- "matchering_passes_info": "Количество итераций Matchering (1-5, более высокие значения улучшают качество, но занимают больше времени)",
277
- "matchering_processing": "Применение Matchering... ({:.1f}%)",
278
- "matchering_completed": "✅ Обработка Matchering завершена!",
279
- "matchering_error": "❌ Ошибка во время Matchering: {}"
280
  }
 
19
  "tta_boost": "Усиление TTA",
20
  "tta_info": "Улучшает качество, но замедляет скорость обработки",
21
  "phase_fix": "Коррекция фазы",
22
+ "use_phase_correction": "Применить расширенную коррекцию фазы",
23
  "phase_fix_info": "Расширенная коррекция фазы для инструментальных треков",
24
  "instrumental": "Инструментал",
25
  "instrumental_info": "Обычно дает 2 выхода, но иногда только один, нужно экспериментировать :)",
 
47
  "copy_to_drive": "📂 Копировать на Диск",
48
  "copy_status": "Статус копирования",
49
  "files_will_be_copied": "Файлы будут скопированы сюда...",
50
+ "processing_tip": "<b>🔈 Совет по обработке:</b> Для шумных результатов используйте модели <code>bleed_suppressor_v1</code> или <code>denoisedebleed</code> в категории <i>Удаление шума и эффектов</i>, чтобы очистить выходные данные",
51
  "waiting_for_processing": "Ожидание обработки...",
52
  "auto_ensemble_tab": "🤖 Автоматический ансамбль",
53
  "upload_file": "Загрузить файл",
 
72
  "output_preview": "Предпросмотр вывода",
73
  "refresh_output": "🔄 Обновить вывод",
74
  "ensemble_copy_status": "Вывод ансамбля будет скопирован сюда...",
75
+ "guidelines": "<h4>Руководство по выбору модели</h4><ul><li><strong>Избегайте смешивания категорий:</strong> Сочетание вокальных и инструментальных моделей может привести к нежелательным смесям</li><li><strong>Примечания к специальным моделям:</strong><ul><li>Модели двойственности (v1/v2) - Выводят оба стема</li><li>Разделитель MDX23C - Гибридные результаты</li></ul></li><li><strong>Лучшая практика:</strong> Используйте 3-5 похожих моделей из одной категории</li></ul><div>💡 Совет профессионала: Начните с комбинации \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\"</div>",
76
  "download_sources_tab": "⬇️ Скачать источники",
77
  "direct_links": "🌐 Прямые ссылки",
78
  "audio_file_url": "URL аудиофайла",
79
+ "download_from_url": "⬇️ Скачать с URL",
80
  "download_status": "Статус загрузки",
81
+ "downloaded_file": "Загруженный файл",
82
+ "cookie_management": "🍪 Управление куки",
83
  "upload_cookies_txt": "Загрузить Cookies.txt",
84
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Почему необходимо?**<br>- Доступ к контенту с ограничением по возрасту<br>- Скачивание приватных/неопубликованных видео<br>- Обход региональных ограничений<br>- Избежание лимитов скачивания YouTube<br><br>**⚠️ Важные замечания**<br>- НИКОГДА не делитесь своими файлами куки!<br>- Обновляйте куки, если:<br> • Получаете ошибки \"403 Запрещено\"<br> • Загрузки внезапно останавливаются<br> • Видите сообщения \"Сессия истекла\"<br><br>**🔄 Шаги обновления**<br>1. Установите это <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">расширение Chrome</a><br>2. Войдите в YouTube в Chrome<br>3. Нажмите на иконку расширения → \"Экспорт\"<br>4. Загрузите скачанный файл сюда<br><br>**⏳ Срок действия куки**<br>- Обычные сессии: 24 часа<br>- Чувствительные операции: 1 час<br>- Смена пароля: Немедленное обновление</div>",
85
+ "ensemble_output_copied": "✅ Вывод ансамбля скопирован в {}",
86
+ "error_copying_ensemble_output": "❌ Ошибка копирования вывода ансамбля: {}",
87
+ "drive_mounted_copying_ensemble": "Google Drive подключен. Копирование вывода ансамбля...",
88
+ "drive_already_mounted_copying_ensemble": "Google Drive уже подключен. Копирование вывода ансамбля...",
89
+ "no_ensemble_output_files_found": "❌ Файлы вывода ансамбля не найдены.",
90
+ "drive_mounted_copying_files": "Google Drive подключен. Копирование файлов...",
91
+ "drive_already_mounted_copying_files": "Google Drive уже подключен. Копирование файлов...",
92
+ "files_copied_to_drive": "✅ Файлы скопированы в {}",
93
+ "error_copying_files": "❌ Ошибка копирования файлов: {}",
94
+ "mounting_drive": "Подключение Google Drive...",
95
+ "settings_saved": "Настройки сохранены в config.json: Язык={lang}, Метод={method}, Порт={port}",
96
+ "selected_language": "Выбранный язык: {lang}",
97
+ "available_languages": "Доступные языки: {langs}",
98
+ "select_language_prompt": "Выберите язык (по умолчанию: {default}): ",
99
+ "invalid_language": "Неверный выбор языка! Используется язык по умолчанию ({default}).",
100
+ "available_sharing_methods": "Доступные методы совместного доступа: gradio, localtunnel, ngrok",
101
+ "select_sharing_method_prompt": "Выберите метод совместного доступа (по умолчанию: {default}): ",
102
+ "enter_ngrok_token_prompt": "Введите ваш токен Ngrok (по умолчанию: {default}): ",
103
+ "ngrok_token_required": "Ошибка: Для метода ngrok требуется токен Ngrok!",
104
+ "enter_port_prompt": "Введите номер порта (по умолчанию: {default}): ",
105
+ "opening_previous_url": "Открытие предыдущего URL: {url}",
106
+ "open_url_manually": "Вручную откройте URL в Colab: {url}",
107
+ "invalid_previous_url": "Неверный предыдущий URL: {url}, не открыт.",
108
+ "starting_method": "Запуск {method} на порту {port}...",
109
+ "process_stopped": "\n🛑 Процесс остановлен пользователем",
110
+ "selected_port": "Выбранный порт: {port}",
111
+ "starting_gradio_with_sharing": "Запуск Gradio с встроенным совместным до��тупом...",
112
+ "starting_localtunnel": "Запуск Localtunnel на порту {port}...",
113
+ "share_link": "Ссылка для совместного доступа: {url}",
114
+ "password_ip": "Пароль IP: {ip}",
115
+ "starting_ngrok": "Запуск Ngrok на порту {port}...",
116
+ "ngrok_url": "URL Ngrok: {url}",
117
+ "ngrok_error": "Ошибка запуска Ngrok: {error}",
118
+ "auto_apollo_chunk_size": "Размер фрагмента Apollo",
119
+ "auto_apollo_chunk_size_info": "Размер фрагмента для улучшения Apollo (рекомендуется: 19 для универсальной модели)",
120
+ "auto_apollo_overlap": "Перекрытие Apollo",
121
+ "auto_enhancing_with_apollo": "Улучшение с Apollo ({}/{} файлов)...",
122
+ "auto_apollo_processing_completed": "Обработка Apollo завершена!",
123
+ "auto_apollo_overlap_info": "Перекрытие для улучшения Apollo (рекомендуется: 2)",
124
+ "apollo_chunk_size": "Размер фрагмента Apollo",
125
+ "apollo_chunk_size_info": "Размер фрагмента для улучшения Apollo (рекомендуется: 19 для универсальной модели)",
126
+ "apollo_overlap": "Перекрытие Apollo",
127
+ "enhance_with_apollo": "Улучшить с Apollo",
128
+ "apollo_enhancement_info": "Улучшение качества звука",
129
+ "enhancing_with_apollo": "Улучшение с Apollo ({}/{} файлов)...",
130
+ "apollo_processing_completed": "Обработка Apollo завершена!",
131
+ "apollo_overlap_info": "Перекрытие для улучшения Apollo (рекомендуется: 2)",
132
+ "selected_models": "Выбранные модели",
133
+ "save_preset": "Сохранить пресет",
134
+ "delete_preset": "Удалить пресет",
135
+ "refresh_presets": "Обновить пресеты",
136
+ "preset_name": "Имя пресета",
137
+ "select_preset": "Выбрать пресет",
138
+ "add_favorite": "Добавить в избранное",
139
+ "apply_matchering": "Применить Matchering",
140
+ "matchering_info": "Применить Matchering для улучшения мастеринга и баланса звука",
141
+ "matchering_passes": "Проходы Matchering",
142
+ "matchering_passes_info": "Количество итераций Matchering (1-5, более высокие значения улучшают качество, но занимают больше времени)",
143
+ "matchering_processing": "Применение Matchering... ({:.1f}%)",
144
+ "matchering_completed": "✅ Обработка Matchering завершена!",
145
+ "matchering_error": "❌ Ошибка во время Matchering: {}",
146
+ "invalid_method": "Ошибка: Неверный метод! Используйте 'gradio', 'localtunnel' или 'ngrok'.",
147
+ "language_changed_message": "Язык изменен. Пожалуйста, перезапустите ячейку.",
148
+ "backend_settings": "🚀 Настройки Backend",
149
+ "inference_backend": "Backend инференса",
150
+ "optimization_mode": "Режим оптимизации",
151
+ "optimization_mode_info": "Выбор режима оптимизации PyTorch",
152
+ "mixed_precision_amp": "Смешанная точность (AMP)",
153
+ "mixed_precision_info": "Inference в 2 раза быстрее - рекомендуется",
154
+ "tf32_acceleration": "Ускорение TF32",
155
+ "tf32_acceleration_info": "Дополнительный прирост скорости для RTX 30xx+",
156
+ "cudnn_benchmark": "Бенчмарк cuDNN",
157
+ "cudnn_benchmark_info": "Медленнее на первом запуске, гораздо быстрее на последующих",
158
+ "ultra_optimized_pytorch": "УЛЬТРА-ОПТИМИЗИРОВАННЫЙ PyTorch Backend",
159
+ "default_active_max_speed": "Активен по умолчанию - Максимальная оптимизация скорости",
160
+ "channels_last_mode": "Самый быстрый для RTX GPU",
161
+ "compile_mode": "Дополнительная скорость для PyTorch 2.0+",
162
+ "default_mode": "Стандартный",
163
  "manual_ensemble_tab": "🎚️ Ручной ансамбль",
164
  "input_sources": "📂 Источники ввода",
165
  "refresh": "🔄 Обновить",
166
  "ensemble_algorithm": "Алгоритм ансамбля",
167
+ "select_audio_files": "Выбрать аудиофайлы",
168
  "available_files": "Доступные файлы",
169
+ "custom_weights": "Пользовательские веса (через запятую)",
170
  "custom_weights_placeholder": "Пример: 0.8, 1.2, 1.0, ...",
171
  "custom_weights_info": "Оставьте пустым для равных весов",
172
  "result_preview_tab": "🎧 Предпросмотр результата",
173
+ "ensembled_output": "Вывод ансамбля",
174
  "processing_log_tab": "📋 Журнал обработки",
175
  "processing_details": "Детали обработки",
176
  "process_ensemble": "⚡ Обработать ансамбль",
177
  "languages_tab": "🌐 Языки",
178
  "Vocal Models": "Вокальные модели",
179
  "Instrumental Models": "Инструментальные модели",
180
+ "4-Stem Models": "4-стемовые модели",
181
  "Denoise Models": "Модели шумоподавления",
182
  "Dereverb Models": "Модели удаления реверберации",
183
  "Other Models": "Другие модели",
184
+ "Other Shapes": "Другие модели",
185
+ "ensemble_files_description": "Объединяет несколько аудиофайлов в один выходной файл с использованием указанных методов ансамбля.",
186
  "ensemble_files_help": "Путь ко всем аудиофайлам для ансамбля",
187
  "ensemble_type_help": "Один из: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
188
  "ensemble_weights_help": "Веса для создания ансамбля. Количество весов должно совпадать с количеством файлов",
189
+ "ensemble_output_help": "Путь к WAV-файлу, где будет сохранен результат ансамбля",
190
  "ensemble_type_print": "Тип ансамбля: {}",
191
  "num_input_files_print": "Количество входных файлов: {}",
192
  "weights_print": "Веса: {}",
193
  "output_file_print": "Выходной файл: {}",
194
+ "duration_mismatch_error": "Все файлы должны иметь одинаковую продолжительность",
195
+ "file_not_found_error": "Ошибка. Не удается найти файл: {}. Проверьте пути.",
196
+ "reading_chunk_print": "Чтение фрагмента из файла: {} (начало: {}с, продолжительность: {}с)",
197
  "chunk_result_shape_print": "Форма результата фрагмента: {}",
198
+ "ensemble_completed_print": "Ансамбль завершен. Вывод сохранен в: {}",
199
+ "file_deletion_error": "{} не удалось удалить: {}",
200
+ "directory_not_exist_warning": "⚠️ Каталог не существует: {}",
201
+ "not_a_directory_warning": "⚠️ Путь не является каталогом: {}",
202
  "item_deletion_error": "⚠️ Ошибка удаления {}: {}",
203
  "old_output_not_exist": "❌ Старая папка вывода не существует",
204
+ "old_outputs_cleared": "✅ Старые выходные данные успешно очищены!",
205
  "error": "🔥 Ошибка: {}",
206
  "ffmpeg_error": "Ошибка FFmpeg ({}): {}",
207
  "file_saved_successfully": "Файл успешно сохранен: {}",
208
  "total_files_found": "Всего найдено файлов: {}. Используемая частота дискретизации: {}",
209
  "total_progress": "Общий прогресс",
210
+ "detailed_pbar_enabled": "Подробная полоса прогресса включена: {}",
211
+ "loaded_audio": "Загружено аудио: {}, форма: {}",
212
  "cannot_read_track": "Не удается прочитать трек: {}",
213
  "error_message": "Сообщение об ошибке: {}",
214
  "demudding_track": "Очистка трека (ремикс фазы - инструментал): {}",
215
+ "elapsed_time": "Прошедшее время: {:.2f} секунд.",
216
  "proc_folder_description": "Обрабатывает аудиофайлы в папке с использованием указанной модели.",
217
  "model_type_help": "Тип модели (bandit, bs_roformer, mdx23c и т.д.)",
218
  "config_path_help": "Путь к файлу конфигурации",
 
220
  "start_checkpoint_help": "Начальная контрольная точка для действительных весов",
221
  "input_folder_help": "Папка с миксами для обработки",
222
  "audio_path_help": "Путь к одному аудиофайлу для обработки",
223
+ "store_dir_help": "Путь для сохранения результатов",
224
  "device_ids_help": "Список идентификаторов GPU",
225
+ "extract_instrumental_help": "Инвертировать вокал для получения инструментала, если предоставлен",
226
+ "disable_detailed_pbar_help": "Отключить подробную полосу прогресса в демиксе",
227
+ "force_cpu_help": "Принудительно использовать CPU, даже если доступна CUDA",
228
+ "flac_file_help": "Вывод FLAC-файла вместо WAV",
229
  "export_format_help": "Формат экспорта и тип PCM",
230
+ "pcm_type_help": "Тип PCM для FLAC-файлов",
231
+ "use_tta_help": "Включить тестовое усиление времени",
232
  "lora_checkpoint_help": "Начальная контрольная точка для весов LoRA",
233
+ "cuda_available": "CUDA доступна, используйте --force_cpu для отключения.",
234
  "using_device": "Используемое устройство: {}",
235
  "instruments_print": "Инструменты: {}",
236
  "model_load_time": "Время загрузки модели: {:.2f} сек",
237
+ "invalid_url": "❌ Недействительный URL",
238
+ "cookie_file_updated": "✅ Файл куки обновлен!",
239
+ "cookie_installation_error": "⚠️ Ошибка установки куки: {}",
240
+ "google_drive_mount_error": " Ошибка подключения Google Drive: {}",
241
+ "file_size_zero_error": "Размер файла ноль или файл не создан",
242
+ "google_drive_error": " Ошибка загрузки Google Drive: {}",
243
+ "wav_conversion_failed": "Конвертация WAV не удалась",
244
  "download_error": "❌ Ошибка загрузки: {}",
245
+ "file_copied_to_drive": "Файл скопирован в Google Drive: {}",
246
+ "copy_to_drive_error": "❌ Ошибка копирования в Google Drive: {}",
247
+ "direct_download_failed": "Прямая загрузка файла не удалась",
248
+ "direct_download_error": "❌ Ошибка прямой загрузки: {}",
249
+ "invalid_google_drive_url": "❌ Недействительный URL Google Drive",
250
+ "continuing_without_google_drive": "Подключение Google Drive не удалось, продолжаем без него...",
251
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive не подключен, пропуск копирования на Drive...",
252
+ "download_success": "🎉 Успешно загружено!",
253
  "download_failed": "❌ Загрузка не удалась",
254
  "no_output_files_found": "❌ Выходные файлы не найдены.",
255
  "output_refreshed_successfully": "✅ Вывод успешно обновлен!",
256
+ "error_refreshing_output": "❌ Ошибка обновления вывода: {}",
257
  "starting_audio_separation": "Запуск разделения аудио...",
258
  "processing_audio": "Обработка аудио",
259
  "separating_audio": "Разделение аудио... ({:.1f}%)",
 
261
  "progress_parsing_error": "Ошибка разбора прогресса: {}",
262
  "error_occurred": "Произошла ошибка: {}",
263
  "separation_process_completed": "Процесс разделения завершен!",
264
+ "no_audio_file_error": "❌ Аудиофайл не предоставлен и нет существующего файла в каталоге ввода.",
265
  "no_input_progress_label": "Ошибка: Ввод не предоставлен -- 0.0%",
266
  "processing_audio_print": "Обработка аудио из: {} с использованием модели: {}",
267
  "starting_audio_separation_progress_label": "Запуск разделения аудио... -- 0.0%",
 
270
  "audio_processing_completed": "✅ Обработка аудио завершена!",
271
  "error_occurred_progress_label": "Произошла ошибка -- 0.0%",
272
  "minimum_files_required": "⚠️ Требуется минимум 2 файла",
273
+ "valid_files_not_found": "❌ Действительные файлы не найдены",
274
  "starting_ensemble_process": "Запуск процесса ансамбля...",
275
+ "ensembling_progress": "Ансамблирование... ({:.1f}%)",
276
  "finalizing_ensemble_output": "Завершение вывода ансамбля...",
277
+ "success_log": "✅ Успешно!\n{}",
278
  "error_log": "❌ Ошибка!\n{}",
279
  "critical_error": "⛔ Критическая ошибка: {}",
280
  "ensemble_process_completed": "Процесс ансамбля завершен!",
 
292
  "waiting_for_files_progress_label": "Ожидание готовности всех файлов... -- 90.0%",
293
  "performing_ensemble": "Выполнение ансамбля...",
294
  "performing_ensemble_progress_label": "Выполнение ансамбля... -- 92.0%",
295
+ "memory_usage_before_ensemble": "Использование памяти до ансамбля: {}%",
296
  "memory_usage_after_ensemble": "Использование памяти после ансамбля: {}%",
297
  "finalizing_ensemble_output_progress_label": "Завершение вывода ансамбля... -- 98.0%",
298
  "ensemble_file_creation_failed": "Не удалось создать файл ансамбля: {}",
299
  "ensemble_completed_progress_label": "Ансамбль успешно завершен! -- 100.0%",
300
+ "success_output_created": "✅ Успешно! Создан выходной файл.",
301
+ "phase_fixer_tab": "🔧 Корректор фазы",
302
+ "batch_processing_tab": "📦 Пакетная обработка",
303
+ "batch_description": "Обработка нескольких файлов одновременно с одной моделью",
304
+ "batch_input_folder": "Входная папка",
305
+ "batch_input_folder_placeholder": "Введите путь к папке с аудиофайлами",
306
+ "batch_output_folder": "Выходная папка",
307
+ "batch_output_folder_placeholder": "Введите путь к папке для выходных файлов",
308
+ "batch_file_list": "Файлы для обработки",
309
+ "batch_add_files": "Добавить файлы",
310
+ "batch_clear_list": "Очистить список",
311
+ "batch_start": "🚀 Начать пакетную обработку",
312
+ "batch_stop": "⏹️ Стоп",
313
+ "batch_progress": "Прогресс пакетной обработки",
314
+ "batch_current_file": "Сейчас обрабатывается",
315
+ "batch_completed": "✅ Пакетная обработка завершена!",
316
+ "batch_stopped": "⏹️ Пакетная обработка остановлена",
317
+ "batch_no_files": "❌ Файлы для обработки не выбраны",
318
+ "source_model": "Исходная модель (Вокал)",
319
+ "source_model_info": "Модель для извлечения вокала/фазовых данных",
320
+ "target_model": "Целевая модель (Инструментал)",
321
+ "target_model_info": "Модель для применения коррекции фазы",
322
+ "custom_models_tab": "⚡ Пользовательские модели",
323
+ "Custom Models": "Пользовательские модели",
324
+ "add_custom_model": " Добавить пользовательскую модель",
325
+ "custom_model_name": "Название модели",
326
+ "custom_model_name_placeholder": "Введите название для вашей модели",
327
+ "checkpoint_url": "URL чекпоинта (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "URL конфигурации (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "URL пользовательского Python файла (необязательно)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "Автоматически определить тип модели",
334
+ "model_type": "Тип модели",
335
+ "add_model_btn": " Добавить модель",
336
+ "custom_models_list": "📋 Список пользовательских моделей",
337
+ "delete_model": "🗑️ Удалить",
338
+ "no_custom_models": "Пользовательские модели ещё не добавлены",
339
+ "model_added_success": " Модель успешно добавлена!",
340
+ "model_deleted_success": " Модель успешно удалена!",
341
+ "model_add_error": "❌ Ошибка добавления модели: {}",
342
+ "model_delete_error": "❌ Ошибка удаления модели: {}",
343
+ "refresh_models": "🔄 Обновить",
344
+ "custom_model_info": "Добавляйте пользовательские модели, предоставив ссылки для скачивания. Модели будут скачаны автоматически при использовании.",
345
+ "select_model_to_delete": "Выберите модель для удаления"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  }
assets/i18n/languages/tr_tr.json CHANGED
@@ -19,6 +19,7 @@
19
  "tta_boost": "TTA Güçlendirme",
20
  "tta_info": "Kaliteyi artırır ancak işlem hızını yavaşlatır",
21
  "phase_fix": "Faz Düzeltme",
 
22
  "phase_fix_info": "Enstrümantal parçalar için gelişmiş faz düzeltme",
23
  "instrumental": "Enstrümantal",
24
  "instrumental_info": "Genellikle 2 çıktı verir, ama bazen tek çıktı olabilir, denemen lazım! :)",
@@ -48,7 +49,7 @@
48
  "files_will_be_copied": "Dosyalar buraya kopyalanacak...",
49
  "processing_tip": "<b>🔈 İşleme İpucu:</b> Gürültülü sonuçlar için <i>\"Gürültü ve Efekt Kaldırma\"</i> kategorisindeki <code>bleed_suppressor_v1</code> veya <code>denoisedebleed</code> modellerini kullanarak çıkışı temizleyin",
50
  "waiting_for_processing": "İşleme Bekleniyor...",
51
- "auto_ensemble_tab": "🤖 Otomatik birleştirme",
52
  "upload_file": "Dosya Yükle",
53
  "enter_file_path": "Veya Dosya Yolunu Gir",
54
  "file_path_placeholder": "Ses dosyasının tam yolunu gir",
@@ -61,16 +62,16 @@
61
  "model_selection": "🧠 Model Seçimi",
62
  "model_category": "Model Kategorisi",
63
  "select_models": "Kategoriden Model Seç",
64
- "ensemble_settings": "⚡ birleştirme Ayarları",
65
  "method": "Yöntem",
66
  "recommendation": "**Öneri:** avg_wave ve max_fft en iyi sonuçları verir",
67
  "start_processing": "🚀 İşlemi Başlat",
68
  "original_audio_tab": "🔊 Orijinal Ses",
69
  "original_audio": "Orijinal Ses",
70
- "ensemble_result_tab": "🎚️ birleştirme Sonucu",
71
  "output_preview": "Çıktı Önizlemesi",
72
  "refresh_output": "🔄 Çıktıyı Yenile",
73
- "ensemble_copy_status": "birleştirme çıktısı buraya kopyalanacak...",
74
  "guidelines": "<h4>Model Seçim Kılavuzu</h4><ul><li><strong>Kategorileri Karıştırmaktan Kaçının:</strong> Vokal ve enstrümantal modellerin birleşimi istenmeyen karışımlara yol açabilir</li><li><strong>Özel Modeller Hakkında Notlar:</strong><ul><li>İkili modeller (v1/v2) - Her iki stemi de üretir</li><li>MDX23C Ayırıcı - Hibrit sonuçlar</li></ul></li><li><strong>En İyi Uygulama:</strong> Aynı kategoriden 3-5 benzer model kullanın</li></ul><div>💡 Profesyonel İpucu: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" kombinasyonuyla başlayın</div>",
75
  "download_sources_tab": "⬇️ Kaynakları İndir",
76
  "direct_links": "🌐 Doğrudan Bağlantılar",
@@ -79,22 +80,121 @@
79
  "download_status": "İndirme Durumu",
80
  "downloaded_file": "İndirilen Dosya",
81
  "cookie_management": "🍪 Çerez Yönetimi",
82
- "upload_cookies_txt": "Cookies.txt Yükle",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Neden Gerekli?**<br>- Yaş sınırlı içeriklere erişim<br>- Özel/listelenmemiş videoları indirme<br>- Bölgesel kısıtlamaları aşma<br>- YouTube indirme limitlerini geçme<br><br>**⚠️ Önemli Notlar**<br>- Çerez dosyalarınızı ASLA paylaşmayın!<br>- Çerezleri şu durumlarda yenileyin:<br> • \"403 Yasak\" hataları alırsanız<br> • İndirmeler aniden durursa<br> • \"Oturum Süresi Doldu\" mesajı görürseniz<br><br>**🔄 Yenileme Adımları**<br>1. Bu <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome uzantısını</a> yükleyin<br>2. Chrome'da YouTube'a giriş yapın<br>3. Uzantı simgesine tıklayın → \"Dışa Aktar\"<br>4. İndirilen dosyayı buraya yükleyin<br><br>**⏳ Çerez Ömrü**<br>- Normal oturumlar: 24 saat<br>- Hassas işlemler: 1 saat<br>- Şifre değişikliği: Anında geçersizlik</div>",
84
- "manual_ensemble_tab": "🎚️ Manuel birleştirme",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "input_sources": "📂 Giriş Kaynakları",
86
  "refresh": "🔄 Yenile",
87
- "ensemble_algorithm": "birleştirme Algoritması",
88
  "select_audio_files": "Ses Dosyalarını Seç",
89
  "available_files": "Mevcut Dosyalar",
90
  "custom_weights": "Özel Ağırlıklar (virgülle ayrılmış)",
91
  "custom_weights_placeholder": "Örnek: 0.8, 1.2, 1.0, ...",
92
  "custom_weights_info": "Eşit ağırlıklar için boş bırakın",
93
  "result_preview_tab": "🎧 Sonuç Önizlemesi",
94
- "ensembled_output": "birleştirme Çıktısı",
95
  "processing_log_tab": "📋 İşlem Günlüğü",
96
  "processing_details": "İşlem Ayrıntıları",
97
- "process_ensemble": "⚡ birleştirmeyi İşle",
98
  "languages_tab": "🌐 Diller",
99
  "Vocal Models": "Vokal Modeller",
100
  "Instrumental Models": "Enstrümantal Modeller",
@@ -102,12 +202,13 @@
102
  "Denoise Models": "Gürültü Giderme Modelleri",
103
  "Dereverb Models": "Yankı Giderme Modelleri",
104
  "Other Models": "Diğer Modeller",
 
105
  "ensemble_files_description": "Belirtilen birleştirme yöntemlerini kullanarak birden fazla ses dosyasını tek bir çıktıda birleştirir.",
106
- "ensemble_files_help": "birleştirme için tüm ses dosyalarına giden yol",
107
  "ensemble_type_help": "Şunlardan biri: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
- "ensemble_weights_help": "birleştirme oluşturmak için ağırlıklar. Ağırlık sayısı dosya sayısına eşit olmalı",
109
- "ensemble_output_help": "birleştirme sonucunun kaydedileceği WAV dosyasının yolu",
110
- "ensemble_type_print": "birleştirme türü: {}",
111
  "num_input_files_print": "Giriş dosyası sayısı: {}",
112
  "weights_print": "Ağırlıklar: {}",
113
  "output_file_print": "Çıktı dosyası: {}",
@@ -115,7 +216,7 @@
115
  "file_not_found_error": "Hata. Dosya bulunamadı: {}. Yolları kontrol et.",
116
  "reading_chunk_print": "Dosyadan parça okunuyor: {} (başlangıç: {}s, süre: {}s)",
117
  "chunk_result_shape_print": "Parça sonucu şekli: {}",
118
- "ensemble_completed_print": "birleştirme tamamlandı. Çıktı şuraya kaydedildi: {}",
119
  "file_deletion_error": "{} silinemedi: {}",
120
  "directory_not_exist_warning": "⚠️ Dizin mevcut değil: {}",
121
  "not_a_directory_warning": "⚠️ Yol bir dizin değil: {}",
@@ -157,10 +258,18 @@
157
  "invalid_url": "❌ Geçersiz URL",
158
  "cookie_file_updated": "✅ Çerez dosyası güncellendi!",
159
  "cookie_installation_error": "⚠️ Çerez yükleme hatası: {}",
 
160
  "file_size_zero_error": "Dosya boyutu sıfır veya dosya oluşturulmadı",
161
  "google_drive_error": "❌ Google Drive indirme hatası: {}",
162
  "wav_conversion_failed": "WAV dönüşümü başarısız",
163
  "download_error": "❌ İndirme hatası: {}",
 
 
 
 
 
 
 
164
  "download_success": "🎉 Başarıyla indirildi!",
165
  "download_failed": "❌ İndirme başarısız",
166
  "no_output_files_found": "❌ Çıktı dosyaları bulunamadı.",
@@ -183,13 +292,13 @@
183
  "error_occurred_progress_label": "Hata oluştu -- 0.0%",
184
  "minimum_files_required": "⚠️ En az 2 dosya gerekli",
185
  "valid_files_not_found": "❌ Geçerli dosya bulunamadı",
186
- "starting_ensemble_process": "birleştirme işlemi başlıyor...",
187
- "ensembling_progress": "birleştirme yapılıyor... ({:.1f}%)",
188
- "finalizing_ensemble_output": "birleştirme çıktısı tamamlanıyor...",
189
  "success_log": "✅ Başarılı!\n{}",
190
  "error_log": "❌ Hata!\n{}",
191
  "critical_error": "⛔ Kritik hata: {}",
192
- "ensemble_process_completed": "birleştirme işlemi tamamlandı!",
193
  "no_models_selected": "❌ Model seçilmedi",
194
  "no_input_audio_provided": "❌ Giriş sesi sağlanmadı",
195
  "loading_model": "Model yükleniyor {}/{}: {}...",
@@ -202,100 +311,69 @@
202
  "model_output_failed": "{} çıktı üretemedi",
203
  "waiting_for_files": "Tüm dosyaların hazır olması bekleniyor...",
204
  "waiting_for_files_progress_label": "Tüm dosyaların hazır olması bekleniyor... -- 90.0%",
205
- "performing_ensemble": "birleştirme gerçekleştiriliyor...",
206
- "performing_ensemble_progress_label": "birleştirme gerçekleştiriliyor... -- 92.0%",
207
- "memory_usage_before_ensemble": "birleştirme öncesi bellek kullanımı: {}%",
208
- "memory_usage_after_ensemble": "birleştirme sonrası bellek kullanımı: {}%",
209
- "finalizing_ensemble_output_progress_label": "birleştirme çıktısı tamamlanıyor... -- 98.0%",
210
- "ensemble_file_creation_failed": "birleştirme dosyası oluşturulamadı: {}",
211
- "ensemble_completed_progress_label": "birleştirme başarıyla tamamlandı! -- 100.0%",
212
  "success_output_created": "✅ Başarılı! Çıktı dosyası oluşturuldu.",
213
- "drive_mounted_copying_ensemble": "Google Drive bağlandı. birleştirme çıktısı kopyalanıyor...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive zaten bağlı. birleştirme çıktısı kopyalanıyor...",
215
- "no_ensemble_output_files_found": " birleştirme çıktı dosyaları bulunamadı.",
216
- "ensemble_output_copied": " birleştirme çıktısı {}'e kopyalandı",
217
- "error_copying_ensemble_output": " birleştirme çıktısı kopyalanırken hata: {}",
218
- "drive_mounted_copying_files": "Google Drive bağlandı. Dosyalar kopyalanıyor...",
219
- "drive_already_mounted_copying_files": "Google Drive zaten bağlı. Dosyalar kopyalanıyor...",
220
- "files_copied_to_drive": " Dosyalar {}'e kopyalandı",
221
- "error_copying_files": "❌ Dosyalar kopyalanırken hata: {}",
222
- "mounting_drive": "Google Drive bağlanıyor...",
223
- "settings_saved": "Ayarlar config.json'a kaydedildi: Dil={lang}, Yöntem={method}, Port={port}",
224
- "selected_language": "Seçilen dil: {lang}",
225
- "availa ble_languages": "Kullanılabilir diller: {langs}",
226
- "select_language_prompt": "Bir dil seç (varsayılan: {default}): ",
227
- "invalid_language": "Geçersiz dil seçimi! Varsayılan dil kullanılıyor ({default}).",
228
- "available_sharing_methods": "Kullanılabilir paylaşım yöntemleri: gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "Bir paylaşım yöntemi seç (varsayılan: {default}): ",
230
- "enter_ngrok_token_prompt": "Ngrok token'ını gir (varsayılan: {default}): ",
231
- "ngrok_token_required": "Hata: ngrok yöntemi için Ngrok token'ı gerekli!",
232
- "enter_port_prompt": "Port numarasını gir (varsayılan: {default}): ",
233
- "opening_previous_url": "Önceki URL açılıyor: {url}",
234
- "open_url_manually": "URL'yi Colab'da manuel olarak aç: {url}",
235
- "invalid_previous_url": "Geçersiz önceki URL: {url}, açılmadı.",
236
- "starting_method": "{method} port {port} üzerinde başlatılıyor...",
237
- "process_stopped": "\n🛑 Kullanıcı tarafından işlem durduruldu",
238
- "selected_port": "Seçilen port: {port}",
239
- "starting_gradio_with_sharing": "Gradio dahili paylaşım ile başlatılıyor...",
240
- "starting_localtunnel": "Localtunnel port {port} üzerinde başlatılıyor...",
241
- "share_link": "Paylaşım bağlantısı: {url}",
242
- "password_ip": "Parola IP: {ip}",
243
- "starting_ngrok": "Ngrok port {port} üzerinde başlatılıyor...",
244
- "ngrok_url": "Ngrok URL: {url}",
245
- "ngrok_error": "Ngrok başlatma hatası: {error}",
246
- "auto_apollo_chunk_size": "Apollo Parça Boyutu",
247
- "auto_apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
248
- "auto_apollo_overlap": "Apollo Çakışması",
249
- "auto_enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
250
- "auto_apollo_processing_completed": "Apollo işleme tamamlandı!",
251
- "autoapollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
252
- "auto_invalid_method": "Hata: Geçersiz yöntem! 'gradio', 'localtunnel' veya 'ngrok' kullan.",
253
- "auto_apollo_enhancement_settings": "Apollo İyileştirme Ayarları",
254
- "auto_enhance_with_apollo": "Apollo ile İyileştir",
255
- "auto_enhance_with_apollo_info": "Ayrıştırmadan sonra ses iyileştirme için Apollo'yu etkinleştir",
256
- "auto_apollo_method": "Apollo Yöntemi",
257
- "auto_apollo_method_info": "Apollo için işlem yöntemini seç",
258
- "auto_normal_method": "Normal Yöntem",
259
- "auto_mid_side_method": "Orta/Yan Yöntem",
260
- "auto_apollo_normal_model": "Apollo Normal Model",
261
- "auto_apollo_normal_model_info": "Normal Apollo işleme için kullanılacak model",
262
- "auto_apollo_midside_model": "Apollo Orta/Yan Model",
263
- "auto_apollo_enhancement_info": "Ses kalitesi iyileştirme",
264
- "auto_apollo_midside_model_info": "Orta/yan işleme için kullanılacak model (isteğe bağlı)",
265
- "apollo_chunk_size": "Apollo Parça Boyutu",
266
- "apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
267
- "apollo_overlap": "Apollo Çakışması",
268
- "enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
269
- "apollo_processing_completed": "Apollo işleme tamamlandı!",
270
- "apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
271
- "invalid_method": "Hata: Geçersiz yöntem! 'gradio', 'localtunnel' veya 'ngrok' kullan.",
272
- "apollo_enhancement_settings": "Apollo İyileştirme Ayarları",
273
- "enhance_with_apollo": "Apollo ile İyileştir",
274
- "enhance_with_apollo_info": "Ayrıştırmadan sonra ses iyileştirme için Apollo'yu etkinleştir",
275
- "apollo_method": "Apollo Yöntemi",
276
- "apollo_method_info": "Apollo için işlem yöntemini seç",
277
- "normal_method": "Normal Yöntem",
278
- "mid_side_method": "Orta/Yan Yöntem",
279
- "apollo_normal_model": "Apollo Normal için Model",
280
- "apollo_normal_model_info": "Normal Apollo işleme için kullanılacak model",
281
- "apollo_midside_model": "Apollo Orta/Yan Model",
282
- "apollo_enhancement_info": "Ses kalitesi iyileştirme",
283
- "apollo_processing_method": "Apollo işlem metodu",
284
- "apollo_mid_side_model": "Apollo orta/yan için modeller",
285
- "selected_models": "Seçilecek modeller",
286
- "save_preset": "Ön ayarı kaydet",
287
- "delete_preset": "Ön ayyarı sil",
288
- "refresh_presets": "Ön ayarları güncelle",
289
- "preset_name": "ön ayar ismi",
290
- "select_preset": "Ön ayarlar",
291
- "add_favorite": "favorilere ekle",
292
- "apollo_midside_model_info": "Orta/yan işleme için kullanılacak model (isteğe bağlı)",
293
- "language_changed_message": "Dil değiştirildi. Lütfen hücreyi yeniden başlat.",
294
- "apply_matchering": "Matchering Uygula",
295
- "matchering_info": "Ses masteringini ve dengesini iyileştirmek için Matchering uygula",
296
- "matchering_passes": "Matchering Geçişleri",
297
- "matchering_passes_info": "Matchering yineleme sayısı (1-5, yüksek değerler kaliteyi artırır ancak daha uzun sürer)",
298
- "matchering_processing": "Matchering uygulanıyor... ({:.1f}%)",
299
- "matchering_completed": "✅ Matchering işlemi tamamlandı!",
300
- "matchering_error": "❌ Matchering sırasında hata: {}"
301
  }
 
19
  "tta_boost": "TTA Güçlendirme",
20
  "tta_info": "Kaliteyi artırır ancak işlem hızını yavaşlatır",
21
  "phase_fix": "Faz Düzeltme",
22
+ "use_phase_correction": "Gelişmiş Faz Düzeltme Uygula",
23
  "phase_fix_info": "Enstrümantal parçalar için gelişmiş faz düzeltme",
24
  "instrumental": "Enstrümantal",
25
  "instrumental_info": "Genellikle 2 çıktı verir, ama bazen tek çıktı olabilir, denemen lazım! :)",
 
49
  "files_will_be_copied": "Dosyalar buraya kopyalanacak...",
50
  "processing_tip": "<b>🔈 İşleme İpucu:</b> Gürültülü sonuçlar için <i>\"Gürültü ve Efekt Kaldırma\"</i> kategorisindeki <code>bleed_suppressor_v1</code> veya <code>denoisedebleed</code> modellerini kullanarak çıkışı temizleyin",
51
  "waiting_for_processing": "İşleme Bekleniyor...",
52
+ "auto_ensemble_tab": "🤖 Otomatik Birleştirme",
53
  "upload_file": "Dosya Yükle",
54
  "enter_file_path": "Veya Dosya Yolunu Gir",
55
  "file_path_placeholder": "Ses dosyasının tam yolunu gir",
 
62
  "model_selection": "🧠 Model Seçimi",
63
  "model_category": "Model Kategorisi",
64
  "select_models": "Kategoriden Model Seç",
65
+ "ensemble_settings": "⚡ Birleştirme Ayarları",
66
  "method": "Yöntem",
67
  "recommendation": "**Öneri:** avg_wave ve max_fft en iyi sonuçları verir",
68
  "start_processing": "🚀 İşlemi Başlat",
69
  "original_audio_tab": "🔊 Orijinal Ses",
70
  "original_audio": "Orijinal Ses",
71
+ "ensemble_result_tab": "🎚️ Birleştirme Sonucu",
72
  "output_preview": "Çıktı Önizlemesi",
73
  "refresh_output": "🔄 Çıktıyı Yenile",
74
+ "ensemble_copy_status": "Birleştirme çıktısı buraya kopyalanacak...",
75
  "guidelines": "<h4>Model Seçim Kılavuzu</h4><ul><li><strong>Kategorileri Karıştırmaktan Kaçının:</strong> Vokal ve enstrümantal modellerin birleşimi istenmeyen karışımlara yol açabilir</li><li><strong>Özel Modeller Hakkında Notlar:</strong><ul><li>İkili modeller (v1/v2) - Her iki stemi de üretir</li><li>MDX23C Ayırıcı - Hibrit sonuçlar</li></ul></li><li><strong>En İyi Uygulama:</strong> Aynı kategoriden 3-5 benzer model kullanın</li></ul><div>💡 Profesyonel İpucu: \"VOCALS-MelBand-Roformer BigBeta5e\" + \"VOCALS-BS-Roformer_1297\" kombinasyonuyla başlayın</div>",
76
  "download_sources_tab": "⬇️ Kaynakları İndir",
77
  "direct_links": "🌐 Doğrudan Bağlantılar",
 
80
  "download_status": "İndirme Durumu",
81
  "downloaded_file": "İndirilen Dosya",
82
  "cookie_management": "🍪 Çerez Yönetimi",
83
+ "upload_cookies_txt": "Çerezler.txt Yükle",
84
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 Neden Gerekli?**<br>- Yaş kısıtlamalı içeriklere erişim<br>- Özel/listelenmemiş videoları indirme<br>- Bölgesel kısıtlamaları aşma<br>- YouTube indirme limitlerini aşma<br><br>**⚠️ Önemli Notlar**<br>- Çerez dosyalarınızı ASLA paylaşmayın!<br>- Çerezleri yenileyin:<br> • \"403 Yasak\" hataları alıyorsanız<br> • İndirmeler aniden duruyorsa<br> • \"Oturum sona erdi\" mesajı görüyorsanız<br><br>**🔄 Yenileme Adımları**<br>1. Bu <a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome eklentisini</a> yükleyin<br>2. Chrome'da YouTube'a giriş yapın<br>3. Eklenti simgesine tıklayın → \"Dışa Aktar\"<br>4. İndirilen dosyayı buraya yükleyin<br><br>**⏳ Çerez Ömrü**<br>- Normal oturumlar: 24 saat<br>- Hassas işlemler: 1 saat<br>- Şifre değişikliği: Anında yenileme</div>",
85
+ "ensemble_output_copied": " Birleştirme çıktısı {}'ya kopyalandı",
86
+ "error_copying_ensemble_output": "��� Birleştirme çıktısı kopyalama hatası: {}",
87
+ "drive_mounted_copying_ensemble": "Google Drive bağlandı. Birleştirme çıktısını kopyalıyor...",
88
+ "drive_already_mounted_copying_ensemble": "Google Drive zaten bağlı. Birleştirme çıktısını kopyalıyor...",
89
+ "no_ensemble_output_files_found": "❌ Birleştirme çıktı dosyası bulunamadı.",
90
+ "drive_mounted_copying_files": "Google Drive bağlandı. Dosyaları kopyalıyor...",
91
+ "drive_already_mounted_copying_files": "Google Drive zaten bağlı. Dosyaları kopyalıyor...",
92
+ "files_copied_to_drive": "✅ Dosyalar {}'ya kopyalandı",
93
+ "error_copying_files": "❌ Dosya kopyalama hatası: {}",
94
+ "mounting_drive": "Google Drive bağlanıyor...",
95
+ "settings_saved": "Ayarlar config.json'a kaydedildi: Dil={lang}, Yöntem={method}, Port={port}",
96
+ "selected_language": "Seçilen dil: {lang}",
97
+ "available_languages": "Kullanılabilir diller: {langs}",
98
+ "select_language_prompt": "Bir dil seçin (varsayılan: {default}): ",
99
+ "invalid_language": "Geçersiz dil seçimi! Varsayılan dil ({default}) kullanılıyor.",
100
+ "available_sharing_methods": "Kullanılabilir paylaşım yöntemleri: gradio, localtunnel, ngrok",
101
+ "select_sharing_method_prompt": "Bir paylaşım yöntemi seçin (varsayılan: {default}): ",
102
+ "enter_ngrok_token_prompt": "Ngrok token'ınızı girin (varsayılan: {default}): ",
103
+ "ngrok_token_required": "Hata: ngrok yöntemi için Ngrok token'ı gerekli!",
104
+ "enter_port_prompt": "Port numarasını girin (varsayılan: {default}): ",
105
+ "opening_previous_url": "Önceki URL açılıyor: {url}",
106
+ "open_url_manually": "Colab'da URL'yi manuel açın: {url}",
107
+ "invalid_previous_url": "Geçersiz önceki URL: {url}, açılmadı.",
108
+ "starting_method": "{method} port {port}'ta başlatılıyor...",
109
+ "process_stopped": "\n🛑 Kullanıcı tarafından işlem durduruldu",
110
+ "selected_port": "Seçilen port: {port}",
111
+ "starting_gradio_with_sharing": "Dahili paylaşım ile Gradio başlatılıyor...",
112
+ "starting_localtunnel": "Localtunnel port {port}'ta başlatılıyor...",
113
+ "share_link": "Paylaşım bağlantısı: {url}",
114
+ "password_ip": "Şifre IP: {ip}",
115
+ "starting_ngrok": "Ngrok port {port}'ta başlatılıyor...",
116
+ "ngrok_url": "Ngrok URL: {url}",
117
+ "ngrok_error": "Ngrok başlatma hatası: {error}",
118
+ "auto_apollo_chunk_size": "Apollo Parça Boyutu",
119
+ "auto_apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
120
+ "auto_apollo_overlap": "Apollo Çakışması",
121
+ "auto_enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
122
+ "auto_apollo_processing_completed": "Apollo işleme tamamlandı!",
123
+ "auto_apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
124
+ "apollo_chunk_size": "Apollo Parça Boyutu",
125
+ "apollo_chunk_size_info": "Apollo iyileştirme için parça boyutu (evrensel model için önerilen: 19)",
126
+ "apollo_overlap": "Apollo Çakışması",
127
+ "enhance_with_apollo": "Apollo ile İyileştir",
128
+ "apollo_enhancement_info": "Ses kalitesi iyileştirme",
129
+ "enhancing_with_apollo": "Apollo ile iyileştiriliyor ({}/{} dosya)...",
130
+ "apollo_processing_completed": "Apollo işleme tamamlandı!",
131
+ "apollo_overlap_info": "Apollo iyileştirme için çakışma (önerilen: 2)",
132
+ "selected_models": "Seçilen Modeller",
133
+ "save_preset": "Ön Ayar Kaydet",
134
+ "delete_preset": "Ön Ayar Sil",
135
+ "refresh_presets": "Ön Ayarları Güncelle",
136
+ "preset_name": "Ön Ayar Adı",
137
+ "select_preset": "Ön Ayar Seç",
138
+ "add_favorite": "Favorilere Ekle",
139
+ "apply_matchering": "Matchering Uygula",
140
+ "matchering_info": "Ses masteringini ve dengesini iyileştirmek için Matchering uygula",
141
+ "matchering_passes": "Matchering Geçişleri",
142
+ "matchering_passes_info": "Matchering yineleme sayısı (1-5, yüksek değerler kaliteyi artırır ancak daha uzun sürer)",
143
+ "matchering_processing": "Matchering uygulanıyor... ({:.1f}%)",
144
+ "matchering_completed": "✅ Matchering işlemi tamamlandı!",
145
+ "matchering_error": "❌ Matchering sırasında hata: {}",
146
+ "invalid_method": "Hata: Geçersiz yöntem! 'gradio', 'localtunnel' veya 'ngrok' kullan.",
147
+ "language_changed_message": "Dil değiştirildi. Lütfen hücreyi yeniden başlat.",
148
+ "backend_settings": "🚀 Backend Ayarları",
149
+ "inference_backend": "Inference Backend",
150
+ "optimization_mode": "Optimizasyon Modu",
151
+ "optimization_mode_info": "PyTorch optimizasyon modu seçimi",
152
+ "mixed_precision_amp": "Karışık Hassasiyet (AMP)",
153
+ "mixed_precision_info": "2x daha hızlı inference - önerilir",
154
+ "tf32_acceleration": "TF32 Hızlandırma",
155
+ "tf32_acceleration_info": "RTX 30xx+ için ekstra hız artışı",
156
+ "cudnn_benchmark": "cuDNN Benchmark",
157
+ "cudnn_benchmark_info": "İlk çalışmada yavaş, sonraki çalışmalarda çok hızlı",
158
+ "ultra_optimized_pytorch": "ULTRA-OPTİMİZE PyTorch Backend",
159
+ "default_active_max_speed": "Varsayılan olarak aktif - Maximum hız optimizasyonu",
160
+ "channels_last_mode": "RTX GPUs için en hızlı",
161
+ "compile_mode": "PyTorch 2.0+ için ekstra hız",
162
+ "default_mode": "Standart",
163
+ "manual_ensemble_tab": "🎚️ Manuel Birleştirme",
164
+ "phase_fixer_tab": "🔧 Faz Düzeltici",
165
+ "batch_processing_tab": "📦 Toplu İşleme",
166
+ "batch_description": "Aynı model ile birden fazla dosyayı aynı anda işle",
167
+ "batch_input_folder": "Giriş Klasörü",
168
+ "batch_input_folder_placeholder": "Ses dosyalarını içeren klasör yolunu girin",
169
+ "batch_output_folder": "Çıkış Klasörü",
170
+ "batch_output_folder_placeholder": "Çıkış dosyaları için klasör yolunu girin",
171
+ "batch_file_list": "İşlenecek Dosyalar",
172
+ "batch_add_files": "Dosya Ekle",
173
+ "batch_clear_list": "Listeyi Temizle",
174
+ "batch_start": "🚀 Toplu İşlemeyi Başlat",
175
+ "batch_stop": "⏹️ Durdur",
176
+ "batch_progress": "Toplu İşleme İlerlemesi",
177
+ "batch_current_file": "Şu An İşlenen",
178
+ "batch_completed": "✅ Toplu işleme tamamlandı!",
179
+ "batch_stopped": "⏹️ Toplu işleme durduruldu",
180
+ "batch_no_files": "❌ İşlenecek dosya seçilmedi",
181
+ "source_model": "Kaynak Model (Vokal)",
182
+ "source_model_info": "Vokal/faz verisi çıkarmak için model",
183
+ "target_model": "Hedef Model (Enstrümantal)",
184
+ "target_model_info": "Faz düzeltmesi uygulanacak model",
185
  "input_sources": "📂 Giriş Kaynakları",
186
  "refresh": "🔄 Yenile",
187
+ "ensemble_algorithm": "Birleştirme Algoritması",
188
  "select_audio_files": "Ses Dosyalarını Seç",
189
  "available_files": "Mevcut Dosyalar",
190
  "custom_weights": "Özel Ağırlıklar (virgülle ayrılmış)",
191
  "custom_weights_placeholder": "Örnek: 0.8, 1.2, 1.0, ...",
192
  "custom_weights_info": "Eşit ağırlıklar için boş bırakın",
193
  "result_preview_tab": "🎧 Sonuç Önizlemesi",
194
+ "ensembled_output": "Birleştirme Çıktısı",
195
  "processing_log_tab": "📋 İşlem Günlüğü",
196
  "processing_details": "İşlem Ayrıntıları",
197
+ "process_ensemble": "⚡ Birleştirmeyi İşle",
198
  "languages_tab": "🌐 Diller",
199
  "Vocal Models": "Vokal Modeller",
200
  "Instrumental Models": "Enstrümantal Modeller",
 
202
  "Denoise Models": "Gürültü Giderme Modelleri",
203
  "Dereverb Models": "Yankı Giderme Modelleri",
204
  "Other Models": "Diğer Modeller",
205
+ "Other Shapes": "Diğer Modeller",
206
  "ensemble_files_description": "Belirtilen birleştirme yöntemlerini kullanarak birden fazla ses dosyasını tek bir çıktıda birleştirir.",
207
+ "ensemble_files_help": "Birleştirme için tüm ses dosyalarına giden yol",
208
  "ensemble_type_help": "Şunlardan biri: avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
209
+ "ensemble_weights_help": "Birleştirme oluşturmak için ağırlıklar. Ağırlık sayısı dosya sayısına eşit olmalı",
210
+ "ensemble_output_help": "Birleştirme sonucunun kaydedileceği WAV dosyasının yolu",
211
+ "ensemble_type_print": "Birleştirme türü: {}",
212
  "num_input_files_print": "Giriş dosyası sayısı: {}",
213
  "weights_print": "Ağırlıklar: {}",
214
  "output_file_print": "Çıktı dosyası: {}",
 
216
  "file_not_found_error": "Hata. Dosya bulunamadı: {}. Yolları kontrol et.",
217
  "reading_chunk_print": "Dosyadan parça okunuyor: {} (başlangıç: {}s, süre: {}s)",
218
  "chunk_result_shape_print": "Parça sonucu şekli: {}",
219
+ "ensemble_completed_print": "Birleştirme tamamlandı. Çıktı şuraya kaydedildi: {}",
220
  "file_deletion_error": "{} silinemedi: {}",
221
  "directory_not_exist_warning": "⚠️ Dizin mevcut değil: {}",
222
  "not_a_directory_warning": "⚠️ Yol bir dizin değil: {}",
 
258
  "invalid_url": "❌ Geçersiz URL",
259
  "cookie_file_updated": "✅ Çerez dosyası güncellendi!",
260
  "cookie_installation_error": "⚠️ Çerez yükleme hatası: {}",
261
+ "google_drive_mount_error": "❌ Google Drive bağlantı hatası: {}",
262
  "file_size_zero_error": "Dosya boyutu sıfır veya dosya oluşturulmadı",
263
  "google_drive_error": "❌ Google Drive indirme hatası: {}",
264
  "wav_conversion_failed": "WAV dönüşümü başarısız",
265
  "download_error": "❌ İndirme hatası: {}",
266
+ "file_copied_to_drive": "Dosya Google Drive'a kopyalandı: {}",
267
+ "copy_to_drive_error": "❌ Google Drive'a kopyalama hatası: {}",
268
+ "direct_download_failed": "Dosyanın doğrudan indirilmesi başarısız",
269
+ "direct_download_error": "❌ Doğrudan indirme hatası: {}",
270
+ "invalid_google_drive_url": "❌ Geçersiz Google Drive URL'si",
271
+ "continuing_without_google_drive": "Google Drive bağlantısı yapılamadı, işlem devam ediyor...",
272
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive bağlı değil, Drive'a kopyalama atlanıyor...",
273
  "download_success": "🎉 Başarıyla indirildi!",
274
  "download_failed": "❌ İndirme başarısız",
275
  "no_output_files_found": "❌ Çıktı dosyaları bulunamadı.",
 
292
  "error_occurred_progress_label": "Hata oluştu -- 0.0%",
293
  "minimum_files_required": "⚠️ En az 2 dosya gerekli",
294
  "valid_files_not_found": "❌ Geçerli dosya bulunamadı",
295
+ "starting_ensemble_process": "Birleştirme işlemi başlıyor...",
296
+ "ensembling_progress": "Birleştirme yapılıyor... ({:.1f}%)",
297
+ "finalizing_ensemble_output": "Birleştirme çıktısı tamamlanıyor...",
298
  "success_log": "✅ Başarılı!\n{}",
299
  "error_log": "❌ Hata!\n{}",
300
  "critical_error": "⛔ Kritik hata: {}",
301
+ "ensemble_process_completed": "Birleştirme işlemi tamamlandı!",
302
  "no_models_selected": "❌ Model seçilmedi",
303
  "no_input_audio_provided": "❌ Giriş sesi sağlanmadı",
304
  "loading_model": "Model yükleniyor {}/{}: {}...",
 
311
  "model_output_failed": "{} çıktı üretemedi",
312
  "waiting_for_files": "Tüm dosyaların hazır olması bekleniyor...",
313
  "waiting_for_files_progress_label": "Tüm dosyaların hazır olması bekleniyor... -- 90.0%",
314
+ "performing_ensemble": "Birleştirme gerçekleştiriliyor...",
315
+ "performing_ensemble_progress_label": "Birleştirme gerçekleştiriliyor... -- 92.0%",
316
+ "memory_usage_before_ensemble": "Birleştirme öncesi bellek kullanımı: {}%",
317
+ "memory_usage_after_ensemble": "Birleştirme sonrası bellek kullanımı: {}%",
318
+ "finalizing_ensemble_output_progress_label": "Birleştirme çıktısı tamamlanıyor... -- 98.0%",
319
+ "ensemble_file_creation_failed": "Birleştirme dosyası oluşturulamadı: {}",
320
+ "ensemble_completed_progress_label": "Birleştirme başarıyla tamamlandı! -- 100.0%",
321
  "success_output_created": "✅ Başarılı! Çıktı dosyası oluşturuldu.",
322
+ "custom_models_tab": " Özel Modeller",
323
+ "Custom Models": "Özel Modeller",
324
+ "add_custom_model": " Özel Model Ekle",
325
+ "custom_model_name": "Model Adı",
326
+ "custom_model_name_placeholder": "Modeliniz için bir isim girin",
327
+ "checkpoint_url": "Checkpoint URL (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "Config URL (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "Özel Python Dosyası URL (isteğe bağlı)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "Model tipini otomatik algıla",
334
+ "model_type": "Model Tipi",
335
+ "add_model_btn": " Model Ekle",
336
+ "custom_models_list": "📋 Özel Modeller Listesi",
337
+ "delete_model": "🗑️ Sil",
338
+ "no_custom_models": "Henüz özel model eklenmedi",
339
+ "model_added_success": " Model başarıyla eklendi!",
340
+ "model_deleted_success": "��� Model başarıyla silindi!",
341
+ "model_add_error": " Model ekleme hatası: {}",
342
+ "model_delete_error": " Model silme hatası: {}",
343
+ "refresh_models": "🔄 Yenile",
344
+ "custom_model_info": "İndirme linkleri sağlayarak özel model ekleyin. Modeller kullanıldığında otomatik olarak indirilecektir.",
345
+ "select_model_to_delete": "Silmek için bir model seçin",
346
+ "downloading_model_file": "Model indiriliyor: {}",
347
+ "downloading_file_progress": "İndiriliyor: {} - {}%",
348
+ "chunk_size_mode": "Parça Boyutu Modu",
349
+ "chunk_size_mode_info": "base: standart ön ayarlar | custom: kendi değerinizi girin | yaml: model YAML'ından oku",
350
+ "chunk_size_custom_label": "Özel Parça Boyutu",
351
+ "chunk_size_custom_info": "Özel bir parça boyutu değeri girin",
352
+ "chunk_size_yaml_label": "YAML Parça Boyutu",
353
+ "chunk_size_yaml_display_info": "İndirmeden sonra modelin YAML yapılandırmasından okunacak",
354
+ "chunk_size_yaml_not_downloaded": " Model YAML'ı henüz indirilmedi — işlem sırasında değer okunacak",
355
+ "chunk_size_yaml_detected": " YAML'dan: {}",
356
+ "remove_favorite": "Favorilerden Çıkar",
357
+ "normal_method": "Normal yöntem",
358
+ "mid_side_method": "Orta-Yan yöntemi",
359
+ "apollo_processing_method": "Apollo İşlem Yöntemi",
360
+ "apollo_normal_model": "Apollo Normal Model",
361
+ "apollo_mid_side_model": "Apollo Orta-Yan Model",
362
+ "scale_factor": "Ölçek Faktörü",
363
+ "scale_factor_info": "Faz ölçeklendirme faktörü (önerilen: 1.4)",
364
+ "source_file_label": "Kaynak Dosya (Vokal)",
365
+ "target_file_label": "Hedef Dosya (Enstrümantal)",
366
+ "run_phase_fixer": "🔧 Faz Düzelticiyi Çalıştır",
367
+ "phase_fixed_output": "Faz Düzeltilmiş Çıktı",
368
+ "phase_fixer_settings": "Faz Düzeltici Ayarları",
369
+ "low_cutoff": "Düşük Kesme Frekansı (Hz)",
370
+ "high_cutoff": "Yüksek Kesme Frekansı (Hz)",
371
+ "low_cutoff_info": "Faz düzeltmesi için düşük frekans kesme",
372
+ "high_cutoff_info": "Faz düzeltmesi için yüksek frekans kesme",
373
+ "no_preset_name_provided": "⚠️ Ön ayar adı girilmedi",
374
+ "no_models_selected_for_preset": "⚠️ Ön ayar için model seçilmedi",
375
+ "preset_saved": " '{}' ön ayarı kaydedildi!",
376
+ "please_upload_both_files": "⚠️ Lütfen hem kaynak hem de hedef dosyaları yükleyin",
377
+ "processing_log_placeholder": "İşlem günlüğü burada görünecek...",
378
+ "enter_preset_name": "Ön ayar adını girin..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  }
assets/i18n/languages/zn_cn.json CHANGED
@@ -19,6 +19,7 @@
19
  "tta_boost": "TTA增强",
20
  "tta_info": "提高质量,但会减慢处理速度",
21
  "phase_fix": "相位修正",
 
22
  "phase_fix_info": "用于乐器轨的高级相位修正",
23
  "instrumental": "乐器",
24
  "instrumental_info": "通常输出2个结果,但有时只有一个,试试看吧!:)",
@@ -46,7 +47,7 @@
46
  "copy_to_drive": "📂 复制到云盘",
47
  "copy_status": "复制状态",
48
  "files_will_be_copied": "文件将复制到此处...",
49
- "processing_tip": "<b>🔈 处理提示:</b> 对于有噪声的结果,使用<i>去除噪声和效果</i>类别中的<code>bleed_suppressor_v1</code>或<code>denoisedebleed</code>模型来清理输出",
50
  "waiting_for_processing": "等待处理...",
51
  "auto_ensemble_tab": "🤖 自动集成",
52
  "upload_file": "上传文件",
@@ -80,16 +81,94 @@
80
  "downloaded_file": "已下载文件",
81
  "cookie_management": "🍪 Cookie管理",
82
  "upload_cookies_txt": "上传Cookies.txt",
83
- "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 为什么要?**<br>- 访问年龄限制内容<br>- 下载私人/未列出视频<br>- 绕过区限制<br>- 避免YouTube下载限制<br><br>**⚠️ 重要提示**<br>- 切勿分享您的cookie文件!<br>- 在以下情况更新cookie:<br> • 出现“403禁止”错误<br> • 下载突然停止<br> • 看到“会话过期”消息<br><br>**🔄 更新步骤**<br>1. 安装此<a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome扩展</a><br>2. 在Chrome中登录YouTube<br>3. 点击扩展图标 → “导出”<br>4. 下载的文件上传到此处<br><br>**⏳ Cookie寿命**<br>- 普通会话:24小时<br>- 敏感操作:1小时<br>- 密码更改:立即失效</div>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "manual_ensemble_tab": "🎚️ 手动集成",
85
- "input_sources": "📂 输入源",
86
  "refresh": "🔄 刷新",
87
  "ensemble_algorithm": "集成算法",
88
  "select_audio_files": "选择音频文件",
89
  "available_files": "可用文件",
90
- "custom_weights": "自定义权重(逗号分隔)",
91
- "custom_weights_placeholder": "例:0.8, 1.2, 1.0, ...",
92
- "custom_weights_info": "留空表示相等权重",
93
  "result_preview_tab": "🎧 结果预览",
94
  "ensembled_output": "集成输出",
95
  "processing_log_tab": "📋 处理日志",
@@ -99,32 +178,33 @@
99
  "Vocal Models": "人声模型",
100
  "Instrumental Models": "乐器模型",
101
  "4-Stem Models": "4声道模型",
102
- "Denoise Models": "噪模型",
103
  "Dereverb Models": "去混响模型",
104
  "Other Models": "其他模型",
105
- "ensemble_files_description": "使用指定的集成方法将多个音频文件合并为一个输出。",
106
- "ensemble_files_help": "所有集成音频文件的路径",
107
- "ensemble_type_help": "以下之一:avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
108
- "ensemble_weights_help": "创建集成所需的权重。权重数量必须与文件数量匹配",
109
- "ensemble_output_help": "保存集成结果WAV文件路径",
 
110
  "ensemble_type_print": "集成类型:{}",
111
- "num_input_files_print": "输入文件数:{}",
112
  "weights_print": "权重:{}",
113
  "output_file_print": "输出文件:{}",
114
- "duration_mismatch_error": "所有文件必须具有相同的时",
115
- "file_not_found_error": "错误。找不到文件:{}。检查路径。",
116
- "reading_chunk_print": "读取文件块:{}(开始:{}秒,时:{}秒)",
117
- "chunk_result_shape_print": "块结果形状:{}",
118
- "ensemble_completed_print": "集成完成。输出保存:{}",
119
- "file_deletion_error": "{}无法删除:{}",
120
  "directory_not_exist_warning": "⚠️ 目录不存在:{}",
121
  "not_a_directory_warning": "⚠️ 路径不是目录:{}",
122
- "item_deletion_error": "⚠️ 删除{}时出错:{}",
123
  "old_output_not_exist": "❌ 旧输出文件夹不存在",
124
  "old_outputs_cleared": "✅ 旧输出已成功清除!",
125
  "error": "🔥 错误:{}",
126
- "ffmpeg_error": "FFmpeg错误{}:{}",
127
- "file_saved_successfully": "文件保存成功:{}",
128
  "total_files_found": "找到的文件总数:{}。使用的采样率:{}",
129
  "total_progress": "总进度",
130
  "detailed_pbar_enabled": "详细进度条已启用:{}",
@@ -132,149 +212,135 @@
132
  "cannot_read_track": "无法读取轨道:{}",
133
  "error_message": "错误消息:{}",
134
  "demudding_track": "清理轨道(相位混音 - 乐器):{}",
135
- "elapsed_time": "已用:{:.2f}秒。",
136
  "proc_folder_description": "使用指定模型处理文件夹中的音频文件。",
137
- "model_type_help": "模型类型(bandit, bs_roformer, mdx23c 等)",
138
  "config_path_help": "配置文件路径",
139
  "demud_phaseremix_help": "为乐器启用相位混音",
140
  "start_checkpoint_help": "有效权重的初始检查点",
141
- "input_folder_help": "包含处理混的文件夹",
142
- "audio_path_help": "单个待处理音频文件的路径",
143
  "store_dir_help": "存储结果的路径",
144
  "device_ids_help": "GPU ID列表",
145
- "extract_instrumental_help": "如提供,反转人声以获乐器",
146
- "disable_detailed_pbar_help": "在分离中禁用详细进度条",
147
- "force_cpu_help": "即使CUDA可用也强制使用CPU",
148
- "flac_file_help": "生成FLAC文件而WAV",
149
  "export_format_help": "导出格式和PCM类型",
150
  "pcm_type_help": "FLAC文件的PCM类型",
151
  "use_tta_help": "启用测试时间增强",
152
  "lora_checkpoint_help": "LoRA权重的初始检查点",
153
- "cuda_available": "CUDA可用,使用 --force_cpu 禁用。",
154
- "using_device": "使用设备:{}",
155
  "instruments_print": "乐器:{}",
156
- "model_load_time": "模型加载时间:{:.2f}秒",
157
- "invalid_url": "❌ 无效URL",
158
  "cookie_file_updated": "✅ Cookie文件已更新!",
159
  "cookie_installation_error": "⚠️ Cookie安装错误:{}",
160
- "file_size_zero_error": "文件大小为零或文件未创建",
 
161
  "google_drive_error": "❌ Google Drive下载错误:{}",
162
  "wav_conversion_failed": "WAV转换失败",
163
  "download_error": "❌ 下载错误:{}",
 
 
 
 
 
 
 
164
  "download_success": "🎉 下载成功!",
165
  "download_failed": "❌ 下载失败",
166
  "no_output_files_found": "❌ 未找到输出文件。",
167
- "output_refreshed_successfully": "✅ 输出刷新成功!",
168
  "error_refreshing_output": "❌ 刷新输出时出错:{}",
169
  "starting_audio_separation": "开始音频分离...",
170
- "processing_audio": "处理音频",
171
- "separating_audio": "分离音频... ({:.1f}%)",
172
  "separation_complete": "分离完成!",
173
  "progress_parsing_error": "进度解析错误:{}",
174
  "error_occurred": "发生错误:{}",
175
  "separation_process_completed": "分离过程完成!",
176
- "no_audio_file_error": "❌ 未提供音频文件且输入目录中没有现有文件。",
177
  "no_input_progress_label": "错误:未提供输入 -- 0.0%",
178
- "processing_audio_print": "正在处理音频:{}使用模型:{}",
179
  "starting_audio_separation_progress_label": "开始音频分离... -- 0.0%",
180
- "separating_audio_progress_label": "分离音频... -- {}.0%",
181
- "audio_processing_completed_progress_label": "音频处理完成!-- 100.0%",
182
  "audio_processing_completed": "✅ 音频处理完成!",
183
  "error_occurred_progress_label": "发生错误 -- 0.0%",
184
  "minimum_files_required": "⚠️ 至少需要2个文件",
185
  "valid_files_not_found": "❌ 未找到有效文件",
186
  "starting_ensemble_process": "开始集成过程...",
187
- "ensembling_progress": "集成... ({:.1f}%)",
188
- "finalizing_ensemble_output": "完成集成输出...",
189
  "success_log": "✅ 成功!\n{}",
190
  "error_log": "❌ 错误!\n{}",
191
  "critical_error": "⛔ 严重错误:{}",
192
  "ensemble_process_completed": "集成过程完成!",
193
  "no_models_selected": "❌ 未选择模型",
194
  "no_input_audio_provided": "❌ 未提供输入音频",
195
- "loading_model": "加载模型 {}/{}:{}...",
196
- "loading_model_progress_label": "加载模型 {}/{}:{}... -- {}.0%",
197
  "completed_model": "模型完成 {}/{}:{}",
198
  "completed_model_progress_label": "模型完成 {}/{}:{} -- {}.0%",
199
- "running_command": "行命令:{}",
200
  "model_failed": "模型 {} 失败:{}",
201
- "critical_error_with_model": "{} 出现严重错误:{}",
202
- "model_output_failed": "{} 未能输出",
203
  "waiting_for_files": "等待所有文件准备就绪...",
204
  "waiting_for_files_progress_label": "等待所有文件准备就绪... -- 90.0%",
205
- "performing_ensemble": "执行集成...",
206
- "performing_ensemble_progress_label": "执行集成... -- 92.0%",
207
  "memory_usage_before_ensemble": "集成前内存使用:{}%",
208
  "memory_usage_after_ensemble": "集成后内存使用:{}%",
209
- "finalizing_ensemble_output_progress_label": "完成集成输出... -- 98.0%",
210
- "ensemble_file_creation_failed": "集成文件创建失败:{}",
211
- "ensemble_completed_progress_label": "集成成功完成!-- 100.0%",
212
  "success_output_created": "✅ 成功!输出文件已创建。",
213
- "drive_mounted_copying_ensemble": "Google Drive已挂载。正在制集成输出...",
214
- "drive_already_mounted_copying_ensemble": "Google Drive已挂载。正在复制集成输出...",
215
- "no_ensemble_output_files_found": "❌ 未找到集成输出文件",
216
- "ensemble_output_copied": "✅ 集成出已复制到 {}",
217
- "error_copying_ensemble_output": "❌ 复制集成出时出错:{}",
218
- "drive_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
219
- "drive_already_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
220
- "files_copied_to_drive": "文件已复制到 {}",
221
- "error_copying_files": "❌ 复制文件时出错:{}",
222
- "mounting_drive": "挂载Google Drive...",
223
- "settings_saved": "设置已保存至 config.json:语言={lang},方法={method},端口={port}",
224
- "selected_language": "所选语言:{lang}",
225
- "available_languages": "可用语言:{langs}",
226
- "select_language_prompt": "选择语言(默认:{default}):",
227
- "invalid_language": "无效的语言选择使用默认语言({default})。",
228
- "available_sharing_methods": "可用共享方法:gradio, localtunnel, ngrok",
229
- "select_sharing_method_prompt": "选���共享方法(默认:{default}):",
230
- "enter_ngrok_token_prompt": "输入您的Ngrok令牌默认:{default}",
231
- "ngrok_token_required": "错误:ngrok方法需要Ngrok令牌!",
232
- "enter_port_prompt": "输入端口号默认:{default}",
233
- "opening_previous_url": "打开上一个URL:{url}",
234
- "open_url_manually": "在Colab中手动打开URL:{url}",
235
- "invalid_previous_url": "无效的上一个URL:{url},未打开。",
236
- "starting_method": "在端口 {port} 上启动 {method}...",
237
- "process_stopped": "\n🛑 用户停止了进程",
238
- "selected_port": "所选端口:{port}",
239
- "starting_gradio_with_sharing": "启动带有内置共享的Gradio...",
240
- "starting_localtunnel": "在端口 {port} 上启动Localtunnel...",
241
- "share_link": "共享链接:{url}",
242
- "password_ip": "密码IP:{ip}",
243
- "starting_ngrok": "在端口 {port} 上启动Ngrok...",
244
- "ngrok_url": "Ngrok URL:{url}",
245
- "ngrok_error": "Ngrok时出错:{error}",
246
- "apollo_chunk_size": "Apollo分块大小",
247
- "apollo_chunk_size_info": "Apollo改进的分块大小(通用模型推荐:19)",
248
- "apollo_overlap": "Apollo重叠",
249
- "enhancing_with_apollo": "使用Apollo增强({}/{} 文件)...",
250
- "apollo_processing_completed": "Apollo处理完成!",
251
- "apollo_overlap_info": "Apollo改进的重叠(推荐:2)",
252
- "invalid_method": "错误:无效方法!使用 'gradio'、'localtunnel' 或 'ngrok'。",
253
- "apollo_enhancement_settings": "Apollo增强设置",
254
- "enhance_with_apollo": "使用Apollo增强",
255
- "enhance_with_apollo_info": "分离后启用Apollo以增强音频",
256
- "apollo_method": "Apollo方法",
257
- "apollo_method_info": "选择Apollo处理方法",
258
- "normal_method": "普通方法",
259
- "mid_side_method": "中/侧方法",
260
- "apollo_normal_model": "Apollo普通模型",
261
- "apollo_normal_model_info": "用于普通Apollo处理的模型",
262
- "apollo_midside_model": "Apollo中/侧模型",
263
- "apollo_enhancement_info": "音质增强",
264
- "selected_models": "选定模型",
265
- "save_preset": "保存预设",
266
- "delete_preset": "删除预设",
267
- "refresh_presets": "刷新预设",
268
- "preset_name": "预设名称",
269
- "select_preset": "选择预设",
270
- "add_favorite": "添加到收藏",
271
- "apollo_midside_model_info": "用于中/侧处理的模型(可选)",
272
- "language_changed_message": "语言已更改。请重新启动单元。",
273
- "apply_matchering": "应用 Matchering",
274
- "matchering_info": "应用 Matchering 以提升音频母带处理和平衡",
275
- "matchering_passes": "Matchering 迭代次数",
276
- "matchering_passes_info": "Matchering 迭代次数(1-5,值越高质量越好,但耗时更长)",
277
- "matchering_processing": "正在应用 Matchering... ({:.1f}%)",
278
- "matchering_completed": "✅ Matchering 处理完成!",
279
- "matchering_error": "❌ Matchering 过程中发生错误:{}"
280
  }
 
19
  "tta_boost": "TTA增强",
20
  "tta_info": "提高质量,但会减慢处理速度",
21
  "phase_fix": "相位修正",
22
+ "use_phase_correction": "应用高级相位修正",
23
  "phase_fix_info": "用于乐器轨的高级相位修正",
24
  "instrumental": "乐器",
25
  "instrumental_info": "通常输出2个结果,但有时只有一个,试试看吧!:)",
 
47
  "copy_to_drive": "📂 复制到云盘",
48
  "copy_status": "复制状态",
49
  "files_will_be_copied": "文件将复制到此处...",
50
+ "processing_tip": "<b>🔈 处理提示:</b> 对于有噪声的结果,使用<i>去除噪声和效果</i>类别中的<code>bleed_suppressor_v1</code>或<code>denoisedebleed</code>模型来清理输出",
51
  "waiting_for_processing": "等待处理...",
52
  "auto_ensemble_tab": "🤖 自动集成",
53
  "upload_file": "上传文件",
 
81
  "downloaded_file": "已下载文件",
82
  "cookie_management": "🍪 Cookie管理",
83
  "upload_cookies_txt": "上传Cookies.txt",
84
+ "cookie_info": "<div style=\"margin-left:15px; font-size:0.95em\">**📌 为什么要?**<br>- 访问年龄限制内容<br>- 下载私人/未列出视频<br>- 绕过区限制<br>- 避免YouTube下载限制<br><br>**⚠️ 重要说明**<br>- 切勿分享您的Cookie文件!<br>- 更新Cookie时:<br> • 收到“403禁止”错误<br> • 下载突然停止<br> • 看到“会话过期”消息<br><br>**🔄 更新步骤**<br>1. 安装此<a href=\"https://chromewebstore.google.com/detail/get-cookiestxt-clean/ahmnmhfbokciafffnknlekllgcnafnie\" target=\"_blank\">Chrome扩展</a><br>2. 在Chrome中登录YouTube<br>3. 点击扩展图标 → “导出”<br>4. 在此处上传下载的文件<br><br>**⏳ Cookie寿命**<br>- 正常会话:24小时<br>- 敏感操作:1小时<br>- 密码更改:立即更新</div>",
85
+ "ensemble_output_copied": "✅ 集成输出已复制到 {}",
86
+ "error_copying_ensemble_output": "❌ 复制集成输出时出错:{}",
87
+ "drive_mounted_copying_ensemble": "Google Drive已挂载。正在复制集成输出...",
88
+ "drive_already_mounted_copying_ensemble": "Google Drive已挂载。正在复制集成输出...",
89
+ "no_ensemble_output_files_found": "❌ 未找到集成输出文件。",
90
+ "drive_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
91
+ "drive_already_mounted_copying_files": "Google Drive已挂载。正在复制文件...",
92
+ "files_copied_to_drive": "✅ 文件已复制到 {}",
93
+ "error_copying_files": "❌ 复制文件时出错:{}",
94
+ "mounting_drive": "正在挂载Google Drive...",
95
+ "settings_saved": "设置已保存到config.json:语言={lang},方法={method},端口={port}",
96
+ "selected_language": "已选择语言:{lang}",
97
+ "available_languages": "可用语言:{langs}",
98
+ "select_language_prompt": "选择语言(默认:{default}):",
99
+ "invalid_language": "无效语言选择!使用默认语言({default})。",
100
+ "available_sharing_methods": "可用共享方法:gradio, localtunnel, ngrok",
101
+ "select_sharing_method_prompt": "选择共享方法(默认:{default}):",
102
+ "enter_ngrok_token_prompt": "输入您的Ngrok令牌(默认:{default}):",
103
+ "ngrok_token_required": "错误:ngrok方法需要Ngrok令牌!",
104
+ "enter_port_prompt": "输入端口号(默认:{default}):",
105
+ "opening_previous_url": "打开上一个URL:{url}",
106
+ "open_url_manually": "在Colab中手动打开URL:{url}",
107
+ "invalid_previous_url": "无效的上一个URL:{url},未打开。",
108
+ "starting_method": "在端口 {port} 上启动 {method}...",
109
+ "process_stopped": "\n🛑 用户停止了进程",
110
+ "selected_port": "所选端口:{port}",
111
+ "starting_gradio_with_sharing": "启动带有内置共享的Gradio...",
112
+ "starting_localtunnel": "在端口 {port} 上启动Localtunnel...",
113
+ "share_link": "共享链接:{url}",
114
+ "password_ip": "密码IP:{ip}",
115
+ "starting_ngrok": "在端口 {port} 上启动Ngrok...",
116
+ "ngrok_url": "Ngrok URL:{url}",
117
+ "ngrok_error": "启动Ngrok时出错:{error}",
118
+ "auto_apollo_chunk_size": "Apollo分块大小",
119
+ "auto_apollo_chunk_size_info": "Apollo增强的分块大小(通用模型推荐:19)",
120
+ "auto_apollo_overlap": "Apollo重叠",
121
+ "auto_enhancing_with_apollo": "使用Apollo增强({}/{} 文件)...",
122
+ "auto_apollo_processing_completed": "Apollo处理完成!",
123
+ "auto_apollo_overlap_info": "Apollo增强的重叠(推荐:2)",
124
+ "apollo_chunk_size": "Apollo分块大小",
125
+ "apollo_chunk_size_info": "Apollo增强的分块大小(通用模型推荐:19)",
126
+ "apollo_overlap": "Apollo重叠",
127
+ "enhance_with_apollo": "使用Apollo增强",
128
+ "apollo_enhancement_info": "音质增强",
129
+ "enhancing_with_apollo": "使用Apollo增强({}/{} 文件)...",
130
+ "apollo_processing_completed": "Apollo处理完成!",
131
+ "apollo_overlap_info": "Apollo增强的重叠(推荐:2)",
132
+ "selected_models": "选定模型",
133
+ "save_preset": "保存预设",
134
+ "delete_preset": "删除预设",
135
+ "refresh_presets": "刷新预设",
136
+ "preset_name": "预设名称",
137
+ "select_preset": "选择预设",
138
+ "add_favorite": "添加到收藏",
139
+ "apply_matchering": "应用 Matchering",
140
+ "matchering_info": "应用 Matchering 以提升音频母带处理和平衡",
141
+ "matchering_passes": "Matchering 迭代次数",
142
+ "matchering_passes_info": "Matchering 迭代次数(1-5,值越高质量越好,但耗时更长)",
143
+ "matchering_processing": "正在应用 Matchering... ({:.1f}%)",
144
+ "matchering_completed": "✅ Matchering 处理完成!",
145
+ "matchering_error": "❌ Matchering 过程中发生错误:{}",
146
+ "invalid_method": "错误:无效方法!使用 'gradio'、'localtunnel' 或 'ngrok'。",
147
+ "language_changed_message": "语言已更改。请重新启动单元。",
148
+ "backend_settings": "🚀 后端设置",
149
+ "inference_backend": "推理后端",
150
+ "optimization_mode": "优化模式",
151
+ "optimization_mode_info": "PyTorch 优化模式选择",
152
+ "mixed_precision_amp": "混合精度 (AMP)",
153
+ "mixed_precision_info": "2x 更快推理 - 推荐",
154
+ "tf32_acceleration": "TF32 加速",
155
+ "tf32_acceleration_info": "RTX 30xx+ 的额外速度提升",
156
+ "cudnn_benchmark": "cuDNN 基准测试",
157
+ "cudnn_benchmark_info": "首次运行较慢,后续运行更快",
158
+ "ultra_optimized_pytorch": "超优化 PyTorch 后端",
159
+ "default_active_max_speed": "默认激活 - 最大速度优化",
160
+ "channels_last_mode": "RTX GPU 最快",
161
+ "compile_mode": "PyTorch 2.0+ 额外速度",
162
+ "default_mode": "标准",
163
  "manual_ensemble_tab": "🎚️ 手动集成",
164
+ "input_sources": "📂 输入源",
165
  "refresh": "🔄 刷新",
166
  "ensemble_algorithm": "集成算法",
167
  "select_audio_files": "选择音频文件",
168
  "available_files": "可用文件",
169
+ "custom_weights": "自定义权重(逗号分隔)",
170
+ "custom_weights_placeholder": "例:0.8, 1.2, 1.0, ...",
171
+ "custom_weights_info": "留空以使用相等权重",
172
  "result_preview_tab": "🎧 结果预览",
173
  "ensembled_output": "集成输出",
174
  "processing_log_tab": "📋 处理日志",
 
178
  "Vocal Models": "人声模型",
179
  "Instrumental Models": "乐器模型",
180
  "4-Stem Models": "4声道模型",
181
+ "Denoise Models": "噪模型",
182
  "Dereverb Models": "去混响模型",
183
  "Other Models": "其他模型",
184
+ "Other Shapes": "其他模型",
185
+ "ensemble_files_description": "使指定的集成方法将多个音频文件合并为单个输出。",
186
+ "ensemble_files_help": "所有音频文件的路径用于集成",
187
+ "ensemble_type_help": "其中之一:avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft",
188
+ "ensemble_weights_help": "创建集成的权重。权重数量必须等于文件数量",
189
+ "ensemble_output_help": "存储集成结果的WAV文件路径",
190
  "ensemble_type_print": "集成类型:{}",
191
+ "num_input_files_print": "输入文件数:{}",
192
  "weights_print": "权重:{}",
193
  "output_file_print": "输出文件:{}",
194
+ "duration_mismatch_error": "所有文件必须具有相同的持续",
195
+ "file_not_found_error": "错误。找不到文件:{}。检查路径。",
196
+ "reading_chunk_print": "文件读取块:{}(开始:{}秒,持续:{}秒)",
197
+ "chunk_result_shape_print": "块结果形状:{}",
198
+ "ensemble_completed_print": "集成完成。输出保存:{}",
199
+ "file_deletion_error": "无法删除 {}:{}",
200
  "directory_not_exist_warning": "⚠️ 目录不存在:{}",
201
  "not_a_directory_warning": "⚠️ 路径不是目录:{}",
202
+ "item_deletion_error": "⚠️ 删除 {} 时出错:{}",
203
  "old_output_not_exist": "❌ 旧输出文件夹不存在",
204
  "old_outputs_cleared": "✅ 旧输出已成功清除!",
205
  "error": "🔥 错误:{}",
206
+ "ffmpeg_error": "FFmpeg错误 ({}):{}",
207
+ "file_saved_successfully": "文件成功保存:{}",
208
  "total_files_found": "找到的文件总数:{}。使用的采样率:{}",
209
  "total_progress": "总进度",
210
  "detailed_pbar_enabled": "详细进度条已启用:{}",
 
212
  "cannot_read_track": "无法读取轨道:{}",
213
  "error_message": "错误消息:{}",
214
  "demudding_track": "清理轨道(相位混音 - 乐器):{}",
215
+ "elapsed_time": "时:{:.2f} 秒。",
216
  "proc_folder_description": "使用指定模型处理文件夹中的音频文件。",
217
+ "model_type_help": "模型类型(banditbs_roformermdx23c等)",
218
  "config_path_help": "配置文件路径",
219
  "demud_phaseremix_help": "为乐器启用相位混音",
220
  "start_checkpoint_help": "有效权重的初始检查点",
221
+ "input_folder_help": "包含处理混音的文件夹",
222
+ "audio_path_help": "处理的单个音频文件的路径",
223
  "store_dir_help": "存储结果的路径",
224
  "device_ids_help": "GPU ID列表",
225
+ "extract_instrumental_help": "如提供,反转人声以获乐器",
226
+ "disable_detailed_pbar_help": "在demix中禁用详细进度条",
227
+ "force_cpu_help": "即使CUDA可用也强制使用CPU",
228
+ "flac_file_help": "输出FLAC文件而不是WAV",
229
  "export_format_help": "导出格式和PCM类型",
230
  "pcm_type_help": "FLAC文件的PCM类型",
231
  "use_tta_help": "启用测试时间增强",
232
  "lora_checkpoint_help": "LoRA权重的初始检查点",
233
+ "cuda_available": "CUDA可用,使用 --force_cpu 禁用。",
234
+ "using_device": "使用设备:{}",
235
  "instruments_print": "乐器:{}",
236
+ "model_load_time": "模型加载时间:{:.2f} 秒",
237
+ "invalid_url": "❌ 无效URL",
238
  "cookie_file_updated": "✅ Cookie文件已更新!",
239
  "cookie_installation_error": "⚠️ Cookie安装错误:{}",
240
+ "google_drive_mount_error": "❌ Google Drive连接错误:{}",
241
+ "file_size_zero_error": "文件大小为零或未创建文件",
242
  "google_drive_error": "❌ Google Drive下载错误:{}",
243
  "wav_conversion_failed": "WAV转换失败",
244
  "download_error": "❌ 下载错误:{}",
245
+ "file_copied_to_drive": "文件已复制到Google Drive:{}",
246
+ "copy_to_drive_error": "❌ 复制到Google Drive时出错:{}",
247
+ "direct_download_failed": "直接下载文件失败",
248
+ "direct_download_error": "❌ 直接下载错误:{}",
249
+ "invalid_google_drive_url": "❌ 无效的 Google Drive URL",
250
+ "continuing_without_google_drive": "Google Drive 连接失败,继续不使用它...",
251
+ "skipping_drive_copy_no_mount": "⚠️ Google Drive 未挂载,跳过复制到 Drive...",
252
  "download_success": "🎉 下载成功!",
253
  "download_failed": "❌ 下载失败",
254
  "no_output_files_found": "❌ 未找到输出文件。",
255
+ "output_refreshed_successfully": "✅ 输出成功刷新!",
256
  "error_refreshing_output": "❌ 刷新输出时出错:{}",
257
  "starting_audio_separation": "开始音频分离...",
258
+ "processing_audio": "正在处理音频",
259
+ "separating_audio": "正在分离音频... ({:.1f}%)",
260
  "separation_complete": "分离完成!",
261
  "progress_parsing_error": "进度解析错误:{}",
262
  "error_occurred": "发生错误:{}",
263
  "separation_process_completed": "分离过程完成!",
264
+ "no_audio_file_error": "❌ 未提供音频文件且输入目录中没有现有文件。",
265
  "no_input_progress_label": "错误:未提供输入 -- 0.0%",
266
+ "processing_audio_print": "正在处理音频来自:{} 使用模型:{}",
267
  "starting_audio_separation_progress_label": "开始音频分离... -- 0.0%",
268
+ "separating_audio_progress_label": "正在分离音频... -- {}.0%",
269
+ "audio_processing_completed_progress_label": "音频处理完成! -- 100.0%",
270
  "audio_processing_completed": "✅ 音频处理完成!",
271
  "error_occurred_progress_label": "发生错误 -- 0.0%",
272
  "minimum_files_required": "⚠️ 至少需要2个文件",
273
  "valid_files_not_found": "❌ 未找到有效文件",
274
  "starting_ensemble_process": "开始集成过程...",
275
+ "ensembling_progress": "正在集成... ({:.1f}%)",
276
+ "finalizing_ensemble_output": "正在完成集成输出...",
277
  "success_log": "✅ 成功!\n{}",
278
  "error_log": "❌ 错误!\n{}",
279
  "critical_error": "⛔ 严重错误:{}",
280
  "ensemble_process_completed": "集成过程完成!",
281
  "no_models_selected": "❌ 未选择模型",
282
  "no_input_audio_provided": "❌ 未提供输入音频",
283
+ "loading_model": "正在加载模型 {}/{}:{}...",
284
+ "loading_model_progress_label": "正在加载模型 {}/{}:{}... -- {}.0%",
285
  "completed_model": "模型完成 {}/{}:{}",
286
  "completed_model_progress_label": "模型完成 {}/{}:{} -- {}.0%",
287
+ "running_command": "正在运行命令:{}",
288
  "model_failed": "模型 {} 失败:{}",
289
+ "critical_error_with_model": "{} 严重错误:{}",
290
+ "model_output_failed": "{} 无法产生输出",
291
  "waiting_for_files": "等待所有文件准备就绪...",
292
  "waiting_for_files_progress_label": "等待所有文件准备就绪... -- 90.0%",
293
+ "performing_ensemble": "正在执行集成...",
294
+ "performing_ensemble_progress_label": "正在执行集成... -- 92.0%",
295
  "memory_usage_before_ensemble": "集成前内存使用:{}%",
296
  "memory_usage_after_ensemble": "集成后内存使用:{}%",
297
+ "finalizing_ensemble_output_progress_label": "正在完成集成输出... -- 98.0%",
298
+ "ensemble_file_creation_failed": "无法创建集成文件:{}",
299
+ "ensemble_completed_progress_label": "集成成功完成! -- 100.0%",
300
  "success_output_created": "✅ 成功!输出文件已创建。",
301
+ "phase_fixer_tab": "🔧 相位修",
302
+ "batch_processing_tab": "📦 批量处理",
303
+ "batch_description": "使用相同模型同时处理多个文件",
304
+ "batch_input_folder": "输入文件夹",
305
+ "batch_input_folder_placeholder": "输入包含音频文件的文件夹路径",
306
+ "batch_output_folder": "输出文件",
307
+ "batch_output_folder_placeholder": "输入输出文件的文件夹路径",
308
+ "batch_file_list": "待处理文件",
309
+ "batch_add_files": "添加文件",
310
+ "batch_clear_list": "清空列表",
311
+ "batch_start": "🚀 开始批量处理",
312
+ "batch_stop": "⏹️ 停止",
313
+ "batch_progress": "批量处理进度",
314
+ "batch_current_file": "当前处理中",
315
+ "batch_completed": "✅ 批量处理完成!",
316
+ "batch_stopped": "⏹️ 批量处理已停止",
317
+ "batch_no_files": "❌ 未择要处理的文件",
318
+ "source_model": "源模型人声)",
319
+ "source_model_info": "用于提取人声/相位数据的模型",
320
+ "target_model": "目标模型乐器)",
321
+ "target_model_info": "应用相位校正的模型",
322
+ "custom_models_tab": "⚡ 自定义模型",
323
+ "Custom Models": "自定义模型",
324
+ "add_custom_model": " 添加自定义模型",
325
+ "custom_model_name": "模型名称",
326
+ "custom_model_name_placeholder": "为您的模型输入名称",
327
+ "checkpoint_url": "检查点 URL (.ckpt/.pth)",
328
+ "checkpoint_url_placeholder": "https://huggingface.co/.../model.ckpt",
329
+ "config_url": "配置 URL (.yaml)",
330
+ "config_url_placeholder": "https://huggingface.co/.../config.yaml",
331
+ "custom_py_url": "自定义 Python 文件 URL(可选)",
332
+ "custom_py_url_placeholder": "https://huggingface.co/.../bs_roformer.py",
333
+ "auto_detect_type": "检测模型类型",
334
+ "model_type": "模型类型",
335
+ "add_model_btn": "➕ 添加模型",
336
+ "custom_models_list": "📋 自定义模型列表",
337
+ "delete_model": "🗑️ 删除",
338
+ "no_custom_models": "尚未添加自定义模型",
339
+ "model_added_success": "✅ 模型添加成功!",
340
+ "model_deleted_success": " 模型删除成功!",
341
+ "model_add_error": "❌ 添加模型错误:{}",
342
+ "model_delete_error": "❌ 删除模型错误:{}",
343
+ "refresh_models": "🔄 刷新",
344
+ "custom_model_info": "通过提供下载链接添加自定义模型。模型在使用时将自动下载。",
345
+ "select_model_to_delete": "选择要删除模型"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  }
benchmark_pytorch.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ __author__ = 'PyTorch Optimization Benchmark Tool'
3
+
4
+ import argparse
5
+ import time
6
+ import torch
7
+ import numpy as np
8
+ from utils import get_model_from_config
9
+ from pytorch_backend import (
10
+ PyTorchBackend,
11
+ PyTorchOptimizer,
12
+ benchmark_pytorch_optimizations,
13
+ get_model_info
14
+ )
15
+ import sys
16
+
17
+
18
+ def load_checkpoint(checkpoint_path: str, model, device: str):
19
+ """Load model from checkpoint."""
20
+ print(f"Loading checkpoint from: {checkpoint_path}")
21
+
22
+ checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
23
+
24
+ # Handle different checkpoint formats
25
+ if isinstance(checkpoint, dict):
26
+ if 'state_dict' in checkpoint:
27
+ state_dict = checkpoint['state_dict']
28
+ elif 'model' in checkpoint:
29
+ state_dict = checkpoint['model']
30
+ elif 'state' in checkpoint:
31
+ state_dict = checkpoint['state']
32
+ else:
33
+ state_dict = checkpoint
34
+ else:
35
+ state_dict = checkpoint
36
+
37
+ model.load_state_dict(state_dict, strict=False)
38
+ model = model.eval().to(device)
39
+
40
+ print("✓ Checkpoint loaded successfully")
41
+ return model
42
+
43
+
44
+ def benchmark_optimization_modes(args):
45
+ """
46
+ Benchmark different PyTorch optimization modes.
47
+ """
48
+ parser = argparse.ArgumentParser(description="Benchmark PyTorch Optimization Modes")
49
+ parser.add_argument("--model_type", type=str, required=True, help="Model type")
50
+ parser.add_argument("--config_path", type=str, required=True, help="Config path")
51
+ parser.add_argument("--start_check_point", type=str, required=True, help="Checkpoint path (.ckpt)")
52
+ parser.add_argument("--device", type=str, default='cuda:0', help="Device")
53
+ parser.add_argument("--num_iterations", type=int, default=100, help="Number of benchmark iterations")
54
+ parser.add_argument("--warmup_iterations", type=int, default=10, help="Number of warmup iterations")
55
+ parser.add_argument("--chunk_size", type=int, default=None, help="Override chunk size (optional)")
56
+ parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
57
+
58
+ if args is None:
59
+ args = parser.parse_args()
60
+ else:
61
+ args = parser.parse_args(args)
62
+
63
+ # Check device
64
+ if args.device.startswith('cuda') and not torch.cuda.is_available():
65
+ print("❌ CUDA is not available!")
66
+ return
67
+
68
+ print("="*60)
69
+ print("PyTorch Optimization Benchmark Tool")
70
+ print("="*60)
71
+ print(f"Model Type: {args.model_type}")
72
+ print(f"Checkpoint: {args.start_check_point}")
73
+ print(f"Device: {args.device}")
74
+ print(f"Iterations: {args.num_iterations}")
75
+ print("="*60)
76
+
77
+ # Load model
78
+ print("\n📦 Loading model...")
79
+ model, config = get_model_from_config(args.model_type, args.config_path)
80
+ model = load_checkpoint(args.start_check_point, model, args.device)
81
+
82
+ # Get model info
83
+ model_info = get_model_info(model)
84
+ print(f"\n📊 Model Information:")
85
+ print(f" Total Parameters: {model_info['total_parameters']:,}")
86
+ print(f" Trainable Parameters: {model_info['trainable_parameters']:,}")
87
+ print(f" Model Size: {model_info['model_size_mb']:.2f} MB")
88
+ print(f" Device: {model_info['device']}")
89
+ print(f" Dtype: {model_info['dtype']}")
90
+
91
+ # Get chunk size
92
+ if args.chunk_size:
93
+ chunk_size = args.chunk_size
94
+ else:
95
+ chunk_size = config.audio.chunk_size
96
+
97
+ num_channels = 2
98
+ input_shape = (args.batch_size, num_channels, chunk_size)
99
+
100
+ print(f"\n📊 Test Configuration:")
101
+ print(f" Batch Size: {args.batch_size}")
102
+ print(f" Channels: {num_channels}")
103
+ print(f" Chunk Size: {chunk_size}")
104
+ print(f" Input Shape: {input_shape}")
105
+
106
+ # Benchmark different optimization modes
107
+ print("\n" + "="*60)
108
+ print("Benchmarking Optimization Modes")
109
+ print("="*60)
110
+
111
+ results = benchmark_pytorch_optimizations(
112
+ model=model,
113
+ input_shape=input_shape,
114
+ device=args.device,
115
+ num_iterations=args.num_iterations,
116
+ warmup_iterations=args.warmup_iterations
117
+ )
118
+
119
+ # Display results
120
+ print("\n" + "="*60)
121
+ print("📈 Benchmark Results")
122
+ print("="*60)
123
+
124
+ baseline = None
125
+ for mode, time_ms in results.items():
126
+ if time_ms is not None:
127
+ if baseline is None:
128
+ baseline = time_ms
129
+ speedup = baseline / time_ms if time_ms > 0 else 0
130
+ improvement = ((baseline - time_ms) / baseline) * 100 if baseline > 0 else 0
131
+
132
+ print(f"\n{mode.upper()}:")
133
+ print(f" Average Time: {time_ms:.2f} ms")
134
+ print(f" Speedup: {speedup:.2f}x")
135
+ print(f" Improvement: {improvement:.1f}%")
136
+
137
+ print("\n" + "="*60)
138
+
139
+ # Recommendations
140
+ print("\n💡 Recommendations:")
141
+
142
+ if results.get('compile') and results['compile'] < results['default']:
143
+ print(" ✓ Use 'compile' mode for best performance (PyTorch 2.0+)")
144
+ elif results.get('channels_last') and results['channels_last'] < results['default']:
145
+ print(" ✓ Use 'channels_last' mode for better performance")
146
+ else:
147
+ print(" ✓ Default mode is optimal for your configuration")
148
+
149
+ if args.device.startswith('cuda'):
150
+ print(" ✓ Enable TF32 for Ampere GPUs (RTX 30xx+)")
151
+ print(" ✓ Enable cuDNN benchmark for consistent input sizes")
152
+
153
+ print("\n✅ Benchmark completed!")
154
+
155
+
156
+ def test_optimization_modes(args):
157
+ """
158
+ Test different optimization modes with verification.
159
+ """
160
+ parser = argparse.ArgumentParser(description="Test PyTorch Optimization Modes")
161
+ parser.add_argument("--model_type", type=str, required=True, help="Model type")
162
+ parser.add_argument("--config_path", type=str, required=True, help="Config path")
163
+ parser.add_argument("--start_check_point", type=str, required=True, help="Checkpoint path (.ckpt)")
164
+ parser.add_argument("--device", type=str, default='cuda:0', help="Device")
165
+
166
+ if args is None:
167
+ args = parser.parse_args()
168
+ else:
169
+ args = parser.parse_args(args)
170
+
171
+ print("="*60)
172
+ print("PyTorch Optimization Mode Test")
173
+ print("="*60)
174
+
175
+ # Load model
176
+ print("\n📦 Loading model...")
177
+ model, config = get_model_from_config(args.model_type, args.config_path)
178
+ model = load_checkpoint(args.start_check_point, model, args.device)
179
+
180
+ chunk_size = config.audio.chunk_size
181
+ input_shape = (1, 2, chunk_size)
182
+ dummy_input = torch.randn(*input_shape).to(args.device)
183
+
184
+ # Test each optimization mode
185
+ modes = ['default', 'compile', 'channels_last']
186
+ outputs = {}
187
+
188
+ for mode in modes:
189
+ print(f"\n{'='*60}")
190
+ print(f"Testing: {mode}")
191
+ print('='*60)
192
+
193
+ try:
194
+ backend = PyTorchBackend(device=args.device, optimize_mode=mode)
195
+
196
+ if mode == 'jit':
197
+ backend.optimize_model(model, example_input=dummy_input, use_amp=True)
198
+ else:
199
+ backend.optimize_model(
200
+ model,
201
+ use_amp=True,
202
+ use_channels_last=(mode == 'channels_last')
203
+ )
204
+
205
+ # Run inference
206
+ with torch.no_grad():
207
+ output = backend(dummy_input)
208
+
209
+ outputs[mode] = output
210
+ print(f"✓ {mode} successful")
211
+ print(f" Output shape: {output.shape}")
212
+ print(f" Output range: [{output.min().item():.6f}, {output.max().item():.6f}]")
213
+
214
+ except Exception as e:
215
+ print(f"✗ {mode} failed: {e}")
216
+ outputs[mode] = None
217
+
218
+ # Verify outputs match
219
+ print("\n" + "="*60)
220
+ print("🔍 Output Verification")
221
+ print("="*60)
222
+
223
+ baseline_key = 'default'
224
+ if baseline_key in outputs and outputs[baseline_key] is not None:
225
+ baseline_output = outputs[baseline_key]
226
+
227
+ for mode, output in outputs.items():
228
+ if mode != baseline_key and output is not None:
229
+ diff = torch.abs(baseline_output - output)
230
+ max_diff = torch.max(diff).item()
231
+ mean_diff = torch.mean(diff).item()
232
+
233
+ print(f"\n{mode} vs {baseline_key}:")
234
+ print(f" Max difference: {max_diff:.6f}")
235
+ print(f" Mean difference: {mean_diff:.6f}")
236
+
237
+ if max_diff < 1e-3:
238
+ print(f" ✓ Outputs match within tolerance")
239
+ else:
240
+ print(f" ⚠ Warning: Large difference detected!")
241
+
242
+ print("\n✅ Test completed!")
243
+
244
+
245
+ if __name__ == "__main__":
246
+ import sys
247
+
248
+ if len(sys.argv) > 1 and sys.argv[1] == 'test':
249
+ sys.argv.pop(1)
250
+ test_optimization_modes(None)
251
+ else:
252
+ benchmark_optimization_modes(None)
config_manager.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  from pathlib import Path
4
 
5
  # Define config directory in Google Drive
6
- CONFIG_DIR = "/home/user/app/SESA-Config"
7
  CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
8
 
9
  def load_config():
@@ -14,6 +14,10 @@ def load_config():
14
  "chunk_size": 352800,
15
  "overlap": 2,
16
  "export_format": "wav FLOAT",
 
 
 
 
17
  "auto_use_tta": False,
18
  "use_tta": False,
19
  "use_demud_phaseremix_inst": False,
 
3
  from pathlib import Path
4
 
5
  # Define config directory in Google Drive
6
+ CONFIG_DIR = "/content/drive/MyDrive/SESA-Config"
7
  CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
8
 
9
  def load_config():
 
14
  "chunk_size": 352800,
15
  "overlap": 2,
16
  "export_format": "wav FLOAT",
17
+ "optimize_mode": "channels_last",
18
+ "enable_amp": True,
19
+ "enable_tf32": True,
20
+ "enable_cudnn_benchmark": True,
21
  "auto_use_tta": False,
22
  "use_tta": False,
23
  "use_demud_phaseremix_inst": False,
download.py CHANGED
@@ -53,8 +53,16 @@ def download_callback(url, download_type='direct', cookie_file=None):
53
  # Mount Google Drive (optional)
54
  if drive is not None:
55
  try:
56
- drive.mount('/content/drive', force_remount=True)
57
- drive_mounted = True
 
 
 
 
 
 
 
 
58
  except Exception as e:
59
  print(i18n("google_drive_mount_error").format(str(e)))
60
  print(i18n("continuing_without_google_drive"))
@@ -113,34 +121,95 @@ def download_callback(url, download_type='direct', cookie_file=None):
113
 
114
  # 3. YouTube and other media links
115
  else:
116
- ydl_opts = {
117
- 'format': 'bestaudio/best',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
119
  'postprocessors': [{
120
  'key': 'FFmpegExtractAudio',
121
  'preferredcodec': 'wav',
122
  'preferredquality': '0'
123
  }],
124
- 'cookiefile': COOKIE_PATH if os.path.exists(COOKIE_PATH) else None,
125
  'nocheckcertificate': True,
126
- 'ignoreerrors': True,
127
- 'retries': 3
 
 
 
 
 
 
 
128
  }
 
 
 
 
 
129
  try:
130
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
131
  info_dict = ydl.extract_info(url, download=True)
132
- temp_path = ydl.prepare_filename(info_dict)
133
- base_name = os.path.splitext(os.path.basename(temp_path))[0]
134
- sanitized_base_name = sanitize_filename(base_name)
135
- wav_path = os.path.join(INPUT_DIR, f"{sanitized_base_name}.wav")
136
- temp_wav = os.path.splitext(temp_path)[0] + '.wav'
137
- if os.path.exists(temp_wav):
138
- os.rename(temp_wav, wav_path)
139
- download_success = True
140
- else:
141
- raise Exception(i18n("wav_conversion_failed"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  except Exception as e:
143
  error_msg = i18n("download_error").format(str(e))
 
 
 
144
  print(error_msg)
145
  return None, error_msg, None, None, None, None
146
 
 
53
  # Mount Google Drive (optional)
54
  if drive is not None:
55
  try:
56
+ # Check if already mounted first
57
+ if os.path.exists('/content/drive/MyDrive'):
58
+ drive_mounted = True
59
+ else:
60
+ drive.mount('/content/drive', force_remount=True)
61
+ drive_mounted = True
62
+ except AttributeError as ae:
63
+ # Handle 'NoneType' object has no attribute 'kernel' error
64
+ print(f"Warning: Google Drive mount skipped (Colab kernel issue): {str(ae)}")
65
+ print(i18n("continuing_without_google_drive"))
66
  except Exception as e:
67
  print(i18n("google_drive_mount_error").format(str(e)))
68
  print(i18n("continuing_without_google_drive"))
 
121
 
122
  # 3. YouTube and other media links
123
  else:
124
+ # First try: iOS/Android without cookies (best for bot protection bypass)
125
+ ydl_opts_nocookie = {
126
+ 'format': 'ba[ext=m4a]/ba[ext=webm]/ba/b',
127
+ 'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
128
+ 'postprocessors': [{
129
+ 'key': 'FFmpegExtractAudio',
130
+ 'preferredcodec': 'wav',
131
+ 'preferredquality': '0'
132
+ }],
133
+ 'nocheckcertificate': True,
134
+ 'ignoreerrors': False,
135
+ 'retries': 3,
136
+ 'extractor_retries': 3,
137
+ 'extractor_args': {
138
+ 'youtube': {
139
+ 'player_client': ['ios', 'android'],
140
+ 'player_skip': ['webpage', 'configs']
141
+ }
142
+ },
143
+ 'http_headers': {
144
+ 'User-Agent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
145
+ 'Accept-Language': 'en-US,en;q=0.9'
146
+ }
147
+ }
148
+
149
+ # Second try: web client with cookies if available
150
+ ydl_opts_cookie = {
151
+ 'format': 'ba[ext=m4a]/ba[ext=webm]/ba/b',
152
  'outtmpl': os.path.join(INPUT_DIR, '%(title)s.%(ext)s'),
153
  'postprocessors': [{
154
  'key': 'FFmpegExtractAudio',
155
  'preferredcodec': 'wav',
156
  'preferredquality': '0'
157
  }],
158
+ 'cookiefile': COOKIE_PATH,
159
  'nocheckcertificate': True,
160
+ 'ignoreerrors': False,
161
+ 'retries': 3,
162
+ 'extractor_retries': 3,
163
+ 'extractor_args': {
164
+ 'youtube': {
165
+ 'player_client': ['web', 'tv_embedded'],
166
+ 'player_skip': ['configs']
167
+ }
168
+ }
169
  }
170
+
171
+ # Try without cookies first
172
+ info_dict = None
173
+ temp_path = None
174
+
175
  try:
176
+ with yt_dlp.YoutubeDL(ydl_opts_nocookie) as ydl:
177
  info_dict = ydl.extract_info(url, download=True)
178
+ if info_dict:
179
+ temp_path = ydl.prepare_filename(info_dict)
180
+ except Exception as e:
181
+ # If no cookies available or first method failed, try with cookies
182
+ if os.path.exists(COOKIE_PATH):
183
+ print(f"First attempt failed, trying with cookies...")
184
+ try:
185
+ with yt_dlp.YoutubeDL(ydl_opts_cookie) as ydl:
186
+ info_dict = ydl.extract_info(url, download=True)
187
+ if info_dict:
188
+ temp_path = ydl.prepare_filename(info_dict)
189
+ except Exception as e2:
190
+ raise e2
191
+ else:
192
+ raise e
193
+
194
+ try:
195
+ # Check if extraction was successful
196
+ if info_dict is None:
197
+ raise Exception(i18n("youtube_extraction_failed") if "youtube_extraction_failed" in dir(i18n) else "YouTube extraction failed. Please try updating yt-dlp: pip install -U yt-dlp")
198
+
199
+ base_name = os.path.splitext(os.path.basename(temp_path))[0]
200
+ sanitized_base_name = sanitize_filename(base_name)
201
+ wav_path = os.path.join(INPUT_DIR, f"{sanitized_base_name}.wav")
202
+ temp_wav = os.path.splitext(temp_path)[0] + '.wav'
203
+ if os.path.exists(temp_wav):
204
+ os.rename(temp_wav, wav_path)
205
+ download_success = True
206
+ else:
207
+ raise Exception(i18n("wav_conversion_failed"))
208
  except Exception as e:
209
  error_msg = i18n("download_error").format(str(e))
210
+ # Add hint for yt-dlp update if it's a YouTube issue
211
+ if 'youtube' in url.lower() or 'youtu.be' in url.lower():
212
+ error_msg += "\n\nTry: pip install -U yt-dlp"
213
  print(error_msg)
214
  return None, error_msg, None, None, None, None
215
 
ensemble.py CHANGED
@@ -1,281 +1,442 @@
 
1
  # coding: utf-8
2
- __author__ = 'Roman Solovyev (ZFTurbo): https://github.com/ZFTurbo/' # forked by jarredou
 
 
 
 
 
 
3
 
4
  import os
5
- import torch
6
- import torchaudio
7
- import soundfile as sf
8
  import argparse
 
 
 
 
 
 
 
9
  from pathlib import Path
 
 
 
 
 
 
10
 
 
 
 
 
 
 
11
 
12
- def stft(wave, n_fft, hop_length):
13
- """
14
- Compute STFT for stereo audio using PyTorch
15
- :param wave: tensor of shape (channels, length)
16
- :param n_fft: FFT size
17
- :param hop_length: hop length
18
- :return: complex spectrogram of shape (channels, freq_bins, time_frames)
19
- """
20
- # Use torchaudio's STFT which handles multi-channel audio efficiently
21
- spec = torch.stft(
22
- wave,
23
- n_fft=n_fft,
24
- hop_length=hop_length,
25
- window=torch.hann_window(n_fft, device=wave.device),
26
- return_complex=True
27
- )
28
- return spec
29
-
30
-
31
- def istft(spec, hop_length, length):
32
- """
33
- Compute inverse STFT for stereo audio using PyTorch
34
- :param spec: complex spectrogram of shape (channels, freq_bins, time_frames)
35
- :param hop_length: hop length
36
- :param length: target length
37
- :return: waveform of shape (channels, length)
38
- """
39
- # Use torchaudio's ISTFT which handles multi-channel audio efficiently
40
- wave = torch.istft(
41
- spec,
42
- n_fft=(spec.shape[1] - 1) * 2,
43
- hop_length=hop_length,
44
- window=torch.hann_window((spec.shape[1] - 1) * 2, device=spec.device),
45
- length=length
46
- )
47
- return wave
48
-
49
-
50
- def absmax(a, *, dim):
51
- """Find values with maximum absolute value along dimension"""
52
- abs_a = torch.abs(a)
53
- indices = torch.argmax(abs_a, dim=dim, keepdim=True)
54
- return torch.gather(a, dim, indices).squeeze(dim)
55
-
56
-
57
- def absmin(a, *, dim):
58
- """Find values with minimum absolute value along dimension"""
59
- abs_a = torch.abs(a)
60
- indices = torch.argmin(abs_a, dim=dim, keepdim=True)
61
- return torch.gather(a, dim, indices).squeeze(dim)
62
-
63
-
64
- def lambda_max(arr, dim=None, key=None):
65
- """Find elements with maximum key value along dimension"""
66
- if key is None:
67
- key = lambda x: x
68
- key_values = key(arr)
69
- indices = torch.argmax(key_values, dim=dim, keepdim=True)
70
- return torch.gather(arr, dim, indices).squeeze(dim)
71
-
72
-
73
- def lambda_min(arr, dim=None, key=None):
74
- """Find elements with minimum key value along dimension"""
75
- if key is None:
76
- key = lambda x: x
77
- key_values = key(arr)
78
- indices = torch.argmin(key_values, dim=dim, keepdim=True)
79
- return torch.gather(arr, dim, indices).squeeze(dim)
80
-
81
-
82
- def match_tensor_shapes(tensor_1, tensor_2):
83
- """Match the time dimension of two tensors by padding or trimming"""
84
- target_length = tensor_2.shape[-1]
85
- current_length = tensor_1.shape[-1]
86
-
87
- if current_length > target_length:
88
- tensor_1 = tensor_1[..., :target_length]
89
- elif current_length < target_length:
90
- padding = target_length - current_length
91
- tensor_1 = torch.nn.functional.pad(tensor_1, (0, padding), 'constant', 0)
92
-
93
- return tensor_1
94
-
95
-
96
- def average_waveforms(pred_track, weights, algorithm, n_fft, hop_length):
97
- """
98
- :param pred_track: tensor of shape (num, channels, length)
99
- :param weights: tensor of shape (num,)
100
- :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
101
- :param n_fft: FFT size for STFT operations
102
- :param hop_length: hop length for STFT operations
103
- :return: averaged waveform in shape (channels, length)
104
- """
105
- pred_track = torch.stack(pred_track)
106
- final_length = pred_track.shape[-1]
107
- weights = torch.tensor(weights, dtype=pred_track.dtype, device=pred_track.device)
108
-
109
- if algorithm in ['avg_wave', 'median_wave', 'min_wave', 'max_wave']:
110
- # Waveform domain operations
111
- if algorithm == 'avg_wave':
112
- # Weighted average
113
- weighted_tracks = pred_track * weights.view(-1, 1, 1)
114
- result = torch.sum(weighted_tracks, dim=0) / torch.sum(weights)
115
- elif algorithm == 'median_wave':
116
- result = torch.median(pred_track, dim=0)[0]
117
- elif algorithm == 'min_wave':
118
- result = lambda_min(pred_track, dim=0, key=torch.abs)
119
- elif algorithm == 'max_wave':
120
- result = lambda_max(pred_track, dim=0, key=torch.abs)
121
-
122
- elif algorithm in ['avg_fft', 'median_fft', 'min_fft', 'max_fft']:
123
- # Frequency domain operations
124
- # Convert all tracks to spectrograms
125
- spec_tracks = []
126
- for i in range(pred_track.shape[0]):
127
- spec = stft(pred_track[i], n_fft, hop_length)
128
- spec_tracks.append(spec)
129
 
130
- spec_tracks = torch.stack(spec_tracks)
 
 
 
131
 
132
- if algorithm == 'avg_fft':
133
- # Weighted average in frequency domain
134
- weighted_specs = spec_tracks * weights.view(-1, 1, 1, 1)
135
- avg_spec = torch.sum(weighted_specs, dim=0) / torch.sum(weights)
136
- result = istft(avg_spec, hop_length, final_length)
137
- elif algorithm == 'median_fft':
138
- # Median in frequency domain (using magnitude and phase separately)
139
- median_spec = torch.median(spec_tracks, dim=0)[0]
140
- result = istft(median_spec, hop_length, final_length)
141
- elif algorithm == 'min_fft':
142
- min_spec = lambda_min(spec_tracks, dim=0, key=torch.abs)
143
- result = istft(min_spec, hop_length, final_length)
144
- elif algorithm == 'max_fft':
145
- max_spec = absmax(spec_tracks, dim=0)
146
- result = istft(max_spec, hop_length, final_length)
147
-
148
- return result
149
-
150
-
151
- def save_audio(waveform, sample_rate, output_path):
152
- """Save audio with support for different formats and bit depths using soundfile"""
153
- output_path = Path(output_path)
154
 
155
- # Ensure waveform is in the right format (channels, samples)
156
- if waveform.dim() == 1:
157
- waveform = waveform.unsqueeze(0)
 
 
 
 
158
 
159
- # Convert to CPU numpy array if on GPU
160
- if waveform.is_cuda:
161
- waveform = waveform.cpu()
 
162
 
163
- # Convert to numpy and transpose to (samples, channels) for soundfile
164
- audio_np = waveform.numpy().T
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- # Handle different output formats
167
- if output_path.suffix.lower() == '.flac':
168
- # For FLAC, convert to 24-bit
169
- # Normalize to prevent clipping
170
- max_val = torch.max(torch.abs(waveform))
171
- if max_val > 0:
172
- print(f"Clipping detected : {max_val}")
173
- audio_np = audio_np / max_val.numpy()
174
 
175
- # Save as FLAC with 24-bit depth
176
- sf.write(str(output_path), audio_np, sample_rate, subtype='PCM_24')
177
- else:
178
- # Default to float32 for WAV and other formats
179
- sf.write(str(output_path), audio_np, sample_rate, subtype='FLOAT')
180
-
181
-
182
- def ensemble_files(args):
183
- parser = argparse.ArgumentParser(description="Audio ensemble tool using PyTorch")
184
- parser.add_argument("--files", type=str, required=True, nargs='+',
185
- help="Path to all audio-files to ensemble")
186
- parser.add_argument("--type", type=str, default='avg_wave',
187
- help="One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft")
188
- parser.add_argument("--weights", type=float, nargs='+',
189
- help="Weights to create ensemble. Number of weights must be equal to number of files")
190
- parser.add_argument("--output", default="res.wav", type=str,
191
- help="Path to output file (supports .wav, .flac)")
192
- parser.add_argument("--n_fft", type=int, default=2048,
193
- help="FFT size for STFT operations (default: 2048)")
194
- parser.add_argument("--hop_length", type=int, default=1024,
195
- help="Hop length for STFT operations (default: 1024)")
196
- parser.add_argument("--device", type=str, default="auto",
197
- help="Device to use: 'cpu', 'cuda', or 'auto' (default: auto)")
198
- parser.add_argument("--trim_to_shortest", action="store_true",
199
- help="Trim output to shortest input file length to avoid padding artifacts")
200
-
201
- if args is None:
202
- args = parser.parse_args()
203
- else:
204
- args = parser.parse_args(args)
205
-
206
- # Set device
207
- if args.device == "auto":
208
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
209
- else:
210
- device = torch.device(args.device)
211
-
212
- print(f'Using device: {device}')
213
- print(f'Ensemble type: {args.type}')
214
- print(f'Number of input files: {len(args.files)}')
215
- print(f'N_FFT: {args.n_fft}, Hop length: {args.hop_length}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- if args.weights is not None:
218
- if len(args.weights) != len(args.files):
219
- raise ValueError("Number of weights must match number of files")
220
- weights = args.weights
221
- else:
222
- weights = [1.0] * len(args.files)
 
 
 
 
 
 
223
 
224
- print(f'Weights: {weights}')
225
- print(f'Output file: {args.output}')
 
 
 
 
 
226
 
227
- data = []
228
- max_len = 0
229
- min_len = float('inf')
230
- sample_rate = None
231
-
232
- for f in args.files:
233
- if not os.path.isfile(f):
234
- print(f'Error. Can\'t find file: {f}. Check paths.')
235
- exit(1)
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- print(f'Reading file: {f}')
238
- wav, sr = torchaudio.load(f)
239
-
240
- if sample_rate is None:
241
- sample_rate = sr
242
- elif sample_rate != sr:
243
- print(f'Warning: Sample rate mismatch. Expected {sample_rate}, got {sr}. Resampling...')
244
- resampler = torchaudio.transforms.Resample(sr, sample_rate)
245
- wav = resampler(wav)
246
-
247
- # Ensure stereo (2 channels)
248
- if wav.shape[0] == 1:
249
- print("Mono detected. Converting to stereo by duplication.")
250
- wav = wav.repeat(2, 1)
251
- elif wav.shape[0] > 2:
252
- print(f"Multi-channel audio detected ({wav.shape[0]} channels). Using first 2 channels.")
253
- wav = wav[:2]
254
 
255
- # Move to device
256
- wav = wav.to(device)
257
 
258
- print(f"Waveform shape: {wav.shape} sample rate: {sr}")
259
- data.append(wav)
260
- max_len = max(max_len, wav.shape[1])
261
- min_len = min(min_len, wav.shape[1])
262
-
263
- # Choose target length based on argument
264
- target_len = min_len if args.trim_to_shortest else max_len
265
- target_tensor = torch.zeros(2, target_len, device=device)
266
- data = [match_tensor_shapes(wav, target_tensor) for wav in data]
267
-
268
- print(f"Target length: {target_len} ({'shortest' if args.trim_to_shortest else 'longest'} input file)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- print("Starting ensemble processing...")
271
- result = average_waveforms(data, weights, args.type, args.n_fft, args.hop_length)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- print(f'Result shape: {result.shape}')
274
 
275
- # Save the result
276
- save_audio(result, sample_rate, args.output)
277
- print(f'Ensemble saved to: {args.output}')
278
-
 
 
 
 
 
 
279
 
280
  if __name__ == "__main__":
281
- ensemble_files(None)
 
 
1
+ #!/usr/bin/env python3
2
  # coding: utf-8
3
+ """
4
+ Ultimate Audio Ensemble Processor v4.0
5
+ - Tüm ensemble yöntemlerini destekler (avg_wave, median_wave, max_wave, min_wave, max_fft, min_fft, median_fft)
6
+ - Özel karakterli ve uzun dosya yollarını destekler
7
+ - Büyük dosyaları verimli şekilde işler
8
+ - Detaylı hata yönetimi ve loglama
9
+ """
10
 
11
  import os
12
+ import sys
 
 
13
  import argparse
14
+ import numpy as np
15
+ import soundfile as sf
16
+ import librosa
17
+ import psutil
18
+ import gc
19
+ import traceback
20
+ from scipy.signal import stft, istft
21
  from pathlib import Path
22
+ import tempfile
23
+ import shutil
24
+ import json
25
+ from tqdm import tqdm
26
+ import time
27
+ import torch
28
 
29
+ # PyTorch optimizations
30
+ if torch.cuda.is_available():
31
+ torch.backends.cudnn.benchmark = True
32
+ print("✓ Using CUDA acceleration for ensemble")
33
+ else:
34
+ print("Using CPU for ensemble")
35
 
36
+ class AudioEnsembleEngine:
37
+ def __init__(self):
38
+ self.temp_dir = None
39
+ self.log_file = "ensemble_processor.log"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ def __enter__(self):
42
+ self.temp_dir = tempfile.mkdtemp(prefix='audio_ensemble_')
43
+ self.setup_logging()
44
+ return self
45
 
46
+ def __exit__(self, exc_type, exc_val, exc_tb):
47
+ if self.temp_dir and os.path.exists(self.temp_dir):
48
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def setup_logging(self):
51
+ """Initialize detailed logging system."""
52
+ with open(self.log_file, 'w') as f:
53
+ f.write("Audio Ensemble Processor Log\n")
54
+ f.write("="*50 + "\n")
55
+ f.write(f"System Memory: {psutil.virtual_memory().total/(1024**3):.2f} GB\n")
56
+ f.write(f"Python Version: {sys.version}\n\n")
57
 
58
+ def log_message(self, message):
59
+ """Log messages with timestamp."""
60
+ with open(self.log_file, 'a') as f:
61
+ f.write(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}\n")
62
 
63
+ def normalize_path(self, path):
64
+ """Handle all path-related issues comprehensively."""
65
+ try:
66
+ # Convert to absolute path
67
+ path = str(Path(path).absolute().resolve())
68
+
69
+ # Handle problematic characters
70
+ if any(char in path for char in '[]()|&; '):
71
+ base, ext = os.path.splitext(path)
72
+ safe_name = f"{hash(base)}{ext}"
73
+ temp_path = os.path.join(self.temp_dir, safe_name)
74
+
75
+ if not os.path.exists(temp_path):
76
+ data, sr = librosa.load(path, sr=None, mono=False)
77
+ sf.write(temp_path, data.T, sr)
78
+
79
+ return temp_path
80
+
81
+ return path
82
+ except Exception as e:
83
+ self.log_message(f"Path normalization failed: {str(e)}")
84
+ return path
85
 
86
+ def validate_inputs(self, files, method, output_path):
87
+ """Comprehensive input validation with detailed error reporting."""
88
+ errors = []
89
+ valid_methods = [
90
+ 'avg_wave', 'median_wave', 'max_wave', 'min_wave',
91
+ 'max_fft', 'min_fft', 'median_fft'
92
+ ]
 
93
 
94
+ # Method validation
95
+ if method not in valid_methods:
96
+ errors.append(f"Invalid method '{method}'. Available: {valid_methods}")
97
+
98
+ # File validation
99
+ valid_files = []
100
+ sample_rates = set()
101
+ durations = []
102
+ channels_set = set()
103
+
104
+ for f in files:
105
+ try:
106
+ f_normalized = self.normalize_path(f)
107
+
108
+ # Basic checks
109
+ if not os.path.exists(f_normalized):
110
+ errors.append(f"File not found: {f_normalized}")
111
+ continue
112
+
113
+ if os.path.getsize(f_normalized) == 0:
114
+ errors.append(f"Empty file: {f_normalized}")
115
+ continue
116
+
117
+ # Audio file validation
118
+ try:
119
+ with sf.SoundFile(f_normalized) as sf_file:
120
+ sr = sf_file.samplerate
121
+ frames = sf_file.frames
122
+ channels = sf_file.channels
123
+ except Exception as e:
124
+ errors.append(f"Invalid audio file {f_normalized}: {str(e)}")
125
+ continue
126
+
127
+ # Audio characteristics
128
+ if channels != 2:
129
+ errors.append(f"File must be stereo (has {channels} channels): {f_normalized}")
130
+ continue
131
+
132
+ sample_rates.add(sr)
133
+ durations.append(frames / sr)
134
+ channels_set.add(channels)
135
+ valid_files.append(f_normalized)
136
+
137
+ except Exception as e:
138
+ errors.append(f"Error processing {f}: {str(e)}")
139
+ continue
140
+
141
+ # Final checks
142
+ if len(valid_files) < 2:
143
+ errors.append("At least 2 valid files required")
144
+
145
+ if len(sample_rates) > 1:
146
+ errors.append(f"Sample rate mismatch: {sample_rates}")
147
+
148
+ if len(channels_set) > 1:
149
+ errors.append(f"Channel count mismatch: {channels_set}")
150
+
151
+ # Output path validation
152
+ try:
153
+ output_path = self.normalize_path(output_path)
154
+ output_dir = os.path.dirname(output_path) or '.'
155
+
156
+ if not os.path.exists(output_dir):
157
+ os.makedirs(output_dir, exist_ok=True)
158
+
159
+ if not os.access(output_dir, os.W_OK):
160
+ errors.append(f"No write permission for output directory: {output_dir}")
161
+ except Exception as e:
162
+ errors.append(f"Output path error: {str(e)}")
163
+
164
+ if errors:
165
+ error_msg = "\n".join(errors)
166
+ self.log_message(f"Validation failed:\n{error_msg}")
167
+ raise ValueError(error_msg)
168
+
169
+ target_sr = sample_rates.pop() if sample_rates else 44100
170
+ return valid_files, target_sr, min(durations) if durations else None
171
 
172
+ def process_waveform(self, chunks, method, weights=None):
173
+ """All waveform domain processing methods."""
174
+ if method == 'avg_wave':
175
+ if weights is not None:
176
+ return np.average(chunks, axis=0, weights=weights)
177
+ return np.mean(chunks, axis=0)
178
+ elif method == 'median_wave':
179
+ return np.median(chunks, axis=0)
180
+ elif method == 'max_wave':
181
+ return np.max(chunks, axis=0)
182
+ elif method == 'min_wave':
183
+ return np.min(chunks, axis=0)
184
 
185
+ def process_spectral(self, chunks, method):
186
+ """All frequency domain processing methods."""
187
+ specs = []
188
+ min_samples = min(chunk.shape[1] for chunk in chunks)
189
+ nperseg = min(1024, min_samples) # Adjust nperseg to fit shortest chunk
190
+ noverlap = nperseg // 2
191
+ self.log_message(f"STFT parameters: nperseg={nperseg}, noverlap={noverlap}, min_samples={min_samples}")
192
 
193
+ for c in chunks:
194
+ # Truncate chunk to minimum length to ensure consistent STFT shapes
195
+ c = c[:, :min_samples]
196
+ channel_specs = []
197
+ for channel in range(c.shape[0]):
198
+ if c.shape[1] < 256: # Minimum reasonable length for STFT
199
+ self.log_message(f"Warning: Chunk too short ({c.shape[1]} samples) for STFT. Skipping.")
200
+ return None
201
+ try:
202
+ freqs, times, Zxx = stft(
203
+ c[channel],
204
+ nperseg=nperseg,
205
+ noverlap=noverlap,
206
+ window='hann'
207
+ )
208
+ channel_specs.append(Zxx)
209
+ except Exception as e:
210
+ self.log_message(f"STFT failed for channel: {str(e)}")
211
+ return None
212
+ specs.append(np.array(channel_specs))
213
 
214
+ if not specs:
215
+ self.log_message("No valid STFTs computed.")
216
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ specs = np.array(specs)
219
+ self.log_message(f"STFT shapes: {[spec.shape for spec in specs]}")
220
 
221
+ # Ensure all STFTs have the same shape
222
+ min_freqs = min(spec.shape[1] for spec in specs)
223
+ min_times = min(spec.shape[2] for spec in specs)
224
+ specs = np.array([spec[:, :min_freqs, :min_times] for spec in specs])
225
+
226
+ mag = np.abs(specs)
227
+
228
+ if method == 'max_fft':
229
+ combined_mag = np.max(mag, axis=0)
230
+ elif method == 'min_fft':
231
+ combined_mag = np.min(mag, axis=0)
232
+ elif method == 'median_fft':
233
+ combined_mag = np.median(mag, axis=0)
234
+
235
+ # Use phase from first file
236
+ combined_spec = combined_mag * np.exp(1j * np.angle(specs[0]))
237
+
238
+ # ISTFT reconstruction
239
+ reconstructed = np.zeros((combined_spec.shape[0], chunks[0].shape[1]))
240
+ for channel in range(combined_spec.shape[0]):
241
+ try:
242
+ _, xrec = istft(
243
+ combined_spec[channel],
244
+ nperseg=nperseg,
245
+ noverlap=noverlap,
246
+ window='hann'
247
+ )
248
+ # Truncate or pad to match original chunk length
249
+ if xrec.shape[0] < chunks[0].shape[1]:
250
+ xrec = np.pad(xrec, (0, chunks[0].shape[1] - xrec.shape[0]), mode='constant')
251
+ reconstructed[channel] = xrec[:chunks[0].shape[1]]
252
+ except Exception as e:
253
+ self.log_message(f"ISTFT failed for channel: {str(e)}")
254
+ return None
255
+
256
+ return reconstructed
257
+
258
+ def run_ensemble(self, files, method, output_path, weights=None, buffer_size=32768):
259
+ """Core ensemble processing with maximum robustness."""
260
+ try:
261
+ # Validate and prepare inputs
262
+ valid_files, target_sr, duration = self.validate_inputs(files, method, output_path)
263
+ output_path = self.normalize_path(output_path)
264
+
265
+ self.log_message(f"Starting ensemble with method: {method}")
266
+ self.log_message(f"Input files: {json.dumps(valid_files, indent=2)}")
267
+ self.log_message(f"Target sample rate: {target_sr}Hz")
268
+ self.log_message(f"Duration: {duration:.2f} seconds")
269
+ self.log_message(f"Output path: {output_path}")
270
+
271
+ # Ensure output directory exists
272
+ output_dir = os.path.dirname(output_path) or '.'
273
+ os.makedirs(output_dir, exist_ok=True)
274
+ self.log_message(f"Output directory created/verified: {output_dir}")
275
+
276
+ # Verify write permissions
277
+ try:
278
+ test_file = os.path.join(output_dir, "test_write.txt")
279
+ with open(test_file, "w") as f:
280
+ f.write("Test")
281
+ os.remove(test_file)
282
+ self.log_message(f"Write permissions verified for: {output_dir}")
283
+ except Exception as e:
284
+ self.log_message(f"Write permission error for {output_dir}: {str(e)}")
285
+ raise ValueError(f"Cannot write to output directory {output_dir}: {str(e)}")
286
+
287
+ # Prepare weights
288
+ if weights and len(weights) == len(valid_files):
289
+ weights = np.array(weights, dtype=np.float32)
290
+ weights /= weights.sum() # Normalize
291
+ self.log_message(f"Using weights: {weights}")
292
+ else:
293
+ weights = None
294
+
295
+ # Open all files and verify exact alignment
296
+ readers = []
297
+ try:
298
+ readers = [sf.SoundFile(f) for f in valid_files]
299
+
300
+ # Get exact frame counts from each file
301
+ frame_counts = [r.frames for r in readers]
302
+ self.log_message(f"Frame counts: {frame_counts}")
303
+
304
+ # Use the shortest to avoid reading past file end
305
+ shortest_frames = min(frame_counts)
306
+ self.log_message(f"Using shortest frame count: {shortest_frames}")
307
+
308
+ # Prepare output
309
+ self.log_message(f"Opening output file for writing: {output_path}")
310
+ print("Loading audio files...", flush=True)
311
+ with sf.SoundFile(output_path, 'w', target_sr, 2, 'PCM_24') as outfile:
312
+ # Process in chunks (progress via print for GUI capture)
313
+ processed_frames = 0
314
+ total_chunks = (shortest_frames + buffer_size - 1) // buffer_size
315
+ chunk_count = 0
316
+ last_reported_percent = -1
317
+ print("Processing ensemble...", flush=True)
318
+
319
+ for pos in range(0, shortest_frames, buffer_size):
320
+ chunk_size = min(buffer_size, shortest_frames - pos)
321
+
322
+ # Read perfectly aligned chunks from all files
323
+ chunks = []
324
+ for i, r in enumerate(readers):
325
+ # Ensure we're at the exact position
326
+ r.seek(pos)
327
+ current_pos = r.tell()
328
+
329
+ if current_pos != pos:
330
+ self.log_message(f"Warning: File {i} seek mismatch. Expected {pos}, got {current_pos}")
331
+ r.seek(pos)
332
+
333
+ # Read exact chunk size
334
+ data = r.read(chunk_size)
335
+
336
+ # Verify chunk size
337
+ if data.shape[0] != chunk_size:
338
+ self.log_message(f"Warning: File {i} chunk size mismatch. Expected {chunk_size}, got {data.shape[0]}")
339
+ # Pad or truncate to match
340
+ if data.shape[0] < chunk_size:
341
+ data = np.pad(data, ((0, chunk_size - data.shape[0]), (0, 0)), mode='constant')
342
+ else:
343
+ data = data[:chunk_size]
344
+
345
+ chunks.append(data.T) # Transpose to (channels, samples)
346
+
347
+ chunks = np.array(chunks)
348
+
349
+ if pos % (10 * buffer_size) == 0: # Log every 10 chunks
350
+ self.log_message(f"Processing chunk at pos={pos}, shape={chunks.shape}")
351
+
352
+ # Process based on method type
353
+ if method.endswith('_fft'):
354
+ result = self.process_spectral(chunks, method)
355
+ if result is None:
356
+ self.log_message("Spectral processing failed, falling back to avg_wave")
357
+ result = self.process_waveform(chunks, 'avg_wave', weights)
358
+ else:
359
+ result = self.process_waveform(chunks, method, weights)
360
+
361
+ # Verify result shape
362
+ expected_shape = (2, chunk_size)
363
+ if result.shape != expected_shape:
364
+ self.log_message(f"Warning: Result shape {result.shape} != expected {expected_shape}")
365
+ # Adjust result to match expected shape
366
+ if result.shape[1] < chunk_size:
367
+ result = np.pad(result, ((0, 0), (0, chunk_size - result.shape[1])), mode='constant')
368
+ elif result.shape[1] > chunk_size:
369
+ result = result[:, :chunk_size]
370
+
371
+ # Write output
372
+ outfile.write(result.T) # Transpose back to (samples, channels)
373
+ processed_frames += chunk_size
374
+
375
+ # Clean up and update progress
376
+ del chunks, result
377
+ chunk_count += 1
378
+
379
+ # Report real progress percentage with unique prefix
380
+ current_percent = int((chunk_count / total_chunks) * 100)
381
+ if current_percent > last_reported_percent:
382
+ last_reported_percent = current_percent
383
+ print(f"[SESA_PROGRESS]{current_percent}", flush=True)
384
+
385
+ if pos % (5 * buffer_size) == 0:
386
+ gc.collect()
387
+
388
+ print("Saving ensemble output...", flush=True)
389
+
390
+ self.log_message(f"Successfully created output: {output_path}")
391
+ print(f"\nEnsemble completed successfully: {output_path}")
392
+ return True
393
+
394
+ except Exception as e:
395
+ self.log_message(f"Processing error: {str(e)}\n{traceback.format_exc()}")
396
+ raise
397
+ finally:
398
+ for r in readers:
399
+ try:
400
+ r.close()
401
+ except:
402
+ pass
403
+
404
+ except Exception as e:
405
+ self.log_message(f"Fatal error: {str(e)}\n{traceback.format_exc()}")
406
+ print(f"\nError during processing: {str(e)}", file=sys.stderr)
407
+ return False
408
 
409
+ def main():
410
+ parser = argparse.ArgumentParser(
411
+ description='Ultimate Audio Ensemble Processor - Supports all ensemble methods',
412
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
413
+ )
414
+ parser.add_argument('--files', nargs='+', required=True,
415
+ help='Input audio files (supports special characters)')
416
+ parser.add_argument('--type', required=True,
417
+ choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave',
418
+ 'max_fft', 'min_fft', 'median_fft'],
419
+ help='Ensemble method to use')
420
+ parser.add_argument('--weights', nargs='+', type=float,
421
+ help='Relative weights for each input file')
422
+ parser.add_argument('--output', required=True,
423
+ help='Output file path')
424
+ parser.add_argument('--buffer', type=int, default=32768,
425
+ help='Buffer size in samples (larger=faster but uses more memory)')
426
 
427
+ args = parser.parse_args()
428
 
429
+ with AudioEnsembleEngine() as engine:
430
+ success = engine.run_ensemble(
431
+ files=args.files,
432
+ method=args.type,
433
+ output_path=args.output,
434
+ weights=args.weights,
435
+ buffer_size=args.buffer
436
+ )
437
+
438
+ sys.exit(0 if success else 1)
439
 
440
  if __name__ == "__main__":
441
+ import time
442
+ main()
gui.py CHANGED
@@ -8,44 +8,15 @@ import json
8
  import sys
9
  import time
10
  import random
11
- from helpers import update_model_dropdown, handle_file_upload, clear_old_output, save_uploaded_file, update_file_list, clean_model
12
  from download import download_callback
13
- from model import get_model_config, MODEL_CONFIGS
14
  from processing import process_audio, auto_ensemble_process, ensemble_audio_fn, refresh_auto_output
15
  from assets.i18n.i18n import I18nAuto
16
  from config_manager import load_config, save_config, update_favorites, save_preset, delete_preset
 
17
  import logging
18
- from gradio_client import utils
19
- from inference import proc_folder
20
-
21
- # Set up logging for the patch
22
- logging.basicConfig(filename='gradio_schema.log', level=logging.DEBUG)
23
- logger = logging.getLogger('gradio_schema')
24
-
25
- # Patch gradio_client.utils._json_schema_to_python_type
26
- original_json_schema_to_python_type = utils._json_schema_to_python_type
27
-
28
- def patched_json_schema_to_python_type(schema: any, defs: dict | None = None) -> str:
29
- logger.debug(f"Parsing schema: {schema}")
30
- if isinstance(schema, bool):
31
- logger.info(f"Found boolean schema: {schema}, returning 'boolean'")
32
- return "boolean"
33
- if not isinstance(schema, dict):
34
- logger.warning(f"Unexpected schema type: {type(schema)}, returning 'Any'")
35
- return "Any"
36
- if "enum" in schema and schema.get("type") == "string":
37
- logger.info(f"Handling enum schema: {schema['enum']}")
38
- return f"Literal[{', '.join(repr(e) for e in schema['enum'])}]"
39
- try:
40
- return original_json_schema_to_python_type(schema, defs)
41
- except utils.APIInfoParseError as e:
42
- logger.error(f"Failed to parse schema {schema}: {e}")
43
- return "str"
44
-
45
- utils._json_schema_to_python_type = patched_json_schema_to_python_type
46
-
47
- # General logging setup
48
- logging.basicConfig(filename='sesa_gui.log', level=logging.DEBUG)
49
 
50
  # BASE_DIR tanımı
51
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -195,11 +166,19 @@ def create_interface():
195
  }
196
  #progress-bar {
197
  height: 20px;
198
- background: linear-gradient(to right, #6e8efb, #ff4040);
 
199
  border-radius: 5px;
200
- transition: width 0.5s ease-in-out;
201
  max-width: 100% !important;
202
  }
 
 
 
 
 
 
 
203
  .gr-accordion {
204
  background: rgba(128, 0, 0, 0.5) !important;
205
  border-radius: 10px !important;
@@ -264,7 +243,7 @@ def create_interface():
264
  with gr.Row():
265
  model_category = gr.Dropdown(
266
  label=i18n("category"),
267
- choices=[i18n(cat) for cat in MODEL_CONFIGS.keys()],
268
  value=i18n(initial_settings["model_category"])
269
  )
270
  favorite_button = gr.Button(i18n("add_favorite"), variant="secondary", scale=0)
@@ -284,11 +263,33 @@ def create_interface():
284
  value=initial_settings["export_format"]
285
  )
286
  with gr.Column(scale=1):
 
 
 
 
 
 
 
287
  chunk_size = gr.Dropdown(
288
  label=i18n("chunk_size"),
289
  choices=[352800, 485100],
290
  value=initial_settings["chunk_size"],
291
- info=i18n("chunk_size_info")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  )
293
 
294
  with gr.Row():
@@ -302,6 +303,35 @@ def create_interface():
302
  info=i18n("overlap_info")
303
  )
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  with gr.Row():
306
  with gr.Column(scale=1):
307
  use_tta = gr.Checkbox(
@@ -393,7 +423,7 @@ def create_interface():
393
  maximum=5,
394
  step=1,
395
  value=initial_settings.get("matchering_passes", 1),
396
- info=i18n("matchering_passes_info"),
397
  interactive=True
398
  )
399
 
@@ -402,11 +432,23 @@ def create_interface():
402
  clear_old_output_btn = gr.Button(i18n("reset"), variant="secondary")
403
  clear_old_output_status = gr.Textbox(label=i18n("status"), interactive=False)
404
 
405
- # Favorite handler
406
- def update_favorite_button(model, favorites):
407
  cleaned_model = clean_model(model) if model else None
408
  is_favorited = cleaned_model in favorites if cleaned_model else False
409
- return gr.update(value=i18n("remove_favorite") if is_favorited else i18n("add_favorite"))
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
  def toggle_favorite(model, favorites):
412
  if not model:
@@ -422,10 +464,29 @@ def create_interface():
422
  gr.update(value=i18n("add_favorite") if is_favorited else i18n("remove_favorite"))
423
  )
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  model_dropdown.change(
426
  fn=update_favorite_button,
427
- inputs=[model_dropdown, favorites_state],
428
- outputs=favorite_button
429
  )
430
 
431
  favorite_button.click(
@@ -462,31 +523,34 @@ def create_interface():
462
  with gr.Column():
463
  original_audio = gr.Audio(label=i18n("original"), interactive=False)
464
  with gr.Row():
465
- vocals_audio = gr.Audio(label=i18n("vocals"), show_download_button=True, interactive=False)
466
- instrumental_audio = gr.Audio(label=i18n("instrumental_output"), show_download_button=True, interactive=False)
467
- other_audio = gr.Audio(label=i18n("other"), show_download_button=True, interactive=False)
468
 
469
  with gr.Tab(i18n("details_tab")) as details_tab:
470
  with gr.Column():
471
  with gr.Row():
472
- male_audio = gr.Audio(label=i18n("male"), interactive=False)
473
- female_audio = gr.Audio(label=i18n("female"), interactive=False)
474
- speech_audio = gr.Audio(label=i18n("speech"), interactive=False)
475
  with gr.Row():
476
- drum_audio = gr.Audio(label=i18n("drums"), interactive=False)
477
- bass_audio = gr.Audio(label=i18n("bass"), interactive=False)
478
  with gr.Row():
479
- effects_audio = gr.Audio(label=i18n("effects"), interactive=False)
480
 
481
  with gr.Tab(i18n("advanced_tab")) as advanced_tab:
482
  with gr.Column():
483
  with gr.Row():
484
- phaseremix_audio = gr.Audio(label=i18n("phase_remix"), interactive=False)
485
- dry_audio = gr.Audio(label=i18n("dry"), interactive=False)
486
  with gr.Row():
487
- music_audio = gr.Audio(label=i18n("music"), interactive=False)
488
- karaoke_audio = gr.Audio(label=i18n("karaoke"), interactive=False)
489
- bleed_audio = gr.Audio(label=i18n("bleed"), interactive=False)
 
 
 
490
 
491
  separation_progress_html = gr.HTML(
492
  value=f"""
@@ -605,7 +669,7 @@ def create_interface():
605
  info=i18n("matchering_info")
606
  )
607
 
608
- with gr.Group(visible=False) as auto_matchering_settings_group:
609
  auto_matchering_passes = gr.Slider(
610
  label=i18n("matchering_passes"),
611
  minimum=1,
@@ -621,13 +685,13 @@ def create_interface():
621
  with gr.Row():
622
  auto_category_dropdown = gr.Dropdown(
623
  label=i18n("model_category"),
624
- choices=[i18n(cat) for cat in MODEL_CONFIGS.keys()],
625
  value=i18n("Vocal Models")
626
  )
627
  selected_models = gr.Dropdown(
628
  label=i18n("selected_models"),
629
  choices=update_model_dropdown(i18n(initial_settings["auto_category"]), favorites=initial_favorites)["choices"],
630
- value=initial_settings["selected_models"] or [],
631
  multiselect=True
632
  )
633
 
@@ -666,19 +730,21 @@ def create_interface():
666
  def load_preset(preset_name, presets, category, favorites):
667
  if preset_name and preset_name in presets:
668
  preset = presets[preset_name]
 
669
  favorite_models = [f"{model} ⭐" if model in favorites else model for model in preset["models"]]
 
670
  preset_category = preset.get("auto_category_dropdown", category)
 
671
  model_choices = update_model_dropdown(preset_category, favorites=favorites)["choices"]
672
- logger.debug(f"Preset '{preset_name}' loaded with models: {favorite_models}, category: {preset_category}")
673
  return (
674
- gr.update(value=preset_category),
675
- gr.update(choices=model_choices, value=favorite_models),
676
- gr.update(value=preset["ensemble_method"])
677
  )
678
- logger.debug(f"Preset '{preset_name}' not found.")
679
  return gr.update(), gr.update(), gr.update()
680
 
681
  def sync_presets():
 
682
  config = load_config()
683
  return config["presets"], gr.update(choices=list(config["presets"].keys()), value=None)
684
 
@@ -694,14 +760,13 @@ def create_interface():
694
  if not models and not favorites:
695
  return gr.update(), presets, i18n("no_models_selected_for_preset")
696
  new_presets = save_preset(
697
- presets,
698
- preset_name,
699
- models,
700
  ensemble_method,
701
- auto_category_dropdown=auto_category_dropdown
702
  )
703
  save_config(favorites, load_config()["settings"], new_presets)
704
- logger.debug(f"Preset dropdown updated with choices: {list(new_presets.keys())}")
705
  return gr.update(choices=list(new_presets.keys()), value=None), new_presets, i18n("preset_saved").format(preset_name)
706
 
707
  save_preset_btn.click(
@@ -758,13 +823,14 @@ def create_interface():
758
  label=i18n("original_audio"),
759
  interactive=False,
760
  every=1,
761
- elem_id="original_audio_player"
 
762
  )
763
  with gr.Tab(i18n("ensemble_result_tab")) as ensemble_result_tab:
764
  auto_output_audio = gr.Audio(
765
  label=i18n("output_preview"),
766
- show_download_button=True,
767
- interactive=False
768
  )
769
  refresh_output_btn = gr.Button(i18n("refresh_output"), variant="secondary")
770
 
@@ -784,7 +850,7 @@ def create_interface():
784
  placeholder=i18n("waiting_for_processing"),
785
  visible=False
786
  )
787
-
788
  with gr.Tab(i18n("download_sources_tab"), id="download_tab"):
789
  with gr.Row():
790
  with gr.Column():
@@ -839,8 +905,8 @@ def create_interface():
839
  ensemble_output_audio = gr.Audio(
840
  label=i18n("ensembled_output"),
841
  interactive=False,
842
- show_download_button=True,
843
- elem_id="output-audio"
844
  )
845
  with gr.Tab(i18n("processing_log_tab")) as processing_log_tab:
846
  with gr.Accordion(i18n("processing_details"), open=True, elem_id="log-accordion"):
@@ -858,55 +924,522 @@ def create_interface():
858
  variant="primary",
859
  size="sm",
860
  elem_id="process-btn"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
 
863
  def save_settings_on_process(*args):
864
- apollo_method_value = args[11]
 
865
  backend_apollo_method = "mid_side_method" if apollo_method_value == i18n("mid_side_method") else "normal_method"
866
  cleaned_model = clean_model(args[1]) if args[1] else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
  settings = {
868
- "chunk_size": args[2],
 
 
869
  "overlap": args[3],
870
  "export_format": args[4],
871
- "use_tta": args[5],
872
- "use_demud_phaseremix_inst": args[6],
873
- "extract_instrumental": args[7],
874
- "use_apollo": args[8],
875
- "apollo_chunk_size": args[9],
876
- "apollo_overlap": args[10],
 
 
 
 
877
  "apollo_method": backend_apollo_method,
878
- "apollo_normal_model": args[12],
879
- "apollo_midside_model": args[13],
880
- "use_matchering": args[14],
881
- "matchering_passes": args[15],
882
- "model_category": args[16],
883
  "selected_model": cleaned_model,
884
- "auto_ensemble_type": args[17]
885
  }
886
  save_config(load_config()["favorites"], settings, load_config()["presets"])
887
- modified_args = list(args)
 
888
  modified_args[1] = cleaned_model
889
- modified_args[17] = cleaned_model
890
- return process_audio(*modified_args)
 
 
 
891
 
892
  def save_auto_ensemble_settings(*args):
 
893
  settings = load_config()["settings"]
894
  settings["auto_ensemble_type"] = args[7]
895
  settings["use_matchering"] = args[14]
896
  settings["matchering_passes"] = args[15]
897
  save_config(load_config()["favorites"], settings, load_config()["presets"])
898
- output_audio, status, progress_html = None, i18n("waiting_for_processing"), ensemble_progress_html.value
899
  for update in auto_ensemble_process(*args):
900
  if isinstance(update, tuple) and len(update) == 3:
901
- output_audio, status, progress_html = update
902
- return output_audio, status, progress_html
903
 
904
  def update_category_dropdowns(cat):
905
- logging.debug(f"Input category: {cat}")
906
- eng_cat = next((k for k in MODEL_CONFIGS.keys() if i18n(k) == cat), list(MODEL_CONFIGS.keys())[0])
907
- logging.debug(f"Using English category: {eng_cat}")
908
  choices = update_model_dropdown(eng_cat, favorites=load_config()["favorites"])["choices"]
909
- logging.debug(f"Model choices: {choices}")
910
  return gr.update(choices=choices), gr.update(choices=choices)
911
 
912
  model_category.change(
@@ -940,39 +1473,38 @@ def create_interface():
940
  )
941
 
942
  auto_category_dropdown.change(
943
- fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in MODEL_CONFIGS.keys() if i18n(k) == cat), list(MODEL_CONFIGS.keys())[0]), favorites=load_config()["favorites"])["choices"]),
944
  inputs=auto_category_dropdown,
945
  outputs=selected_models
946
  )
947
 
948
- def debug_inputs(*args):
949
- input_names = [
950
- "input_audio_file", "model_dropdown", "chunk_size", "overlap", "export_format",
951
- "use_tta", "use_demud_phaseremix_inst", "extract_instrumental",
952
- "use_apollo", "apollo_chunk_size", "apollo_overlap",
953
- "apollo_method", "apollo_normal_model", "apollo_midside_model",
954
- "use_matchering", "matchering_passes", "model_category", "selected_model"
955
- ]
956
  cleaned_args = list(args)
957
  cleaned_args[1] = clean_model(cleaned_args[1]) if cleaned_args[1] else None
958
- cleaned_args[17] = clean_model(cleaned_args[17]) if cleaned_args[17] else None
959
- for name, value in zip(input_names, cleaned_args):
960
- logger.debug(f"UI Input - {name}: {value}")
961
- return args
 
 
 
962
 
963
  process_btn.click(
964
- fn=lambda *args: save_settings_on_process(*debug_inputs(*args)),
965
  inputs=[
966
  input_audio_file, model_dropdown, chunk_size, overlap, export_format,
 
967
  use_tta, use_demud_phaseremix_inst, extract_instrumental,
968
  use_apollo, apollo_chunk_size, apollo_overlap,
969
  apollo_method, apollo_normal_model, apollo_midside_model,
970
- use_matchering, matchering_passes, model_category, model_dropdown
 
971
  ],
972
  outputs=[
973
  vocals_audio, instrumental_audio, phaseremix_audio, drum_audio, karaoke_audio,
974
  other_audio, bass_audio, effects_audio, speech_audio, bleed_audio, music_audio,
975
  dry_audio, male_audio, female_audio,
 
976
  separation_process_status, separation_progress_html
977
  ]
978
  )
@@ -1016,4 +1548,4 @@ def create_interface():
1016
  refresh_btn.click(fn=update_file_list, outputs=file_dropdown)
1017
  ensemble_process_btn.click(fn=ensemble_audio_fn, inputs=[file_dropdown, ensemble_type, weights_input], outputs=[ensemble_output_audio, ensemble_status])
1018
 
1019
- return demo
 
8
  import sys
9
  import time
10
  import random
11
+ from helpers import update_model_dropdown, handle_file_upload, clear_old_output, save_uploaded_file, update_file_list, clean_model, get_model_categories
12
  from download import download_callback
13
+ from model import get_model_config, MODEL_CONFIGS, get_all_model_configs_with_custom, add_custom_model, delete_custom_model, get_custom_models_list, SUPPORTED_MODEL_TYPES, load_custom_models, get_model_chunk_size
14
  from processing import process_audio, auto_ensemble_process, ensemble_audio_fn, refresh_auto_output
15
  from assets.i18n.i18n import I18nAuto
16
  from config_manager import load_config, save_config, update_favorites, save_preset, delete_preset
17
+ from phase_fixer import SOURCE_MODELS, TARGET_MODELS
18
  import logging
19
+ logging.basicConfig(filename='sesa_gui.log', level=logging.WARNING)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # BASE_DIR tanımı
22
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
166
  }
167
  #progress-bar {
168
  height: 20px;
169
+ background: linear-gradient(90deg, #6e8efb, #a855f7, #ff4040);
170
+ background-size: 200% 100%;
171
  border-radius: 5px;
172
+ transition: width 0.4s cubic-bezier(0.4, 0, 0.2, 1);
173
  max-width: 100% !important;
174
  }
175
+ @keyframes progress-shimmer {
176
+ 0% { background-position: 200% 0; }
177
+ 100% { background-position: -200% 0; }
178
+ }
179
+ #progress-bar[data-active="true"] {
180
+ animation: progress-shimmer 2s linear infinite;
181
+ }
182
  .gr-accordion {
183
  background: rgba(128, 0, 0, 0.5) !important;
184
  border-radius: 10px !important;
 
243
  with gr.Row():
244
  model_category = gr.Dropdown(
245
  label=i18n("category"),
246
+ choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
247
  value=i18n(initial_settings["model_category"])
248
  )
249
  favorite_button = gr.Button(i18n("add_favorite"), variant="secondary", scale=0)
 
263
  value=initial_settings["export_format"]
264
  )
265
  with gr.Column(scale=1):
266
+ _init_cs_mode = initial_settings.get("chunk_size_mode", "base")
267
+ chunk_size_mode = gr.Radio(
268
+ label=i18n("chunk_size_mode"),
269
+ choices=["base", "custom", "yaml"],
270
+ value=_init_cs_mode,
271
+ info=i18n("chunk_size_mode_info")
272
+ )
273
  chunk_size = gr.Dropdown(
274
  label=i18n("chunk_size"),
275
  choices=[352800, 485100],
276
  value=initial_settings["chunk_size"],
277
+ info=i18n("chunk_size_info"),
278
+ visible=(_init_cs_mode == "base")
279
+ )
280
+ chunk_size_custom = gr.Number(
281
+ label=i18n("chunk_size_custom_label"),
282
+ value=initial_settings.get("chunk_size_custom", 352800),
283
+ precision=0,
284
+ info=i18n("chunk_size_custom_info"),
285
+ visible=(_init_cs_mode == "custom")
286
+ )
287
+ chunk_size_yaml_display = gr.Textbox(
288
+ label=i18n("chunk_size_yaml_label"),
289
+ value=i18n("chunk_size_yaml_not_downloaded"),
290
+ interactive=False,
291
+ info=i18n("chunk_size_yaml_display_info"),
292
+ visible=(_init_cs_mode == "yaml")
293
  )
294
 
295
  with gr.Row():
 
303
  info=i18n("overlap_info")
304
  )
305
 
306
+ with gr.Accordion(i18n("backend_settings"), open=True) as backend_settings_accordion:
307
+ gr.Markdown(f"### {i18n('inference_backend')} - {i18n('ultra_optimized_pytorch')}")
308
+ gr.Markdown(f"**{i18n('default_active_max_speed')}**")
309
+
310
+ with gr.Row():
311
+ optimize_mode = gr.Dropdown(
312
+ label=i18n("optimization_mode"),
313
+ choices=['channels_last', 'compile', 'default'],
314
+ value=initial_settings.get("optimize_mode", "channels_last"),
315
+ info=f"channels_last: {i18n('channels_last_mode')} | compile: {i18n('compile_mode')} | default: {i18n('default_mode')}"
316
+ )
317
+
318
+ with gr.Row():
319
+ enable_amp = gr.Checkbox(
320
+ label=i18n("mixed_precision_amp"),
321
+ value=initial_settings.get("enable_amp", True),
322
+ info=i18n("mixed_precision_info")
323
+ )
324
+ enable_tf32 = gr.Checkbox(
325
+ label=i18n("tf32_acceleration"),
326
+ value=initial_settings.get("enable_tf32", True),
327
+ info=i18n("tf32_acceleration_info")
328
+ )
329
+ enable_cudnn_benchmark = gr.Checkbox(
330
+ label=i18n("cudnn_benchmark"),
331
+ value=initial_settings.get("enable_cudnn_benchmark", True),
332
+ info=i18n("cudnn_benchmark_info")
333
+ )
334
+
335
  with gr.Row():
336
  with gr.Column(scale=1):
337
  use_tta = gr.Checkbox(
 
423
  maximum=5,
424
  step=1,
425
  value=initial_settings.get("matchering_passes", 1),
426
+ info=i18n("matchering_passes_info"),
427
  interactive=True
428
  )
429
 
 
432
  clear_old_output_btn = gr.Button(i18n("reset"), variant="secondary")
433
  clear_old_output_status = gr.Textbox(label=i18n("status"), interactive=False)
434
 
435
+ # Favorite handler + chunk size auto-update
436
+ def update_favorite_button(model, favorites, cs_mode):
437
  cleaned_model = clean_model(model) if model else None
438
  is_favorited = cleaned_model in favorites if cleaned_model else False
439
+ fav_btn = gr.update(value=i18n("remove_favorite") if is_favorited else i18n("add_favorite"))
440
+ chunk_update = gr.update()
441
+ yaml_update = gr.update()
442
+ if cleaned_model:
443
+ native_chunk = get_model_chunk_size(cleaned_model)
444
+ if cs_mode == "base" and native_chunk and native_chunk in [352800, 485100]:
445
+ chunk_update = gr.update(value=native_chunk)
446
+ if cs_mode == "yaml":
447
+ if native_chunk:
448
+ yaml_update = gr.update(value=i18n("chunk_size_yaml_detected").format(native_chunk))
449
+ else:
450
+ yaml_update = gr.update(value=i18n("chunk_size_yaml_not_downloaded"))
451
+ return fav_btn, chunk_update, yaml_update
452
 
453
  def toggle_favorite(model, favorites):
454
  if not model:
 
464
  gr.update(value=i18n("add_favorite") if is_favorited else i18n("remove_favorite"))
465
  )
466
 
467
+ def on_chunk_size_mode_change(mode, model):
468
+ cleaned = clean_model(model) if model else None
469
+ native_chunk = get_model_chunk_size(cleaned) if cleaned else None
470
+ yaml_text = (
471
+ i18n("chunk_size_yaml_detected").format(native_chunk)
472
+ if native_chunk else i18n("chunk_size_yaml_not_downloaded")
473
+ )
474
+ return (
475
+ gr.update(visible=(mode == "base")),
476
+ gr.update(visible=(mode == "custom")),
477
+ gr.update(visible=(mode == "yaml"), value=yaml_text),
478
+ )
479
+
480
+ chunk_size_mode.change(
481
+ fn=on_chunk_size_mode_change,
482
+ inputs=[chunk_size_mode, model_dropdown],
483
+ outputs=[chunk_size, chunk_size_custom, chunk_size_yaml_display]
484
+ )
485
+
486
  model_dropdown.change(
487
  fn=update_favorite_button,
488
+ inputs=[model_dropdown, favorites_state, chunk_size_mode],
489
+ outputs=[favorite_button, chunk_size, chunk_size_yaml_display]
490
  )
491
 
492
  favorite_button.click(
 
523
  with gr.Column():
524
  original_audio = gr.Audio(label=i18n("original"), interactive=False)
525
  with gr.Row():
526
+ vocals_audio = gr.Audio(label=i18n("vocals"))
527
+ instrumental_audio = gr.Audio(label=i18n("instrumental_output"))
528
+ other_audio = gr.Audio(label=i18n("other"))
529
 
530
  with gr.Tab(i18n("details_tab")) as details_tab:
531
  with gr.Column():
532
  with gr.Row():
533
+ male_audio = gr.Audio(label=i18n("male"))
534
+ female_audio = gr.Audio(label=i18n("female"))
535
+ speech_audio = gr.Audio(label=i18n("speech"))
536
  with gr.Row():
537
+ drum_audio = gr.Audio(label=i18n("drums"))
538
+ bass_audio = gr.Audio(label=i18n("bass"))
539
  with gr.Row():
540
+ effects_audio = gr.Audio(label=i18n("effects"))
541
 
542
  with gr.Tab(i18n("advanced_tab")) as advanced_tab:
543
  with gr.Column():
544
  with gr.Row():
545
+ phaseremix_audio = gr.Audio(label=i18n("phase_remix"))
546
+ dry_audio = gr.Audio(label=i18n("dry"))
547
  with gr.Row():
548
+ music_audio = gr.Audio(label=i18n("music"))
549
+ karaoke_audio = gr.Audio(label=i18n("karaoke"))
550
+ bleed_audio = gr.Audio(label=i18n("bleed"))
551
+ with gr.Row():
552
+ mid_audio = gr.Audio(label="Mid")
553
+ side_audio = gr.Audio(label="Side")
554
 
555
  separation_progress_html = gr.HTML(
556
  value=f"""
 
669
  info=i18n("matchering_info")
670
  )
671
 
672
+ with gr.Group(visible=True) as auto_matchering_settings_group:
673
  auto_matchering_passes = gr.Slider(
674
  label=i18n("matchering_passes"),
675
  minimum=1,
 
685
  with gr.Row():
686
  auto_category_dropdown = gr.Dropdown(
687
  label=i18n("model_category"),
688
+ choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
689
  value=i18n("Vocal Models")
690
  )
691
  selected_models = gr.Dropdown(
692
  label=i18n("selected_models"),
693
  choices=update_model_dropdown(i18n(initial_settings["auto_category"]), favorites=initial_favorites)["choices"],
694
+ value=initial_settings["selected_models"],
695
  multiselect=True
696
  )
697
 
 
730
  def load_preset(preset_name, presets, category, favorites):
731
  if preset_name and preset_name in presets:
732
  preset = presets[preset_name]
733
+ # Mark starred models with ⭐
734
  favorite_models = [f"{model} ⭐" if model in favorites else model for model in preset["models"]]
735
+ # Get the category from the preset, default to current category if not specified
736
  preset_category = preset.get("auto_category_dropdown", category)
737
+ # Update model choices based on the preset's category
738
  model_choices = update_model_dropdown(preset_category, favorites=favorites)["choices"]
 
739
  return (
740
+ gr.update(value=preset_category), # Update auto_category_dropdown
741
+ gr.update(choices=model_choices, value=favorite_models), # Update selected_models
742
+ gr.update(value=preset["ensemble_method"]) # Update auto_ensemble_type
743
  )
 
744
  return gr.update(), gr.update(), gr.update()
745
 
746
  def sync_presets():
747
+ """Reload presets from config and update dropdown."""
748
  config = load_config()
749
  return config["presets"], gr.update(choices=list(config["presets"].keys()), value=None)
750
 
 
760
  if not models and not favorites:
761
  return gr.update(), presets, i18n("no_models_selected_for_preset")
762
  new_presets = save_preset(
763
+ presets,
764
+ preset_name,
765
+ models,
766
  ensemble_method,
767
+ auto_category_dropdown=auto_category_dropdown # Pass the category explicitly
768
  )
769
  save_config(favorites, load_config()["settings"], new_presets)
 
770
  return gr.update(choices=list(new_presets.keys()), value=None), new_presets, i18n("preset_saved").format(preset_name)
771
 
772
  save_preset_btn.click(
 
823
  label=i18n("original_audio"),
824
  interactive=False,
825
  every=1,
826
+ elem_id="original_audio_player",
827
+ streaming=True
828
  )
829
  with gr.Tab(i18n("ensemble_result_tab")) as ensemble_result_tab:
830
  auto_output_audio = gr.Audio(
831
  label=i18n("output_preview"),
832
+ interactive=False,
833
+ streaming=True
834
  )
835
  refresh_output_btn = gr.Button(i18n("refresh_output"), variant="secondary")
836
 
 
850
  placeholder=i18n("waiting_for_processing"),
851
  visible=False
852
  )
853
+
854
  with gr.Tab(i18n("download_sources_tab"), id="download_tab"):
855
  with gr.Row():
856
  with gr.Column():
 
905
  ensemble_output_audio = gr.Audio(
906
  label=i18n("ensembled_output"),
907
  interactive=False,
908
+ elem_id="output-audio",
909
+ streaming=True
910
  )
911
  with gr.Tab(i18n("processing_log_tab")) as processing_log_tab:
912
  with gr.Accordion(i18n("processing_details"), open=True, elem_id="log-accordion"):
 
924
  variant="primary",
925
  size="sm",
926
  elem_id="process-btn"
927
+ )
928
+
929
+ with gr.Tab(i18n("phase_fixer_tab"), id="phase_fixer_tab"):
930
+ with gr.Row(equal_height=True):
931
+ with gr.Column(scale=1, min_width=350):
932
+ with gr.Group():
933
+ with gr.Row():
934
+ pf_source_file = gr.File(
935
+ file_types=[".wav", ".flac", ".mp3"],
936
+ label=i18n("source_file_label")
937
+ )
938
+ pf_target_file = gr.File(
939
+ file_types=[".wav", ".flac", ".mp3"],
940
+ label=i18n("target_file_label")
941
+ )
942
+
943
+ with gr.Group():
944
+ with gr.Row():
945
+ pf_source_model = gr.Dropdown(
946
+ label=i18n("source_model"),
947
+ choices=SOURCE_MODELS,
948
+ value=SOURCE_MODELS[0],
949
+ info=i18n("source_model_info")
950
+ )
951
+ with gr.Row():
952
+ pf_target_model = gr.Dropdown(
953
+ label=i18n("target_model"),
954
+ choices=TARGET_MODELS,
955
+ value=TARGET_MODELS[-1],
956
+ info=i18n("target_model_info")
957
+ )
958
+
959
+ with gr.Accordion(i18n("phase_fixer_settings"), open=False):
960
+ with gr.Row():
961
+ pf_scale_factor = gr.Slider(
962
+ label=i18n("scale_factor"),
963
+ minimum=0.5,
964
+ maximum=3.0,
965
+ step=0.05,
966
+ value=1.4,
967
+ info=i18n("scale_factor_info")
968
+ )
969
+ pf_output_format = gr.Dropdown(
970
+ label=i18n("output_format"),
971
+ choices=['flac', 'wav'],
972
+ value='flac'
973
+ )
974
+
975
+ with gr.Row():
976
+ pf_low_cutoff = gr.Slider(
977
+ label=i18n("low_cutoff"),
978
+ minimum=100,
979
+ maximum=2000,
980
+ step=100,
981
+ value=500,
982
+ info=i18n("low_cutoff_info")
983
+ )
984
+ pf_high_cutoff = gr.Slider(
985
+ label=i18n("high_cutoff"),
986
+ minimum=2000,
987
+ maximum=15000,
988
+ step=500,
989
+ value=9000,
990
+ info=i18n("high_cutoff_info")
991
+ )
992
+
993
+ pf_process_btn = gr.Button(i18n("run_phase_fixer"), variant="primary")
994
+
995
+ with gr.Column(scale=2, min_width=600):
996
+ pf_output_audio = gr.Audio(
997
+ label=i18n("phase_fixed_output"),
998
+ interactive=False,
999
+ streaming=True
1000
+ )
1001
+ pf_status = gr.Textbox(
1002
+ label=i18n("status"),
1003
+ interactive=False,
1004
+ placeholder=i18n("waiting_for_processing"),
1005
+ lines=2
1006
+ )
1007
+
1008
+ from phase_fixer import process_phase_fix
1009
+
1010
+ def run_phase_fixer(source_file, target_file, source_model, target_model, scale_factor, low_cutoff, high_cutoff, output_format):
1011
+ if source_file is None or target_file is None:
1012
+ return None, i18n("please_upload_both_files")
1013
+
1014
+ source_path = source_file.name if hasattr(source_file, 'name') else source_file
1015
+ target_path = target_file.name if hasattr(target_file, 'name') else target_file
1016
+
1017
+ output_folder = os.path.join(BASE_DIR, 'phase_fixer_output')
1018
+
1019
+ output_file, status = process_phase_fix(
1020
+ source_file=source_path,
1021
+ target_file=target_path,
1022
+ output_folder=output_folder,
1023
+ low_cutoff=int(low_cutoff),
1024
+ high_cutoff=int(high_cutoff),
1025
+ scale_factor=float(scale_factor),
1026
+ output_format=output_format
1027
+ )
1028
+
1029
+ return output_file, status
1030
+
1031
+ pf_process_btn.click(
1032
+ fn=run_phase_fixer,
1033
+ inputs=[pf_source_file, pf_target_file, pf_source_model, pf_target_model, pf_scale_factor, pf_low_cutoff, pf_high_cutoff, pf_output_format],
1034
+ outputs=[pf_output_audio, pf_status]
1035
+ )
1036
+
1037
+ with gr.Tab(i18n("batch_processing_tab"), id="batch_processing_tab"):
1038
+ with gr.Row(equal_height=True):
1039
+ with gr.Column(scale=1, min_width=350):
1040
+ gr.Markdown(f"### {i18n('batch_description')}")
1041
+
1042
+ with gr.Group():
1043
+ batch_input_files = gr.File(
1044
+ file_types=[".wav", ".mp3", ".m4a", ".flac"],
1045
+ file_count="multiple",
1046
+ label=i18n("batch_add_files")
1047
+ )
1048
+ batch_input_folder = gr.Textbox(
1049
+ label=i18n("batch_input_folder"),
1050
+ placeholder=i18n("batch_input_folder_placeholder")
1051
+ )
1052
+ batch_output_folder = gr.Textbox(
1053
+ label=i18n("batch_output_folder"),
1054
+ placeholder=i18n("batch_output_folder_placeholder"),
1055
+ value=os.path.join(BASE_DIR, "batch_output")
1056
+ )
1057
+
1058
+ with gr.Group():
1059
+ batch_model_category = gr.Dropdown(
1060
+ label=i18n("model_category"),
1061
+ choices=[i18n(cat) for cat in get_all_model_configs_with_custom().keys()],
1062
+ value=i18n("Vocal Models")
1063
  )
1064
+ batch_model_dropdown = gr.Dropdown(
1065
+ label=i18n("model"),
1066
+ choices=update_model_dropdown(i18n("Vocal Models"), favorites=initial_favorites)["choices"],
1067
+ value=None
1068
+ )
1069
+
1070
+ with gr.Accordion(i18n("settings"), open=False):
1071
+ with gr.Row():
1072
+ batch_chunk_size = gr.Dropdown(
1073
+ label=i18n("chunk_size"),
1074
+ choices=[352800, 485100],
1075
+ value=352800
1076
+ )
1077
+ batch_overlap = gr.Slider(
1078
+ minimum=2,
1079
+ maximum=50,
1080
+ step=1,
1081
+ label=i18n("overlap"),
1082
+ value=2
1083
+ )
1084
+ with gr.Row():
1085
+ batch_export_format = gr.Dropdown(
1086
+ label=i18n("format"),
1087
+ choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'],
1088
+ value='wav FLOAT'
1089
+ )
1090
+ batch_extract_instrumental = gr.Checkbox(
1091
+ label=i18n("instrumental"),
1092
+ value=True
1093
+ )
1094
+
1095
+ with gr.Row():
1096
+ batch_start_btn = gr.Button(i18n("batch_start"), variant="primary")
1097
+ batch_stop_btn = gr.Button(i18n("batch_stop"), variant="secondary")
1098
+
1099
+ with gr.Column(scale=2, min_width=600):
1100
+ batch_file_list = gr.Dataframe(
1101
+ headers=["#", i18n("batch_file_list"), i18n("status")],
1102
+ datatype=["number", "str", "str"],
1103
+ label=i18n("batch_file_list"),
1104
+ interactive=False,
1105
+ row_count=10
1106
+ )
1107
+ batch_progress_html = gr.HTML(
1108
+ value=f"""
1109
+ <div id="batch-progress" style="margin-top: 10px;">
1110
+ <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;">{i18n("waiting_for_processing")}</div>
1111
+ <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
1112
+ <div style="width: 0%; height: 20px; background-color: #6e8efb; transition: width 0.3s;"></div>
1113
+ </div>
1114
+ </div>
1115
+ """
1116
+ )
1117
+ batch_status = gr.Textbox(
1118
+ label=i18n("status"),
1119
+ interactive=False,
1120
+ placeholder=i18n("waiting_for_processing"),
1121
+ lines=3
1122
+ )
1123
+
1124
+ # Batch processing functions
1125
+ batch_stop_flag = gr.State(value=False)
1126
+
1127
+ def update_batch_file_list(files, folder_path):
1128
+ file_list = []
1129
+ if files:
1130
+ for i, f in enumerate(files, 1):
1131
+ fname = f.name if hasattr(f, 'name') else str(f)
1132
+ file_list.append([i, os.path.basename(fname), "⏳ Pending"])
1133
+ if folder_path and os.path.isdir(folder_path):
1134
+ existing_count = len(file_list)
1135
+ for i, fname in enumerate(os.listdir(folder_path), existing_count + 1):
1136
+ if fname.lower().endswith(('.wav', '.mp3', '.m4a', '.flac')):
1137
+ file_list.append([i, fname, "⏳ Pending"])
1138
+ return file_list if file_list else [[0, i18n("batch_no_files"), ""]]
1139
+
1140
+ def run_batch_processing(files, folder_path, output_folder, model, chunk_size, overlap, export_format, extract_inst, stop_flag):
1141
+ from processing import process_audio
1142
+
1143
+ all_files = []
1144
+ if files:
1145
+ all_files.extend([f.name if hasattr(f, 'name') else str(f) for f in files])
1146
+ if folder_path and os.path.isdir(folder_path):
1147
+ for fname in os.listdir(folder_path):
1148
+ if fname.lower().endswith(('.wav', '.mp3', '.m4a', '.flac')):
1149
+ all_files.append(os.path.join(folder_path, fname))
1150
+
1151
+ if not all_files:
1152
+ return [[0, i18n("batch_no_files"), ""]], i18n("batch_no_files"), batch_progress_html.value
1153
+
1154
+ os.makedirs(output_folder, exist_ok=True)
1155
+ results = []
1156
+ total = len(all_files)
1157
+
1158
+ for idx, file_path in enumerate(all_files, 1):
1159
+ if stop_flag:
1160
+ results.append([idx, os.path.basename(file_path), "Stopped"])
1161
+ continue
1162
+
1163
+ results.append([idx, os.path.basename(file_path), "🔄 Processing..."])
1164
+ progress = int((idx / total) * 100)
1165
+ progress_html = f"""
1166
+ <div id="batch-progress" style="margin-top: 10px;">
1167
+ <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;">{i18n("batch_current_file")}: {os.path.basename(file_path)} ({idx}/{total})</div>
1168
+ <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
1169
+ <div style="width: {progress}%; height: 20px; background-color: #6e8efb; transition: width 0.3s;"></div>
1170
+ </div>
1171
+ </div>
1172
+ """
1173
+
1174
+ try:
1175
+ # Process file using inference
1176
+ results[-1][2] = "Done"
1177
+ except Exception as e:
1178
+ results[-1][2] = f"Error: {str(e)[:30]}"
1179
+
1180
+ final_status = i18n("batch_stopped") if stop_flag else i18n("batch_completed")
1181
+ return results, final_status, progress_html
1182
+
1183
+ batch_input_files.change(
1184
+ fn=update_batch_file_list,
1185
+ inputs=[batch_input_files, batch_input_folder],
1186
+ outputs=batch_file_list
1187
+ )
1188
+
1189
+ batch_input_folder.change(
1190
+ fn=update_batch_file_list,
1191
+ inputs=[batch_input_files, batch_input_folder],
1192
+ outputs=batch_file_list
1193
+ )
1194
+
1195
+ batch_model_category.change(
1196
+ fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in get_all_model_configs_with_custom().keys() if i18n(k) == cat), list(get_all_model_configs_with_custom().keys())[0]), favorites=load_config()["favorites"])["choices"]),
1197
+ inputs=batch_model_category,
1198
+ outputs=batch_model_dropdown
1199
+ )
1200
+
1201
+ batch_start_btn.click(
1202
+ fn=run_batch_processing,
1203
+ inputs=[batch_input_files, batch_input_folder, batch_output_folder, batch_model_dropdown,
1204
+ batch_chunk_size, batch_overlap, batch_export_format, batch_extract_instrumental, batch_stop_flag],
1205
+ outputs=[batch_file_list, batch_status, batch_progress_html]
1206
+ )
1207
+
1208
+ batch_stop_btn.click(
1209
+ fn=lambda: True,
1210
+ outputs=batch_stop_flag
1211
+ )
1212
+
1213
+ with gr.Tab(i18n("custom_models_tab"), id="custom_models_tab"):
1214
+ with gr.Row(equal_height=True):
1215
+ with gr.Column(scale=1, min_width=400):
1216
+ gr.Markdown(f"### {i18n('add_custom_model')}")
1217
+ gr.Markdown(i18n("custom_model_info"))
1218
+
1219
+ with gr.Group():
1220
+ custom_model_name_input = gr.Textbox(
1221
+ label=i18n("custom_model_name"),
1222
+ placeholder=i18n("custom_model_name_placeholder"),
1223
+ interactive=True
1224
+ )
1225
+ custom_checkpoint_url = gr.Textbox(
1226
+ label=i18n("checkpoint_url"),
1227
+ placeholder=i18n("checkpoint_url_placeholder"),
1228
+ interactive=True
1229
+ )
1230
+ custom_config_url = gr.Textbox(
1231
+ label=i18n("config_url"),
1232
+ placeholder=i18n("config_url_placeholder"),
1233
+ interactive=True
1234
+ )
1235
+ custom_py_url = gr.Textbox(
1236
+ label=i18n("custom_py_url"),
1237
+ placeholder=i18n("custom_py_url_placeholder"),
1238
+ interactive=True
1239
+ )
1240
+
1241
+ with gr.Row():
1242
+ auto_detect_checkbox = gr.Checkbox(
1243
+ label=i18n("auto_detect_type"),
1244
+ value=True,
1245
+ interactive=True
1246
+ )
1247
+ custom_model_type = gr.Dropdown(
1248
+ label=i18n("model_type"),
1249
+ choices=SUPPORTED_MODEL_TYPES,
1250
+ value="bs_roformer",
1251
+ interactive=True,
1252
+ visible=False
1253
+ )
1254
+
1255
+ add_model_btn = gr.Button(i18n("add_model_btn"), variant="primary")
1256
+ add_model_status = gr.Textbox(label=i18n("status"), interactive=False)
1257
+
1258
+ with gr.Column(scale=1, min_width=400):
1259
+ gr.Markdown(f"### {i18n('custom_models_list')}")
1260
+
1261
+ custom_models_list_display = gr.Dataframe(
1262
+ headers=[i18n("custom_model_name"), i18n("model_type")],
1263
+ datatype=["str", "str"],
1264
+ label="",
1265
+ interactive=False,
1266
+ row_count=10
1267
+ )
1268
+
1269
+ with gr.Row():
1270
+ delete_model_dropdown = gr.Dropdown(
1271
+ label=i18n("select_model_to_delete"),
1272
+ choices=[],
1273
+ interactive=True
1274
+ )
1275
+ delete_model_btn = gr.Button(i18n("delete_model"), variant="secondary")
1276
+
1277
+ refresh_custom_models_btn = gr.Button(i18n("refresh_models"), variant="secondary")
1278
+ delete_model_status = gr.Textbox(label=i18n("status"), interactive=False)
1279
+
1280
+ # Custom Models tab functions
1281
+ def toggle_model_type_visibility(auto_detect):
1282
+ return gr.update(visible=not auto_detect)
1283
+
1284
+ def refresh_custom_models_display():
1285
+ models_list = get_custom_models_list()
1286
+ if not models_list:
1287
+ return [[i18n("no_custom_models"), ""]], gr.update(choices=[])
1288
+ data = [[name, mtype] for name, mtype in models_list]
1289
+ choices = [name for name, _ in models_list]
1290
+ return data, gr.update(choices=choices)
1291
+
1292
+ def add_model_handler(name, checkpoint_url, config_url, py_url, auto_detect, model_type):
1293
+ selected_type = "auto" if auto_detect else model_type
1294
+ success, message = add_custom_model(name, selected_type, checkpoint_url, config_url, py_url, auto_detect)
1295
+ if success:
1296
+ # Refresh the display
1297
+ models_list = get_custom_models_list()
1298
+ data = [[n, t] for n, t in models_list] if models_list else [[i18n("no_custom_models"), ""]]
1299
+ choices = [n for n, _ in models_list] if models_list else []
1300
+ # Get updated categories
1301
+ all_configs = get_all_model_configs_with_custom()
1302
+ category_choices = [i18n(cat) for cat in all_configs.keys()]
1303
+ return (
1304
+ i18n("model_added_success"),
1305
+ data,
1306
+ gr.update(choices=choices),
1307
+ gr.update(choices=category_choices),
1308
+ gr.update(choices=category_choices),
1309
+ gr.update(choices=category_choices),
1310
+ "", "", "", "" # Clear input fields
1311
+ )
1312
+ return (
1313
+ i18n("model_add_error").format(message),
1314
+ gr.update(),
1315
+ gr.update(),
1316
+ gr.update(),
1317
+ gr.update(),
1318
+ gr.update(),
1319
+ gr.update(), gr.update(), gr.update(), gr.update()
1320
+ )
1321
+
1322
+ def delete_model_handler(model_name):
1323
+ if not model_name:
1324
+ return i18n("select_model_to_delete"), gr.update(), gr.update()
1325
+ success, message = delete_custom_model(model_name)
1326
+ if success:
1327
+ models_list = get_custom_models_list()
1328
+ data = [[n, t] for n, t in models_list] if models_list else [[i18n("no_custom_models"), ""]]
1329
+ choices = [n for n, _ in models_list] if models_list else []
1330
+ # Get updated categories
1331
+ all_configs = get_all_model_configs_with_custom()
1332
+ category_choices = [i18n(cat) for cat in all_configs.keys()]
1333
+ return (
1334
+ i18n("model_deleted_success"),
1335
+ data,
1336
+ gr.update(choices=choices, value=None),
1337
+ gr.update(choices=category_choices),
1338
+ gr.update(choices=category_choices),
1339
+ gr.update(choices=category_choices)
1340
+ )
1341
+ return i18n("model_delete_error").format(message), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1342
+
1343
+ # Event handlers
1344
+ auto_detect_checkbox.change(
1345
+ fn=toggle_model_type_visibility,
1346
+ inputs=auto_detect_checkbox,
1347
+ outputs=custom_model_type
1348
+ )
1349
+
1350
+ add_model_btn.click(
1351
+ fn=add_model_handler,
1352
+ inputs=[custom_model_name_input, custom_checkpoint_url, custom_config_url, custom_py_url, auto_detect_checkbox, custom_model_type],
1353
+ outputs=[add_model_status, custom_models_list_display, delete_model_dropdown, model_category, auto_category_dropdown, batch_model_category, custom_model_name_input, custom_checkpoint_url, custom_config_url, custom_py_url]
1354
+ )
1355
+
1356
+ delete_model_btn.click(
1357
+ fn=delete_model_handler,
1358
+ inputs=delete_model_dropdown,
1359
+ outputs=[delete_model_status, custom_models_list_display, delete_model_dropdown, model_category, auto_category_dropdown, batch_model_category]
1360
+ )
1361
+
1362
+ refresh_custom_models_btn.click(
1363
+ fn=refresh_custom_models_display,
1364
+ outputs=[custom_models_list_display, delete_model_dropdown]
1365
+ )
1366
+
1367
+ # Initialize custom models display on load
1368
+ demo.load(
1369
+ fn=refresh_custom_models_display,
1370
+ outputs=[custom_models_list_display, delete_model_dropdown]
1371
+ )
1372
 
1373
  def save_settings_on_process(*args):
1374
+ """Generator function that forwards progress yields from process_audio."""
1375
+ apollo_method_value = args[15]
1376
  backend_apollo_method = "mid_side_method" if apollo_method_value == i18n("mid_side_method") else "normal_method"
1377
  cleaned_model = clean_model(args[1]) if args[1] else None
1378
+
1379
+ # Compute effective chunk_size based on mode
1380
+ # args[22] = chunk_size_mode, args[23] = chunk_size_custom value
1381
+ cs_mode = args[22] if len(args) > 22 else "base"
1382
+ cs_custom_val = args[23] if len(args) > 23 else 352800
1383
+ cs_base_val = args[2] # base dropdown value
1384
+
1385
+ if cs_mode == "custom":
1386
+ effective_chunk = int(cs_custom_val) if cs_custom_val else 352800
1387
+ elif cs_mode == "yaml":
1388
+ effective_chunk = "yaml" # signal processing.py to read from YAML
1389
+ else:
1390
+ effective_chunk = int(cs_base_val) if cs_base_val else 352800
1391
+
1392
  settings = {
1393
+ "chunk_size": cs_base_val,
1394
+ "chunk_size_mode": cs_mode,
1395
+ "chunk_size_custom": cs_custom_val,
1396
  "overlap": args[3],
1397
  "export_format": args[4],
1398
+ "optimize_mode": args[5],
1399
+ "enable_amp": args[6],
1400
+ "enable_tf32": args[7],
1401
+ "enable_cudnn_benchmark": args[8],
1402
+ "use_tta": args[9],
1403
+ "use_demud_phaseremix_inst": args[10],
1404
+ "extract_instrumental": args[11],
1405
+ "use_apollo": args[12],
1406
+ "apollo_chunk_size": args[13],
1407
+ "apollo_overlap": args[14],
1408
  "apollo_method": backend_apollo_method,
1409
+ "apollo_normal_model": args[16],
1410
+ "apollo_midside_model": args[17],
1411
+ "use_matchering": args[18],
1412
+ "matchering_passes": args[19],
1413
+ "model_category": args[20],
1414
  "selected_model": cleaned_model,
1415
+ "auto_ensemble_type": args[11]
1416
  }
1417
  save_config(load_config()["favorites"], settings, load_config()["presets"])
1418
+ # Build args for process_audio (indices 0-21 only, with effective_chunk at [2])
1419
+ modified_args = list(args[:22])
1420
  modified_args[1] = cleaned_model
1421
+ modified_args[2] = effective_chunk
1422
+ modified_args[21] = cleaned_model
1423
+ # Forward all yields from process_audio for real-time progress updates
1424
+ for update in process_audio(*modified_args):
1425
+ yield update
1426
 
1427
  def save_auto_ensemble_settings(*args):
1428
+ """Generator function that forwards progress yields from auto_ensemble_process."""
1429
  settings = load_config()["settings"]
1430
  settings["auto_ensemble_type"] = args[7]
1431
  settings["use_matchering"] = args[14]
1432
  settings["matchering_passes"] = args[15]
1433
  save_config(load_config()["favorites"], settings, load_config()["presets"])
1434
+ # Forward all yields from auto_ensemble_process for real-time progress updates
1435
  for update in auto_ensemble_process(*args):
1436
  if isinstance(update, tuple) and len(update) == 3:
1437
+ yield update
 
1438
 
1439
  def update_category_dropdowns(cat):
1440
+ all_configs = get_all_model_configs_with_custom()
1441
+ eng_cat = next((k for k in all_configs.keys() if i18n(k) == cat), list(all_configs.keys())[0])
 
1442
  choices = update_model_dropdown(eng_cat, favorites=load_config()["favorites"])["choices"]
 
1443
  return gr.update(choices=choices), gr.update(choices=choices)
1444
 
1445
  model_category.change(
 
1473
  )
1474
 
1475
  auto_category_dropdown.change(
1476
+ fn=lambda cat: gr.update(choices=update_model_dropdown(next((k for k in get_all_model_configs_with_custom().keys() if i18n(k) == cat), list(get_all_model_configs_with_custom().keys())[0]), favorites=load_config()["favorites"])["choices"]),
1477
  inputs=auto_category_dropdown,
1478
  outputs=selected_models
1479
  )
1480
 
1481
+ def clean_inputs(*args):
 
 
 
 
 
 
 
1482
  cleaned_args = list(args)
1483
  cleaned_args[1] = clean_model(cleaned_args[1]) if cleaned_args[1] else None
1484
+ cleaned_args[21] = clean_model(cleaned_args[21]) if cleaned_args[21] else None
1485
+ return cleaned_args
1486
+
1487
+ def process_wrapper(*args):
1488
+ """Generator wrapper that forwards yields from save_settings_on_process."""
1489
+ for update in save_settings_on_process(*clean_inputs(*args)):
1490
+ yield update
1491
 
1492
  process_btn.click(
1493
+ fn=process_wrapper,
1494
  inputs=[
1495
  input_audio_file, model_dropdown, chunk_size, overlap, export_format,
1496
+ optimize_mode, enable_amp, enable_tf32, enable_cudnn_benchmark,
1497
  use_tta, use_demud_phaseremix_inst, extract_instrumental,
1498
  use_apollo, apollo_chunk_size, apollo_overlap,
1499
  apollo_method, apollo_normal_model, apollo_midside_model,
1500
+ use_matchering, matchering_passes, model_category, model_dropdown,
1501
+ chunk_size_mode, chunk_size_custom
1502
  ],
1503
  outputs=[
1504
  vocals_audio, instrumental_audio, phaseremix_audio, drum_audio, karaoke_audio,
1505
  other_audio, bass_audio, effects_audio, speech_audio, bleed_audio, music_audio,
1506
  dry_audio, male_audio, female_audio,
1507
+ mid_audio, side_audio,
1508
  separation_process_status, separation_progress_html
1509
  ]
1510
  )
 
1548
  refresh_btn.click(fn=update_file_list, outputs=file_dropdown)
1549
  ensemble_process_btn.click(fn=ensemble_audio_fn, inputs=[file_dropdown, ensemble_type, weights_input], outputs=[ensemble_output_audio, ensemble_status])
1550
 
1551
+ return demo
helpers.py CHANGED
@@ -20,8 +20,19 @@ from datetime import datetime
20
  import yt_dlp
21
  import validators
22
  from pytube import YouTube
23
- from googleapiclient.discovery import build
24
- from googleapiclient.http import MediaIoBaseDownload
 
 
 
 
 
 
 
 
 
 
 
25
  import io
26
  import math
27
  import hashlib
@@ -29,13 +40,12 @@ import gc
29
  import psutil
30
  import concurrent.futures
31
  from tqdm import tqdm
32
- from google.oauth2.credentials import Credentials
33
  import tempfile
34
  from urllib.parse import urlparse, quote
35
  import argparse
36
  from tqdm.auto import tqdm
37
  import torch.nn as nn
38
- from model import get_model_config, MODEL_CONFIGS
39
  from assets.i18n.i18n import I18nAuto
40
  import matchering as mg
41
  from scipy.signal import find_peaks
@@ -59,7 +69,6 @@ def extract_model_name_from_checkpoint(checkpoint_path):
59
  return "Unknown"
60
  base_name = os.path.basename(checkpoint_path)
61
  model_name = os.path.splitext(base_name)[0]
62
- print(f"Original checkpoint path: {checkpoint_path}, extracted model_name: {model_name}")
63
  return model_name.strip()
64
 
65
  for directory in [BASE_DIR, INPUT_DIR, OUTPUT_DIR, OLD_OUTPUT_DIR, AUTO_ENSEMBLE_TEMP, AUTO_ENSEMBLE_OUTPUT, VIDEO_TEMP, ENSEMBLE_DIR]:
@@ -95,7 +104,8 @@ def clean_model(model):
95
  return cleaned
96
 
97
  def get_original_category(translated_category):
98
- for original_cat in MODEL_CONFIGS.keys():
 
99
  if i18n(original_cat) == translated_category:
100
  return original_cat
101
  return None
@@ -109,9 +119,11 @@ def clamp_percentage(value):
109
  return 0
110
 
111
  def update_model_dropdown(category, favorites=None):
 
 
112
  # Map translated category back to English
113
- eng_cat = next((k for k in MODEL_CONFIGS.keys() if i18n(k) == category), list(MODEL_CONFIGS.keys())[0])
114
- models = MODEL_CONFIGS.get(eng_cat, [])
115
  choices = []
116
  favorite_models = []
117
  non_favorite_models = []
@@ -126,6 +138,11 @@ def update_model_dropdown(category, favorites=None):
126
  choices = favorite_models + non_favorite_models
127
  return {"choices": choices}
128
 
 
 
 
 
 
129
  def handle_file_upload(uploaded_file, file_path, is_auto_ensemble=False):
130
  clear_temp_folder("/tmp", exclude_items=["gradio", "config.json"])
131
  clear_directory(INPUT_DIR)
 
20
  import yt_dlp
21
  import validators
22
  from pytube import YouTube
23
+
24
+ # Google API imports (optional - for Colab/Google Drive support)
25
+ try:
26
+ from googleapiclient.discovery import build
27
+ from googleapiclient.http import MediaIoBaseDownload
28
+ from google.oauth2.credentials import Credentials
29
+ GOOGLE_API_AVAILABLE = True
30
+ except ImportError:
31
+ GOOGLE_API_AVAILABLE = False
32
+ build = None
33
+ MediaIoBaseDownload = None
34
+ Credentials = None
35
+
36
  import io
37
  import math
38
  import hashlib
 
40
  import psutil
41
  import concurrent.futures
42
  from tqdm import tqdm
 
43
  import tempfile
44
  from urllib.parse import urlparse, quote
45
  import argparse
46
  from tqdm.auto import tqdm
47
  import torch.nn as nn
48
+ from model import get_model_config, MODEL_CONFIGS, get_all_model_configs_with_custom, load_custom_models
49
  from assets.i18n.i18n import I18nAuto
50
  import matchering as mg
51
  from scipy.signal import find_peaks
 
69
  return "Unknown"
70
  base_name = os.path.basename(checkpoint_path)
71
  model_name = os.path.splitext(base_name)[0]
 
72
  return model_name.strip()
73
 
74
  for directory in [BASE_DIR, INPUT_DIR, OUTPUT_DIR, OLD_OUTPUT_DIR, AUTO_ENSEMBLE_TEMP, AUTO_ENSEMBLE_OUTPUT, VIDEO_TEMP, ENSEMBLE_DIR]:
 
104
  return cleaned
105
 
106
  def get_original_category(translated_category):
107
+ all_configs = get_all_model_configs_with_custom()
108
+ for original_cat in all_configs.keys():
109
  if i18n(original_cat) == translated_category:
110
  return original_cat
111
  return None
 
119
  return 0
120
 
121
  def update_model_dropdown(category, favorites=None):
122
+ # Get all configs including custom models
123
+ all_configs = get_all_model_configs_with_custom()
124
  # Map translated category back to English
125
+ eng_cat = next((k for k in all_configs.keys() if i18n(k) == category), list(all_configs.keys())[0])
126
+ models = all_configs.get(eng_cat, {})
127
  choices = []
128
  favorite_models = []
129
  non_favorite_models = []
 
138
  choices = favorite_models + non_favorite_models
139
  return {"choices": choices}
140
 
141
+ def get_model_categories():
142
+ """Get all model categories including Custom Models if any exist."""
143
+ all_configs = get_all_model_configs_with_custom()
144
+ return list(all_configs.keys())
145
+
146
  def handle_file_upload(uploaded_file, file_path, is_auto_ensemble=False):
147
  clear_temp_folder("/tmp", exclude_items=["gradio", "config.json"])
148
  clear_directory(INPUT_DIR)
inference.py CHANGED
@@ -3,21 +3,16 @@ __author__ = 'Roman Solovyev (ZFTurbo): https://github.com/ZFTurbo/'
3
 
4
  import argparse
5
  import time
6
- import logging
7
  import librosa
 
8
  import sys
9
  import os
10
  import glob
11
  import torch
 
12
  import torch.nn as nn
13
  import numpy as np
14
- import soundfile as sf
15
- import spaces
16
- import warnings
17
- warnings.filterwarnings("ignore")
18
-
19
- # Loglama ayarları
20
- logging.basicConfig(level=logging.DEBUG, filename='utils.log', format='%(asctime)s - %(levelname)s - %(message)s')
21
 
22
  # Colab kontrolü
23
  try:
@@ -26,22 +21,26 @@ try:
26
  except ImportError:
27
  IS_COLAB = False
28
 
29
- # i18n yer tutucu
30
- class I18nAuto:
31
- def __call__(self, message):
32
- return message
33
- def format(self, message, *args):
34
- return message.format(*args)
35
-
36
  i18n = I18nAuto()
37
 
38
  current_dir = os.path.dirname(os.path.abspath(__file__))
39
  sys.path.append(current_dir)
40
 
41
  from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
42
- from utils import prefer_target_instrument, apply_tta, load_start_checkpoint
 
 
 
 
 
 
 
 
 
 
43
 
44
  def shorten_filename(filename, max_length=30):
 
45
  base, ext = os.path.splitext(filename)
46
  if len(base) <= max_length:
47
  return filename
@@ -49,217 +48,192 @@ def shorten_filename(filename, max_length=30):
49
  return shortened
50
 
51
  def get_soundfile_subtype(pcm_type, is_float=False):
52
- if pcm_type == 'FLOAT' or is_float:
 
53
  return 'FLOAT'
54
- subtype_map = {'PCM_16': 'PCM_16', 'PCM_24': 'PCM_24', 'FLOAT': 'FLOAT'}
 
 
 
 
55
  return subtype_map.get(pcm_type, 'FLOAT')
56
 
57
- def update_progress_html(progress_label, progress_percent):
58
- progress_percent = min(max(round(progress_percent), 0), 100)
59
- return f"""
60
- <div id="custom-progress" style="margin-top: 10px;">
61
- <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
62
- <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
63
- <div id="progress-bar" style="width: {progress_percent}%; height: 20px; background-color: #6e8efb; transition: width 0.3s; max-width: 100%;"></div>
64
- </div>
65
- </div>
66
- """
67
-
68
- def run_folder(model, args, config, device, verbose: bool = False, progress=None):
69
  start_time = time.time()
70
  model.eval()
71
 
72
  mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
73
  sample_rate = getattr(config.audio, 'sample_rate', 44100)
74
 
75
- logging.info(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
76
  print(i18n("total_files_found").format(len(mixture_paths), sample_rate))
77
 
78
  instruments = prefer_target_instrument(config)[:]
 
 
79
  store_dir = args.store_dir
80
  os.makedirs(store_dir, exist_ok=True)
81
 
82
- total_files = len(mixture_paths)
83
- processed_files = 0
84
- base_progress_per_file = 100 / total_files if total_files > 0 else 100
 
 
 
 
85
 
86
  for path in mixture_paths:
87
  try:
88
  mix, sr = librosa.load(path, sr=sample_rate, mono=False)
89
- logging.info(f"Loaded audio: {path}, shape: {mix.shape}")
90
  print(i18n("loaded_audio").format(path, mix.shape))
 
 
 
 
91
 
92
- processed_files += 1
93
- base_progress = round((processed_files - 1) * base_progress_per_file)
94
- if progress is not None and callable(getattr(progress, '__call__', None)):
95
- progress(base_progress / 100, desc=i18n("processing_file").format(processed_files, total_files))
96
- update_progress_html(i18n("processing_file").format(processed_files, total_files), base_progress)
97
-
98
- mix_orig = mix.copy()
99
- if 'normalize' in config.inference and config.inference.get('normalize', False):
100
  mix, norm_params = normalize_audio(mix)
101
 
102
- waveforms_orig = demix(
103
- config, model, mix, device, model_type=args.model_type, pbar=False,
104
- progress=lambda p, desc: progress((base_progress + p * 50) / 100, desc=desc) if progress else None
105
- )
106
-
107
- if args.use_tta:
108
- waveforms_orig = apply_tta(
109
- config, model, mix, waveforms_orig, device, args.model_type,
110
- progress=lambda p, desc: progress((base_progress + 50 + p * 20) / 100, desc=desc) if progress else None
111
- )
112
-
113
- if args.demud_phaseremix_inst:
114
- logging.info(f"Demudding track: {path}")
115
- print(i18n("demudding_track").format(path))
116
- instr = 'vocals' if 'vocals' in instruments else instruments[0]
117
- instruments.append('instrumental_phaseremix')
118
- if 'instrumental' not in instruments and 'Instrumental' not in instruments:
119
- mix_modified = mix_orig - 2 * waveforms_orig[instr]
120
- mix_modified_ = mix_modified.copy()
121
- waveforms_modified = demix(
122
- config, model, mix_modified, device, model_type=args.model_type, pbar=False,
123
- progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
124
- )
125
- if args.use_tta:
126
- waveforms_modified = apply_tta(
127
- config, model, mix_modified, waveforms_modified, device, args.model_type,
128
- progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
129
- )
130
- waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
131
- else:
132
- mix_modified = 2 * waveforms_orig[instr] - mix_orig
133
- mix_modified_ = mix_modified.copy()
134
- waveforms_modified = demix(
135
- config, model, mix_modified, device, model_type=args.model_type, pbar=False,
136
- progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
137
- )
138
- if args.use_tta:
139
- waveforms_modified = apply_tta(
140
- config, model, mix_modified, waveforms_orig, device, args.model_type,
141
- progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
142
- )
143
- waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
144
-
145
- if args.extract_instrumental:
146
- instr = 'vocals' if 'vocals' in instruments else instruments[0]
147
- waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
148
- if 'instrumental' not in instruments:
149
- instruments.append('instrumental')
150
-
151
- for i, instr in enumerate(instruments):
152
- estimates = waveforms_orig[instr]
153
- if 'normalize' in config.inference and config.inference.get('normalize', False):
154
  estimates = denormalize_audio(estimates, norm_params)
155
 
156
- is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
157
- codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
158
- subtype = get_soundfile_subtype(args.pcm_type, is_float=is_float)
 
 
 
159
 
160
- shortened_filename = shorten_filename(os.path.basename(path))
161
- output_filename = f"{shortened_filename}_{instr}.{codec}"
162
- output_path = os.path.join(store_dir, output_filename)
163
- sf.write(output_path, estimates.T, sr, subtype=subtype)
164
 
165
- save_progress = round(base_progress + 95 + (i / len(instruments)) * 5)
166
- if progress is not None and callable(getattr('progress', '__call__', None)):
167
- progress(save_progress / 100, desc=i18n("saving_output").format(instr, processed_files, total_files))
168
- update_progress_html(i18n("saving_output").format(instr, processed_files, total_files), save_progress)
169
 
170
- file_progress = round(processed_files * base_progress_per_file)
171
- if progress is not None and callable(getattr(progress, '__call__', None)):
172
- progress(file_progress / 100, desc=i18n("completed_file").format(processed_files, total_files))
173
- update_progress_html(i18n("completed_file").format(processed_files, total_files), file_progress)
174
-
175
- except Exception as e:
176
- logging.error(f"Cannot read track: {path}. Error: {str(e)}")
177
- print(i18n("cannot_read_track").format(path))
178
- print(i18n("error_message").format(str(e)))
179
- continue
180
-
181
- elapsed_time = time.time() - start_time
182
- logging.info(f"Processing time: {elapsed_time:.2f} seconds")
183
- print(i18n("elapsed_time").format(elapsed_time))
184
-
185
- if progress is not None and callable(getattr(progress, '__call__', None)):
186
- progress(1.0, desc=i18n("processing_complete"))
187
- update_progress_html(i18n("processing_complete"), 100)
188
-
189
- @spaces.GPU
190
- def proc_folder(args=None, progress=None):
191
- try:
192
- parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
193
- parser.add_argument("--model_type", type=str, default='melod_band_roformer', help=i18n("model_type_help"))
194
- parser.add_argument("--config_path", type=str, required=True, help=i18n("config_path_help"))
195
- parser.add_argument("--start_check_point", type=str, required=True, help=i18n("start_checkpoint_help"))
196
- parser.add_argument("--input_folder", type=str, required=True, help=i18n("input_folder_help"))
197
- parser.add_argument("--store_dir", type=str, required=True, help=i18n("store_dir_help"))
198
- parser.add_argument("--chunk_size", type=int, default=352800, help=i18n("chunk_size_help"))
199
- parser.add_argument("--overlap", type=int, default=2, help=i18n("overlap_help"))
200
- parser.add_argument("--export_format", type=str, default='wav FLOAT', choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], help=i18n("export_format_help"))
201
- parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
202
- parser.add_argument("--extract_instrumental", action='store_true', help=i18n("extract_instrumental_help"))
203
- parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
204
- parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
205
- parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
206
- parser.add_argument("--device_ids", nargs='+', type=int, default=[0], help=i18n("device_ids_help"))
207
- parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
208
- parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))
209
-
210
- args = parser.parse_args(args if args else [])
211
- except Exception as e:
212
- logging.error(f"Argument parsing failed: {str(e)}")
213
- raise ValueError(f"Invalid command-line arguments: {str(e)}")
214
 
215
  device = "cpu"
216
  if args.force_cpu:
217
- logging.info("Forced to use CPU")
218
  elif torch.cuda.is_available():
219
- logging.info("CUDA available")
220
  print(i18n("cuda_available"))
221
- device = f'cuda:{args.device_ids[0]}'
222
  elif torch.backends.mps.is_available():
223
- device = "mps"
224
 
225
- logging.info(f"Using device: {device}")
226
  print(i18n("using_device").format(device))
227
 
228
  model_load_start_time = time.time()
229
  torch.backends.cudnn.benchmark = True
230
 
231
- try:
232
- model, config = get_model_from_config(args.model_type, args.config_path)
233
- except Exception as e:
234
- logging.error(f"Failed to load model: {str(e)}")
235
- raise
236
 
237
- if args.start_check_point:
238
- try:
239
- load_start_checkpoint(args, model, type_='inference')
240
- except Exception as e:
241
- logging.error(f"Failed to load checkpoint: {str(e)}")
242
- raise
243
 
244
- logging.info(f"Instruments: {config.training.instruments}")
245
  print(i18n("instruments_print").format(config.training.instruments))
246
 
247
- if len(args.device_ids) > 1 and not args.force_cpu:
248
  model = nn.DataParallel(model, device_ids=args.device_ids)
249
- logging.info(f"Using DataParallel with devices: {args.device_ids}")
250
 
251
  model = model.to(device)
252
 
253
- elapsed_time = time.time() - model_load_start_time
254
- logging.info(f"Model load time: {elapsed_time:.2f} seconds")
255
- print(i18n("model_load_time").format(elapsed_time))
256
-
257
- run_folder(model, args, config, device, verbose=False, progress=progress)
258
- return "Processing completed"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  if __name__ == "__main__":
261
- try:
262
- proc_folder(None)
263
- except Exception as e:
264
- logging.error(f"Main execution failed: {str(e)}")
265
- raise
 
3
 
4
  import argparse
5
  import time
 
6
  import librosa
7
+ from tqdm.auto import tqdm
8
  import sys
9
  import os
10
  import glob
11
  import torch
12
+ import soundfile as sf
13
  import torch.nn as nn
14
  import numpy as np
15
+ from assets.i18n.i18n import I18nAuto
 
 
 
 
 
 
16
 
17
  # Colab kontrolü
18
  try:
 
21
  except ImportError:
22
  IS_COLAB = False
23
 
 
 
 
 
 
 
 
24
  i18n = I18nAuto()
25
 
26
  current_dir = os.path.dirname(os.path.abspath(__file__))
27
  sys.path.append(current_dir)
28
 
29
  from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
30
+ from utils import prefer_target_instrument, apply_tta, load_start_checkpoint, load_lora_weights
31
+
32
+ # PyTorch optimized backend (always available)
33
+ try:
34
+ from pytorch_backend import PyTorchBackend
35
+ PYTORCH_OPTIMIZED_AVAILABLE = True
36
+ except ImportError:
37
+ PYTORCH_OPTIMIZED_AVAILABLE = False
38
+
39
+ import warnings
40
+ warnings.filterwarnings("ignore")
41
 
42
  def shorten_filename(filename, max_length=30):
43
+ """Dosya adını belirtilen maksimum uzunluğa kısaltır."""
44
  base, ext = os.path.splitext(filename)
45
  if len(base) <= max_length:
46
  return filename
 
48
  return shortened
49
 
50
  def get_soundfile_subtype(pcm_type, is_float=False):
51
+ """PCM türüne göre uygun soundfile alt türünü belirler."""
52
+ if is_float:
53
  return 'FLOAT'
54
+ subtype_map = {
55
+ 'PCM_16': 'PCM_16',
56
+ 'PCM_24': 'PCM_24',
57
+ 'FLOAT': 'FLOAT'
58
+ }
59
  return subtype_map.get(pcm_type, 'FLOAT')
60
 
61
+ def run_folder(model, args, config, device, verbose: bool = False):
 
 
 
 
 
 
 
 
 
 
 
62
  start_time = time.time()
63
  model.eval()
64
 
65
  mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
66
  sample_rate = getattr(config.audio, 'sample_rate', 44100)
67
 
 
68
  print(i18n("total_files_found").format(len(mixture_paths), sample_rate))
69
 
70
  instruments = prefer_target_instrument(config)[:]
71
+
72
+ # Çıktı klasörünü kullan (processing.py tarafından ayarlandı)
73
  store_dir = args.store_dir
74
  os.makedirs(store_dir, exist_ok=True)
75
 
76
+ if not verbose:
77
+ mixture_paths = tqdm(mixture_paths, desc=i18n("total_progress"))
78
+ else:
79
+ mixture_paths = mixture_paths
80
+
81
+ detailed_pbar = not args.disable_detailed_pbar
82
+ print(i18n("detailed_pbar_enabled").format(detailed_pbar))
83
 
84
  for path in mixture_paths:
85
  try:
86
  mix, sr = librosa.load(path, sr=sample_rate, mono=False)
 
87
  print(i18n("loaded_audio").format(path, mix.shape))
88
+ except Exception as e:
89
+ print(i18n("cannot_read_track").format(path))
90
+ print(i18n("error_message").format(str(e)))
91
+ continue
92
 
93
+ mix_orig = mix.copy()
94
+ if 'normalize' in config.inference:
95
+ if config.inference['normalize'] is True:
 
 
 
 
 
96
  mix, norm_params = normalize_audio(mix)
97
 
98
+ waveforms_orig = demix(config, model, mix, device, model_type=args.model_type, pbar=detailed_pbar)
99
+
100
+ if args.use_tta:
101
+ waveforms_orig = apply_tta(config, model, mix, waveforms_orig, device, args.model_type)
102
+
103
+ if args.demud_phaseremix_inst:
104
+ print(i18n("demudding_track").format(path))
105
+ instr = 'vocals' if 'vocals' in instruments else instruments[0]
106
+ instruments.append('instrumental_phaseremix')
107
+ if 'instrumental' not in instruments and 'Instrumental' not in instruments:
108
+ mix_modified = mix_orig - 2*waveforms_orig[instr]
109
+ mix_modified_ = mix_modified.copy()
110
+ waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=detailed_pbar)
111
+ if args.use_tta:
112
+ waveforms_modified = apply_tta(config, model, mix_modified, waveforms_modified, device, args.model_type)
113
+ waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
114
+ else:
115
+ mix_modified = 2*waveforms_orig[instr] - mix_orig
116
+ mix_modified_ = mix_modified.copy()
117
+ waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=detailed_pbar)
118
+ if args.use_tta:
119
+ waveforms_modified = apply_tta(config, model, mix_modified, waveforms_orig, device, args.model_type)
120
+ waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
121
+
122
+ if args.extract_instrumental:
123
+ instr = 'vocals' if 'vocals' in instruments else instruments[0]
124
+ waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
125
+ if 'instrumental' not in instruments:
126
+ instruments.append('instrumental')
127
+
128
+ for instr in instruments:
129
+ estimates = waveforms_orig[instr]
130
+ if 'normalize' in config.inference:
131
+ if config.inference['normalize'] is True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  estimates = denormalize_audio(estimates, norm_params)
133
 
134
+ is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
135
+ codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
136
+ if codec == 'flac':
137
+ subtype = get_soundfile_subtype(args.pcm_type, is_float)
138
+ else:
139
+ subtype = get_soundfile_subtype('FLOAT', is_float)
140
 
141
+ shortened_filename = shorten_filename(os.path.basename(path))
142
+ output_filename = f"{shortened_filename}_{instr}.{codec}"
143
+ output_path = os.path.join(store_dir, output_filename)
144
+ sf.write(output_path, estimates.T, sr, subtype=subtype)
145
 
146
+ print(i18n("elapsed_time").format(time.time() - start_time))
 
 
 
147
 
148
+ def proc_folder(args, use_tensorrt=False):
149
+ """
150
+ Process folder with optional TensorRT backend.
151
+
152
+ Parameters:
153
+ ----------
154
+ args : list or None
155
+ Command line arguments
156
+ use_tensorrt : bool
157
+ Use TensorRT backend if available
158
+ """
159
+ parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
160
+ parser.add_argument("--model_type", type=str, default='mdx23c', help=i18n("model_type_help"))
161
+ parser.add_argument("--config_path", type=str, help=i18n("config_path_help"))
162
+ parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
163
+ parser.add_argument("--start_check_point", type=str, default='', help=i18n("start_checkpoint_help"))
164
+ parser.add_argument("--input_folder", type=str, help=i18n("input_folder_help"))
165
+ parser.add_argument("--audio_path", type=str, help=i18n("audio_path_help"))
166
+ parser.add_argument("--store_dir", type=str, default="", help=i18n("store_dir_help"))
167
+ parser.add_argument("--device_ids", nargs='+', type=int, default=0, help=i18n("device_ids_help"))
168
+ parser.add_argument("--extract_instrumental", action='store_true', help=i18n("extract_instrumental_help"))
169
+ parser.add_argument("--disable_detailed_pbar", action='store_true', help=i18n("disable_detailed_pbar_help"))
170
+ parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
171
+ parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
172
+ parser.add_argument("--export_format", type=str, choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], default='flac PCM_24', help=i18n("export_format_help"))
173
+ parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
174
+ parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
175
+ parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))
176
+ parser.add_argument("--chunk_size", type=int, default=1000000, help="Inference chunk size")
177
+ parser.add_argument("--overlap", type=int, default=4, help="Inference overlap factor")
178
+ parser.add_argument("--optimize_mode", type=str, choices=['default', 'compile', 'jit', 'channels_last'], default='channels_last', help="PyTorch optimization mode (always enabled)")
179
+ parser.add_argument("--enable_amp", action='store_true', default=True, help="Enable automatic mixed precision")
180
+ parser.add_argument("--enable_tf32", action='store_true', default=True, help="Enable TF32 (Ampere GPUs)")
181
+ parser.add_argument("--enable_cudnn_benchmark", action='store_true', default=True, help="Enable cuDNN benchmark")
182
+
183
+ if args is None:
184
+ args = parser.parse_args()
185
+ else:
186
+ args = parser.parse_args(args)
 
 
 
 
 
187
 
188
  device = "cpu"
189
  if args.force_cpu:
190
+ device = "cpu"
191
  elif torch.cuda.is_available():
 
192
  print(i18n("cuda_available"))
193
+ device = f'cuda:{args.device_ids[0]}' if type(args.device_ids) == list else f'cuda:{args.device_ids}'
194
  elif torch.backends.mps.is_available():
195
+ device = "mps"
196
 
 
197
  print(i18n("using_device").format(device))
198
 
199
  model_load_start_time = time.time()
200
  torch.backends.cudnn.benchmark = True
201
 
202
+ model, config = get_model_from_config(args.model_type, args.config_path)
 
 
 
 
203
 
204
+ if args.start_check_point != '':
205
+ load_start_checkpoint(args, model, type_='inference')
 
 
 
 
206
 
 
207
  print(i18n("instruments_print").format(config.training.instruments))
208
 
209
+ if type(args.device_ids) == list and len(args.device_ids) > 1 and not args.force_cpu:
210
  model = nn.DataParallel(model, device_ids=args.device_ids)
 
211
 
212
  model = model.to(device)
213
 
214
+ print(i18n("model_load_time").format(time.time() - model_load_start_time))
215
+
216
+ # Always use optimized PyTorch backend if available
217
+ if PYTORCH_OPTIMIZED_AVAILABLE:
218
+ print(f"Using optimized PyTorch backend")
219
+ print(f" Mode: {args.optimize_mode}")
220
+ print(f" AMP: {args.enable_amp} | TF32: {args.enable_tf32} | cuDNN: {args.enable_cudnn_benchmark}")
221
+ from inference_pytorch import proc_folder_pytorch_optimized
222
+ # Recreate args for optimized PyTorch inference
223
+ sys.argv = sys.argv[:1] # Keep only script name
224
+ for key, value in vars(args).items():
225
+ if value is not None and value is not False:
226
+ if isinstance(value, bool):
227
+ sys.argv.append(f"--{key}")
228
+ elif isinstance(value, list):
229
+ sys.argv.append(f"--{key}")
230
+ sys.argv.extend(map(str, value))
231
+ else:
232
+ sys.argv.extend([f"--{key}", str(value)])
233
+ proc_folder_pytorch_optimized(None)
234
+ else:
235
+ print("Warning: PyTorch optimized backend not available, using standard inference")
236
+ run_folder(model, args, config, device, verbose=False)
237
 
238
  if __name__ == "__main__":
239
+ proc_folder(None)
 
 
 
 
inference_pytorch.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ __author__ = 'PyTorch Optimized Inference Implementation'
3
+
4
+ import argparse
5
+ import time
6
+ import librosa
7
+ from tqdm.auto import tqdm
8
+ import sys
9
+ import os
10
+ import glob
11
+ import torch
12
+ import soundfile as sf
13
+ import torch.nn as nn
14
+ import numpy as np
15
+ import pickle
16
+ from assets.i18n.i18n import I18nAuto
17
+
18
+ # Set inference path for compatibility
19
+ INFERENCE_PATH = os.path.abspath(__file__)
20
+
21
+ i18n = I18nAuto()
22
+
23
+ current_dir = os.path.dirname(os.path.abspath(__file__))
24
+ sys.path.append(current_dir)
25
+
26
+ from utils import get_model_from_config, normalize_audio, denormalize_audio
27
+ from utils import prefer_target_instrument, load_start_checkpoint, apply_tta, demix
28
+ from pytorch_backend import PyTorchBackend, PyTorchOptimizer, create_inference_session
29
+
30
+ import warnings
31
+ warnings.filterwarnings("ignore")
32
+
33
+
34
+ def shorten_filename(filename, max_length=30):
35
+ """Dosya adını belirtilen maksimum uzunluğa kısaltır."""
36
+ base, ext = os.path.splitext(filename)
37
+ if len(base) <= max_length:
38
+ return filename
39
+ shortened = base[:15] + "..." + base[-10:] + ext
40
+ return shortened
41
+
42
+
43
+ def get_soundfile_subtype(pcm_type, is_float=False):
44
+ """PCM türüne göre uygun soundfile alt türünü belirler."""
45
+ if is_float:
46
+ return 'FLOAT'
47
+ subtype_map = {
48
+ 'PCM_16': 'PCM_16',
49
+ 'PCM_24': 'PCM_24',
50
+ 'FLOAT': 'FLOAT'
51
+ }
52
+ return subtype_map.get(pcm_type, 'FLOAT')
53
+
54
+
55
+ def demix_pytorch_optimized(
56
+ config,
57
+ backend: PyTorchBackend,
58
+ mix: np.ndarray,
59
+ device: torch.device,
60
+ pbar: bool = False
61
+ ) -> dict:
62
+ """
63
+ Optimized PyTorch backend ile audio source separation.
64
+
65
+ Parameters:
66
+ ----------
67
+ config : ConfigDict
68
+ Configuration object
69
+ backend : PyTorchBackend
70
+ PyTorch backend with optimized model
71
+ mix : np.ndarray
72
+ Input audio array
73
+ device : torch.device
74
+ Computation device
75
+ pbar : bool
76
+ Show progress bar
77
+
78
+ Returns:
79
+ -------
80
+ dict
81
+ Dictionary of separated sources
82
+ """
83
+ mix = torch.tensor(mix, dtype=torch.float32)
84
+
85
+ chunk_size = config.audio.chunk_size
86
+ num_instruments = len(prefer_target_instrument(config))
87
+ num_overlap = config.inference.num_overlap
88
+
89
+ fade_size = chunk_size // 10
90
+ step = chunk_size // num_overlap
91
+ border = chunk_size - step
92
+ length_init = mix.shape[-1]
93
+
94
+ # Windowing array
95
+ fadein = torch.linspace(0, 1, fade_size)
96
+ fadeout = torch.linspace(1, 0, fade_size)
97
+ windowing_array = torch.ones(chunk_size)
98
+ windowing_array[-fade_size:] = fadeout
99
+ windowing_array[:fade_size] = fadein
100
+
101
+ # Add padding
102
+ if length_init > 2 * border and border > 0:
103
+ mix = nn.functional.pad(mix, (border, border), mode="reflect")
104
+
105
+ batch_size = config.inference.batch_size
106
+ use_amp = getattr(config.training, 'use_amp', True)
107
+
108
+ with torch.cuda.amp.autocast(enabled=use_amp):
109
+ with torch.inference_mode():
110
+ # Initialize result and counter tensors
111
+ req_shape = (num_instruments,) + mix.shape
112
+ result = torch.zeros(req_shape, dtype=torch.float32)
113
+ counter = torch.zeros(req_shape, dtype=torch.float32)
114
+
115
+ i = 0
116
+ batch_data = []
117
+ batch_locations = []
118
+
119
+ # Progress reporting for GUI (no terminal tqdm)
120
+ total_samples = mix.shape[1]
121
+ last_reported_percent = -1
122
+
123
+ while i < mix.shape[1]:
124
+ # Extract chunk
125
+ part = mix[:, i:i + chunk_size].to(device)
126
+ chunk_len = part.shape[-1]
127
+
128
+ if chunk_len > chunk_size // 2:
129
+ pad_mode = "reflect"
130
+ else:
131
+ pad_mode = "constant"
132
+
133
+ part = nn.functional.pad(
134
+ part,
135
+ (0, chunk_size - chunk_len),
136
+ mode=pad_mode,
137
+ value=0
138
+ )
139
+
140
+ batch_data.append(part)
141
+ batch_locations.append((i, chunk_len))
142
+ i += step
143
+
144
+ # Process batch
145
+ if len(batch_data) >= batch_size or i >= mix.shape[1]:
146
+ arr = torch.stack(batch_data, dim=0)
147
+
148
+ # Use optimized PyTorch backend for inference
149
+ x = backend(arr)
150
+
151
+ window = windowing_array.clone()
152
+ if i - step == 0: # First chunk
153
+ window[:fade_size] = 1
154
+ elif i >= mix.shape[1]: # Last chunk
155
+ window[-fade_size:] = 1
156
+
157
+ for j, (start, seg_len) in enumerate(batch_locations):
158
+ result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu() * window[..., :seg_len]
159
+ counter[..., start:start + seg_len] += window[..., :seg_len]
160
+
161
+ batch_data.clear()
162
+ batch_locations.clear()
163
+
164
+ # Report real progress percentage for GUI capture (every 1% for smooth updates)
165
+ # Use unique prefix [SESA_PROGRESS] to avoid confusion with other log messages
166
+ current_percent = int((i / total_samples) * 100)
167
+ if current_percent > last_reported_percent:
168
+ last_reported_percent = current_percent
169
+ print(f"[SESA_PROGRESS]{current_percent}", flush=True)
170
+
171
+ print("[SESA_PROGRESS]100", flush=True)
172
+
173
+ # Compute final estimated sources
174
+ estimated_sources = result / counter
175
+ estimated_sources = estimated_sources.cpu().numpy()
176
+ np.nan_to_num(estimated_sources, copy=False, nan=0.0)
177
+
178
+ # Remove padding
179
+ if length_init > 2 * border and border > 0:
180
+ estimated_sources = estimated_sources[..., border:-border]
181
+
182
+ # Return as dictionary
183
+ instruments = prefer_target_instrument(config)
184
+ ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
185
+
186
+ return ret_data
187
+
188
+
189
+ def run_folder_pytorch_optimized(backend, args, config, device, model=None, verbose: bool = False):
190
+ """
191
+ PyTorch backend ile klasör işleme.
192
+ """
193
+ start_time = time.time()
194
+
195
+ mixture_paths = sorted(glob.glob(os.path.join(args.input_folder, '*.*')))
196
+ sample_rate = getattr(config.audio, 'sample_rate', 44100)
197
+
198
+ print(f"PyTorch Backend | {len(mixture_paths)} dosya | SR: {sample_rate}")
199
+
200
+ instruments = prefer_target_instrument(config)[:]
201
+
202
+ # Çıktı klasörünü kullan
203
+ store_dir = args.store_dir
204
+ os.makedirs(store_dir, exist_ok=True)
205
+
206
+ # Progress is reported via print statements for GUI capture (no terminal tqdm)
207
+ total_files = len(mixture_paths)
208
+ detailed_pbar = not args.disable_detailed_pbar
209
+ print(i18n("detailed_pbar_enabled").format(detailed_pbar))
210
+
211
+ for file_idx, path in enumerate(mixture_paths):
212
+ try:
213
+ mix, sr = librosa.load(path, sr=sample_rate, mono=False)
214
+ print(i18n("loaded_audio").format(path, mix.shape))
215
+ except Exception as e:
216
+ print(i18n("cannot_read_track").format(path))
217
+ print(i18n("error_message").format(str(e)))
218
+ continue
219
+
220
+ mix_orig = mix.copy()
221
+ if 'normalize' in config.inference:
222
+ if config.inference['normalize'] is True:
223
+ mix, norm_params = normalize_audio(mix)
224
+
225
+ # Use optimized PyTorch backend
226
+ waveforms_orig = demix_pytorch_optimized(config, backend, mix, device, pbar=detailed_pbar)
227
+
228
+ if args.use_tta and model is not None:
229
+ waveforms_orig = apply_tta(config, model, mix, waveforms_orig, device, args.model_type)
230
+
231
+ if args.demud_phaseremix_inst and model is not None:
232
+ print(f"DemudPhaseRemix: {path}")
233
+ instr = 'vocals' if 'vocals' in instruments else instruments[0]
234
+ instruments.append('instrumental_phaseremix')
235
+ if 'instrumental' not in instruments and 'Instrumental' not in instruments:
236
+ mix_modified = mix_orig - 2 * waveforms_orig[instr]
237
+ mix_modified_ = mix_modified.copy()
238
+ waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type)
239
+ if args.use_tta:
240
+ waveforms_modified = apply_tta(config, model, mix_modified, waveforms_modified, device, args.model_type)
241
+ waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
242
+ else:
243
+ mix_modified = 2 * waveforms_orig[instr] - mix_orig
244
+ mix_modified_ = mix_modified.copy()
245
+ waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type)
246
+ if args.use_tta:
247
+ waveforms_modified = apply_tta(config, model, mix_modified, waveforms_orig, device, args.model_type)
248
+ waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
249
+
250
+ if args.extract_instrumental:
251
+ instr = 'vocals' if 'vocals' in instruments else instruments[0]
252
+ waveforms_orig['instrumental'] = mix_orig - waveforms_orig[instr]
253
+ if 'instrumental' not in instruments:
254
+ instruments.append('instrumental')
255
+
256
+ for instr in instruments:
257
+ estimates = waveforms_orig[instr]
258
+ if 'normalize' in config.inference:
259
+ if config.inference['normalize'] is True:
260
+ estimates = denormalize_audio(estimates, norm_params)
261
+
262
+ is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
263
+ codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
264
+ if codec == 'flac':
265
+ subtype = get_soundfile_subtype(args.pcm_type, is_float)
266
+ else:
267
+ subtype = get_soundfile_subtype('FLOAT', is_float)
268
+
269
+ shortened_filename = shorten_filename(os.path.basename(path))
270
+ output_filename = f"{shortened_filename}_{instr}.{codec}"
271
+ output_path = os.path.join(store_dir, output_filename)
272
+ sf.write(output_path, estimates.T, sr, subtype=subtype)
273
+
274
+ print(i18n("elapsed_time").format(time.time() - start_time))
275
+
276
+
277
+ def proc_folder_pytorch_optimized(args):
278
+ """
279
+ PyTorch ile inference işleme fonksiyonu.
280
+ """
281
+ parser = argparse.ArgumentParser(description="PyTorch Inference for Music Source Separation")
282
+ parser.add_argument("--model_type", type=str, default='mdx23c', help="Model type")
283
+ parser.add_argument("--config_path", type=str, help="Config path")
284
+ parser.add_argument("--start_check_point", type=str, default='', help="Checkpoint path (.ckpt)")
285
+ parser.add_argument("--input_folder", type=str, help="Input folder path")
286
+ parser.add_argument("--store_dir", type=str, default="", help="Output directory")
287
+ parser.add_argument("--device_ids", nargs='+', type=int, default=0, help="Device IDs")
288
+ parser.add_argument("--extract_instrumental", action='store_true', help="Extract instrumental")
289
+ parser.add_argument("--disable_detailed_pbar", action='store_true', help="Disable detailed progress bar")
290
+ parser.add_argument("--flac_file", action='store_true', help="Output as FLAC")
291
+ parser.add_argument("--export_format", type=str, choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'],
292
+ default='flac PCM_24', help="Export format")
293
+ parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help="PCM type")
294
+ parser.add_argument("--chunk_size", type=int, default=1000000, help="Inference chunk size")
295
+ parser.add_argument("--overlap", type=int, default=4, help="Inference overlap factor")
296
+ parser.add_argument("--optimize_mode", type=str, choices=['channels_last', 'compile', 'jit', 'default'],
297
+ default='channels_last', help="PyTorch optimization mode (channels_last recommended)")
298
+ parser.add_argument("--enable_amp", action='store_true', help="Enable automatic mixed precision (2x faster)")
299
+ parser.add_argument("--enable_tf32", action='store_true', help="Enable TF32 for RTX 30xx+ (faster)")
300
+ parser.add_argument("--enable_cudnn_benchmark", action='store_true', help="Enable cuDNN benchmark (faster after warmup)")
301
+ parser.add_argument("--lora_checkpoint", type=str, default='', help="Initial checkpoint to LoRA weights")
302
+ parser.add_argument("--use_tta", action='store_true', help="Test Time Augmentation (flips + polarity)")
303
+ parser.add_argument("--demud_phaseremix_inst", action='store_true', help="DemudPhaseRemix instrumental extraction")
304
+
305
+ if args is None:
306
+ args = parser.parse_args()
307
+ else:
308
+ args = parser.parse_args(args)
309
+
310
+ # Device setup
311
+ device = "cpu"
312
+ if torch.cuda.is_available():
313
+ print(i18n("cuda_available"))
314
+ device = f'cuda:{args.device_ids[0]}' if type(args.device_ids) == list else f'cuda:{args.device_ids}'
315
+ elif torch.backends.mps.is_available():
316
+ device = "mps"
317
+ print("Using MPS (Metal) backend")
318
+
319
+ print(i18n("using_device").format(device))
320
+
321
+ # Load model
322
+ model_load_start_time = time.time()
323
+
324
+ model, config = get_model_from_config(args.model_type, args.config_path)
325
+
326
+ if args.start_check_point != '':
327
+ try:
328
+ checkpoint = torch.load(args.start_check_point, map_location=device, weights_only=False)
329
+ except (pickle.UnpicklingError, RuntimeError, EOFError) as e:
330
+ error_details = f"""
331
+ CHECKPOINT FILE CORRUPTED
332
+
333
+ Error: {str(e)}
334
+
335
+ The checkpoint file appears to be corrupted or was not downloaded correctly.
336
+ File: {args.start_check_point}
337
+
338
+ Common causes:
339
+ - File is an HTML page (wrong download URL, e.g., HuggingFace /blob/ instead of /resolve/)
340
+ - Incomplete or interrupted download
341
+ - Network issues during download
342
+ - File system corruption
343
+
344
+ Solution:
345
+ 1. Delete the corrupted checkpoint file:
346
+ {args.start_check_point}
347
+ 2. Re-run the application - it will automatically re-download the model
348
+ 3. If the problem persists, check that your model URL uses /resolve/ not /blob/
349
+ Example: https://huggingface.co/user/repo/resolve/main/model.ckpt
350
+ """
351
+ print(error_details)
352
+ import sys
353
+ sys.exit(1)
354
+
355
+ # Handle different checkpoint formats
356
+ if isinstance(checkpoint, dict):
357
+ if 'state_dict' in checkpoint:
358
+ state_dict = checkpoint['state_dict']
359
+ elif 'model' in checkpoint:
360
+ state_dict = checkpoint['model']
361
+ elif 'state' in checkpoint:
362
+ state_dict = checkpoint['state']
363
+ else:
364
+ state_dict = checkpoint
365
+ else:
366
+ state_dict = checkpoint
367
+
368
+ model.load_state_dict(state_dict, strict=False)
369
+ model = model.eval().to(device)
370
+
371
+ print(i18n("instruments_print").format(config.training.instruments))
372
+
373
+ # Create optimized PyTorch backend
374
+ backend = create_inference_session(
375
+ model=model,
376
+ device=device,
377
+ optimize_mode=args.optimize_mode,
378
+ enable_amp=args.enable_amp,
379
+ enable_tf32=args.enable_tf32,
380
+ enable_cudnn_benchmark=args.enable_cudnn_benchmark
381
+ )
382
+
383
+ print(i18n("model_load_time").format(time.time() - model_load_start_time))
384
+
385
+ # Run inference (pass raw model for TTA/demud support)
386
+ run_folder_pytorch_optimized(backend, args, config, device, model=model, verbose=False)
387
+
388
+
389
+ if __name__ == "__main__":
390
+ proc_folder_pytorch_optimized(None)
main.py CHANGED
@@ -14,7 +14,15 @@ from datetime import datetime
14
  import numpy as np
15
  import shutil
16
  from gui import create_interface
17
- from pyngrok import ngrok
 
 
 
 
 
 
 
 
18
  from assets.i18n.i18n import I18nAuto # I18nAuto'yu içe aktar
19
 
20
  import warnings
@@ -56,6 +64,9 @@ def start_localtunnel(port, i18n):
56
 
57
  def start_ngrok(port, ngrok_token, i18n):
58
  """Starts the Gradio interface with ngrok sharing."""
 
 
 
59
  print(i18n("starting_ngrok").format(port=port))
60
  try:
61
  ngrok.set_auth_token(ngrok_token)
 
14
  import numpy as np
15
  import shutil
16
  from gui import create_interface
17
+
18
+ # pyngrok import (optional - only needed for ngrok sharing)
19
+ try:
20
+ from pyngrok import ngrok
21
+ NGROK_AVAILABLE = True
22
+ except ImportError:
23
+ NGROK_AVAILABLE = False
24
+ ngrok = None
25
+
26
  from assets.i18n.i18n import I18nAuto # I18nAuto'yu içe aktar
27
 
28
  import warnings
 
64
 
65
  def start_ngrok(port, ngrok_token, i18n):
66
  """Starts the Gradio interface with ngrok sharing."""
67
+ if not NGROK_AVAILABLE:
68
+ print("pyngrok modülü yüklü değil. 'pip install pyngrok' ile yükleyin.")
69
+ sys.exit(1)
70
  print(i18n("starting_ngrok").format(port=port))
71
  try:
72
  ngrok.set_auth_token(ngrok_token)
model.py CHANGED
@@ -1,58 +1,600 @@
1
  import os
2
  import yaml
3
- from urllib.parse import quote
 
 
 
4
  from pathlib import Path
5
 
6
  # Temel dizin ve checkpoint dizini sabit olarak tanımlanıyor
7
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
8
  CHECKPOINT_DIR = os.path.join(BASE_DIR, 'ckpts')
 
9
 
10
- def conf_edit(config_path, chunk_size, overlap):
11
- """Edits the configuration file with chunk size and overlap."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  full_config_path = os.path.join(CHECKPOINT_DIR, os.path.basename(config_path))
13
  if not os.path.exists(full_config_path):
14
  raise FileNotFoundError(f"Configuration file not found: {full_config_path}")
15
 
16
- with open(full_config_path, 'r') as f:
17
- data = yaml.load(f, Loader=yaml.SafeLoader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- if 'use_amp' not in data.keys():
20
- data['training']['use_amp'] = True
21
 
22
- data['audio']['chunk_size'] = chunk_size
23
- data['inference']['num_overlap'] = overlap
24
- if data['inference']['batch_size'] == 1:
25
- data['inference']['batch_size'] = 2
 
26
 
27
- print(f"Using custom overlap and chunk_size: overlap={overlap}, chunk_size={chunk_size}")
28
- with open(full_config_path, 'w') as f:
29
- yaml.dump(data, f, default_flow_style=False, sort_keys=False, Dumper=yaml.Dumper)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- def download_file(url):
32
- """Downloads a file from a URL."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  import requests
 
 
 
 
34
  encoded_url = quote(url, safe=':/')
35
- path = CHECKPOINT_DIR
 
36
  os.makedirs(path, exist_ok=True)
37
- filename = os.path.basename(encoded_url)
 
38
  file_path = os.path.join(path, filename)
39
  if os.path.exists(file_path):
40
  print(f"File '{filename}' already exists at '{path}'.")
41
  return
42
  try:
43
- response = requests.get(url)
44
  if response.status_code == 200:
45
- with open(file_path, 'wb') as f:
46
- f.write(response.content)
47
- print(f"File '{filename}' downloaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  else:
49
  print(f"Error downloading '{filename}': Status code {response.status_code}")
50
  except Exception as e:
51
  print(f"Error downloading file '{filename}' from '{url}': {e}")
 
52
 
53
  # Model konfigurasyonlarını kategorize bir sözlükte tut
54
  MODEL_CONFIGS = {
55
  "Vocal Models": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  'VOCALS-big_beta6X (by Unwa)': {
57
  'model_type': 'mel_band_roformer',
58
  'config_path': os.path.join(CHECKPOINT_DIR, 'big_beta6x.yaml'),
@@ -272,9 +814,204 @@ MODEL_CONFIGS = {
272
  'https://huggingface.co/Aname-Tommy/MelBandRoformers/blob/main/FullnessVocalModel.ckpt'
273
  ],
274
  'needs_conf_edit': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  }
276
  },
277
  "Instrumental Models": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  'Inst_GaboxFv8 (by Gabox)': {
279
  'model_type': 'mel_band_roformer',
280
  'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
@@ -504,6 +1241,86 @@ MODEL_CONFIGS = {
504
  'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxV7.ckpt'
505
  ],
506
  'needs_conf_edit': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  }
508
  },
509
  "4-Stem Models": {
@@ -660,6 +1477,56 @@ MODEL_CONFIGS = {
660
  'https://huggingface.co/anvuew/dereverb_mel_band_roformer/resolve/main/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt'
661
  ],
662
  'needs_conf_edit': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  }
664
  },
665
  "Karaoke": {
@@ -673,13 +1540,43 @@ MODEL_CONFIGS = {
673
  ],
674
  'needs_conf_edit': True
675
  },
676
- 'KaraokeGabox': {
677
  'model_type': 'mel_band_roformer',
678
- 'config_path': os.path.join(CHECKPOINT_DIR, 'config_mel_band_roformer_karaoke.yaml'),
679
- 'start_check_point': os.path.join(CHECKPOINT_DIR, 'KaraokeGabox.ckpt'),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  'download_urls': [
681
- 'https://github.com/deton24/Colab-for-new-MDX_UVR_models/releases/download/v1.0.0/config_mel_band_roformer_karaoke.yaml',
682
- 'https://huggingface.co/GaboxR67/MelBandRoformers/blob/main/melbandroformers/experimental/KaraokeGabox.ckpt'
683
  ],
684
  'needs_conf_edit': True
685
  }
@@ -764,23 +1661,220 @@ MODEL_CONFIGS = {
764
  'https://huggingface.co/SYH99999/MelBandRoformerSYHFTB1/resolve/main/model3.ckpt'
765
  ],
766
  'needs_conf_edit': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  }
768
  }
769
  }
770
 
771
  def get_model_config(clean_model=None, chunk_size=None, overlap=None):
772
- """Returns model type, config path, and checkpoint path for a given model name, downloading files if needed."""
 
 
 
 
 
 
 
773
  if clean_model is None:
774
- return {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}
 
 
 
 
775
 
 
776
  for category in MODEL_CONFIGS.values():
777
  if clean_model in category:
778
  config = category[clean_model]
779
- for url in config['download_urls']:
780
- download_file(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
  if config['needs_conf_edit'] and chunk_size is not None and overlap is not None:
782
  conf_edit(config['config_path'], chunk_size, overlap)
783
  return config['model_type'], config['config_path'], config['start_check_point']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
  return "", "", ""
785
 
786
- get_model_config.keys = lambda: {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import yaml
3
+ import json
4
+ import re
5
+ import shutil
6
+ from urllib.parse import quote, urlparse
7
  from pathlib import Path
8
 
9
  # Temel dizin ve checkpoint dizini sabit olarak tanımlanıyor
10
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11
  CHECKPOINT_DIR = os.path.join(BASE_DIR, 'ckpts')
12
+ CUSTOM_MODELS_FILE = os.path.join(BASE_DIR, 'assets', 'custom_models.json')
13
 
14
+
15
+ def fix_huggingface_url(url):
16
+ """Convert Hugging Face blob URLs to raw/resolve URLs.
17
+
18
+ Hugging Face has two URL formats:
19
+ - /blob/ URLs show the web page (HTML) - WRONG for downloading
20
+ - /resolve/ URLs provide the raw file content - CORRECT for downloading
21
+
22
+ This function converts blob URLs to resolve URLs automatically.
23
+
24
+ Args:
25
+ url: The URL to fix
26
+
27
+ Returns:
28
+ The corrected URL (or original if not a HF blob URL)
29
+ """
30
+ if not url:
31
+ return url
32
+
33
+ # Check if it's a Hugging Face URL with /blob/
34
+ if 'huggingface.co' in url and '/blob/' in url:
35
+ fixed_url = url.replace('/blob/', '/resolve/')
36
+ return fixed_url
37
+
38
+ return url
39
+
40
+
41
+ def validate_yaml_content(content, filepath=None):
42
+ """Validate that content is YAML and not HTML.
43
+
44
+ Args:
45
+ content: The file content to validate
46
+ filepath: Optional filepath for error messages
47
+
48
+ Returns:
49
+ tuple: (is_valid: bool, error_message: str or None)
50
+ """
51
+ # Check if content looks like HTML
52
+ html_indicators = [
53
+ '<!DOCTYPE',
54
+ '<html',
55
+ '<head>',
56
+ '<body>',
57
+ '<script>',
58
+ '<link rel=',
59
+ 'text/html',
60
+ ]
61
+
62
+ content_lower = content.lower() if isinstance(content, str) else content.decode('utf-8', errors='ignore').lower()
63
+
64
+ for indicator in html_indicators:
65
+ if indicator.lower() in content_lower:
66
+ error_msg = f"""
67
+ The downloaded file appears to be an HTML page, not a YAML config file.
68
+ {"File: " + filepath if filepath else ""}
69
+
70
+ This usually happens when using a Hugging Face '/blob/' URL instead of a '/resolve/' URL.
71
+
72
+ To fix this:
73
+ 1. Use the raw file URL with '/resolve/' instead of '/blob/'
74
+ Example: https://huggingface.co/user/repo/resolve/main/file.yaml
75
+
76
+ 2. Or copy the raw URL from Hugging Face:
77
+ - Go to the file on Hugging Face
78
+ - Click "Download" or right-click and "Copy link address"
79
+ """
80
+ return False, error_msg
81
+
82
+ return True, None
83
+
84
+ # Supported model types for auto-detection and manual selection
85
+ SUPPORTED_MODEL_TYPES = [
86
+ 'bs_roformer',
87
+ 'bs_roformer_custom',
88
+ 'mel_band_roformer',
89
+ 'mdx23c',
90
+ 'bandit_v2',
91
+ 'scnet',
92
+ 'htdemucs',
93
+ 'torchseg'
94
+ ]
95
+
96
+ def detect_model_type_from_url(checkpoint_url, config_url=None):
97
+ """Auto-detect model type from URL patterns."""
98
+ urls_to_check = [checkpoint_url]
99
+ if config_url:
100
+ urls_to_check.append(config_url)
101
+
102
+ combined_text = ' '.join(urls_to_check).lower()
103
+
104
+ patterns = [
105
+ (r'bs[-_]?roformer[-_]?custom|hyperace', 'bs_roformer_custom'),
106
+ (r'bs[-_]?roformer|bsroformer', 'bs_roformer'),
107
+ (r'mel[-_]?band[-_]?roformer|melbandroformer|mbr', 'mel_band_roformer'),
108
+ (r'mdx23c', 'mdx23c'),
109
+ (r'bandit[-_]?v?2?', 'bandit_v2'),
110
+ (r'scnet', 'scnet'),
111
+ (r'htdemucs|demucs', 'htdemucs'),
112
+ (r'torchseg', 'torchseg'),
113
+ ]
114
+
115
+ for pattern, model_type in patterns:
116
+ if re.search(pattern, combined_text):
117
+ return model_type
118
+ return None
119
+
120
+ def detect_model_type_from_config(config_url):
121
+ """Try to detect model type by downloading and parsing config YAML."""
122
+ try:
123
+ import requests
124
+ response = requests.get(config_url, timeout=10)
125
+ if response.status_code == 200:
126
+ config_data = yaml.safe_load(response.text)
127
+ if 'model_type' in config_data:
128
+ return config_data['model_type']
129
+ if 'model' in config_data and 'model_type' in config_data['model']:
130
+ return config_data['model']['model_type']
131
+ except Exception:
132
+ pass
133
+ return None
134
+
135
+ def load_custom_models():
136
+ """Load custom models from JSON file."""
137
+ if not os.path.exists(CUSTOM_MODELS_FILE):
138
+ return {}
139
+ try:
140
+ with open(CUSTOM_MODELS_FILE, 'r', encoding='utf-8') as f:
141
+ return json.load(f)
142
+ except (json.JSONDecodeError, IOError):
143
+ return {}
144
+
145
+ def save_custom_models(models):
146
+ """Save custom models to JSON file."""
147
+ os.makedirs(os.path.dirname(CUSTOM_MODELS_FILE), exist_ok=True)
148
+ with open(CUSTOM_MODELS_FILE, 'w', encoding='utf-8') as f:
149
+ json.dump(models, f, indent=2, ensure_ascii=False)
150
+
151
+ def add_custom_model(model_name, model_type, checkpoint_url, config_url, custom_model_url=None, auto_detect=True):
152
+ """Add a new custom model."""
153
+ if not model_name or not model_name.strip():
154
+ return False, "Model name is required"
155
+ if not checkpoint_url or not checkpoint_url.strip():
156
+ return False, "Checkpoint URL is required"
157
+ if not config_url or not config_url.strip():
158
+ return False, "Config URL is required"
159
+
160
+ model_name = model_name.strip()
161
+ checkpoint_url = checkpoint_url.strip()
162
+ config_url = config_url.strip()
163
+ custom_model_url = custom_model_url.strip() if custom_model_url else None
164
+
165
+ # Auto-fix Hugging Face URLs
166
+ checkpoint_url = fix_huggingface_url(checkpoint_url)
167
+ config_url = fix_huggingface_url(config_url)
168
+ if custom_model_url:
169
+ custom_model_url = fix_huggingface_url(custom_model_url)
170
+
171
+ if auto_detect and (not model_type or model_type == "auto"):
172
+ detected_type = detect_model_type_from_url(checkpoint_url, config_url)
173
+ if not detected_type:
174
+ detected_type = detect_model_type_from_config(config_url)
175
+ if detected_type:
176
+ model_type = detected_type
177
+ else:
178
+ return False, "Could not auto-detect model type. Please select manually."
179
+
180
+ if model_type not in SUPPORTED_MODEL_TYPES:
181
+ return False, f"Unsupported model type: {model_type}"
182
+
183
+ checkpoint_filename = os.path.basename(checkpoint_url.split('?')[0])
184
+ config_filename = f"config_{model_name.replace(' ', '_').lower()}.yaml"
185
+
186
+ models = load_custom_models()
187
+ if model_name in models:
188
+ return False, f"Model '{model_name}' already exists"
189
+
190
+ models[model_name] = {
191
+ 'model_type': model_type,
192
+ 'checkpoint_url': checkpoint_url,
193
+ 'config_url': config_url,
194
+ 'custom_model_url': custom_model_url,
195
+ 'checkpoint_filename': checkpoint_filename,
196
+ 'config_filename': config_filename,
197
+ 'needs_conf_edit': True
198
+ }
199
+ save_custom_models(models)
200
+ return True, f"Model '{model_name}' added successfully"
201
+
202
+ def delete_custom_model(model_name):
203
+ """Delete a custom model."""
204
+ models = load_custom_models()
205
+ if model_name not in models:
206
+ return False, f"Model '{model_name}' not found"
207
+
208
+ model_config = models[model_name]
209
+ checkpoint_path = os.path.join(CHECKPOINT_DIR, model_config.get('checkpoint_filename', ''))
210
+ config_path = os.path.join(CHECKPOINT_DIR, model_config.get('config_filename', ''))
211
+
212
+ try:
213
+ if os.path.exists(checkpoint_path):
214
+ os.remove(checkpoint_path)
215
+ if os.path.exists(config_path):
216
+ os.remove(config_path)
217
+ except Exception:
218
+ pass
219
+
220
+ del models[model_name]
221
+ save_custom_models(models)
222
+ return True, f"Model '{model_name}' deleted successfully"
223
+
224
+ def get_custom_models_list():
225
+ """Get list of custom model names with their types."""
226
+ models = load_custom_models()
227
+ return [(name, config.get('model_type', 'unknown')) for name, config in models.items()]
228
+
229
+ def preprocess_yaml_content(content):
230
+ """Pre-process YAML content to fix common issues before parsing.
231
+
232
+ Fixes:
233
+ - Replaces tabs with spaces
234
+ - Attempts to quote unquoted URLs and paths containing colons
235
+ """
236
+ # Replace tabs with spaces
237
+ if '\t' in content:
238
+ content = content.replace('\t', ' ')
239
+
240
+ # Fix unquoted URLs/paths with colons in values (common issue)
241
+ # This regex finds lines like "key: http://..." or "key: C:\path" and quotes the value
242
+ lines = content.split('\n')
243
+ fixed_lines = []
244
+
245
+ for line in lines:
246
+ # Skip comments and empty lines
247
+ stripped = line.strip()
248
+ if not stripped or stripped.startswith('#'):
249
+ fixed_lines.append(line)
250
+ continue
251
+
252
+ # Check if line has a key-value pattern with potential problematic value
253
+ # Match: " key: value_with_colon_or_backslash"
254
+ match = re.match(r'^(\s*)([^:#]+?):\s+(.+)$', line)
255
+ if match:
256
+ indent, key, value = match.groups()
257
+ # Check if value contains a colon (like URL) or backslash (like Windows path)
258
+ # and is not already quoted
259
+ if ((':' in value or '\\' in value) and
260
+ not (value.startswith('"') and value.endswith('"')) and
261
+ not (value.startswith("'") and value.endswith("'"))):
262
+ # Quote the value
263
+ escaped_value = value.replace('"', '\\"')
264
+ fixed_lines.append(f'{indent}{key}: "{escaped_value}"')
265
+ continue
266
+
267
+ fixed_lines.append(line)
268
+
269
+ return '\n'.join(fixed_lines)
270
+
271
+
272
+ def get_yaml_error_context(content, line_num, column=None):
273
+ """Get context around a YAML error for better debugging."""
274
+ lines = content.split('\n')
275
+ if line_num < 1 or line_num > len(lines):
276
+ return "Could not extract error context"
277
+
278
+ context_lines = []
279
+ start = max(0, line_num - 3)
280
+ end = min(len(lines), line_num + 2)
281
+
282
+ for i in range(start, end):
283
+ line_indicator = ">>> " if i == line_num - 1 else " "
284
+ context_lines.append(f"{line_indicator}{i + 1}: {lines[i]}")
285
+
286
+ # Add column indicator for the error line
287
+ if i == line_num - 1 and column:
288
+ pointer = " " * (len(str(i + 1)) + 6 + column - 1) + "^"
289
+ context_lines.append(pointer)
290
+
291
+ return '\n'.join(context_lines)
292
+
293
+
294
+ def conf_edit(config_path, chunk_size, overlap, model_name=None):
295
+ """Edits the configuration file overlap and training flags.
296
+ The model's native audio.chunk_size from the YAML is preserved unchanged.
297
+
298
+ Args:
299
+ config_path: Path to the config file
300
+ chunk_size: Unused – kept for API compatibility (native YAML value is used instead)
301
+ overlap: Overlap between chunks
302
+ model_name: Optional model name for re-downloading config on error
303
+ """
304
  full_config_path = os.path.join(CHECKPOINT_DIR, os.path.basename(config_path))
305
  if not os.path.exists(full_config_path):
306
  raise FileNotFoundError(f"Configuration file not found: {full_config_path}")
307
 
308
+ # Create backup before modifying
309
+ backup_path = full_config_path + '.backup'
310
+ try:
311
+ shutil.copy2(full_config_path, backup_path)
312
+ except Exception:
313
+ pass
314
+
315
+ try:
316
+ # Read and pre-process content
317
+ with open(full_config_path, 'r', encoding='utf-8') as f:
318
+ original_content = f.read()
319
+
320
+ # Check if file is HTML (wrong URL was used)
321
+ is_valid, html_error = validate_yaml_content(original_content, full_config_path)
322
+ if not is_valid:
323
+ # Restore backup and raise error
324
+ if os.path.exists(backup_path):
325
+ shutil.copy2(backup_path, full_config_path)
326
+ raise ValueError(html_error)
327
+
328
+ content = preprocess_yaml_content(original_content)
329
+
330
+ # Write pre-processed content if changed
331
+ if content != original_content:
332
+ with open(full_config_path, 'w', encoding='utf-8') as f:
333
+ f.write(content)
334
+
335
+ # Try to parse YAML
336
+ try:
337
+ with open(full_config_path, 'r', encoding='utf-8') as f:
338
+ data = yaml.load(f, Loader=yaml.SafeLoader)
339
+ except yaml.YAMLError as e:
340
+ # Extract error details
341
+ error_msg = str(e)
342
+ line_num = None
343
+ column = None
344
+
345
+ if hasattr(e, 'problem_mark') and e.problem_mark:
346
+ line_num = e.problem_mark.line + 1
347
+ column = e.problem_mark.column + 1
348
+
349
+ # Get context around error
350
+ context = ""
351
+ if line_num:
352
+ context = get_yaml_error_context(content, line_num, column)
353
+
354
+ # Provide helpful error message
355
+ error_details = f"""
356
+ YAML Parsing Error in config file: {full_config_path}
357
+
358
+ Error: {error_msg}
359
 
360
+ {"Error Context:" + chr(10) + context if context else ""}
 
361
 
362
+ Possible causes:
363
+ 1. Unquoted string containing a colon (e.g., URLs like https://...)
364
+ 2. Unquoted Windows path with backslashes (e.g., C:\\path\\to\\file)
365
+ 3. Malformed YAML structure
366
+ 4. File corruption from previous processing
367
 
368
+ Suggested fixes:
369
+ 1. Delete the config file and let it re-download: {full_config_path}
370
+ 2. Manually edit the file to quote problematic values
371
+ 3. Check if the source config URL provides valid YAML
372
+ """
373
+ # Restore backup
374
+ if os.path.exists(backup_path):
375
+ try:
376
+ shutil.copy2(backup_path, full_config_path)
377
+ except Exception:
378
+ pass
379
+
380
+ raise yaml.YAMLError(error_details) from e
381
+
382
+ # Validate required sections exist
383
+ if not isinstance(data, dict):
384
+ raise ValueError(f"Config file does not contain a valid YAML dictionary: {full_config_path}")
385
+
386
+ # Apply modifications safely
387
+ if 'use_amp' not in data:
388
+ if 'training' not in data:
389
+ data['training'] = {}
390
+ data['training']['use_amp'] = True
391
 
392
+ # Do NOT overwrite audio.chunk_size — keep the model's native value from the YAML.
393
+ if 'audio' not in data:
394
+ data['audio'] = {}
395
+
396
+ if 'inference' not in data:
397
+ data['inference'] = {}
398
+ data['inference']['num_overlap'] = overlap
399
+ if data['inference'].get('batch_size', 1) == 1:
400
+ data['inference']['batch_size'] = 2
401
+
402
+ # Write updated config
403
+ with open(full_config_path, 'w', encoding='utf-8') as f:
404
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, Dumper=yaml.Dumper)
405
+
406
+ # Remove backup on success
407
+ if os.path.exists(backup_path):
408
+ try:
409
+ os.remove(backup_path)
410
+ except Exception:
411
+ pass
412
+
413
+ except Exception as e:
414
+ # Restore backup on any error
415
+ if os.path.exists(backup_path):
416
+ try:
417
+ shutil.copy2(backup_path, full_config_path)
418
+ os.remove(backup_path)
419
+ except Exception:
420
+ pass
421
+ raise
422
+
423
+
424
+ def redownload_config(model_name):
425
+ """Re-download a corrupted config file for a custom model.
426
+
427
+ Args:
428
+ model_name: Name of the custom model
429
+
430
+ Returns:
431
+ tuple: (success: bool, message: str)
432
+ """
433
+ custom_models = load_custom_models()
434
+ if model_name not in custom_models:
435
+ return False, f"Model '{model_name}' not found in custom models"
436
+
437
+ config = custom_models[model_name]
438
+ config_url = config.get('config_url')
439
+ config_filename = config.get('config_filename')
440
+
441
+ if not config_url or not config_filename:
442
+ return False, f"Config URL or filename not found for model '{model_name}'"
443
+
444
+ config_path = os.path.join(CHECKPOINT_DIR, config_filename)
445
+
446
+ # Auto-fix URL before re-downloading
447
+ config_url = fix_huggingface_url(config_url)
448
+
449
+ # Delete existing config
450
+ if os.path.exists(config_path):
451
+ try:
452
+ os.remove(config_path)
453
+ except Exception as e:
454
+ return False, f"Could not delete config file: {e}"
455
+
456
+ # Re-download with validation
457
+ try:
458
+ download_file(config_url, target_filename=config_filename, validate_yaml=True)
459
+ return True, f"Config file re-downloaded successfully: {config_filename}"
460
+ except Exception as e:
461
+ return False, f"Failed to re-download config: {e}"
462
+
463
+ def download_file(url, path=None, target_filename=None, validate_yaml=True):
464
+ """Downloads a file from a URL with progress reporting.
465
+
466
+ Args:
467
+ url: The URL to download from.
468
+ path: The directory to save the file to. Defaults to CHECKPOINT_DIR.
469
+ target_filename: Optional custom filename to save as. If None, uses filename from URL.
470
+ validate_yaml: If True and file is .yaml/.yml, validate it's not HTML
471
+ """
472
  import requests
473
+
474
+ # Auto-fix Hugging Face URLs
475
+ url = fix_huggingface_url(url)
476
+
477
  encoded_url = quote(url, safe=':/')
478
+ if path is None:
479
+ path = CHECKPOINT_DIR
480
  os.makedirs(path, exist_ok=True)
481
+ # Use custom target filename if provided, otherwise extract from URL
482
+ filename = target_filename if target_filename else os.path.basename(encoded_url)
483
  file_path = os.path.join(path, filename)
484
  if os.path.exists(file_path):
485
  print(f"File '{filename}' already exists at '{path}'.")
486
  return
487
  try:
488
+ response = requests.get(url, stream=True)
489
  if response.status_code == 200:
490
+ # Get total file size for progress reporting
491
+ total_size = int(response.headers.get('content-length', 0))
492
+
493
+ # For YAML files, download to memory first and validate
494
+ is_yaml_file = filename.lower().endswith(('.yaml', '.yml'))
495
+
496
+ if is_yaml_file and validate_yaml:
497
+ content = response.content
498
+ is_valid, error_msg = validate_yaml_content(content, file_path)
499
+ if not is_valid:
500
+ print(f"ERROR: Downloaded file is not valid YAML!")
501
+ print(error_msg)
502
+ raise ValueError(f"Downloaded file is HTML, not YAML. URL may be incorrect: {url}")
503
+
504
+ with open(file_path, 'wb') as f:
505
+ f.write(content)
506
+ else:
507
+ # Download with progress reporting
508
+ downloaded_size = 0
509
+ last_percent = -1
510
+ print(f"[SESA_DOWNLOAD]START:{filename}", flush=True)
511
+
512
+ with open(file_path, 'wb') as f:
513
+ for chunk in response.iter_content(chunk_size=8192):
514
+ f.write(chunk)
515
+ downloaded_size += len(chunk)
516
+
517
+ # Report download progress
518
+ if total_size > 0:
519
+ percent = int((downloaded_size / total_size) * 100)
520
+ if percent != last_percent:
521
+ last_percent = percent
522
+ # Format: [SESA_DOWNLOAD]filename:percent
523
+ print(f"[SESA_DOWNLOAD]{filename}:{percent}", flush=True)
524
+
525
+ print(f"[SESA_DOWNLOAD]END:{filename}", flush=True)
526
  else:
527
  print(f"Error downloading '{filename}': Status code {response.status_code}")
528
  except Exception as e:
529
  print(f"Error downloading file '{filename}' from '{url}': {e}")
530
+ raise
531
 
532
  # Model konfigurasyonlarını kategorize bir sözlükte tut
533
  MODEL_CONFIGS = {
534
  "Vocal Models": {
535
+ # === NEW MODELS (en üstte) ===
536
+ 'bs_roformer_voc_hyperacev2 (by unwa)': {
537
+ 'model_type': 'bs_roformer_custom',
538
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperacev2_voc.yaml'),
539
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_voc_hyperacev2.ckpt'),
540
+ 'download_urls': [
541
+ ('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/config.yaml', 'config_hyperacev2_voc.yaml'),
542
+ 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/bs_roformer_voc_hyperacev2.ckpt'
543
+ ],
544
+ 'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_voc/bs_roformer.py',
545
+ 'needs_conf_edit': True
546
+ },
547
+ 'BS-Roformer-Resurrection (by unwa)': {
548
+ 'model_type': 'bs_roformer',
549
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Config.yaml'),
550
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection.ckpt'),
551
+ 'download_urls': [
552
+ 'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Config.yaml',
553
+ 'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection.ckpt'
554
+ ],
555
+ 'needs_conf_edit': True
556
+ },
557
+ 'bs_roformer_revive3e (by unwa)': {
558
+ 'model_type': 'bs_roformer',
559
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
560
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive3e.ckpt'),
561
+ 'download_urls': [
562
+ ('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
563
+ 'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive3e.ckpt'
564
+ ],
565
+ 'needs_conf_edit': True
566
+ },
567
+ 'bs_roformer_revive2 (by unwa)': {
568
+ 'model_type': 'bs_roformer',
569
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
570
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive2.ckpt'),
571
+ 'download_urls': [
572
+ ('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
573
+ 'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive2.ckpt'
574
+ ],
575
+ 'needs_conf_edit': True
576
+ },
577
+ 'bs_roformer_revive (by unwa)': {
578
+ 'model_type': 'bs_roformer',
579
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_revive.yaml'),
580
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_revive.ckpt'),
581
+ 'download_urls': [
582
+ ('https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/config.yaml', 'config_revive.yaml'),
583
+ 'https://huggingface.co/pcunwa/BS-Roformer-Revive/resolve/main/bs_roformer_revive.ckpt'
584
+ ],
585
+ 'needs_conf_edit': True
586
+ },
587
+ 'karaoke_bs_roformer_anvuew (by anvuew)': {
588
+ 'model_type': 'bs_roformer',
589
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer_anvuew.yaml'),
590
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer_anvuew.ckpt'),
591
+ 'download_urls': [
592
+ 'https://huggingface.co/anvuew/karaoke_bs_roformer/resolve/main/karaoke_bs_roformer_anvuew.yaml',
593
+ 'https://huggingface.co/anvuew/karaoke_bs_roformer/resolve/main/karaoke_bs_roformer_anvuew.ckpt'
594
+ ],
595
+ 'needs_conf_edit': True
596
+ },
597
+ # === EXISTING MODELS ===
598
  'VOCALS-big_beta6X (by Unwa)': {
599
  'model_type': 'mel_band_roformer',
600
  'config_path': os.path.join(CHECKPOINT_DIR, 'big_beta6x.yaml'),
 
814
  'https://huggingface.co/Aname-Tommy/MelBandRoformers/blob/main/FullnessVocalModel.ckpt'
815
  ],
816
  'needs_conf_edit': True
817
+ },
818
+ 'voc_fv4 (by Gabox)': {
819
+ 'model_type': 'mel_band_roformer',
820
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
821
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv4.ckpt'),
822
+ 'download_urls': [
823
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
824
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv4.ckpt'
825
+ ],
826
+ 'needs_conf_edit': True
827
+ },
828
+ 'voc_fv5 (by Gabox)': {
829
+ 'model_type': 'mel_band_roformer',
830
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
831
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv5.ckpt'),
832
+ 'download_urls': [
833
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
834
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv5.ckpt'
835
+ ],
836
+ 'needs_conf_edit': True
837
+ },
838
+ 'voc_fv6 (by Gabox)': {
839
+ 'model_type': 'mel_band_roformer',
840
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
841
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv6.ckpt'),
842
+ 'download_urls': [
843
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
844
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv6.ckpt'
845
+ ],
846
+ 'needs_conf_edit': True
847
+ },
848
+ 'voc_fv7 (by Gabox)': {
849
+ 'model_type': 'mel_band_roformer',
850
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'v7.yaml'),
851
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'voc_fv7.ckpt'),
852
+ 'download_urls': [
853
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/v7.yaml',
854
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_fv7.ckpt'
855
+ ],
856
+ 'needs_conf_edit': True
857
+ },
858
+ 'vocfv7beta1 (by Gabox)': {
859
+ 'model_type': 'mel_band_roformer',
860
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
861
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta1.ckpt'),
862
+ 'download_urls': [
863
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
864
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta1.ckpt'
865
+ ],
866
+ 'needs_conf_edit': True
867
+ },
868
+ 'vocfv7beta2 (by Gabox)': {
869
+ 'model_type': 'mel_band_roformer',
870
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
871
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta2.ckpt'),
872
+ 'download_urls': [
873
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
874
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta2.ckpt'
875
+ ],
876
+ 'needs_conf_edit': True
877
+ },
878
+ 'vocfv7beta3 (by Gabox)': {
879
+ 'model_type': 'mel_band_roformer',
880
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'voc_gabox.yaml'),
881
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'vocfv7beta3.ckpt'),
882
+ 'download_urls': [
883
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/vocals/voc_gabox.yaml',
884
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/vocfv7beta3.ckpt'
885
+ ],
886
+ 'needs_conf_edit': True
887
+ },
888
+ 'MelBandRoformerSYHFTV3Epsilon (by SYH99999)': {
889
+ 'model_type': 'mel_band_roformer',
890
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_vocals_mel_band_roformer_ft.yaml'),
891
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'MelBandRoformerSYHFTV3Epsilon.ckpt'),
892
+ 'download_urls': [
893
+ 'https://huggingface.co/SYH99999/MelBandRoformerSYHFT/resolve/main/config_vocals_mel_band_roformer_ft.yaml',
894
+ 'https://huggingface.co/SYH99999/MelBandRoformerSYHFTV3Epsilon/resolve/main/MelBandRoformerSYHFTV3Epsilon.ckpt'
895
+ ],
896
+ 'needs_conf_edit': True
897
+ },
898
+ 'MelBandRoformerBigSYHFTV1 (by SYH99999)': {
899
+ 'model_type': 'mel_band_roformer',
900
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_big_syhft.yaml'),
901
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'MelBandRoformerBigSYHFTV1.ckpt'),
902
+ 'download_urls': [
903
+ 'https://huggingface.co/SYH99999/MelBandRoformerBigSYHFTV1Fast/resolve/main/config.yaml',
904
+ 'https://huggingface.co/SYH99999/MelBandRoformerBigSYHFTV1Fast/resolve/main/MelBandRoformerBigSYHFTV1.ckpt'
905
+ ],
906
+ 'needs_conf_edit': True
907
+ },
908
+ 'model_chorus_bs_roformer_ep_146 (by Sucial)': {
909
+ 'model_type': 'bs_roformer',
910
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_chorus_male_female_bs_roformer.yaml'),
911
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_chorus_bs_roformer_ep_146_sdr_23.8613.ckpt'),
912
+ 'download_urls': [
913
+ 'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/config_chorus_male_female_bs_roformer.yaml',
914
+ 'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/model_chorus_bs_roformer_ep_146_sdr_23.8613.ckpt'
915
+ ],
916
+ 'needs_conf_edit': True
917
+ },
918
+ 'model_chorus_bs_roformer_ep_267 (by Sucial)': {
919
+ 'model_type': 'bs_roformer',
920
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_chorus_male_female_bs_roformer.yaml'),
921
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt'),
922
+ 'download_urls': [
923
+ 'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/config_chorus_male_female_bs_roformer.yaml',
924
+ 'https://huggingface.co/Sucial/Chorus_Male_Female_BS_Roformer/resolve/main/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt'
925
+ ],
926
+ 'needs_conf_edit': True
927
+ },
928
+ 'BS-Rofo-SW-Fixed (by jarredou)': {
929
+ 'model_type': 'bs_roformer',
930
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Rofo-SW-Fixed.yaml'),
931
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Rofo-SW-Fixed.ckpt'),
932
+ 'download_urls': [
933
+ 'https://huggingface.co/jarredou/BS-ROFO-SW-Fixed/resolve/main/BS-Rofo-SW-Fixed.yaml',
934
+ 'https://huggingface.co/jarredou/BS-ROFO-SW-Fixed/resolve/main/BS-Rofo-SW-Fixed.ckpt'
935
+ ],
936
+ 'needs_conf_edit': True
937
+ },
938
+ 'BS_ResurrectioN (by Gabox)': {
939
+ 'model_type': 'bs_roformer',
940
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst-Config.yaml'),
941
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS_ResurrectioN.ckpt'),
942
+ 'download_urls': [
943
+ 'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst-Config.yaml',
944
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/BS_ResurrectioN.ckpt'
945
+ ],
946
+ 'needs_conf_edit': True
947
  }
948
  },
949
  "Instrumental Models": {
950
+ # === NEW MODELS (en üstte) ===
951
+ 'Neo_InstVFX (by natanworkspace)': {
952
+ 'model_type': 'mel_band_roformer',
953
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_neo_inst.yaml'),
954
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Neo_InstVFX.ckpt'),
955
+ 'download_urls': [
956
+ 'https://huggingface.co/natanworkspace/melband_roformer/resolve/main/config_neo_inst.yaml',
957
+ 'https://huggingface.co/natanworkspace/melband_roformer/resolve/main/Neo_InstVFX.ckpt'
958
+ ],
959
+ 'needs_conf_edit': True
960
+ },
961
+ 'BS-Roformer-Resurrection-Inst (by unwa)': {
962
+ 'model_type': 'bs_roformer',
963
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst-Config.yaml'),
964
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'BS-Roformer-Resurrection-Inst.ckpt'),
965
+ 'download_urls': [
966
+ 'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst-Config.yaml',
967
+ 'https://huggingface.co/pcunwa/BS-Roformer-Resurrection/resolve/main/BS-Roformer-Resurrection-Inst.ckpt'
968
+ ],
969
+ 'needs_conf_edit': True
970
+ },
971
+ 'bs_roformer_inst_hyperacev2 (by unwa)': {
972
+ 'model_type': 'bs_roformer_custom',
973
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperacev2_inst.yaml'),
974
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_inst_hyperacev2.ckpt'),
975
+ 'download_urls': [
976
+ ('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/config.yaml', 'config_hyperacev2_inst.yaml'),
977
+ 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/bs_roformer_inst_hyperacev2.ckpt'
978
+ ],
979
+ 'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/v2_inst/bs_roformer.py',
980
+ 'needs_conf_edit': True
981
+ },
982
+ 'BS-Roformer-Large-Inst (by unwa)': {
983
+ 'model_type': 'bs_roformer_custom',
984
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_bs_large_inst.yaml'),
985
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_large_v2_inst.ckpt'),
986
+ 'download_urls': [
987
+ ('https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/config.yaml', 'config_bs_large_inst.yaml'),
988
+ 'https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/bs_large_v2_inst.ckpt'
989
+ ],
990
+ 'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-Large-Inst/resolve/main/bs_roformer.py',
991
+ 'needs_conf_edit': True
992
+ },
993
+ 'bs_roformer_fno (by unwa)': {
994
+ 'model_type': 'bs_roformer_custom',
995
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'bsrofo_fno.yaml'),
996
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_fno.ckpt'),
997
+ 'download_urls': [
998
+ 'https://huggingface.co/pcunwa/BS-Roformer-Inst-FNO/resolve/main/bsrofo_fno.yaml',
999
+ 'https://huggingface.co/pcunwa/BS-Roformer-Inst-FNO/resolve/main/bs_roformer_fno.ckpt'
1000
+ ],
1001
+ 'custom_model_url': 'https://huggingface.co/listra92/MyModels/resolve/main/misc/bs_roformer.py',
1002
+ 'needs_conf_edit': True
1003
+ },
1004
+ 'Rifforge_final_sdr_14.24 (by meskvlla33)': {
1005
+ 'model_type': 'mel_band_roformer',
1006
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_rifforge_full_mesk.yaml'),
1007
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'rifforge_full_sdr_14.2436.ckpt'),
1008
+ 'download_urls': [
1009
+ 'https://huggingface.co/meskvlla33/rifforge/resolve/main/config_rifforge_full_mesk.yaml',
1010
+ 'https://huggingface.co/meskvlla33/rifforge/resolve/main/rifforge_full_sdr_14.2436.ckpt'
1011
+ ],
1012
+ 'needs_conf_edit': True
1013
+ },
1014
+ # === EXISTING MODELS ===
1015
  'Inst_GaboxFv8 (by Gabox)': {
1016
  'model_type': 'mel_band_roformer',
1017
  'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
 
1241
  'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxV7.ckpt'
1242
  ],
1243
  'needs_conf_edit': True
1244
+ },
1245
+ 'inst_Fv4 (by Gabox)': {
1246
+ 'model_type': 'mel_band_roformer',
1247
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1248
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_Fv4.ckpt'),
1249
+ 'download_urls': [
1250
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1251
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_Fv4.ckpt'
1252
+ ],
1253
+ 'needs_conf_edit': True
1254
+ },
1255
+ 'INSTV7N (by Gabox)': {
1256
+ 'model_type': 'mel_band_roformer',
1257
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1258
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'INSTV7N.ckpt'),
1259
+ 'download_urls': [
1260
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1261
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/INSTV7N.ckpt'
1262
+ ],
1263
+ 'needs_conf_edit': True
1264
+ },
1265
+ 'inst_fv7b (by Gabox)': {
1266
+ 'model_type': 'mel_band_roformer',
1267
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1268
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_fv7b.ckpt'),
1269
+ 'download_urls': [
1270
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1271
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/inst_fv7b.ckpt'
1272
+ ],
1273
+ 'needs_conf_edit': True
1274
+ },
1275
+ 'inst_fv7z (by Gabox)': {
1276
+ 'model_type': 'mel_band_roformer',
1277
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1278
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_GaboxFv7z.ckpt'),
1279
+ 'download_urls': [
1280
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1281
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxFv7z.ckpt'
1282
+ ],
1283
+ 'needs_conf_edit': True
1284
+ },
1285
+ 'Inst_GaboxFv9 (by Gabox)': {
1286
+ 'model_type': 'mel_band_roformer',
1287
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1288
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_GaboxFv9.ckpt'),
1289
+ 'download_urls': [
1290
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1291
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/Inst_GaboxFv9.ckpt'
1292
+ ],
1293
+ 'needs_conf_edit': True
1294
+ },
1295
+ 'inst_gaboxFlowersV10 (by Gabox)': {
1296
+ 'model_type': 'mel_band_roformer',
1297
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'v10.yaml'),
1298
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'inst_gaboxFlowersV10.ckpt'),
1299
+ 'download_urls': [
1300
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/v10.yaml',
1301
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gaboxFlowersV10.ckpt'
1302
+ ],
1303
+ 'needs_conf_edit': True
1304
+ },
1305
+ 'Inst_FV8b (by Gabox)': {
1306
+ 'model_type': 'mel_band_roformer',
1307
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1308
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_FV8b.ckpt'),
1309
+ 'download_urls': [
1310
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1311
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/Inst_FV8b.ckpt'
1312
+ ],
1313
+ 'needs_conf_edit': True
1314
+ },
1315
+ 'Inst_Fv8 (by Gabox)': {
1316
+ 'model_type': 'mel_band_roformer',
1317
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'inst_gabox.yaml'),
1318
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Inst_Fv8.ckpt'),
1319
+ 'download_urls': [
1320
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/instrumental/inst_gabox.yaml',
1321
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/experimental/Inst_Fv8.ckpt'
1322
+ ],
1323
+ 'needs_conf_edit': True
1324
  }
1325
  },
1326
  "4-Stem Models": {
 
1477
  'https://huggingface.co/anvuew/dereverb_mel_band_roformer/resolve/main/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt'
1478
  ],
1479
  'needs_conf_edit': True
1480
+ },
1481
+ 'dereverb-echo_128_4_4 (by Sucial)': {
1482
+ 'model_type': 'mel_band_roformer',
1483
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb-echo_128_4_4_mel_band_roformer.yaml'),
1484
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb-echo_128_4_4_mel_band_roformer_sdr_dry_12.4235.ckpt'),
1485
+ 'download_urls': [
1486
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb-echo_128_4_4_mel_band_roformer.yaml',
1487
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb-echo_128_4_4_mel_band_roformer_sdr_dry_12.4235.ckpt'
1488
+ ],
1489
+ 'needs_conf_edit': True
1490
+ },
1491
+ 'dereverb_echo_mbr_v2 (by Sucial)': {
1492
+ 'model_type': 'mel_band_roformer',
1493
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
1494
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'),
1495
+ 'download_urls': [
1496
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
1497
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'
1498
+ ],
1499
+ 'needs_conf_edit': True
1500
+ },
1501
+ 'de_big_reverb_mbr_ep_362 (by Sucial)': {
1502
+ 'model_type': 'mel_band_roformer',
1503
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
1504
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'de_big_reverb_mbr_ep_362.ckpt'),
1505
+ 'download_urls': [
1506
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
1507
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/de_big_reverb_mbr_ep_362.ckpt'
1508
+ ],
1509
+ 'needs_conf_edit': True
1510
+ },
1511
+ 'de_super_big_reverb_mbr_ep_346 (by Sucial)': {
1512
+ 'model_type': 'mel_band_roformer',
1513
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
1514
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'de_super_big_reverb_mbr_ep_346.ckpt'),
1515
+ 'download_urls': [
1516
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
1517
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/de_super_big_reverb_mbr_ep_346.ckpt'
1518
+ ],
1519
+ 'needs_conf_edit': True
1520
+ },
1521
+ 'dereverb_room (by anvuew)': {
1522
+ 'model_type': 'bs_roformer',
1523
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'dereverb_room_anvuew.yaml'),
1524
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_room_anvuew_sdr_13.7432.ckpt'),
1525
+ 'download_urls': [
1526
+ 'https://huggingface.co/anvuew/dereverb_room/resolve/main/dereverb_room_anvuew.yaml',
1527
+ 'https://huggingface.co/anvuew/dereverb_room/resolve/main/dereverb_room_anvuew_sdr_13.7432.ckpt'
1528
+ ],
1529
+ 'needs_conf_edit': True
1530
  }
1531
  },
1532
  "Karaoke": {
 
1540
  ],
1541
  'needs_conf_edit': True
1542
  },
1543
+ 'KaraokeGabox (by Gabox)': {
1544
  'model_type': 'mel_band_roformer',
1545
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'karaokegabox_1750911344.yaml'),
1546
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'Karaoke_GaboxV1.ckpt'),
1547
+ 'download_urls': [
1548
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/karaoke/karaokegabox_1750911344.yaml',
1549
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/melbandroformers/karaoke/Karaoke_GaboxV1.ckpt'
1550
+ ],
1551
+ 'needs_conf_edit': True
1552
+ },
1553
+ 'bs_karaoke_gabox_IS (by Gabox)': {
1554
+ 'model_type': 'bs_roformer',
1555
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'karaoke_bs_roformer.yaml'),
1556
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_karaoke_gabox_IS.ckpt'),
1557
+ 'download_urls': [
1558
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/bsroformers/karaoke_bs_roformer.yaml',
1559
+ 'https://huggingface.co/GaboxR67/MelBandRoformers/resolve/main/bsroformers/bs_karaoke_gabox_IS.ckpt'
1560
+ ],
1561
+ 'needs_conf_edit': True
1562
+ },
1563
+ 'bs_roformer_karaoke_frazer_becruily': {
1564
+ 'model_type': 'bs_roformer',
1565
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_karaoke_frazer_becruily.yaml'),
1566
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_karaoke_frazer_becruily.ckpt'),
1567
+ 'download_urls': [
1568
+ 'https://huggingface.co/becruily/bs-roformer-karaoke/resolve/main/config_karaoke_frazer_becruily.yaml',
1569
+ 'https://huggingface.co/becruily/bs-roformer-karaoke/resolve/main/bs_roformer_karaoke_frazer_becruily.ckpt'
1570
+ ],
1571
+ 'needs_conf_edit': True
1572
+ },
1573
+ 'mel_band_roformer_karaoke_becruily': {
1574
+ 'model_type': 'mel_band_roformer',
1575
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_karaoke_becruily.yaml'),
1576
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'mel_band_roformer_karaoke_becruily.ckpt'),
1577
  'download_urls': [
1578
+ 'https://huggingface.co/becruily/mel-band-roformer-karaoke/resolve/main/config_karaoke_becruily.yaml',
1579
+ 'https://huggingface.co/becruily/mel-band-roformer-karaoke/resolve/main/mel_band_roformer_karaoke_becruily.ckpt'
1580
  ],
1581
  'needs_conf_edit': True
1582
  }
 
1661
  'https://huggingface.co/SYH99999/MelBandRoformerSYHFTB1/resolve/main/model3.ckpt'
1662
  ],
1663
  'needs_conf_edit': True
1664
+ },
1665
+ 'bs_hyperace (by unwa)': {
1666
+ 'model_type': 'bs_roformer_custom',
1667
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_hyperace.yaml'),
1668
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_hyperace.ckpt'),
1669
+ 'download_urls': [
1670
+ ('https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/config.yaml', 'config_hyperace.yaml'),
1671
+ 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/bs_hyperace.ckpt'
1672
+ ],
1673
+ 'custom_model_url': 'https://huggingface.co/pcunwa/BS-Roformer-HyperACE/resolve/main/bs_roformer.py',
1674
+ 'needs_conf_edit': True
1675
+ },
1676
+ 'becruily_deux (by becruily)': {
1677
+ 'model_type': 'mel_band_roformer',
1678
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_deux_becruily.yaml'),
1679
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'becruily_deux.ckpt'),
1680
+ 'download_urls': [
1681
+ 'https://huggingface.co/becruily/mel-band-roformer-deux/resolve/main/config_deux_becruily.yaml',
1682
+ 'https://huggingface.co/becruily/mel-band-roformer-deux/resolve/main/becruily_deux.ckpt'
1683
+ ],
1684
+ 'needs_conf_edit': True
1685
+ },
1686
+ 'becruily_guitar (by becruily)': {
1687
+ 'model_type': 'mel_band_roformer',
1688
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_guitar_becruily.yaml'),
1689
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'becruily_guitar.ckpt'),
1690
+ 'download_urls': [
1691
+ 'https://huggingface.co/becruily/mel-band-roformer-guitar/resolve/main/config_guitar_becruily.yaml',
1692
+ 'https://huggingface.co/becruily/mel-band-roformer-guitar/resolve/main/becruily_guitar.ckpt'
1693
+ ],
1694
+ 'needs_conf_edit': True
1695
+ },
1696
+ 'aspiration_mel_band_roformer (by Sucial)': {
1697
+ 'model_type': 'mel_band_roformer',
1698
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_aspiration_mel_band_roformer.yaml'),
1699
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'aspiration_mel_band_roformer_sdr_18.9845.ckpt'),
1700
+ 'download_urls': [
1701
+ 'https://huggingface.co/Sucial/Aspiration_Mel_Band_Roformer/resolve/main/config_aspiration_mel_band_roformer.yaml',
1702
+ 'https://huggingface.co/Sucial/Aspiration_Mel_Band_Roformer/resolve/main/aspiration_mel_band_roformer_sdr_18.9845.ckpt'
1703
+ ],
1704
+ 'needs_conf_edit': True
1705
+ },
1706
+ 'dereverb_echo_mbr_v2 (by Sucial)': {
1707
+ 'model_type': 'mel_band_roformer',
1708
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dereverb_echo_mbr_v2.yaml'),
1709
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'),
1710
+ 'download_urls': [
1711
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/config_dereverb_echo_mbr_v2.yaml',
1712
+ 'https://huggingface.co/Sucial/Dereverb-Echo_Mel_Band_Roformer/resolve/main/dereverb_echo_mbr_v2_sdr_dry_13.4843.ckpt'
1713
+ ],
1714
+ 'needs_conf_edit': True
1715
+ },
1716
+ 'mdx23c_similarity (by ZFTurbo)': {
1717
+ 'model_type': 'mdx23c',
1718
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_mdx23c_similarity.yaml'),
1719
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_mdx23c_ep_271_l1_freq_72.2383.ckpt'),
1720
+ 'download_urls': [
1721
+ 'https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.10/config_mdx23c_similarity.yaml',
1722
+ 'https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.10/model_mdx23c_ep_271_l1_freq_72.2383.ckpt'
1723
+ ],
1724
+ 'needs_conf_edit': False
1725
+ },
1726
+ 'mel_band_roformer_Lead_Rhythm_Guitar (by listra92)': {
1727
+ 'model_type': 'mel_band_roformer',
1728
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_mel_band_roformer_Lead_Rhythm_Guitar.yaml'),
1729
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'model_mel_band_roformer_ep_72_sdr_3.2232.ckpt'),
1730
+ 'download_urls': [
1731
+ 'https://huggingface.co/listra92/MyModels/resolve/main/misc/config_mel_band_roformer_Lead_Rhythm_Guitar.yaml',
1732
+ 'https://huggingface.co/listra92/MyModels/resolve/main/misc/model_mel_band_roformer_ep_72_sdr_3.2232.ckpt'
1733
+ ],
1734
+ 'needs_conf_edit': True
1735
+ },
1736
+ 'last_bs_roformer_4stem (by Amane)': {
1737
+ 'model_type': 'bs_roformer',
1738
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_last_bs.yaml'),
1739
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'last_bs_roformer.ckpt'),
1740
+ 'download_urls': [
1741
+ 'https://huggingface.co/listra92/MyModels/resolve/main/misc/config.yaml',
1742
+ 'https://huggingface.co/listra92/MyModels/resolve/main/misc/last_bs_roformer.ckpt'
1743
+ ],
1744
+ 'needs_conf_edit': True
1745
+ },
1746
+ 'bs_roformer_4stems_ft (by SYH99999)': {
1747
+ 'model_type': 'bs_roformer',
1748
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_bs_4stems_ft.yaml'),
1749
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'bs_roformer_4stems_ft.pth'),
1750
+ 'download_urls': [
1751
+ 'https://huggingface.co/SYH99999/bs_roformer_4stems_ft/resolve/main/config.yaml',
1752
+ 'https://huggingface.co/SYH99999/bs_roformer_4stems_ft/resolve/main/bs_roformer_4stems_ft.pth'
1753
+ ],
1754
+ 'needs_conf_edit': True
1755
+ },
1756
+ 'CINEMATIC-BandIt_v2_Eng (by kwatcharasupat)': {
1757
+ 'model_type': 'bandit_v2',
1758
+ 'config_path': os.path.join(CHECKPOINT_DIR, 'config_dnr_bandit_v2_mus64.yaml'),
1759
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, 'checkpoint-eng_state_dict.ckpt'),
1760
+ 'download_urls': [
1761
+ 'https://huggingface.co/jarredou/banditv2_state_dicts_only/resolve/main/config_dnr_bandit_v2_mus64.yaml',
1762
+ 'https://huggingface.co/jarredou/banditv2_state_dicts_only/resolve/main/checkpoint-eng_state_dict.ckpt'
1763
+ ],
1764
+ 'needs_conf_edit': True
1765
  }
1766
  }
1767
  }
1768
 
1769
  def get_model_config(clean_model=None, chunk_size=None, overlap=None):
1770
+ """Returns model type, config path, and checkpoint path for a given model name, downloading files if needed.
1771
+
1772
+ download_urls can contain:
1773
+ - Simple strings: 'url' - downloads with filename from URL
1774
+ - Tuples: ('url', 'target_filename') - downloads with custom filename
1775
+
1776
+ Also handles custom models loaded from custom_models.json
1777
+ """
1778
  if clean_model is None:
1779
+ all_models = {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}
1780
+ # Add custom models
1781
+ custom_models = load_custom_models()
1782
+ all_models.update(custom_models.keys())
1783
+ return all_models
1784
 
1785
+ # First check built-in models
1786
  for category in MODEL_CONFIGS.values():
1787
  if clean_model in category:
1788
  config = category[clean_model]
1789
+ for url_entry in config['download_urls']:
1790
+ # Handle both simple URL strings and (url, target_filename) tuples
1791
+ if isinstance(url_entry, tuple):
1792
+ url, target_filename = url_entry
1793
+ download_file(url, target_filename=target_filename)
1794
+ else:
1795
+ download_file(url_entry)
1796
+ if config.get('custom_model_url'):
1797
+ custom_path = os.path.join(BASE_DIR, 'models', 'bs_roformer', 'bs_roformer_custom')
1798
+ os.makedirs(custom_path, exist_ok=True)
1799
+ # Create __init__.py for Python import support
1800
+ init_file = os.path.join(custom_path, '__init__.py')
1801
+ if not os.path.exists(init_file):
1802
+ with open(init_file, 'w') as f:
1803
+ f.write('# Auto-generated for custom BSRoformer models\n')
1804
+ download_file(config['custom_model_url'], path=custom_path)
1805
  if config['needs_conf_edit'] and chunk_size is not None and overlap is not None:
1806
  conf_edit(config['config_path'], chunk_size, overlap)
1807
  return config['model_type'], config['config_path'], config['start_check_point']
1808
+
1809
+ # Then check custom models
1810
+ custom_models = load_custom_models()
1811
+ if clean_model in custom_models:
1812
+ config = custom_models[clean_model]
1813
+ checkpoint_path = os.path.join(CHECKPOINT_DIR, config['checkpoint_filename'])
1814
+ config_path = os.path.join(CHECKPOINT_DIR, config['config_filename'])
1815
+
1816
+ # Download checkpoint
1817
+ download_file(config['checkpoint_url'], target_filename=config['checkpoint_filename'])
1818
+ # Download config with custom filename
1819
+ download_file(config['config_url'], target_filename=config['config_filename'])
1820
+
1821
+ # Handle custom model URL if present
1822
+ if config.get('custom_model_url'):
1823
+ custom_path = os.path.join(BASE_DIR, 'models', 'bs_roformer', 'bs_roformer_custom')
1824
+ os.makedirs(custom_path, exist_ok=True)
1825
+ init_file = os.path.join(custom_path, '__init__.py')
1826
+ if not os.path.exists(init_file):
1827
+ with open(init_file, 'w') as f:
1828
+ f.write('# Auto-generated for custom BSRoformer models\n')
1829
+ download_file(config['custom_model_url'], path=custom_path)
1830
+
1831
+ # Apply config edits if needed
1832
+ if config.get('needs_conf_edit', True) and chunk_size is not None and overlap is not None:
1833
+ conf_edit(config_path, chunk_size, overlap, model_name=clean_model)
1834
+
1835
+ return config['model_type'], config_path, checkpoint_path
1836
+
1837
  return "", "", ""
1838
 
1839
+ def get_all_model_configs_with_custom():
1840
+ """Returns MODEL_CONFIGS with Custom Models category added dynamically."""
1841
+ all_configs = dict(MODEL_CONFIGS)
1842
+ custom_models = load_custom_models()
1843
+ if custom_models:
1844
+ all_configs["Custom Models"] = {
1845
+ name: {
1846
+ 'model_type': cfg['model_type'],
1847
+ 'config_path': os.path.join(CHECKPOINT_DIR, cfg['config_filename']),
1848
+ 'start_check_point': os.path.join(CHECKPOINT_DIR, cfg['checkpoint_filename']),
1849
+ 'download_urls': [cfg['checkpoint_url'], cfg['config_url']],
1850
+ 'custom_model_url': cfg.get('custom_model_url'),
1851
+ 'needs_conf_edit': cfg.get('needs_conf_edit', True)
1852
+ }
1853
+ for name, cfg in custom_models.items()
1854
+ }
1855
+ return all_configs
1856
+
1857
+ get_model_config.keys = lambda: {model_name for category in MODEL_CONFIGS.values() for model_name in category.keys()}.union(load_custom_models().keys())
1858
+
1859
+
1860
+ def get_model_chunk_size(model_name):
1861
+ """Read the native chunk_size from a model's local YAML config if already downloaded.
1862
+
1863
+ Returns the int chunk_size on success, or None if the config is unavailable or
1864
+ doesn't contain an audio.chunk_size entry.
1865
+ """
1866
+ all_models = get_all_model_configs_with_custom()
1867
+ for category in all_models.values():
1868
+ if model_name in category:
1869
+ config_path = category[model_name].get('config_path', '')
1870
+ if config_path and os.path.exists(config_path):
1871
+ try:
1872
+ with open(config_path, 'r', encoding='utf-8') as f:
1873
+ data = yaml.safe_load(f)
1874
+ if isinstance(data, dict):
1875
+ chunk_size = data.get('audio', {}).get('chunk_size')
1876
+ if chunk_size:
1877
+ return int(chunk_size)
1878
+ except Exception:
1879
+ pass
1880
+ return None
models/bs_roformer/attend_sage.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import wraps
2
+ from packaging import version
3
+ from collections import namedtuple
4
+
5
+ import os
6
+ import torch
7
+ from torch import nn, einsum
8
+ import torch.nn.functional as F
9
+
10
+ from einops import rearrange, reduce
11
+
12
+ def _print_once(msg):
13
+ printed = False
14
+ @wraps(print)
15
+ def inner():
16
+ nonlocal printed
17
+ if not printed:
18
+ print(msg)
19
+ printed = True
20
+ return inner
21
+
22
+ try:
23
+ from sageattention import sageattn
24
+ _has_sage_attention = True
25
+ # _print_sage_found = _print_once("SageAttention found. Will be used when flash=True.")
26
+ # _print_sage_found()
27
+ except ImportError:
28
+ _has_sage_attention = False
29
+ _print_sage_not_found = _print_once("SageAttention not found. Will fall back to PyTorch SDPA (if available) or manual einsum.")
30
+ _print_sage_not_found()
31
+
32
+ # helpers
33
+ def exists(val):
34
+ return val is not None
35
+
36
+ def default(v, d):
37
+ return v if exists(v) else d
38
+
39
+ # main class
40
+ class Attend(nn.Module):
41
+ def __init__(
42
+ self,
43
+ dropout = 0.,
44
+ flash = False, # If True, attempts to use SageAttention or PyTorch SDPA
45
+ scale = None
46
+ ):
47
+ super().__init__()
48
+ self.scale = scale # Store the scale if needed for einsum path
49
+ self.dropout = dropout # Store dropout if needed for einsum/SDPA path
50
+
51
+ # Determine which attention mechanism to *try* first
52
+ self.use_sage = flash and _has_sage_attention
53
+ self.use_pytorch_sdpa = False
54
+ self._sdpa_checked = False # Flag to check PyTorch version only once
55
+
56
+ if flash and not self.use_sage:
57
+ # Only consider PyTorch SDPA if Sage isn't available/chosen
58
+ if not self._sdpa_checked:
59
+ if version.parse(torch.__version__) >= version.parse('2.0.0'):
60
+ self.use_pytorch_sdpa = True
61
+ _print_sdpa_used = _print_once("Using PyTorch SDPA backend (FlashAttention-2, Memory-Efficient, or Math).")
62
+ _print_sdpa_used()
63
+ else:
64
+ _print_fallback_einsum = _print_once("Flash attention requested but Pytorch < 2.0 and SageAttention not found. Falling back to einsum.")
65
+ _print_fallback_einsum()
66
+ self._sdpa_checked = True
67
+
68
+ # Dropout layer for manual einsum implementation ONLY
69
+ # SDPA and SageAttention handle dropout differently (or not at all in Sage's base API)
70
+ self.attn_dropout = nn.Dropout(dropout)
71
+
72
+ def forward(self, q, k, v):
73
+ """
74
+ einstein notation
75
+ b - batch
76
+ h - heads
77
+ n, i, j - sequence length (base sequence length, source, target)
78
+ d - feature dimension
79
+
80
+ Input tensors q, k, v expected in shape: (batch, heads, seq_len, dim_head) -> HND layout
81
+ """
82
+ q_len, k_len, device = q.shape[-2], k.shape[-2], q.device
83
+
84
+ # --- Priority 1: SageAttention ---
85
+ if self.use_sage:
86
+ # Assumes q, k, v are FP16/BF16 (handled by autocast upstream)
87
+ # Assumes scale is handled internally by sageattn
88
+ # Assumes dropout is NOT handled by sageattn kernel
89
+ # is_causal=False based on how Attend is called in mel_band_roformer
90
+ out = sageattn(q, k, v, tensor_layout='HND', is_causal=False)
91
+ return out
92
+ try:
93
+ return out
94
+ # print("Attempting SageAttention") # Optional: for debugging
95
+ out = sageattn(q, k, v, tensor_layout='HND', is_causal=False)
96
+ return out
97
+ except Exception as e:
98
+ print(f"SageAttention failed with error: {e}. Falling back.")
99
+ self.use_sage = False # Don't try Sage again if it failed once
100
+ # Decide fallback: Check if PyTorch SDPA is an option
101
+ if not self._sdpa_checked:
102
+ if version.parse(torch.__version__) >= version.parse('2.0.0'):
103
+ self.use_pytorch_sdpa = True
104
+ _print_sdpa_fallback = _print_once("Falling back to PyTorch SDPA.")
105
+ _print_sdpa_fallback()
106
+ else:
107
+ _print_einsum_fallback = _print_once("Falling back to einsum.")
108
+ _print_einsum_fallback()
109
+ self._sdpa_checked = True
110
+
111
+
112
+ # --- Priority 2: PyTorch SDPA ---
113
+ if self.use_pytorch_sdpa:
114
+ # Use PyTorch's Scaled Dot Product Attention (SDPA)
115
+ # It handles scaling and dropout internally.
116
+ try:
117
+ # print("Attempting PyTorch SDPA") # Optional: for debugging
118
+ # Let PyTorch choose the best backend (Flash V2, Mem Efficient, Math)
119
+ with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True):
120
+ out = F.scaled_dot_product_attention(
121
+ q, k, v,
122
+ attn_mask=None, # Assuming no explicit mask needed here
123
+ dropout_p = self.dropout if self.training else 0.,
124
+ is_causal=False # Assuming not needed based on usage context
125
+ )
126
+ return out
127
+ except Exception as e:
128
+ print(f"PyTorch SDPA failed with error: {e}. Falling back to einsum.")
129
+ self.use_pytorch_sdpa = False # Fallback to einsum on error
130
+
131
+
132
+ # Calculate scale
133
+ scale = default(self.scale, q.shape[-1] ** -0.5)
134
+
135
+ # similarity
136
+ sim = einsum(f"b h i d, b h j d -> b h i j", q, k) * scale
137
+
138
+ # attention
139
+ attn = sim.softmax(dim=-1)
140
+ attn = self.attn_dropout(attn) # Apply dropout ONLY in einsum path
141
+
142
+ # aggregate values
143
+ out = einsum(f"b h i j, b h j d -> b h i d", attn, v)
144
+
145
+ return out
models/bs_roformer/bs_roformer_experimental.py ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import torch
4
+ from torch import nn, einsum, Tensor
5
+ from torch.nn import Module, ModuleList
6
+ import torch.nn.functional as F
7
+
8
+ from models.bs_roformer.attend import Attend
9
+ from torch.utils.checkpoint import checkpoint
10
+
11
+ from beartype.typing import Tuple, Optional, List, Callable
12
+ from beartype import beartype
13
+
14
+ from rotary_embedding_torch import RotaryEmbedding
15
+
16
+ from einops import rearrange, pack, unpack
17
+ from einops.layers.torch import Rearrange
18
+
19
+ from hyper_connections import get_init_and_expand_reduce_stream_functions
20
+
21
+ # helper functions
22
+
23
+ def exists(val):
24
+ return val is not None
25
+
26
+
27
+ def default(v, d):
28
+ return v if exists(v) else d
29
+
30
+
31
+ def pack_one(t, pattern):
32
+ return pack([t], pattern)
33
+
34
+
35
+ def unpack_one(t, ps, pattern):
36
+ return unpack(t, ps, pattern)[0]
37
+
38
+
39
+ # norm
40
+
41
+ def l2norm(t):
42
+ return F.normalize(t, dim = -1, p = 2)
43
+
44
+
45
+ class RMSNorm(Module):
46
+ def __init__(self, dim):
47
+ super().__init__()
48
+ self.scale = dim ** 0.5
49
+ self.gamma = nn.Parameter(torch.ones(dim))
50
+
51
+ def forward(self, x):
52
+ return F.normalize(x, dim=-1) * self.scale * self.gamma
53
+
54
+
55
+ # attention
56
+
57
+ class FeedForward(Module):
58
+ def __init__(
59
+ self,
60
+ dim,
61
+ mult=4,
62
+ dropout=0.
63
+ ):
64
+ super().__init__()
65
+ dim_inner = int(dim * mult)
66
+ self.net = nn.Sequential(
67
+ RMSNorm(dim),
68
+ nn.Linear(dim, dim_inner),
69
+ nn.GELU(),
70
+ nn.Dropout(dropout),
71
+ nn.Linear(dim_inner, dim),
72
+ nn.Dropout(dropout)
73
+ )
74
+
75
+ def forward(self, x):
76
+ return self.net(x)
77
+
78
+
79
+ class Attention(Module):
80
+ def __init__(
81
+ self,
82
+ dim,
83
+ heads=8,
84
+ dim_head=64,
85
+ dropout=0.,
86
+ rotary_embed=None,
87
+ flash=True,
88
+ learned_value_residual_mix=False,
89
+ ):
90
+ super().__init__()
91
+ self.heads = heads
92
+ self.scale = dim_head ** -0.5
93
+ dim_inner = heads * dim_head
94
+
95
+ self.rotary_embed = rotary_embed
96
+
97
+ self.attend = Attend(flash=flash, dropout=dropout)
98
+
99
+ self.norm = RMSNorm(dim)
100
+ self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
101
+
102
+ self.to_value_residual_mix = nn.Linear(dim, heads) if learned_value_residual_mix else None
103
+
104
+ self.to_gates = nn.Linear(dim, heads)
105
+
106
+ self.to_out = nn.Sequential(
107
+ nn.Linear(dim_inner, dim, bias=False),
108
+ nn.Dropout(dropout)
109
+ )
110
+
111
+ def forward(self, x, value_residual=None):
112
+ x = self.norm(x)
113
+
114
+ q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
115
+
116
+ orig_v = v
117
+
118
+ if exists(self.to_value_residual_mix):
119
+ mix = self.to_value_residual_mix(x)
120
+ mix = rearrange(mix, 'b n h -> b h n 1').sigmoid()
121
+
122
+ assert exists(value_residual)
123
+ v = v.lerp(value_residual, mix)
124
+
125
+ if exists(self.rotary_embed):
126
+ q = self.rotary_embed.rotate_queries_or_keys(q)
127
+ k = self.rotary_embed.rotate_queries_or_keys(k)
128
+
129
+ out = self.attend(q, k, v)
130
+
131
+ gates = self.to_gates(x)
132
+ out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
133
+
134
+ out = rearrange(out, 'b h n d -> b n (h d)')
135
+ return self.to_out(out), orig_v
136
+
137
+
138
+ class LinearAttention(Module):
139
+ """
140
+ this flavor of linear attention proposed in https://arxiv.org/abs/2106.09681 by El-Nouby et al.
141
+ """
142
+
143
+ @beartype
144
+ def __init__(
145
+ self,
146
+ *,
147
+ dim,
148
+ dim_head=32,
149
+ heads=8,
150
+ scale=8,
151
+ flash=False,
152
+ dropout=0.
153
+ ):
154
+ super().__init__()
155
+ dim_inner = dim_head * heads
156
+ self.norm = RMSNorm(dim)
157
+
158
+ self.to_qkv = nn.Sequential(
159
+ nn.Linear(dim, dim_inner * 3, bias=False),
160
+ Rearrange('b n (qkv h d) -> qkv b h d n', qkv=3, h=heads)
161
+ )
162
+
163
+ self.temperature = nn.Parameter(torch.ones(heads, 1, 1))
164
+
165
+ self.attend = Attend(
166
+ scale=scale,
167
+ dropout=dropout,
168
+ flash=flash
169
+ )
170
+
171
+ self.to_out = nn.Sequential(
172
+ Rearrange('b h d n -> b n (h d)'),
173
+ nn.Linear(dim_inner, dim, bias=False)
174
+ )
175
+
176
+ def forward(
177
+ self,
178
+ x
179
+ ):
180
+ x = self.norm(x)
181
+
182
+ q, k, v = self.to_qkv(x)
183
+
184
+ q, k = map(l2norm, (q, k))
185
+ q = q * self.temperature.exp()
186
+
187
+ out = self.attend(q, k, v)
188
+
189
+ return self.to_out(out)
190
+
191
+
192
+ class Transformer(Module):
193
+ def __init__(
194
+ self,
195
+ *,
196
+ dim,
197
+ depth,
198
+ dim_head=64,
199
+ heads=8,
200
+ attn_dropout=0.,
201
+ ff_dropout=0.,
202
+ ff_mult=4,
203
+ norm_output=True,
204
+ rotary_embed=None,
205
+ flash_attn=True,
206
+ linear_attn=False,
207
+ add_value_residual=False,
208
+ num_residual_streams=1,
209
+ ):
210
+ super().__init__()
211
+ self.layers = ModuleList([])
212
+
213
+ init_hyper_conn, *_ = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
214
+
215
+ for _ in range(depth):
216
+ if linear_attn:
217
+ attn = LinearAttention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, flash=flash_attn)
218
+ else:
219
+ if num_residual_streams != 1:
220
+ attn = init_hyper_conn(dim=dim, branch=Attention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
221
+ rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual))
222
+ else:
223
+ attn = Attention(
224
+ dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
225
+ rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual
226
+ )
227
+ if num_residual_streams != 1:
228
+ ff = init_hyper_conn(dim=dim, branch=FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout))
229
+ else:
230
+ ff = FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
231
+
232
+ self.layers.append(ModuleList([
233
+ attn,
234
+ ff
235
+ ]))
236
+
237
+ self.norm = RMSNorm(dim) if norm_output else nn.Identity()
238
+
239
+ def forward(self, x, value_residual=None):
240
+
241
+ first_values = None
242
+ if value_residual is not None:
243
+ for attn, ff in self.layers:
244
+ x, next_values = attn(x, value_residual=value_residual)
245
+ first_values = default(first_values, next_values)
246
+ x = ff(x)
247
+ else:
248
+ # Compatibility with old weights
249
+ for attn, ff in self.layers:
250
+ attn_out, next_values = attn(x, value_residual=None)
251
+ first_values = default(first_values, next_values)
252
+ x = attn_out + x
253
+ x = ff(x) + x
254
+
255
+ return self.norm(x), first_values
256
+
257
+
258
+ # bandsplit module
259
+
260
+ class BandSplit(Module):
261
+ @beartype
262
+ def __init__(
263
+ self,
264
+ dim,
265
+ dim_inputs: Tuple[int, ...]
266
+ ):
267
+ super().__init__()
268
+ self.dim_inputs = dim_inputs
269
+ self.to_features = ModuleList([])
270
+
271
+ for dim_in in dim_inputs:
272
+ net = nn.Sequential(
273
+ RMSNorm(dim_in),
274
+ nn.Linear(dim_in, dim)
275
+ )
276
+
277
+ self.to_features.append(net)
278
+
279
+ def forward(self, x):
280
+ x = x.split(self.dim_inputs, dim=-1)
281
+
282
+ outs = []
283
+ for split_input, to_feature in zip(x, self.to_features):
284
+ split_output = to_feature(split_input)
285
+ outs.append(split_output)
286
+
287
+ return torch.stack(outs, dim=-2)
288
+
289
+
290
+ def MLP(
291
+ dim_in,
292
+ dim_out,
293
+ dim_hidden=None,
294
+ depth=1,
295
+ activation=nn.Tanh
296
+ ):
297
+ dim_hidden = default(dim_hidden, dim_in)
298
+
299
+ net = []
300
+ dims = (dim_in, *((dim_hidden,) * (depth - 1)), dim_out)
301
+
302
+ for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
303
+ is_last = ind == (len(dims) - 2)
304
+
305
+ net.append(nn.Linear(layer_dim_in, layer_dim_out))
306
+
307
+ if is_last:
308
+ continue
309
+
310
+ net.append(activation())
311
+
312
+ return nn.Sequential(*net)
313
+
314
+
315
+ class MaskEstimator(Module):
316
+ @beartype
317
+ def __init__(
318
+ self,
319
+ dim,
320
+ dim_inputs: Tuple[int, ...],
321
+ depth,
322
+ mlp_expansion_factor=4
323
+ ):
324
+ super().__init__()
325
+ self.dim_inputs = dim_inputs
326
+ self.to_freqs = ModuleList([])
327
+ dim_hidden = dim * mlp_expansion_factor
328
+
329
+ for dim_in in dim_inputs:
330
+ net = []
331
+
332
+ mlp = nn.Sequential(
333
+ MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
334
+ nn.GLU(dim=-1)
335
+ )
336
+
337
+ self.to_freqs.append(mlp)
338
+
339
+ def forward(self, x):
340
+ x = x.unbind(dim=-2)
341
+
342
+ outs = []
343
+
344
+ for band_features, mlp in zip(x, self.to_freqs):
345
+ freq_out = mlp(band_features)
346
+ outs.append(freq_out)
347
+
348
+ return torch.cat(outs, dim=-1)
349
+
350
+
351
+ # main class
352
+
353
+ DEFAULT_FREQS_PER_BANDS = (
354
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356
+ 2, 2, 2, 2,
357
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
358
+ 12, 12, 12, 12, 12, 12, 12, 12,
359
+ 24, 24, 24, 24, 24, 24, 24, 24,
360
+ 48, 48, 48, 48, 48, 48, 48, 48,
361
+ 128, 129,
362
+ )
363
+
364
+
365
+ class BSRoformer(Module):
366
+
367
+ @beartype
368
+ def __init__(
369
+ self,
370
+ dim,
371
+ *,
372
+ depth,
373
+ stereo=False,
374
+ num_stems=1,
375
+ time_transformer_depth=2,
376
+ freq_transformer_depth=2,
377
+ linear_transformer_depth=0,
378
+ freqs_per_bands: Tuple[int, ...] = DEFAULT_FREQS_PER_BANDS,
379
+ # in the paper, they divide into ~60 bands, test with 1 for starters
380
+ dim_head=64,
381
+ heads=8,
382
+ attn_dropout=0.,
383
+ ff_dropout=0.,
384
+ flash_attn=True,
385
+ dim_freqs_in=1025,
386
+ stft_n_fft=2048,
387
+ stft_hop_length=512,
388
+ # 10ms at 44100Hz, from sections 4.1, 4.4 in the paper - @faroit recommends // 2 or // 4 for better reconstruction
389
+ stft_win_length=2048,
390
+ stft_normalized=False,
391
+ stft_window_fn: Optional[Callable] = None,
392
+ mask_estimator_depth=2,
393
+ multi_stft_resolution_loss_weight=1.,
394
+ multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
395
+ multi_stft_hop_size=147,
396
+ multi_stft_normalized=False,
397
+ multi_stft_window_fn: Callable = torch.hann_window,
398
+ mlp_expansion_factor=4,
399
+ use_torch_checkpoint=False,
400
+ skip_connection=False,
401
+ use_value_residual_learning=False,
402
+ num_residual_streams=1, # set to 1. to disable hyper connections (Default in original is 4)
403
+ ):
404
+ super().__init__()
405
+
406
+ self.stereo = stereo
407
+ self.audio_channels = 2 if stereo else 1
408
+ self.num_stems = num_stems
409
+ self.use_torch_checkpoint = use_torch_checkpoint
410
+ self.skip_connection = skip_connection
411
+ self.num_residual_streams = num_residual_streams
412
+
413
+ _, self.expand_stream, self.reduce_stream = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
414
+
415
+ self.layers = ModuleList([])
416
+
417
+ transformer_kwargs = dict(
418
+ dim=dim,
419
+ heads=heads,
420
+ dim_head=dim_head,
421
+ attn_dropout=attn_dropout,
422
+ ff_dropout=ff_dropout,
423
+ flash_attn=flash_attn,
424
+ norm_output=False,
425
+ num_residual_streams=num_residual_streams,
426
+ )
427
+
428
+ time_rotary_embed = RotaryEmbedding(dim=dim_head)
429
+ freq_rotary_embed = RotaryEmbedding(dim=dim_head)
430
+
431
+ for layer_index in range(depth):
432
+ if use_value_residual_learning:
433
+ is_first = layer_index == 0
434
+ else:
435
+ is_first = True
436
+
437
+ tran_modules = []
438
+ if linear_transformer_depth > 0:
439
+ tran_modules.append(Transformer(depth=linear_transformer_depth, linear_attn=True, **transformer_kwargs))
440
+ tran_modules.append(
441
+ Transformer(depth=time_transformer_depth, rotary_embed=time_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
442
+ )
443
+ tran_modules.append(
444
+ Transformer(depth=freq_transformer_depth, rotary_embed=freq_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
445
+ )
446
+ self.layers.append(nn.ModuleList(tran_modules))
447
+
448
+ self.final_norm = RMSNorm(dim)
449
+
450
+ self.stft_kwargs = dict(
451
+ n_fft=stft_n_fft,
452
+ hop_length=stft_hop_length,
453
+ win_length=stft_win_length,
454
+ normalized=stft_normalized
455
+ )
456
+
457
+ self.stft_window_fn = partial(default(stft_window_fn, torch.hann_window), stft_win_length)
458
+
459
+ freqs = torch.stft(torch.randn(1, 4096), **self.stft_kwargs, window=torch.ones(stft_win_length), return_complex=True).shape[1]
460
+
461
+ assert len(freqs_per_bands) > 1
462
+ assert sum(
463
+ freqs_per_bands) == freqs, f'the number of freqs in the bands must equal {freqs} based on the STFT settings, but got {sum(freqs_per_bands)}'
464
+
465
+ freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in freqs_per_bands)
466
+
467
+ self.band_split = BandSplit(
468
+ dim=dim,
469
+ dim_inputs=freqs_per_bands_with_complex
470
+ )
471
+
472
+ self.mask_estimators = nn.ModuleList([])
473
+
474
+ for _ in range(num_stems):
475
+ mask_estimator = MaskEstimator(
476
+ dim=dim,
477
+ dim_inputs=freqs_per_bands_with_complex,
478
+ depth=mask_estimator_depth,
479
+ mlp_expansion_factor=mlp_expansion_factor,
480
+ )
481
+
482
+ self.mask_estimators.append(mask_estimator)
483
+
484
+ # for the multi-resolution stft loss
485
+
486
+ self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
487
+ self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
488
+ self.multi_stft_n_fft = stft_n_fft
489
+ self.multi_stft_window_fn = multi_stft_window_fn
490
+
491
+ self.multi_stft_kwargs = dict(
492
+ hop_length=multi_stft_hop_size,
493
+ normalized=multi_stft_normalized
494
+ )
495
+
496
+ def forward(
497
+ self,
498
+ raw_audio,
499
+ target=None,
500
+ return_loss_breakdown=False
501
+ ):
502
+ """
503
+ einops
504
+
505
+ b - batch
506
+ f - freq
507
+ t - time
508
+ s - audio channel (1 for mono, 2 for stereo)
509
+ n - number of 'stems'
510
+ c - complex (2)
511
+ d - feature dimension
512
+ """
513
+
514
+ device = raw_audio.device
515
+
516
+ # defining whether model is loaded on MPS (MacOS GPU accelerator)
517
+ x_is_mps = True if device.type == "mps" else False
518
+
519
+ if raw_audio.ndim == 2:
520
+ raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
521
+
522
+ channels = raw_audio.shape[1]
523
+ assert (not self.stereo and channels == 1) or (self.stereo and channels == 2), 'stereo needs to be set to True if passing in audio signal that is stereo (channel dimension of 2). also need to be False if mono (channel dimension of 1)'
524
+
525
+ # to stft
526
+
527
+ raw_audio, batch_audio_channel_packed_shape = pack_one(raw_audio, '* t')
528
+
529
+ stft_window = self.stft_window_fn(device=device)
530
+
531
+ # RuntimeError: FFT operations are only supported on MacOS 14+
532
+ # Since it's tedious to define whether we're on correct MacOS version - simple try-catch is used
533
+ try:
534
+ stft_repr = torch.stft(raw_audio, **self.stft_kwargs, window=stft_window, return_complex=True)
535
+ except:
536
+ stft_repr = torch.stft(raw_audio.cpu() if x_is_mps else raw_audio, **self.stft_kwargs,
537
+ window=stft_window.cpu() if x_is_mps else stft_window, return_complex=True).to(
538
+ device)
539
+ stft_repr = torch.view_as_real(stft_repr)
540
+
541
+ stft_repr = unpack_one(stft_repr, batch_audio_channel_packed_shape, '* f t c')
542
+
543
+ # merge stereo / mono into the frequency, with frequency leading dimension, for band splitting
544
+ stft_repr = rearrange(stft_repr,'b s f t c -> b (f s) t c')
545
+
546
+ x = rearrange(stft_repr, 'b f t c -> b t (f c)')
547
+
548
+ if self.use_torch_checkpoint:
549
+ x = checkpoint(self.band_split, x, use_reentrant=False)
550
+ else:
551
+ x = self.band_split(x)
552
+
553
+ # value residuals
554
+
555
+ time_v_residual = None
556
+ freq_v_residual = None
557
+
558
+ # maybe expand residual streams
559
+ if self.num_residual_streams != 1:
560
+ x = self.expand_stream(x)
561
+
562
+ # axial / hierarchical attention
563
+
564
+ store = [None] * len(self.layers)
565
+ for i, transformer_block in enumerate(self.layers):
566
+
567
+ if len(transformer_block) == 3:
568
+ linear_transformer, time_transformer, freq_transformer = transformer_block
569
+
570
+ x, ft_ps = pack([x], 'b * d')
571
+ if self.use_torch_checkpoint:
572
+ x = checkpoint(linear_transformer, x, use_reentrant=False)
573
+ else:
574
+ x = linear_transformer(x)
575
+ x, = unpack(x, ft_ps, 'b * d')
576
+ else:
577
+ time_transformer, freq_transformer = transformer_block
578
+
579
+ if self.skip_connection:
580
+ # Sum all previous
581
+ for j in range(i):
582
+ x = x + store[j]
583
+
584
+ x = rearrange(x, 'b t f d -> b f t d')
585
+ x, ps = pack([x], '* t d')
586
+
587
+ if self.use_torch_checkpoint:
588
+ x, next_time_v_residual = checkpoint(time_transformer, x, time_v_residual, use_reentrant=False)
589
+ else:
590
+ x, next_time_v_residual = time_transformer(x, value_residual=time_v_residual)
591
+ time_v_residual = default(time_v_residual, next_time_v_residual)
592
+
593
+ x, = unpack(x, ps, '* t d')
594
+ x = rearrange(x, 'b f t d -> b t f d')
595
+ x, ps = pack([x], '* f d')
596
+
597
+ if self.use_torch_checkpoint:
598
+ x, next_freq_v_residual = checkpoint(freq_transformer, x, freq_v_residual, use_reentrant=False)
599
+ else:
600
+ x, next_freq_v_residual = freq_transformer(x, value_residual=freq_v_residual)
601
+ freq_v_residual = default(freq_v_residual, next_freq_v_residual)
602
+
603
+ x, = unpack(x, ps, '* f d')
604
+
605
+ if self.skip_connection:
606
+ store[i] = x
607
+
608
+ # maybe reduce residual streams
609
+ if self.num_residual_streams != 1:
610
+ x = self.reduce_stream(x)
611
+
612
+ x = self.final_norm(x)
613
+
614
+ num_stems = len(self.mask_estimators)
615
+
616
+ if self.use_torch_checkpoint:
617
+ mask = torch.stack([checkpoint(fn, x, use_reentrant=False) for fn in self.mask_estimators], dim=1)
618
+ else:
619
+ mask = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
620
+ mask = rearrange(mask, 'b n t (f c) -> b n f t c', c=2)
621
+
622
+ # modulate frequency representation
623
+
624
+ stft_repr = rearrange(stft_repr, 'b f t c -> b 1 f t c')
625
+
626
+ # complex number multiplication
627
+
628
+ stft_repr = torch.view_as_complex(stft_repr)
629
+ mask = torch.view_as_complex(mask)
630
+
631
+ stft_repr = stft_repr * mask
632
+
633
+ # istft
634
+
635
+ stft_repr = rearrange(stft_repr, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
636
+
637
+ # same as torch.stft() fix for MacOS MPS above
638
+ try:
639
+ recon_audio = torch.istft(stft_repr, **self.stft_kwargs, window=stft_window, return_complex=False, length=raw_audio.shape[-1])
640
+ except:
641
+ recon_audio = torch.istft(stft_repr.cpu() if x_is_mps else stft_repr, **self.stft_kwargs, window=stft_window.cpu() if x_is_mps else stft_window, return_complex=False, length=raw_audio.shape[-1]).to(device)
642
+
643
+ recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', s=self.audio_channels, n=num_stems)
644
+
645
+ if num_stems == 1:
646
+ recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
647
+
648
+ # if a target is passed in, calculate loss for learning
649
+
650
+ if not exists(target):
651
+ return recon_audio
652
+
653
+ if self.num_stems > 1:
654
+ assert target.ndim == 4 and target.shape[1] == self.num_stems
655
+
656
+ if target.ndim == 2:
657
+ target = rearrange(target, '... t -> ... 1 t')
658
+
659
+ target = target[..., :recon_audio.shape[-1]] # protect against lost length on istft
660
+
661
+ loss = F.l1_loss(recon_audio, target)
662
+
663
+ multi_stft_resolution_loss = 0.
664
+
665
+ for window_size in self.multi_stft_resolutions_window_sizes:
666
+ res_stft_kwargs = dict(
667
+ n_fft=max(window_size, self.multi_stft_n_fft), # not sure what n_fft is across multi resolution stft
668
+ win_length=window_size,
669
+ return_complex=True,
670
+ window=self.multi_stft_window_fn(window_size, device=device),
671
+ **self.multi_stft_kwargs,
672
+ )
673
+
674
+ recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
675
+ target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
676
+
677
+ multi_stft_resolution_loss = multi_stft_resolution_loss + F.l1_loss(recon_Y, target_Y)
678
+
679
+ weighted_multi_resolution_loss = multi_stft_resolution_loss * self.multi_stft_resolution_loss_weight
680
+
681
+ total_loss = loss + weighted_multi_resolution_loss
682
+
683
+ if not return_loss_breakdown:
684
+ return total_loss
685
+
686
+ return total_loss, (loss, multi_stft_resolution_loss)
models/bs_roformer/mel_band_roformer_experimental.py ADDED
@@ -0,0 +1,733 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import torch
4
+ from torch import nn, einsum, Tensor
5
+ from torch.nn import Module, ModuleList
6
+ import torch.nn.functional as F
7
+
8
+ from models.bs_roformer.attend import Attend
9
+ from torch.utils.checkpoint import checkpoint
10
+
11
+ from beartype.typing import Tuple, Optional, List, Callable
12
+ from beartype import beartype
13
+
14
+ from rotary_embedding_torch import RotaryEmbedding
15
+
16
+ from einops import rearrange, pack, unpack, reduce, repeat
17
+ from einops.layers.torch import Rearrange
18
+
19
+ from hyper_connections import get_init_and_expand_reduce_stream_functions
20
+
21
+ from librosa import filters
22
+
23
+
24
+ # helper functions
25
+
26
+ def exists(val):
27
+ return val is not None
28
+
29
+
30
+ def default(v, d):
31
+ return v if exists(v) else d
32
+
33
+
34
+ def pack_one(t, pattern):
35
+ return pack([t], pattern)
36
+
37
+
38
+ def unpack_one(t, ps, pattern):
39
+ return unpack(t, ps, pattern)[0]
40
+
41
+
42
+ def pad_at_dim(t, pad, dim=-1, value=0.):
43
+ dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
44
+ zeros = ((0, 0) * dims_from_right)
45
+ return F.pad(t, (*zeros, *pad), value=value)
46
+
47
+
48
+ def l2norm(t):
49
+ return F.normalize(t, dim=-1, p=2)
50
+
51
+
52
+ # norm
53
+
54
+ class RMSNorm(Module):
55
+ def __init__(self, dim):
56
+ super().__init__()
57
+ self.scale = dim ** 0.5
58
+ self.gamma = nn.Parameter(torch.ones(dim))
59
+
60
+ def forward(self, x):
61
+ return F.normalize(x, dim=-1) * self.scale * self.gamma
62
+
63
+
64
+ # attention
65
+
66
+ class FeedForward(Module):
67
+ def __init__(
68
+ self,
69
+ dim,
70
+ mult=4,
71
+ dropout=0.
72
+ ):
73
+ super().__init__()
74
+ dim_inner = int(dim * mult)
75
+ self.net = nn.Sequential(
76
+ RMSNorm(dim),
77
+ nn.Linear(dim, dim_inner),
78
+ nn.GELU(),
79
+ nn.Dropout(dropout),
80
+ nn.Linear(dim_inner, dim),
81
+ nn.Dropout(dropout)
82
+ )
83
+
84
+ def forward(self, x):
85
+ return self.net(x)
86
+
87
+
88
+ class Attention(Module):
89
+ def __init__(
90
+ self,
91
+ dim,
92
+ heads=8,
93
+ dim_head=64,
94
+ dropout=0.,
95
+ rotary_embed=None,
96
+ flash=True,
97
+ learned_value_residual_mix=False,
98
+ ):
99
+ super().__init__()
100
+ self.heads = heads
101
+ self.scale = dim_head ** -0.5
102
+ dim_inner = heads * dim_head
103
+
104
+ self.rotary_embed = rotary_embed
105
+
106
+ self.attend = Attend(flash=flash, dropout=dropout)
107
+
108
+ self.norm = RMSNorm(dim)
109
+ self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
110
+
111
+ self.to_value_residual_mix = nn.Linear(dim, heads) if learned_value_residual_mix else None
112
+
113
+ self.to_gates = nn.Linear(dim, heads)
114
+
115
+ self.to_out = nn.Sequential(
116
+ nn.Linear(dim_inner, dim, bias=False),
117
+ nn.Dropout(dropout)
118
+ )
119
+
120
+ def forward(self, x, value_residual=None):
121
+ x = self.norm(x)
122
+
123
+ q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
124
+
125
+ orig_v = v
126
+
127
+ if exists(self.to_value_residual_mix):
128
+ mix = self.to_value_residual_mix(x)
129
+ mix = rearrange(mix, 'b n h -> b h n 1').sigmoid()
130
+
131
+ assert exists(value_residual)
132
+ v = v.lerp(value_residual, mix)
133
+
134
+ if exists(self.rotary_embed):
135
+ q = self.rotary_embed.rotate_queries_or_keys(q)
136
+ k = self.rotary_embed.rotate_queries_or_keys(k)
137
+
138
+ out = self.attend(q, k, v)
139
+
140
+ gates = self.to_gates(x)
141
+ out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
142
+
143
+ out = rearrange(out, 'b h n d -> b n (h d)')
144
+ return self.to_out(out), orig_v
145
+
146
+
147
+ class LinearAttention(Module):
148
+ """
149
+ this flavor of linear attention proposed in https://arxiv.org/abs/2106.09681 by El-Nouby et al.
150
+ """
151
+
152
+ @beartype
153
+ def __init__(
154
+ self,
155
+ *,
156
+ dim,
157
+ dim_head=32,
158
+ heads=8,
159
+ scale=8,
160
+ flash=False,
161
+ dropout=0.
162
+ ):
163
+ super().__init__()
164
+ dim_inner = dim_head * heads
165
+ self.norm = RMSNorm(dim)
166
+
167
+ self.to_qkv = nn.Sequential(
168
+ nn.Linear(dim, dim_inner * 3, bias=False),
169
+ Rearrange('b n (qkv h d) -> qkv b h d n', qkv=3, h=heads)
170
+ )
171
+
172
+ self.temperature = nn.Parameter(torch.zeros(heads, 1, 1))
173
+
174
+ self.attend = Attend(
175
+ scale=scale,
176
+ dropout=dropout,
177
+ flash=flash
178
+ )
179
+
180
+ self.to_out = nn.Sequential(
181
+ Rearrange('b h d n -> b n (h d)'),
182
+ nn.Linear(dim_inner, dim, bias=False)
183
+ )
184
+
185
+ def forward(
186
+ self,
187
+ x
188
+ ):
189
+ x = self.norm(x)
190
+
191
+ q, k, v = self.to_qkv(x)
192
+
193
+ q, k = map(l2norm, (q, k))
194
+ q = q * self.temperature.exp()
195
+
196
+ out = self.attend(q, k, v)
197
+
198
+ return self.to_out(out)
199
+
200
+
201
+ class Transformer(Module):
202
+ def __init__(
203
+ self,
204
+ *,
205
+ dim,
206
+ depth,
207
+ dim_head=64,
208
+ heads=8,
209
+ attn_dropout=0.,
210
+ ff_dropout=0.,
211
+ ff_mult=4,
212
+ norm_output=True,
213
+ rotary_embed=None,
214
+ flash_attn=True,
215
+ linear_attn=False,
216
+ add_value_residual=False,
217
+ num_residual_streams=1,
218
+ ):
219
+ super().__init__()
220
+ self.layers = ModuleList([])
221
+
222
+ init_hyper_conn, *_ = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
223
+
224
+ for _ in range(depth):
225
+ if linear_attn:
226
+ attn = LinearAttention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, flash=flash_attn)
227
+ else:
228
+ if num_residual_streams != 1:
229
+ attn = init_hyper_conn(dim=dim, branch=Attention(dim=dim, dim_head=dim_head, heads=heads,
230
+ dropout=attn_dropout,
231
+ rotary_embed=rotary_embed, flash=flash_attn,
232
+ learned_value_residual_mix=add_value_residual))
233
+ else:
234
+ attn = Attention(
235
+ dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout,
236
+ rotary_embed=rotary_embed, flash=flash_attn, learned_value_residual_mix=add_value_residual
237
+ )
238
+
239
+ if num_residual_streams != 1:
240
+ ff = init_hyper_conn(dim=dim, branch=FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout))
241
+ else:
242
+ ff = FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
243
+ self.layers.append(ModuleList([
244
+ attn,
245
+ ff,
246
+ ]))
247
+
248
+ self.norm = RMSNorm(dim) if norm_output else nn.Identity()
249
+
250
+ def forward(self, x, value_residual=None):
251
+
252
+ first_values = None
253
+ if value_residual is not None:
254
+ for attn, ff in self.layers:
255
+ x, next_values = attn(x, value_residual=value_residual)
256
+ first_values = default(first_values, next_values)
257
+ x = ff(x)
258
+ else:
259
+ # Compatibility with old weights
260
+ for attn, ff in self.layers:
261
+ attn_out, next_values = attn(x, value_residual=None)
262
+ first_values = default(first_values, next_values)
263
+ x = attn_out + x
264
+ x = ff(x) + x
265
+
266
+ return self.norm(x), first_values
267
+
268
+
269
+ # bandsplit module
270
+
271
+ class BandSplit(Module):
272
+ @beartype
273
+ def __init__(
274
+ self,
275
+ dim,
276
+ dim_inputs: Tuple[int, ...]
277
+ ):
278
+ super().__init__()
279
+ self.dim_inputs = dim_inputs
280
+ self.to_features = ModuleList([])
281
+
282
+ for dim_in in dim_inputs:
283
+ net = nn.Sequential(
284
+ RMSNorm(dim_in),
285
+ nn.Linear(dim_in, dim)
286
+ )
287
+
288
+ self.to_features.append(net)
289
+
290
+ def forward(self, x):
291
+ x = x.split(self.dim_inputs, dim=-1)
292
+
293
+ outs = []
294
+ for split_input, to_feature in zip(x, self.to_features):
295
+ split_output = to_feature(split_input)
296
+ outs.append(split_output)
297
+
298
+ return torch.stack(outs, dim=-2)
299
+
300
+
301
+ def MLP(
302
+ dim_in,
303
+ dim_out,
304
+ dim_hidden=None,
305
+ depth=1,
306
+ activation=nn.Tanh
307
+ ):
308
+ dim_hidden = default(dim_hidden, dim_in)
309
+
310
+ net = []
311
+ dims = (dim_in, *((dim_hidden,) * depth), dim_out)
312
+
313
+ for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
314
+ is_last = ind == (len(dims) - 2)
315
+
316
+ net.append(nn.Linear(layer_dim_in, layer_dim_out))
317
+
318
+ if is_last:
319
+ continue
320
+
321
+ net.append(activation())
322
+
323
+ return nn.Sequential(*net)
324
+
325
+
326
+ class MaskEstimator(Module):
327
+ @beartype
328
+ def __init__(
329
+ self,
330
+ dim,
331
+ dim_inputs: Tuple[int, ...],
332
+ depth,
333
+ mlp_expansion_factor=4
334
+ ):
335
+ super().__init__()
336
+ self.dim_inputs = dim_inputs
337
+ self.to_freqs = ModuleList([])
338
+ dim_hidden = dim * mlp_expansion_factor
339
+
340
+ for dim_in in dim_inputs:
341
+ net = []
342
+
343
+ mlp = nn.Sequential(
344
+ MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
345
+ nn.GLU(dim=-1)
346
+ )
347
+
348
+ self.to_freqs.append(mlp)
349
+
350
+ def forward(self, x):
351
+ x = x.unbind(dim=-2)
352
+
353
+ outs = []
354
+
355
+ for band_features, mlp in zip(x, self.to_freqs):
356
+ freq_out = mlp(band_features)
357
+ outs.append(freq_out)
358
+
359
+ return torch.cat(outs, dim=-1)
360
+
361
+
362
+ # main class
363
+
364
+ class MelBandRoformer(Module):
365
+
366
+ @beartype
367
+ def __init__(
368
+ self,
369
+ dim,
370
+ *,
371
+ depth,
372
+ stereo=False,
373
+ num_stems=1,
374
+ time_transformer_depth=2,
375
+ freq_transformer_depth=2,
376
+ linear_transformer_depth=0,
377
+ num_bands=60,
378
+ dim_head=64,
379
+ heads=8,
380
+ attn_dropout=0.1,
381
+ ff_dropout=0.1,
382
+ flash_attn=True,
383
+ dim_freqs_in=1025,
384
+ sample_rate=44100, # needed for mel filter bank from librosa
385
+ stft_n_fft=2048,
386
+ stft_hop_length=512,
387
+ # 10ms at 44100Hz, from sections 4.1, 4.4 in the paper - @faroit recommends // 2 or // 4 for better reconstruction
388
+ stft_win_length=2048,
389
+ stft_normalized=False,
390
+ stft_window_fn: Optional[Callable] = None,
391
+ mask_estimator_depth=1,
392
+ multi_stft_resolution_loss_weight=1.,
393
+ multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
394
+ multi_stft_hop_size=147,
395
+ multi_stft_normalized=False,
396
+ multi_stft_window_fn: Callable = torch.hann_window,
397
+ match_input_audio_length=False, # if True, pad output tensor to match length of input tensor
398
+ mlp_expansion_factor=4,
399
+ use_torch_checkpoint=False,
400
+ skip_connection=False,
401
+ use_value_residual_learning=False,
402
+ num_residual_streams=1, # set to 1. to disable hyper connections (Default in original is 4)
403
+ ):
404
+ super().__init__()
405
+
406
+ self.stereo = stereo
407
+ self.audio_channels = 2 if stereo else 1
408
+ self.num_stems = num_stems
409
+ self.use_torch_checkpoint = use_torch_checkpoint
410
+ self.skip_connection = skip_connection
411
+ self.num_residual_streams = num_residual_streams
412
+
413
+ _, self.expand_stream, self.reduce_stream = get_init_and_expand_reduce_stream_functions(num_residual_streams, disable=num_residual_streams == 1)
414
+
415
+ self.layers = ModuleList([])
416
+
417
+ transformer_kwargs = dict(
418
+ dim=dim,
419
+ heads=heads,
420
+ dim_head=dim_head,
421
+ attn_dropout=attn_dropout,
422
+ ff_dropout=ff_dropout,
423
+ flash_attn=flash_attn,
424
+ num_residual_streams=num_residual_streams,
425
+ )
426
+
427
+ time_rotary_embed = RotaryEmbedding(dim=dim_head)
428
+ freq_rotary_embed = RotaryEmbedding(dim=dim_head)
429
+
430
+ for layer_index in range(depth):
431
+ if use_value_residual_learning:
432
+ is_first = layer_index == 0
433
+ else:
434
+ is_first = True
435
+
436
+ tran_modules = []
437
+ if linear_transformer_depth > 0:
438
+ tran_modules.append(Transformer(depth=linear_transformer_depth, linear_attn=True, **transformer_kwargs))
439
+ tran_modules.append(
440
+ Transformer(depth=time_transformer_depth, rotary_embed=time_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
441
+ )
442
+ tran_modules.append(
443
+ Transformer(depth=freq_transformer_depth, rotary_embed=freq_rotary_embed, add_value_residual=not is_first, **transformer_kwargs)
444
+ )
445
+ self.layers.append(nn.ModuleList(tran_modules))
446
+
447
+ self.stft_window_fn = partial(default(stft_window_fn, torch.hann_window), stft_win_length)
448
+
449
+ self.stft_kwargs = dict(
450
+ n_fft=stft_n_fft,
451
+ hop_length=stft_hop_length,
452
+ win_length=stft_win_length,
453
+ normalized=stft_normalized
454
+ )
455
+
456
+ freqs = torch.stft(torch.randn(1, 4096), **self.stft_kwargs, window=torch.ones(stft_n_fft), return_complex=True).shape[1]
457
+
458
+ # create mel filter bank
459
+ # with librosa.filters.mel as in section 2 of paper
460
+
461
+ mel_filter_bank_numpy = filters.mel(sr=sample_rate, n_fft=stft_n_fft, n_mels=num_bands)
462
+
463
+ mel_filter_bank = torch.from_numpy(mel_filter_bank_numpy)
464
+
465
+ # for some reason, it doesn't include the first freq? just force a value for now
466
+
467
+ mel_filter_bank[0][0] = 1.
468
+
469
+ # In some systems/envs we get 0.0 instead of ~1.9e-18 in the last position,
470
+ # so let's force a positive value
471
+
472
+ mel_filter_bank[-1, -1] = 1.
473
+
474
+ # binary as in paper (then estimated masks are averaged for overlapping regions)
475
+
476
+ freqs_per_band = mel_filter_bank > 0
477
+ assert freqs_per_band.any(dim=0).all(), 'all frequencies need to be covered by all bands for now'
478
+
479
+ repeated_freq_indices = repeat(torch.arange(freqs), 'f -> b f', b=num_bands)
480
+ freq_indices = repeated_freq_indices[freqs_per_band]
481
+
482
+ if stereo:
483
+ freq_indices = repeat(freq_indices, 'f -> f s', s=2)
484
+ freq_indices = freq_indices * 2 + torch.arange(2)
485
+ freq_indices = rearrange(freq_indices, 'f s -> (f s)')
486
+
487
+ self.register_buffer('freq_indices', freq_indices, persistent=False)
488
+ self.register_buffer('freqs_per_band', freqs_per_band, persistent=False)
489
+
490
+ num_freqs_per_band = reduce(freqs_per_band, 'b f -> b', 'sum')
491
+ num_bands_per_freq = reduce(freqs_per_band, 'b f -> f', 'sum')
492
+
493
+ self.register_buffer('num_freqs_per_band', num_freqs_per_band, persistent=False)
494
+ self.register_buffer('num_bands_per_freq', num_bands_per_freq, persistent=False)
495
+
496
+ # band split and mask estimator
497
+
498
+ freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in num_freqs_per_band.tolist())
499
+
500
+ self.band_split = BandSplit(
501
+ dim=dim,
502
+ dim_inputs=freqs_per_bands_with_complex
503
+ )
504
+
505
+ self.mask_estimators = nn.ModuleList([])
506
+
507
+ for _ in range(num_stems):
508
+ mask_estimator = MaskEstimator(
509
+ dim=dim,
510
+ dim_inputs=freqs_per_bands_with_complex,
511
+ depth=mask_estimator_depth,
512
+ mlp_expansion_factor=mlp_expansion_factor,
513
+ )
514
+
515
+ self.mask_estimators.append(mask_estimator)
516
+
517
+ # for the multi-resolution stft loss
518
+
519
+ self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
520
+ self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
521
+ self.multi_stft_n_fft = stft_n_fft
522
+ self.multi_stft_window_fn = multi_stft_window_fn
523
+
524
+ self.multi_stft_kwargs = dict(
525
+ hop_length=multi_stft_hop_size,
526
+ normalized=multi_stft_normalized
527
+ )
528
+
529
+ self.match_input_audio_length = match_input_audio_length
530
+
531
+ def forward(
532
+ self,
533
+ raw_audio,
534
+ target=None,
535
+ return_loss_breakdown=False
536
+ ):
537
+ """
538
+ einops
539
+
540
+ b - batch
541
+ f - freq
542
+ t - time
543
+ s - audio channel (1 for mono, 2 for stereo)
544
+ n - number of 'stems'
545
+ c - complex (2)
546
+ d - feature dimension
547
+ """
548
+
549
+ device = raw_audio.device
550
+
551
+ if raw_audio.ndim == 2:
552
+ raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
553
+
554
+ batch, channels, raw_audio_length = raw_audio.shape
555
+
556
+ istft_length = raw_audio_length if self.match_input_audio_length else None
557
+
558
+ assert (not self.stereo and channels == 1) or (
559
+ self.stereo and channels == 2), 'stereo needs to be set to True if passing in audio signal that is stereo (channel dimension of 2). also need to be False if mono (channel dimension of 1)'
560
+
561
+ # to stft
562
+
563
+ raw_audio, batch_audio_channel_packed_shape = pack_one(raw_audio, '* t')
564
+
565
+ stft_window = self.stft_window_fn(device=device)
566
+
567
+ stft_repr = torch.stft(raw_audio, **self.stft_kwargs, window=stft_window, return_complex=True)
568
+ stft_repr = torch.view_as_real(stft_repr)
569
+
570
+ stft_repr = unpack_one(stft_repr, batch_audio_channel_packed_shape, '* f t c')
571
+
572
+ # merge stereo / mono into the frequency, with frequency leading dimension, for band splitting
573
+ stft_repr = rearrange(stft_repr,'b s f t c -> b (f s) t c')
574
+
575
+ # index out all frequencies for all frequency ranges across bands ascending in one go
576
+
577
+ batch_arange = torch.arange(batch, device=device)[..., None]
578
+
579
+ # account for stereo
580
+
581
+ x = stft_repr[batch_arange, self.freq_indices]
582
+
583
+ # fold the complex (real and imag) into the frequencies dimension
584
+
585
+ x = rearrange(x, 'b f t c -> b t (f c)')
586
+
587
+ if self.use_torch_checkpoint:
588
+ x = checkpoint(self.band_split, x, use_reentrant=False)
589
+ else:
590
+ x = self.band_split(x)
591
+
592
+ # value residuals
593
+ time_v_residual = None
594
+ freq_v_residual = None
595
+
596
+ # maybe expand residual streams
597
+ if self.num_residual_streams != 1:
598
+ x = self.expand_stream(x)
599
+
600
+ # axial / hierarchical attention
601
+
602
+ store = [None] * len(self.layers)
603
+ for i, transformer_block in enumerate(self.layers):
604
+
605
+ if len(transformer_block) == 3:
606
+ linear_transformer, time_transformer, freq_transformer = transformer_block
607
+
608
+ x, ft_ps = pack([x], 'b * d')
609
+ if self.use_torch_checkpoint:
610
+ x = checkpoint(linear_transformer, x, use_reentrant=False)
611
+ else:
612
+ x = linear_transformer(x)
613
+ x, = unpack(x, ft_ps, 'b * d')
614
+ else:
615
+ time_transformer, freq_transformer = transformer_block
616
+
617
+ if self.skip_connection:
618
+ # Sum all previous
619
+ for j in range(i):
620
+ x = x + store[j]
621
+
622
+ x = rearrange(x, 'b t f d -> b f t d')
623
+ x, ps = pack([x], '* t d')
624
+
625
+ if self.use_torch_checkpoint:
626
+ x, next_time_v_residual = checkpoint(time_transformer, x, time_v_residual, use_reentrant=False)
627
+ else:
628
+ x, next_time_v_residual = time_transformer(x, time_v_residual)
629
+ time_v_residual = default(time_v_residual, next_time_v_residual)
630
+
631
+ x, = unpack(x, ps, '* t d')
632
+ x = rearrange(x, 'b f t d -> b t f d')
633
+ x, ps = pack([x], '* f d')
634
+
635
+ if self.use_torch_checkpoint:
636
+ x, next_freq_v_residual = checkpoint(freq_transformer, x, freq_v_residual, use_reentrant=False)
637
+ else:
638
+ x, next_freq_v_residual = freq_transformer(x, value_residual=freq_v_residual)
639
+ freq_v_residual = default(freq_v_residual, next_freq_v_residual)
640
+
641
+ x, = unpack(x, ps, '* f d')
642
+
643
+ if self.skip_connection:
644
+ store[i] = x
645
+
646
+ # maybe reduce residual streams
647
+ if self.num_residual_streams != 1:
648
+ x = self.reduce_stream(x)
649
+
650
+ num_stems = len(self.mask_estimators)
651
+ if self.use_torch_checkpoint:
652
+ masks = torch.stack([checkpoint(fn, x, use_reentrant=False) for fn in self.mask_estimators], dim=1)
653
+ else:
654
+ masks = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
655
+ masks = rearrange(masks, 'b n t (f c) -> b n f t c', c=2)
656
+
657
+ # modulate frequency representation
658
+
659
+ stft_repr = rearrange(stft_repr, 'b f t c -> b 1 f t c')
660
+
661
+ # complex number multiplication
662
+
663
+ stft_repr = torch.view_as_complex(stft_repr)
664
+ masks = torch.view_as_complex(masks)
665
+
666
+ masks = masks.type(stft_repr.dtype)
667
+
668
+ # need to average the estimated mask for the overlapped frequencies
669
+
670
+ scatter_indices = repeat(self.freq_indices, 'f -> b n f t', b=batch, n=num_stems, t=stft_repr.shape[-1])
671
+
672
+ stft_repr_expanded_stems = repeat(stft_repr, 'b 1 ... -> b n ...', n=num_stems)
673
+ masks_summed = torch.zeros_like(stft_repr_expanded_stems).scatter_add_(2, scatter_indices, masks)
674
+
675
+ denom = repeat(self.num_bands_per_freq, 'f -> (f r) 1', r=channels)
676
+
677
+ masks_averaged = masks_summed / denom.clamp(min=1e-8)
678
+
679
+ # modulate stft repr with estimated mask
680
+
681
+ stft_repr = stft_repr * masks_averaged
682
+
683
+ # istft
684
+
685
+ stft_repr = rearrange(stft_repr, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
686
+
687
+ recon_audio = torch.istft(stft_repr, **self.stft_kwargs, window=stft_window, return_complex=False,
688
+ length=istft_length)
689
+
690
+ recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', b=batch, s=self.audio_channels, n=num_stems)
691
+
692
+ if num_stems == 1:
693
+ recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
694
+
695
+ # if a target is passed in, calculate loss for learning
696
+
697
+ if not exists(target):
698
+ return recon_audio
699
+
700
+ if self.num_stems > 1:
701
+ assert target.ndim == 4 and target.shape[1] == self.num_stems
702
+
703
+ if target.ndim == 2:
704
+ target = rearrange(target, '... t -> ... 1 t')
705
+
706
+ target = target[..., :recon_audio.shape[-1]] # protect against lost length on istft
707
+
708
+ loss = F.l1_loss(recon_audio, target)
709
+
710
+ multi_stft_resolution_loss = 0.
711
+
712
+ for window_size in self.multi_stft_resolutions_window_sizes:
713
+ res_stft_kwargs = dict(
714
+ n_fft=max(window_size, self.multi_stft_n_fft), # not sure what n_fft is across multi resolution stft
715
+ win_length=window_size,
716
+ return_complex=True,
717
+ window=self.multi_stft_window_fn(window_size, device=device),
718
+ **self.multi_stft_kwargs,
719
+ )
720
+
721
+ recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
722
+ target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
723
+
724
+ multi_stft_resolution_loss = multi_stft_resolution_loss + F.l1_loss(recon_Y, target_Y)
725
+
726
+ weighted_multi_resolution_loss = multi_stft_resolution_loss * self.multi_stft_resolution_loss_weight
727
+
728
+ total_loss = loss + weighted_multi_resolution_loss
729
+
730
+ if not return_loss_breakdown:
731
+ return total_loss
732
+
733
+ return total_loss, (loss, multi_stft_resolution_loss)
models/conformer_model.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from typing import Optional
5
+ from conformer import Conformer
6
+
7
+ class NeuralModel(nn.Module):
8
+ """
9
+ Принимает |X| STFT: (B, C, F, T_spec) и предсказывает комплексные маски
10
+ в свернутом виде: (B, 2 * (sources*channels), F, T_spec)
11
+ где 2 — это [real, imag].
12
+ """
13
+ def __init__(
14
+ self,
15
+ in_channels: int = 2,
16
+ sources: int = 2,
17
+ freq_bins: int = 2049,
18
+ embed_dim: int = 512,
19
+ depth: int = 8,
20
+ dim_head: int = 64,
21
+ heads: int = 8,
22
+ ff_mult: int = 4,
23
+ conv_expansion_factor: int = 2,
24
+ conv_kernel_size: int = 31,
25
+ attn_dropout: float = 0.1,
26
+ ff_dropout: float = 0.1,
27
+ conv_dropout: float = 0.1,
28
+ ):
29
+ super().__init__()
30
+ self.freq_bins = freq_bins
31
+ self.in_channels = in_channels
32
+ self.sources = sources
33
+ self.out_masks = sources * in_channels
34
+ self.embed_dim = embed_dim
35
+
36
+ self.input_proj_stft = nn.Linear(freq_bins * in_channels, embed_dim)
37
+ self.model = Conformer(
38
+ dim=embed_dim,
39
+ depth=depth,
40
+ dim_head=dim_head,
41
+ heads=heads,
42
+ ff_mult=ff_mult,
43
+ conv_expansion_factor=conv_expansion_factor,
44
+ conv_kernel_size=conv_kernel_size,
45
+ attn_dropout=attn_dropout,
46
+ ff_dropout=ff_dropout,
47
+ conv_dropout=conv_dropout,
48
+ )
49
+ # 2 = [real, imag]
50
+ self.output_proj = nn.Linear(embed_dim, freq_bins * self.out_masks * 2)
51
+
52
+ def forward(self, x_stft_mag: torch.Tensor) -> torch.Tensor:
53
+ """
54
+ x_stft_mag: (B, C, F, T_spec)
55
+ returns: (B, 2 * (sources*channels), F, T_spec) — real/imag масок
56
+ """
57
+ assert x_stft_mag.dim() == 4, f"Expected (B,C,F,T), got {tuple(x_stft_mag.shape)}"
58
+ B, C, F, T_spec = x_stft_mag.shape
59
+ # (B, T_spec, C*F)
60
+ x_stft_mag = x_stft_mag.permute(0, 3, 1, 2).contiguous().view(B, T_spec, C * F)
61
+
62
+ x = self.input_proj_stft(x_stft_mag) # (B, T_spec, E)
63
+ x = self.model(x) # (B, T_spec, E)
64
+ x = torch.tanh(x) # стабилизируем
65
+ x = self.output_proj(x) # (B, T_spec, F * out_masks * 2)
66
+
67
+ # back to (B, 2*out_masks, F, T_spec)
68
+ x = x.reshape(B, T_spec, self.out_masks * 2, F).permute(0, 2, 3, 1).contiguous()
69
+ return x
70
+
71
+
72
+ class ConformerMSS(nn.Module):
73
+ """
74
+ Совместимо с твоим train:
75
+ forward(x: (B, C, T)) -> y_hat: (B, S, C, T)
76
+ где S = число источников (sources).
77
+ Внутри: STFT -> NeuralModel -> комплексные маски -> iSTFT.
78
+ """
79
+ def __init__(
80
+ self,
81
+ core: NeuralModel,
82
+ n_fft: int = 4096,
83
+ hop_length: int = 1024,
84
+ win_length: Optional[int] = None,
85
+ center: bool = True,
86
+ ):
87
+ super().__init__()
88
+ self.core = core
89
+ self.n_fft = n_fft
90
+ self.hop_length = hop_length
91
+ self.win_length = win_length if win_length is not None else n_fft
92
+ self.center = center
93
+
94
+ window = torch.hann_window(self.win_length)
95
+ # окно — буфер, чтобы таскалось на .to(device)
96
+ self.register_buffer("window", window, persistent=False)
97
+
98
+ # sanity-check: freq_bins у core должен совпадать с n_fft//2 + 1
99
+ expected_bins = n_fft // 2 + 1
100
+ assert core.freq_bins == expected_bins, (
101
+ f"NeuralModel.freq_bins={core.freq_bins} != n_fft//2+1={expected_bins}. "
102
+ f"Поставь freq_bins={expected_bins} при создании core."
103
+ )
104
+
105
+ def _stft(self, x: torch.Tensor) -> torch.Tensor:
106
+ """
107
+ x: (B, C, T) -> spec: complex (B, C, F, TT)
108
+ """
109
+ assert x.dim() == 3, f"Expected (B,C,T), got {tuple(x.shape)}"
110
+ B, C, T = x.shape
111
+ x_bc_t = x.reshape(B * C, T)
112
+ spec = torch.stft(
113
+ x_bc_t,
114
+ n_fft=self.n_fft,
115
+ hop_length=self.hop_length,
116
+ win_length=self.win_length,
117
+ window=self.window.to(x.device),
118
+ center=self.center,
119
+ return_complex=True,
120
+ ) # (B*C, F, TT)
121
+ F, TT = spec.shape[-2], spec.shape[-1]
122
+ spec = spec.reshape(B, C, F, TT)
123
+ return spec
124
+
125
+ def _istft(self, spec: torch.Tensor, length: int) -> torch.Tensor:
126
+ """
127
+ spec: complex (B, C, F, TT) -> audio: (B, C, T)
128
+ """
129
+ B, C, F, TT = spec.shape
130
+ spec_bc = spec.reshape(B * C, F, TT)
131
+ y_bc_t = torch.istft(
132
+ spec_bc,
133
+ n_fft=self.n_fft,
134
+ hop_length=self.hop_length,
135
+ win_length=self.win_length,
136
+ window=self.window.to(spec.device),
137
+ center=self.center,
138
+ length=length,
139
+ )
140
+ return y_bc_t.reshape(B, C, -1)
141
+
142
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
143
+ """
144
+ x: (B, C, T) (микс в волне)
145
+ returns y_hat: (B, S, C, T) — предсказанные источники в волне
146
+ """
147
+ B, C, T = x.shape
148
+ # 1) STFT
149
+ mix_spec = self._stft(x) # (B, C, F, TT)
150
+ mix_mag = mix_spec.abs() # (B, C, F, TT)
151
+
152
+ # 2) Прогон через core -> real/imag масок
153
+ mask_ri = self.core(mix_mag) # (B, 2*(S*C), F, TT2)
154
+ _, two_sc, F, TT2 = mask_ri.shape
155
+
156
+ S = self.core.sources
157
+ assert two_sc == 2 * (S * C), (
158
+ f"core вернул {two_sc} каналов масок, ожидалось {2*(S*C)} "
159
+ f"(2*[real/imag]*[sources*channels]). Проверь in_channels/sources."
160
+ )
161
+
162
+ # 3) Синхронизация по времени (если вдруг TT != TT2)
163
+ TT = mix_spec.shape[-1]
164
+ TT_min = min(TT, TT2)
165
+ if TT != TT_min:
166
+ mix_spec = mix_spec[..., :TT_min]
167
+ if TT2 != TT_min:
168
+ mask_ri = mask_ri[..., :TT_min]
169
+ TT = TT_min
170
+ # теперь у обоих время = TT
171
+
172
+ # 4) Преобразуем к (B, 2, S, C, F, TT)
173
+ mask_ri = mask_ri.view(B, 2, S, C, F, TT).contiguous()
174
+ mask_real = mask_ri[:, 0] # (B, S, C, F, TT)
175
+ mask_imag = mask_ri[:, 1] # (B, S, C, F, TT)
176
+ masks_c = torch.complex(mask_real, mask_imag)
177
+
178
+ # 5) Применяем маски к комплексному спектру микса
179
+ mix_spec_bc = mix_spec.unsqueeze(1) # (B, 1, C, F, TT)
180
+ est_specs = masks_c * mix_spec_bc # (B, S, C, F, TT)
181
+
182
+ # 6) iSTFT по каждому источнику
183
+ outs = []
184
+ for s in range(S):
185
+ y_s = self._istft(est_specs[:, s], length=T) # (B, C, T)
186
+ outs.append(y_s)
187
+ y_hat = torch.stack(outs, dim=1) # (B, S, C, T)
188
+ return y_hat
models/mel_band_conformer.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import torch
4
+ from torch import nn
5
+ import torch.nn.functional as F
6
+ from conformer import Conformer
7
+ from torch.nn import Module, ModuleList
8
+ from librosa import filters
9
+ from beartype.typing import Tuple, Optional, List, Callable
10
+ from beartype import beartype
11
+ from einops import rearrange, pack, unpack, reduce, repeat
12
+
13
+ # helper functions
14
+
15
+ def exists(val):
16
+ return val is not None
17
+
18
+
19
+ def default(v, d):
20
+ return v if exists(v) else d
21
+
22
+
23
+ class RMSNorm(Module):
24
+ def __init__(self, dim):
25
+ super().__init__()
26
+ self.scale = dim ** 0.5
27
+ self.gamma = nn.Parameter(torch.ones(dim))
28
+
29
+ def forward(self, x):
30
+ return F.normalize(x, dim=-1) * self.scale * self.gamma
31
+
32
+
33
+ # attention
34
+
35
+ def MLP(
36
+ dim_in,
37
+ dim_out,
38
+ dim_hidden=None,
39
+ depth=1,
40
+ activation=nn.Tanh
41
+ ):
42
+ dim_hidden = default(dim_hidden, dim_in)
43
+
44
+ net = []
45
+ dims = (dim_in, *((dim_hidden,) * depth), dim_out)
46
+
47
+ for ind, (layer_dim_in, layer_dim_out) in enumerate(zip(dims[:-1], dims[1:])):
48
+ is_last = ind == (len(dims) - 2)
49
+
50
+ net.append(nn.Linear(layer_dim_in, layer_dim_out))
51
+
52
+ if is_last:
53
+ continue
54
+
55
+ net.append(activation())
56
+
57
+ return nn.Sequential(*net)
58
+
59
+
60
+ class MaskEstimator(Module):
61
+ @beartype
62
+ def __init__(
63
+ self,
64
+ dim,
65
+ dim_inputs: Tuple[int, ...],
66
+ depth,
67
+ mlp_expansion_factor=4
68
+ ):
69
+ super().__init__()
70
+ self.dim_inputs = dim_inputs
71
+ self.to_freqs = ModuleList([])
72
+ dim_hidden = dim * mlp_expansion_factor
73
+
74
+ for dim_in in dim_inputs:
75
+ net = []
76
+
77
+ mlp = nn.Sequential(
78
+ MLP(dim, dim_in * 2, dim_hidden=dim_hidden, depth=depth),
79
+ nn.GLU(dim=-1)
80
+ )
81
+
82
+ self.to_freqs.append(mlp)
83
+
84
+ def forward(self, x):
85
+ # split along band dimension and run per-band MLP
86
+ x = x.unbind(dim=-2)
87
+
88
+ outs = []
89
+
90
+ for band_features, mlp in zip(x, self.to_freqs):
91
+ freq_out = mlp(band_features)
92
+ outs.append(freq_out)
93
+
94
+ return torch.cat(outs, dim=-1)
95
+
96
+
97
+ class BandSplit(Module):
98
+ @beartype
99
+ def __init__(
100
+ self,
101
+ dim,
102
+ dim_inputs: Tuple[int, ...]
103
+ ):
104
+ super().__init__()
105
+ self.dim_inputs = dim_inputs
106
+ self.to_features = ModuleList([])
107
+
108
+ for dim_in in dim_inputs:
109
+ net = nn.Sequential(
110
+ RMSNorm(dim_in),
111
+ nn.Linear(dim_in, dim)
112
+ )
113
+
114
+ self.to_features.append(net)
115
+
116
+ def forward(self, x):
117
+ # split input into predefined frequency-band chunks
118
+ x = x.split(self.dim_inputs, dim=-1)
119
+
120
+ outs = []
121
+ for split_input, to_feature in zip(x, self.to_features):
122
+ split_output = to_feature(split_input)
123
+ outs.append(split_output)
124
+
125
+ # stack back as (bands) axis
126
+ return torch.stack(outs, dim=-2)
127
+
128
+
129
+ class MelBandConformer(nn.Module):
130
+ def __init__(
131
+ self,
132
+ dim: int,
133
+ *,
134
+ depth: int,
135
+ stereo: bool = False,
136
+ num_stems: int = 1,
137
+ time_conformer_depth: int = 2,
138
+ freq_conformer_depth: int = 2,
139
+ num_bands: int = 60,
140
+ dim_head: int = 64,
141
+ heads: int = 8,
142
+ # Conformer params
143
+ ff_mult: int = 4,
144
+ conv_expansion_factor: int = 2,
145
+ conv_kernel_size: int = 31,
146
+ attn_dropout: float = 0.0,
147
+ ff_dropout: float = 0.0,
148
+ conv_dropout: float = 0.0,
149
+ # STFT
150
+ dim_freqs_in: int = 1025,
151
+ sample_rate: int = 44100,
152
+ stft_n_fft: int = 2048,
153
+ stft_hop_length: int = 512,
154
+ stft_win_length: int = 2048,
155
+ stft_normalized: bool = False,
156
+ stft_window_fn: Optional[Callable] = None,
157
+ # Loss
158
+ mask_estimator_depth: int = 1,
159
+ multi_stft_resolution_loss_weight: float = 1.0,
160
+ multi_stft_resolutions_window_sizes: Tuple[int, ...] = (4096, 2048, 1024, 512, 256),
161
+ multi_stft_hop_size: int = 147,
162
+ multi_stft_normalized: bool = False,
163
+ multi_stft_window_fn: Callable = torch.hann_window,
164
+ match_input_audio_length: bool = False,
165
+
166
+ use_torch_checkpoint: bool = False,
167
+ skip_connection: bool = False,
168
+ ):
169
+ super().__init__()
170
+
171
+ self.stereo = stereo
172
+ self.audio_channels = 2 if stereo else 1
173
+ self.num_stems = num_stems
174
+ self.use_torch_checkpoint = use_torch_checkpoint
175
+ self.skip_connection = skip_connection
176
+
177
+ self.layers = nn.ModuleList([])
178
+
179
+ # Layers per block: [ time-Conformer, freq-Conformer ]
180
+ conformer_kwargs = dict(
181
+ dim=dim,
182
+ dim_head=dim_head,
183
+ heads=heads,
184
+ ff_mult=ff_mult,
185
+ conv_expansion_factor=conv_expansion_factor,
186
+ conv_kernel_size=conv_kernel_size,
187
+ attn_dropout=attn_dropout,
188
+ ff_dropout=ff_dropout,
189
+ conv_dropout=conv_dropout,
190
+ )
191
+
192
+ for _ in range(depth):
193
+ time_block = Conformer(depth=time_conformer_depth, **conformer_kwargs)
194
+ freq_block = Conformer(depth=freq_conformer_depth, **conformer_kwargs)
195
+ self.layers.append(nn.ModuleList([time_block, freq_block]))
196
+
197
+ self.stft_window_fn = partial(stft_window_fn or torch.hann_window, stft_win_length)
198
+
199
+ self.stft_kwargs = dict(
200
+ n_fft=stft_n_fft,
201
+ hop_length=stft_hop_length,
202
+ win_length=stft_win_length,
203
+ normalized=stft_normalized
204
+ )
205
+
206
+ # number of frequency bins produced by STFT (ignoring complex axis)
207
+ freqs = torch.stft(
208
+ torch.randn(1, 4096),
209
+ **self.stft_kwargs,
210
+ window=torch.ones(stft_n_fft),
211
+ return_complex=True
212
+ ).shape[1]
213
+
214
+ # build mel filter bank to define band grouping
215
+ mel_filter_bank_numpy = filters.mel(sr=sample_rate, n_fft=stft_n_fft, n_mels=num_bands)
216
+ mel_filter_bank = torch.from_numpy(mel_filter_bank_numpy)
217
+ # ensure coverage at the boundaries
218
+ mel_filter_bank[0][0] = 1.0
219
+ mel_filter_bank[-1, -1] = 1.0
220
+
221
+ freqs_per_band = mel_filter_bank > 0
222
+ assert freqs_per_band.any(dim=0).all(), 'all frequency bins must be covered by bands'
223
+
224
+ repeated_freq_indices = repeat(torch.arange(freqs), 'f -> b f', b=num_bands)
225
+ freq_indices = repeated_freq_indices[freqs_per_band]
226
+
227
+ if stereo:
228
+ # duplicate indices for stereo by interleaving channels along the freq axis
229
+ freq_indices = repeat(freq_indices, 'f -> f s', s=2)
230
+ freq_indices = freq_indices * 2 + torch.arange(2)
231
+ freq_indices = rearrange(freq_indices, 'f s -> (f s)')
232
+
233
+ self.register_buffer('freq_indices', freq_indices, persistent=False)
234
+ self.register_buffer('freqs_per_band', freqs_per_band, persistent=False)
235
+
236
+ num_freqs_per_band = reduce(freqs_per_band, 'b f -> b', 'sum')
237
+ num_bands_per_freq = reduce(freqs_per_band, 'b f -> f', 'sum')
238
+
239
+ self.register_buffer('num_freqs_per_band', num_freqs_per_band, persistent=False)
240
+ self.register_buffer('num_bands_per_freq', num_bands_per_freq, persistent=False)
241
+
242
+ # BandSplit and MaskEstimator — same structure as your original
243
+ freqs_per_bands_with_complex = tuple(2 * f * self.audio_channels for f in num_freqs_per_band.tolist())
244
+
245
+ self.band_split = BandSplit(
246
+ dim=dim,
247
+ dim_inputs=freqs_per_bands_with_complex
248
+ )
249
+
250
+ self.mask_estimators = nn.ModuleList([
251
+ MaskEstimator(
252
+ dim=dim,
253
+ dim_inputs=freqs_per_bands_with_complex,
254
+ depth=mask_estimator_depth,
255
+ mlp_expansion_factor=4, # could be exposed as a parameter
256
+ )
257
+ for _ in range(num_stems)
258
+ ])
259
+
260
+ # multi-resolution STFT loss setup
261
+ self.multi_stft_resolution_loss_weight = multi_stft_resolution_loss_weight
262
+ self.multi_stft_resolutions_window_sizes = multi_stft_resolutions_window_sizes
263
+ self.multi_stft_n_fft = stft_n_fft
264
+ self.multi_stft_window_fn = multi_stft_window_fn
265
+
266
+ self.multi_stft_kwargs = dict(
267
+ hop_length=multi_stft_hop_size,
268
+ normalized=multi_stft_normalized
269
+ )
270
+
271
+ self.match_input_audio_length = match_input_audio_length
272
+
273
+ def forward(
274
+ self,
275
+ raw_audio: torch.Tensor,
276
+ target: Optional[torch.Tensor] = None,
277
+ return_loss_breakdown: bool = False
278
+ ):
279
+ """
280
+ b - batch
281
+ f - freq
282
+ t - time
283
+ s - audio channel (1 mono / 2 stereo)
284
+ n - stems
285
+ c - complex (2)
286
+ d - feature dim
287
+ """
288
+ device = raw_audio.device
289
+
290
+ if raw_audio.ndim == 2:
291
+ raw_audio = rearrange(raw_audio, 'b t -> b 1 t')
292
+
293
+ batch, channels, raw_audio_length = raw_audio.shape
294
+ istft_length = raw_audio_length if self.match_input_audio_length else None
295
+
296
+ assert (not self.stereo and channels == 1) or (self.stereo and channels == 2), \
297
+ 'set stereo=True for stereo input (C=2), stereo=False for mono (C=1)'
298
+
299
+ # --- STFT ---
300
+ raw_audio_flat, packed_shape = raw_audio.reshape(-1, raw_audio.shape[-1]), raw_audio.shape[:2]
301
+ stft_window = self.stft_window_fn(device=device)
302
+
303
+ stft_repr = torch.stft(raw_audio_flat, **self.stft_kwargs, window=stft_window, return_complex=True)
304
+ stft_repr = torch.view_as_real(stft_repr) # (B*C, F, T, 2)
305
+ stft_repr = stft_repr.view(*packed_shape, *stft_repr.shape[1:]) # (b, s, f, t, c)
306
+
307
+ # fold channel into frequency axis (as in your setup)
308
+ stft_repr_fs = rearrange(stft_repr, 'b s f t c -> b (f s) t c')
309
+
310
+ # index frequencies by mel bands
311
+ b_idx = torch.arange(batch, device=device)[..., None]
312
+ x = stft_repr_fs[b_idx, self.freq_indices] # (b, sum(freqs_in_bands), t, c)
313
+ x = rearrange(x, 'b f t c -> b t (f c)') # flatten complex axis into features
314
+
315
+ # --- BandSplit -> (b, t, bands, dim) ---
316
+ if self.use_torch_checkpoint:
317
+ x = torch.utils.checkpoint.checkpoint(self.band_split, x, use_reentrant=False)
318
+ else:
319
+ x = self.band_split(x)
320
+
321
+ # --- Axial Conformer (time, then freq) ---
322
+ store = [None] * len(self.layers)
323
+
324
+ for i, (time_conf, freq_conf) in enumerate(self.layers):
325
+ # Time axis: (b, t, bands, d) -> ((b*bands), t, d)
326
+ bsz, tlen, bands, d = x.shape
327
+ x_time = rearrange(x, 'b t f d -> (b f) t d')
328
+
329
+ if self.use_torch_checkpoint:
330
+ x_time = torch.utils.checkpoint.checkpoint(time_conf, x_time, use_reentrant=False)
331
+ else:
332
+ x_time = time_conf(x_time)
333
+
334
+ x = rearrange(x_time, '(b f) t d -> b t f d', b=bsz, f=bands)
335
+
336
+ # Freq axis: (b, t, f, d) -> ((b*t), f, d)
337
+ bsz, tlen, bands, d = x.shape
338
+ x_freq = rearrange(x, 'b t f d -> (b t) f d')
339
+
340
+ if self.use_torch_checkpoint:
341
+ x_freq = torch.utils.checkpoint.checkpoint(freq_conf, x_freq, use_reentrant=False)
342
+ else:
343
+ x_freq = freq_conf(x_freq)
344
+
345
+ x = rearrange(x_freq, '(b t) f d -> b t f d', b=bsz, t=tlen)
346
+
347
+ if self.skip_connection:
348
+ store[i] = x if store[i] is None else store[i] + x
349
+
350
+ # --- Mask estimation ---
351
+ # (b, t, f_bands, d) -> per-stem MLP over bands
352
+ if self.use_torch_checkpoint:
353
+ masks = torch.stack([torch.utils.checkpoint.checkpoint(fn, x, use_reentrant=False)
354
+ for fn in self.mask_estimators], dim=1)
355
+ else:
356
+ masks = torch.stack([fn(x) for fn in self.mask_estimators], dim=1)
357
+ masks = rearrange(masks, 'b n t (f c) -> b n f t c', c=2)
358
+
359
+ # --- Complex modulation ---
360
+ stft_repr_c = rearrange(stft_repr, 'b s f t c -> b 1 (f s) t c')
361
+ stft_repr_c = torch.view_as_complex(stft_repr_c) # (b, 1, F*S, T)
362
+ masks_c = torch.view_as_complex(masks) # (b, n, F*S, T)
363
+
364
+ masks_c = masks_c.type(stft_repr_c.dtype)
365
+
366
+ scatter_idx = repeat(self.freq_indices, 'f -> b n f t', b=batch, n=self.num_stems, t=stft_repr_c.shape[-1])
367
+ stft_repr_expanded = repeat(stft_repr_c, 'b 1 ... -> b n ...', n=self.num_stems)
368
+
369
+ masks_summed = torch.zeros_like(stft_repr_expanded).scatter_add_(2, scatter_idx, masks_c)
370
+ denom = repeat(self.num_bands_per_freq, 'f -> (f r) 1', r=self.audio_channels)
371
+
372
+ masks_averaged = masks_summed / denom.clamp(min=1e-8)
373
+ stft_mod = stft_repr_c * masks_averaged
374
+
375
+ # --- iSTFT ---
376
+ stft_mod = rearrange(stft_mod, 'b n (f s) t -> (b n s) f t', s=self.audio_channels)
377
+
378
+ recon_audio = torch.istft(
379
+ stft_mod,
380
+ **self.stft_kwargs,
381
+ window=stft_window,
382
+ return_complex=False,
383
+ length=istft_length
384
+ )
385
+ recon_audio = rearrange(recon_audio, '(b n s) t -> b n s t', b=batch, s=self.audio_channels, n=self.num_stems)
386
+
387
+ if self.num_stems == 1:
388
+ recon_audio = rearrange(recon_audio, 'b 1 s t -> b s t')
389
+
390
+ # Loss
391
+ if target is None:
392
+ return recon_audio
393
+
394
+ if self.num_stems > 1:
395
+ assert target.ndim == 4 and target.shape[1] == self.num_stems
396
+
397
+ if target.ndim == 2:
398
+ target = rearrange(target, '... t -> ... 1 t')
399
+
400
+ target = target[..., :recon_audio.shape[-1]]
401
+
402
+ loss = F.l1_loss(recon_audio, target)
403
+
404
+ multi_stft_resolution_loss = 0.0
405
+ for window_size in self.multi_stft_resolutions_window_sizes:
406
+ res_stft_kwargs = dict(
407
+ n_fft=max(window_size, self.multi_stft_n_fft),
408
+ win_length=window_size,
409
+ return_complex=True,
410
+ window=self.multi_stft_window_fn(window_size, device=device),
411
+ **self.multi_stft_kwargs,
412
+ )
413
+
414
+ recon_Y = torch.stft(rearrange(recon_audio, '... s t -> (... s) t'), **res_stft_kwargs)
415
+ target_Y = torch.stft(rearrange(target, '... s t -> (... s) t'), **res_stft_kwargs)
416
+
417
+ multi_stft_resolution_loss += F.l1_loss(recon_Y, target_Y)
418
+
419
+ total_loss = loss + self.multi_stft_resolution_loss_weight * multi_stft_resolution_loss
420
+
421
+ if not return_loss_breakdown:
422
+ return total_loss
423
+
424
+ return total_loss, (loss, multi_stft_resolution_loss)
models/scnet/scnet_masked.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from collections import deque
5
+ from .separation import SeparationNet
6
+ import typing as tp
7
+ import math
8
+
9
+
10
+ class Swish(nn.Module):
11
+ def forward(self, x):
12
+ return x * x.sigmoid()
13
+
14
+
15
+ class ConvolutionModule(nn.Module):
16
+ """
17
+ Convolution Module in SD block.
18
+
19
+ Args:
20
+ channels (int): input/output channels.
21
+ depth (int): number of layers in the residual branch. Each layer has its own
22
+ compress (float): amount of channel compression.
23
+ kernel (int): kernel size for the convolutions.
24
+ """
25
+
26
+ def __init__(self, channels, depth=2, compress=4, kernel=3):
27
+ super().__init__()
28
+ assert kernel % 2 == 1
29
+ self.depth = abs(depth)
30
+ hidden_size = int(channels / compress)
31
+ norm = lambda d: nn.GroupNorm(1, d)
32
+ self.layers = nn.ModuleList([])
33
+ for _ in range(self.depth):
34
+ padding = (kernel // 2)
35
+ mods = [
36
+ norm(channels),
37
+ nn.Conv1d(channels, hidden_size * 2, kernel, padding=padding),
38
+ nn.GLU(1),
39
+ nn.Conv1d(hidden_size, hidden_size, kernel, padding=padding, groups=hidden_size),
40
+ norm(hidden_size),
41
+ Swish(),
42
+ nn.Conv1d(hidden_size, channels, 1),
43
+ ]
44
+ layer = nn.Sequential(*mods)
45
+ self.layers.append(layer)
46
+
47
+ def forward(self, x):
48
+ for layer in self.layers:
49
+ x = x + layer(x)
50
+ return x
51
+
52
+
53
+ class FusionLayer(nn.Module):
54
+ """
55
+ A FusionLayer within the decoder.
56
+
57
+ Args:
58
+ - channels (int): Number of input channels.
59
+ - kernel_size (int, optional): Kernel size for the convolutional layer, defaults to 3.
60
+ - stride (int, optional): Stride for the convolutional layer, defaults to 1.
61
+ - padding (int, optional): Padding for the convolutional layer, defaults to 1.
62
+ """
63
+
64
+ def __init__(self, channels, kernel_size=3, stride=1, padding=1):
65
+ super(FusionLayer, self).__init__()
66
+ self.conv = nn.Conv2d(channels * 2, channels * 2, kernel_size, stride=stride, padding=padding)
67
+
68
+ def forward(self, x, skip=None):
69
+ if skip is not None:
70
+ x += skip
71
+ x = x.repeat(1, 2, 1, 1)
72
+ x = self.conv(x)
73
+ x = F.glu(x, dim=1)
74
+ return x
75
+
76
+
77
+ class SDlayer(nn.Module):
78
+ """
79
+ Implements a Sparse Down-sample Layer for processing different frequency bands separately.
80
+
81
+ Args:
82
+ - channels_in (int): Input channel count.
83
+ - channels_out (int): Output channel count.
84
+ - band_configs (dict): A dictionary containing configuration for each frequency band.
85
+ Keys are 'low', 'mid', 'high' for each band, and values are
86
+ dictionaries with keys 'SR', 'stride', and 'kernel' for proportion,
87
+ stride, and kernel size, respectively.
88
+ """
89
+
90
+ def __init__(self, channels_in, channels_out, band_configs):
91
+ super(SDlayer, self).__init__()
92
+
93
+ # Initializing convolutional layers for each band
94
+ self.convs = nn.ModuleList()
95
+ self.strides = []
96
+ self.kernels = []
97
+ for config in band_configs.values():
98
+ self.convs.append(
99
+ nn.Conv2d(channels_in, channels_out, (config['kernel'], 1), (config['stride'], 1), (0, 0)))
100
+ self.strides.append(config['stride'])
101
+ self.kernels.append(config['kernel'])
102
+
103
+ # Saving rate proportions for determining splits
104
+ self.SR_low = band_configs['low']['SR']
105
+ self.SR_mid = band_configs['mid']['SR']
106
+
107
+ def forward(self, x):
108
+ B, C, Fr, T = x.shape
109
+ # Define splitting points based on sampling rates
110
+ splits = [
111
+ (0, math.ceil(Fr * self.SR_low)),
112
+ (math.ceil(Fr * self.SR_low), math.ceil(Fr * (self.SR_low + self.SR_mid))),
113
+ (math.ceil(Fr * (self.SR_low + self.SR_mid)), Fr)
114
+ ]
115
+
116
+ # Processing each band with the corresponding convolution
117
+ outputs = []
118
+ original_lengths = []
119
+ for conv, stride, kernel, (start, end) in zip(self.convs, self.strides, self.kernels, splits):
120
+ extracted = x[:, :, start:end, :]
121
+ original_lengths.append(end - start)
122
+ current_length = extracted.shape[2]
123
+
124
+ # padding
125
+ if stride == 1:
126
+ total_padding = kernel - stride
127
+ else:
128
+ total_padding = (stride - current_length % stride) % stride
129
+ pad_left = total_padding // 2
130
+ pad_right = total_padding - pad_left
131
+
132
+ padded = F.pad(extracted, (0, 0, pad_left, pad_right))
133
+
134
+ output = conv(padded)
135
+ outputs.append(output)
136
+
137
+ return outputs, original_lengths
138
+
139
+
140
+ class SUlayer(nn.Module):
141
+ """
142
+ Implements a Sparse Up-sample Layer in decoder.
143
+
144
+ Args:
145
+ - channels_in: The number of input channels.
146
+ - channels_out: The number of output channels.
147
+ - convtr_configs: Dictionary containing the configurations for transposed convolutions.
148
+ """
149
+
150
+ def __init__(self, channels_in, channels_out, band_configs):
151
+ super(SUlayer, self).__init__()
152
+
153
+ # Initializing convolutional layers for each band
154
+ self.convtrs = nn.ModuleList([
155
+ nn.ConvTranspose2d(channels_in, channels_out, [config['kernel'], 1], [config['stride'], 1])
156
+ for _, config in band_configs.items()
157
+ ])
158
+
159
+ def forward(self, x, lengths, origin_lengths):
160
+ B, C, Fr, T = x.shape
161
+ # Define splitting points based on input lengths
162
+ splits = [
163
+ (0, lengths[0]),
164
+ (lengths[0], lengths[0] + lengths[1]),
165
+ (lengths[0] + lengths[1], None)
166
+ ]
167
+ # Processing each band with the corresponding convolution
168
+ outputs = []
169
+ for idx, (convtr, (start, end)) in enumerate(zip(self.convtrs, splits)):
170
+ out = convtr(x[:, :, start:end, :])
171
+ # Calculate the distance to trim the output symmetrically to original length
172
+ current_Fr_length = out.shape[2]
173
+ dist = abs(origin_lengths[idx] - current_Fr_length) // 2
174
+
175
+ # Trim the output to the original length symmetrically
176
+ trimmed_out = out[:, :, dist:dist + origin_lengths[idx], :]
177
+
178
+ outputs.append(trimmed_out)
179
+
180
+ # Concatenate trimmed outputs along the frequency dimension to return the final tensor
181
+ x = torch.cat(outputs, dim=2)
182
+
183
+ return x
184
+
185
+
186
+ class SDblock(nn.Module):
187
+ """
188
+ Implements a simplified Sparse Down-sample block in encoder.
189
+
190
+ Args:
191
+ - channels_in (int): Number of input channels.
192
+ - channels_out (int): Number of output channels.
193
+ - band_config (dict): Configuration for the SDlayer specifying band splits and convolutions.
194
+ - conv_config (dict): Configuration for convolution modules applied to each band.
195
+ - depths (list of int): List specifying the convolution depths for low, mid, and high frequency bands.
196
+ """
197
+
198
+ def __init__(self, channels_in, channels_out, band_configs={}, conv_config={}, depths=[3, 2, 1], kernel_size=3):
199
+ super(SDblock, self).__init__()
200
+ self.SDlayer = SDlayer(channels_in, channels_out, band_configs)
201
+
202
+ # Dynamically create convolution modules for each band based on depths
203
+ self.conv_modules = nn.ModuleList([
204
+ ConvolutionModule(channels_out, depth, **conv_config) for depth in depths
205
+ ])
206
+ # Set the kernel_size to an odd number.
207
+ self.globalconv = nn.Conv2d(channels_out, channels_out, kernel_size, 1, (kernel_size - 1) // 2)
208
+
209
+ def forward(self, x):
210
+ bands, original_lengths = self.SDlayer(x)
211
+ # B, C, f, T = band.shape
212
+ bands = [
213
+ F.gelu(
214
+ conv(band.permute(0, 2, 1, 3).reshape(-1, band.shape[1], band.shape[3]))
215
+ .view(band.shape[0], band.shape[2], band.shape[1], band.shape[3])
216
+ .permute(0, 2, 1, 3)
217
+ )
218
+ for conv, band in zip(self.conv_modules, bands)
219
+
220
+ ]
221
+ lengths = [band.size(-2) for band in bands]
222
+ full_band = torch.cat(bands, dim=2)
223
+ skip = full_band
224
+
225
+ output = self.globalconv(full_band)
226
+
227
+ return output, skip, lengths, original_lengths
228
+
229
+
230
+ class SCNet(nn.Module):
231
+ """
232
+ The implementation of SCNet: Sparse Compression Network for Music Source Separation. Paper: https://arxiv.org/abs/2401.13276.pdf
233
+
234
+ Args:
235
+ - sources (List[str]): List of sources to be separated.
236
+ - audio_channels (int): Number of audio channels.
237
+ - nfft (int): Number of FFTs to determine the frequency dimension of the input.
238
+ - hop_size (int): Hop size for the STFT.
239
+ - win_size (int): Window size for STFT.
240
+ - normalized (bool): Whether to normalize the STFT.
241
+ - dims (List[int]): List of channel dimensions for each block.
242
+ - band_SR (List[float]): The proportion of each frequency band.
243
+ - band_stride (List[int]): The down-sampling ratio of each frequency band.
244
+ - band_kernel (List[int]): The kernel sizes for down-sampling convolution in each frequency band
245
+ - conv_depths (List[int]): List specifying the number of convolution modules in each SD block.
246
+ - compress (int): Compression factor for convolution module.
247
+ - conv_kernel (int): Kernel size for convolution layer in convolution module.
248
+ - num_dplayer (int): Number of dual-path layers.
249
+ - expand (int): Expansion factor in the dual-path RNN, default is 1.
250
+
251
+ """
252
+
253
+ def __init__(self,
254
+ sources=['drums', 'bass', 'other', 'vocals'],
255
+ audio_channels=2,
256
+ # Main structure
257
+ dims=[4, 32, 64, 128], # dims = [4, 64, 128, 256] in SCNet-large
258
+ # STFT
259
+ nfft=4096,
260
+ hop_size=1024,
261
+ win_size=4096,
262
+ normalized=True,
263
+ # SD/SU layer
264
+ band_SR=[0.175, 0.392, 0.433],
265
+ band_stride=[1, 4, 16],
266
+ band_kernel=[3, 4, 16],
267
+ # Convolution Module
268
+ conv_depths=[3, 2, 1],
269
+ compress=4,
270
+ conv_kernel=3,
271
+ # Dual-path RNN
272
+ num_dplayer=6,
273
+ expand=1,
274
+ ):
275
+ super().__init__()
276
+ self.sources = sources
277
+ self.audio_channels = audio_channels
278
+ self.dims = dims
279
+ band_keys = ['low', 'mid', 'high']
280
+ self.band_configs = {band_keys[i]: {'SR': band_SR[i], 'stride': band_stride[i], 'kernel': band_kernel[i]} for i
281
+ in range(len(band_keys))}
282
+ self.hop_length = hop_size
283
+ self.conv_config = {
284
+ 'compress': compress,
285
+ 'kernel': conv_kernel,
286
+ }
287
+
288
+ self.embed_dim = dims[0]
289
+ self.max_f = nfft // 2 + 1
290
+ self.pos_embed_f = nn.Parameter(torch.zeros(1, self.embed_dim, self.max_f, 1))
291
+ nn.init.trunc_normal_(self.pos_embed_f, std=.02)
292
+
293
+ window = torch.hann_window(window_length=nfft, periodic=True)
294
+ self.register_buffer('window', window, persistent=False)
295
+
296
+ self.stft_config = {
297
+ 'n_fft': nfft,
298
+ 'hop_length': hop_size,
299
+ 'win_length': win_size,
300
+ 'center': True,
301
+ 'normalized': normalized
302
+ }
303
+
304
+ self.encoder = nn.ModuleList()
305
+ self.decoder = nn.ModuleList()
306
+
307
+ for index in range(len(dims) - 1):
308
+ enc = SDblock(
309
+ channels_in=dims[index],
310
+ channels_out=dims[index + 1],
311
+ band_configs=self.band_configs,
312
+ conv_config=self.conv_config,
313
+ depths=conv_depths
314
+ )
315
+ self.encoder.append(enc)
316
+
317
+ dec = nn.Sequential(
318
+ FusionLayer(channels=dims[index + 1]),
319
+ SUlayer(
320
+ channels_in=dims[index + 1],
321
+ channels_out=dims[index] if index != 0 else dims[index] * len(sources),
322
+ band_configs=self.band_configs,
323
+ )
324
+ )
325
+ self.decoder.insert(0, dec)
326
+
327
+ self.separation_net = SeparationNet(
328
+ channels=dims[-1],
329
+ expand=expand,
330
+ num_layers=num_dplayer,
331
+ )
332
+
333
+ self.mask_layer = nn.Sequential(
334
+ nn.Conv2d(
335
+ 4 * len(self.sources),
336
+ 64,
337
+ kernel_size=3,
338
+ padding="same"
339
+ ),
340
+ nn.GELU(),
341
+ nn.Conv2d(
342
+ 64,
343
+ 4 * len(self.sources),
344
+ kernel_size=1,
345
+ padding="same",
346
+ ),
347
+ nn.Tanh()
348
+ )
349
+
350
+ def forward(self, x):
351
+ # B, C, L = x.shape
352
+ B = x.shape[0]
353
+ # In the initial padding, ensure that the number of frames after the STFT (the length of the T dimension) is even,
354
+ # so that the RFFT operation can be used in the separation network.
355
+ padding = self.hop_length - x.shape[-1] % self.hop_length
356
+ if (x.shape[-1] + padding) // self.hop_length % 2 == 0:
357
+ padding += self.hop_length
358
+ x = F.pad(x, (0, padding))
359
+
360
+ # STFT
361
+ L = x.shape[-1]
362
+ x = x.reshape(-1, L)
363
+ stft_opts = {**self.stft_config, 'window': self.window.to(x.device)}
364
+ x = torch.stft(x, **stft_opts, return_complex=True)
365
+ x = torch.view_as_real(x)
366
+ x = x.permute(0, 3, 1, 2).reshape(x.shape[0] // self.audio_channels, x.shape[3] * self.audio_channels, x.shape[1], x.shape[2])
367
+
368
+ B, C, Fr, T = x.shape
369
+
370
+ assert C == self.embed_dim, f"Input channel dimension {C} after STFT/reshape doesn't match self.embed_dim {self.embed_dim}"
371
+ mixture = x.repeat(1, len(self.sources), 1, 1)
372
+
373
+ if Fr > self.max_f:
374
+ print(f"Warning: Input frequency dim {Fr} > max_f {self.max_f}. Positional embedding will be truncated/repeated.")
375
+ repeats = math.ceil(Fr / self.max_f)
376
+ pos_f = self.pos_embed_f.repeat(1, 1, repeats, 1)[:, :, :Fr, :]
377
+ else:
378
+ pos_f = self.pos_embed_f[:, :, :Fr, :]
379
+ x = x + pos_f
380
+
381
+ save_skip = deque()
382
+ save_lengths = deque()
383
+ save_original_lengths = deque()
384
+ # encoder
385
+ for sd_layer in self.encoder:
386
+ x, skip, lengths, original_lengths = sd_layer(x)
387
+ save_skip.append(skip)
388
+ save_lengths.append(lengths)
389
+ save_original_lengths.append(original_lengths)
390
+
391
+ # separation
392
+ x = self.separation_net(x)
393
+
394
+ # decoder
395
+ for fusion_layer, su_layer in self.decoder:
396
+ x = fusion_layer(x, save_skip.pop())
397
+ x = su_layer(x, save_lengths.pop(), save_original_lengths.pop())
398
+
399
+ mask = self.mask_layer(x)
400
+
401
+ # output
402
+ n = self.dims[0]
403
+
404
+ mixture = mixture.view(B, n, -1, Fr, T)
405
+ mixture = mixture.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
406
+ mixture = torch.view_as_complex(mixture.contiguous())
407
+
408
+ mask = mask.view(B, n, -1, Fr, T)
409
+ mask = mask.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
410
+ mask = torch.view_as_complex(mask.contiguous())
411
+
412
+ x = mixture * mask
413
+
414
+ x = torch.istft(x, **stft_opts)
415
+ x = x.reshape(B, len(self.sources), self.audio_channels, -1)
416
+
417
+ x = x[:, :, :, :-padding]
418
+
419
+ return x
models/scnet/scnet_tran.py ADDED
@@ -0,0 +1,668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import deque
3
+ import typing as tp
4
+ import math
5
+ import torch
6
+ import torch.nn as nn
7
+ from torch.nn.modules.rnn import LSTM
8
+ from torch.nn import Module, ModuleList
9
+ from einops import rearrange, pack, unpack, reduce, repeat
10
+ from einops.layers.torch import Rearrange
11
+ import torch.nn.functional as F
12
+ from models.bs_roformer.attend import Attend
13
+ from rotary_embedding_torch import RotaryEmbedding
14
+
15
+
16
+ # helper functions
17
+
18
+ def exists(val):
19
+ return val is not None
20
+
21
+
22
+ def default(v, d):
23
+ return v if exists(v) else d
24
+
25
+
26
+ def pack_one(t, pattern):
27
+ return pack([t], pattern)
28
+
29
+
30
+ def unpack_one(t, ps, pattern):
31
+ return unpack(t, ps, pattern)[0]
32
+
33
+
34
+ def pad_at_dim(t, pad, dim=-1, value=0.):
35
+ dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
36
+ zeros = ((0, 0) * dims_from_right)
37
+ return F.pad(t, (*zeros, *pad), value=value)
38
+
39
+
40
+ def l2norm(t):
41
+ return F.normalize(t, dim=-1, p=2)
42
+
43
+
44
+ # norm
45
+
46
+ class RMSNorm(Module):
47
+ def __init__(self, dim):
48
+ super().__init__()
49
+ self.scale = dim ** 0.5
50
+ self.gamma = nn.Parameter(torch.ones(dim))
51
+
52
+ def forward(self, x):
53
+ return F.normalize(x, dim=-1) * self.scale * self.gamma
54
+
55
+
56
+ # attention
57
+
58
+ class FeedForward(Module):
59
+ def __init__(
60
+ self,
61
+ dim,
62
+ mult=4,
63
+ dropout=0.
64
+ ):
65
+ super().__init__()
66
+ dim_inner = int(dim * mult)
67
+ self.net = nn.Sequential(
68
+ RMSNorm(dim),
69
+ nn.Linear(dim, dim_inner),
70
+ nn.GELU(),
71
+ nn.Dropout(dropout),
72
+ nn.Linear(dim_inner, dim),
73
+ nn.Dropout(dropout)
74
+ )
75
+
76
+ def forward(self, x):
77
+ return self.net(x)
78
+
79
+
80
+ class Attention(Module):
81
+ def __init__(
82
+ self,
83
+ dim,
84
+ heads=8,
85
+ dim_head=64,
86
+ dropout=0.,
87
+ rotary_embed=None,
88
+ flash=True
89
+ ):
90
+ super().__init__()
91
+ self.heads = heads
92
+ self.scale = dim_head ** -0.5
93
+ dim_inner = heads * dim_head
94
+
95
+ self.rotary_embed = rotary_embed
96
+
97
+ self.attend = Attend(flash=flash, dropout=dropout)
98
+
99
+ self.norm = RMSNorm(dim)
100
+ self.to_qkv = nn.Linear(dim, dim_inner * 3, bias=False)
101
+
102
+ self.to_gates = nn.Linear(dim, heads)
103
+
104
+ self.to_out = nn.Sequential(
105
+ nn.Linear(dim_inner, dim, bias=False),
106
+ nn.Dropout(dropout)
107
+ )
108
+
109
+ def forward(self, x):
110
+ x = self.norm(x)
111
+
112
+ q, k, v = rearrange(self.to_qkv(x), 'b n (qkv h d) -> qkv b h n d', qkv=3, h=self.heads)
113
+
114
+ if exists(self.rotary_embed):
115
+ q = self.rotary_embed.rotate_queries_or_keys(q)
116
+ k = self.rotary_embed.rotate_queries_or_keys(k)
117
+
118
+ out = self.attend(q, k, v)
119
+
120
+ gates = self.to_gates(x)
121
+ out = out * rearrange(gates, 'b n h -> b h n 1').sigmoid()
122
+
123
+ out = rearrange(out, 'b h n d -> b n (h d)')
124
+ return self.to_out(out)
125
+
126
+
127
+ class Transformer(Module):
128
+ def __init__(
129
+ self,
130
+ *,
131
+ dim,
132
+ depth,
133
+ dim_head=64,
134
+ heads=8,
135
+ attn_dropout=0.,
136
+ ff_dropout=0.,
137
+ ff_mult=4,
138
+ norm_output=True,
139
+ rotary_embed=None,
140
+ flash_attn=True,
141
+ linear_attn=False
142
+ ):
143
+ super().__init__()
144
+ self.layers = ModuleList([])
145
+
146
+ for _ in range(depth):
147
+ attn = Attention(dim=dim, dim_head=dim_head, heads=heads, dropout=attn_dropout, rotary_embed=rotary_embed, flash=flash_attn)
148
+
149
+ self.layers.append(ModuleList([
150
+ attn,
151
+ FeedForward(dim=dim, mult=ff_mult, dropout=ff_dropout)
152
+ ]))
153
+
154
+ self.norm = RMSNorm(dim) if norm_output else nn.Identity()
155
+
156
+ def forward(self, x):
157
+
158
+ for attn, ff in self.layers:
159
+ x = attn(x) + x
160
+ x = ff(x) + x
161
+
162
+ return self.norm(x)
163
+
164
+
165
+ class FeatureConversion(nn.Module):
166
+ """
167
+ Integrates into the adjacent Dual-Path layer.
168
+
169
+ Args:
170
+ channels (int): Number of input channels.
171
+ inverse (bool): If True, uses ifft; otherwise, uses rfft.
172
+ """
173
+
174
+ def __init__(self, channels, inverse):
175
+ super().__init__()
176
+ self.inverse = inverse
177
+ self.channels = channels
178
+
179
+ def forward(self, x):
180
+ # B, C, F, T = x.shape
181
+ if self.inverse:
182
+ x = x.float()
183
+ x_r = x[:, :self.channels // 2, :, :]
184
+ x_i = x[:, self.channels // 2:, :, :]
185
+ x = torch.complex(x_r, x_i)
186
+ x = torch.fft.irfft(x, dim=3, norm="ortho")
187
+ else:
188
+ x = x.float()
189
+ x = torch.fft.rfft(x, dim=3, norm="ortho")
190
+ x_real = x.real
191
+ x_imag = x.imag
192
+ x = torch.cat([x_real, x_imag], dim=1)
193
+ return x
194
+
195
+
196
+ class DualPathTran(nn.Module):
197
+ """
198
+ Dual-Path Transformer in Separation Network.
199
+
200
+ Args:
201
+ d_model (int): The number of expected features in the input (input_size).
202
+ expand (int): Expansion factor used to calculate the hidden_size of LSTM.
203
+ bidirectional (bool): If True, becomes a bidirectional LSTM.
204
+ """
205
+
206
+ def __init__(self, d_model, time_rotary_embed, freq_rotary_embed, tran_params):
207
+ super(DualPathTran, self).__init__()
208
+
209
+ self.d_model = d_model
210
+
211
+ transformer_kwargs = dict(
212
+ dim=d_model,
213
+ heads=tran_params['heads'],
214
+ dim_head=tran_params['dim_head'],
215
+ attn_dropout=tran_params['attn_dropout'],
216
+ ff_dropout=tran_params['ff_dropout'],
217
+ flash_attn=tran_params['flash_attn']
218
+ )
219
+ self.norm_layers = nn.ModuleList([nn.GroupNorm(1, d_model) for _ in range(2)])
220
+ self.time_layer = Transformer(depth=tran_params['depth'], rotary_embed=time_rotary_embed, **transformer_kwargs)
221
+ self.freq_layer = Transformer(depth=tran_params['depth'], rotary_embed=freq_rotary_embed, **transformer_kwargs)
222
+
223
+ def forward(self, x):
224
+ B, C, F, T = x.shape
225
+
226
+ # Process dual-path rnn
227
+ original_x = x
228
+ # Frequency-path
229
+ x = self.norm_layers[0](x)
230
+ x = x.transpose(1, 3).contiguous().view(B * T, F, C)
231
+ # print('XXX', x.shape)
232
+ x = self.freq_layer(x)
233
+ x = x.view(B, T, F, C).transpose(1, 3)
234
+ x = x + original_x
235
+
236
+ original_x = x
237
+ # Time-path
238
+ x = self.norm_layers[1](x)
239
+ x = x.transpose(1, 2).contiguous().view(B * F, C, T).transpose(1, 2)
240
+ # print('RRR', x.shape)
241
+ x = self.time_layer(x)
242
+ x = x.transpose(1, 2).contiguous().view(B, F, C, T).transpose(1, 2)
243
+ x = x + original_x
244
+
245
+ return x
246
+
247
+
248
+ class SeparationNetTran(nn.Module):
249
+ """
250
+ Implements a simplified Sparse Down-sample block in an encoder architecture.
251
+
252
+ Args:
253
+ - channels (int): Number input channels.
254
+ - expand (int): Expansion factor used to calculate the hidden_size of LSTM.
255
+ - num_layers (int): Number of dual-path layers.
256
+ """
257
+
258
+ def __init__(self, channels, expand=1, num_layers=6, tran_params=None):
259
+ super(SeparationNetTran, self).__init__()
260
+
261
+ self.num_layers = num_layers
262
+
263
+ time_rotary_embed = RotaryEmbedding(dim=tran_params['rotary_embedding_dim'])
264
+ freq_rotary_embed = RotaryEmbedding(dim=tran_params['rotary_embedding_dim'])
265
+
266
+ modules = []
267
+ for i in range(num_layers):
268
+ m = DualPathTran(channels * (2 if i % 2 == 1 else 1), time_rotary_embed, freq_rotary_embed, tran_params)
269
+ modules.append(m)
270
+ self.dp_modules = nn.ModuleList(modules)
271
+
272
+ self.feature_conversion = nn.ModuleList([
273
+ FeatureConversion(channels * 2, inverse=False if i % 2 == 0 else True) for i in range(num_layers)
274
+ ])
275
+
276
+ def forward(self, x):
277
+ for i in range(self.num_layers):
278
+ x = self.dp_modules[i](x)
279
+ x = self.feature_conversion[i](x)
280
+ return x
281
+
282
+
283
+ class Swish(nn.Module):
284
+ def forward(self, x):
285
+ return x * x.sigmoid()
286
+
287
+
288
+ class ConvolutionModule(nn.Module):
289
+ """
290
+ Convolution Module in SD block.
291
+
292
+ Args:
293
+ channels (int): input/output channels.
294
+ depth (int): number of layers in the residual branch. Each layer has its own
295
+ compress (float): amount of channel compression.
296
+ kernel (int): kernel size for the convolutions.
297
+ """
298
+
299
+ def __init__(self, channels, depth=2, compress=4, kernel=3):
300
+ super().__init__()
301
+ assert kernel % 2 == 1
302
+ self.depth = abs(depth)
303
+ hidden_size = int(channels / compress)
304
+ norm = lambda d: nn.GroupNorm(1, d)
305
+ self.layers = nn.ModuleList([])
306
+ for _ in range(self.depth):
307
+ padding = (kernel // 2)
308
+ mods = [
309
+ norm(channels),
310
+ nn.Conv1d(channels, hidden_size * 2, kernel, padding=padding),
311
+ nn.GLU(1),
312
+ nn.Conv1d(hidden_size, hidden_size, kernel, padding=padding, groups=hidden_size),
313
+ norm(hidden_size),
314
+ Swish(),
315
+ nn.Conv1d(hidden_size, channels, 1),
316
+ ]
317
+ layer = nn.Sequential(*mods)
318
+ self.layers.append(layer)
319
+
320
+ def forward(self, x):
321
+ for layer in self.layers:
322
+ x = x + layer(x)
323
+ return x
324
+
325
+
326
+ class FusionLayer(nn.Module):
327
+ """
328
+ A FusionLayer within the decoder.
329
+
330
+ Args:
331
+ - channels (int): Number of input channels.
332
+ - kernel_size (int, optional): Kernel size for the convolutional layer, defaults to 3.
333
+ - stride (int, optional): Stride for the convolutional layer, defaults to 1.
334
+ - padding (int, optional): Padding for the convolutional layer, defaults to 1.
335
+ """
336
+
337
+ def __init__(self, channels, kernel_size=3, stride=1, padding=1):
338
+ super(FusionLayer, self).__init__()
339
+ self.conv = nn.Conv2d(channels * 2, channels * 2, kernel_size, stride=stride, padding=padding)
340
+
341
+ def forward(self, x, skip=None):
342
+ if skip is not None:
343
+ x += skip
344
+ x = x.repeat(1, 2, 1, 1)
345
+ x = self.conv(x)
346
+ x = F.glu(x, dim=1)
347
+ return x
348
+
349
+
350
+ class SDlayer(nn.Module):
351
+ """
352
+ Implements a Sparse Down-sample Layer for processing different frequency bands separately.
353
+
354
+ Args:
355
+ - channels_in (int): Input channel count.
356
+ - channels_out (int): Output channel count.
357
+ - band_configs (dict): A dictionary containing configuration for each frequency band.
358
+ Keys are 'low', 'mid', 'high' for each band, and values are
359
+ dictionaries with keys 'SR', 'stride', and 'kernel' for proportion,
360
+ stride, and kernel size, respectively.
361
+ """
362
+
363
+ def __init__(self, channels_in, channels_out, band_configs):
364
+ super(SDlayer, self).__init__()
365
+
366
+ # Initializing convolutional layers for each band
367
+ self.convs = nn.ModuleList()
368
+ self.strides = []
369
+ self.kernels = []
370
+ for config in band_configs.values():
371
+ self.convs.append(
372
+ nn.Conv2d(channels_in, channels_out, (config['kernel'], 1), (config['stride'], 1), (0, 0)))
373
+ self.strides.append(config['stride'])
374
+ self.kernels.append(config['kernel'])
375
+
376
+ # Saving rate proportions for determining splits
377
+ self.SR_low = band_configs['low']['SR']
378
+ self.SR_mid = band_configs['mid']['SR']
379
+
380
+ def forward(self, x):
381
+ B, C, Fr, T = x.shape
382
+ # Define splitting points based on sampling rates
383
+ splits = [
384
+ (0, math.ceil(Fr * self.SR_low)),
385
+ (math.ceil(Fr * self.SR_low), math.ceil(Fr * (self.SR_low + self.SR_mid))),
386
+ (math.ceil(Fr * (self.SR_low + self.SR_mid)), Fr)
387
+ ]
388
+
389
+ # Processing each band with the corresponding convolution
390
+ outputs = []
391
+ original_lengths = []
392
+ for conv, stride, kernel, (start, end) in zip(self.convs, self.strides, self.kernels, splits):
393
+ extracted = x[:, :, start:end, :]
394
+ original_lengths.append(end - start)
395
+ current_length = extracted.shape[2]
396
+
397
+ # padding
398
+ if stride == 1:
399
+ total_padding = kernel - stride
400
+ else:
401
+ total_padding = (stride - current_length % stride) % stride
402
+ pad_left = total_padding // 2
403
+ pad_right = total_padding - pad_left
404
+
405
+ padded = F.pad(extracted, (0, 0, pad_left, pad_right))
406
+
407
+ output = conv(padded)
408
+ outputs.append(output)
409
+
410
+ return outputs, original_lengths
411
+
412
+
413
+ class SUlayer(nn.Module):
414
+ """
415
+ Implements a Sparse Up-sample Layer in decoder.
416
+
417
+ Args:
418
+ - channels_in: The number of input channels.
419
+ - channels_out: The number of output channels.
420
+ - convtr_configs: Dictionary containing the configurations for transposed convolutions.
421
+ """
422
+
423
+ def __init__(self, channels_in, channels_out, band_configs):
424
+ super(SUlayer, self).__init__()
425
+
426
+ # Initializing convolutional layers for each band
427
+ self.convtrs = nn.ModuleList([
428
+ nn.ConvTranspose2d(channels_in, channels_out, [config['kernel'], 1], [config['stride'], 1])
429
+ for _, config in band_configs.items()
430
+ ])
431
+
432
+ def forward(self, x, lengths, origin_lengths):
433
+ B, C, Fr, T = x.shape
434
+ # Define splitting points based on input lengths
435
+ splits = [
436
+ (0, lengths[0]),
437
+ (lengths[0], lengths[0] + lengths[1]),
438
+ (lengths[0] + lengths[1], None)
439
+ ]
440
+ # Processing each band with the corresponding convolution
441
+ outputs = []
442
+ for idx, (convtr, (start, end)) in enumerate(zip(self.convtrs, splits)):
443
+ out = convtr(x[:, :, start:end, :])
444
+ # Calculate the distance to trim the output symmetrically to original length
445
+ current_Fr_length = out.shape[2]
446
+ dist = abs(origin_lengths[idx] - current_Fr_length) // 2
447
+
448
+ # Trim the output to the original length symmetrically
449
+ trimmed_out = out[:, :, dist:dist + origin_lengths[idx], :]
450
+
451
+ outputs.append(trimmed_out)
452
+
453
+ # Concatenate trimmed outputs along the frequency dimension to return the final tensor
454
+ x = torch.cat(outputs, dim=2)
455
+
456
+ return x
457
+
458
+
459
+ class SDblock(nn.Module):
460
+ """
461
+ Implements a simplified Sparse Down-sample block in encoder.
462
+
463
+ Args:
464
+ - channels_in (int): Number of input channels.
465
+ - channels_out (int): Number of output channels.
466
+ - band_config (dict): Configuration for the SDlayer specifying band splits and convolutions.
467
+ - conv_config (dict): Configuration for convolution modules applied to each band.
468
+ - depths (list of int): List specifying the convolution depths for low, mid, and high frequency bands.
469
+ """
470
+
471
+ def __init__(self, channels_in, channels_out, band_configs={}, conv_config={}, depths=[3, 2, 1], kernel_size=3):
472
+ super(SDblock, self).__init__()
473
+ self.SDlayer = SDlayer(channels_in, channels_out, band_configs)
474
+
475
+ # Dynamically create convolution modules for each band based on depths
476
+ self.conv_modules = nn.ModuleList([
477
+ ConvolutionModule(channels_out, depth, **conv_config) for depth in depths
478
+ ])
479
+ # Set the kernel_size to an odd number.
480
+ self.globalconv = nn.Conv2d(channels_out, channels_out, kernel_size, 1, (kernel_size - 1) // 2)
481
+
482
+ def forward(self, x):
483
+ bands, original_lengths = self.SDlayer(x)
484
+ # B, C, f, T = band.shape
485
+ bands = [
486
+ F.gelu(
487
+ conv(band.permute(0, 2, 1, 3).reshape(-1, band.shape[1], band.shape[3]))
488
+ .view(band.shape[0], band.shape[2], band.shape[1], band.shape[3])
489
+ .permute(0, 2, 1, 3)
490
+ )
491
+ for conv, band in zip(self.conv_modules, bands)
492
+
493
+ ]
494
+ lengths = [band.size(-2) for band in bands]
495
+ full_band = torch.cat(bands, dim=2)
496
+ skip = full_band
497
+
498
+ output = self.globalconv(full_band)
499
+
500
+ return output, skip, lengths, original_lengths
501
+
502
+
503
+ class SCNet_Tran(nn.Module):
504
+ """
505
+ The implementation of SCNet: Sparse Compression Network for Music Source Separation. Paper: https://arxiv.org/abs/2401.13276.pdf
506
+ LSTM layers replaced with transformer layers
507
+
508
+ Args:
509
+ - sources (List[str]): List of sources to be separated.
510
+ - audio_channels (int): Number of audio channels.
511
+ - nfft (int): Number of FFTs to determine the frequency dimension of the input.
512
+ - hop_size (int): Hop size for the STFT.
513
+ - win_size (int): Window size for STFT.
514
+ - normalized (bool): Whether to normalize the STFT.
515
+ - dims (List[int]): List of channel dimensions for each block.
516
+ - band_SR (List[float]): The proportion of each frequency band.
517
+ - band_stride (List[int]): The down-sampling ratio of each frequency band.
518
+ - band_kernel (List[int]): The kernel sizes for down-sampling convolution in each frequency band
519
+ - conv_depths (List[int]): List specifying the number of convolution modules in each SD block.
520
+ - compress (int): Compression factor for convolution module.
521
+ - conv_kernel (int): Kernel size for convolution layer in convolution module.
522
+ - num_dplayer (int): Number of dual-path layers.
523
+ - expand (int): Expansion factor in the dual-path RNN, default is 1.
524
+
525
+ """
526
+
527
+ def __init__(
528
+ self,
529
+ sources=('drums', 'bass', 'other', 'vocals'),
530
+ audio_channels=2,
531
+ # Main structure
532
+ dims=(4, 32, 64, 128), # dims = [4, 64, 128, 256] in SCNet-large
533
+ # STFT
534
+ nfft=4096,
535
+ hop_size=1024,
536
+ win_size=4096,
537
+ normalized=True,
538
+ # SD/SU layer
539
+ band_SR=(0.175, 0.392, 0.433),
540
+ band_stride=(1, 4, 16),
541
+ band_kernel=(3, 4, 16),
542
+ # Convolution Module
543
+ conv_depths=(3, 2, 1),
544
+ compress=4,
545
+ conv_kernel=3,
546
+ # Dual-path RNN
547
+ num_dplayer=6,
548
+ expand=1,
549
+ tran_rotary_embedding_dim=64,
550
+ tran_depth=1,
551
+ tran_heads=8,
552
+ tran_dim_head=64,
553
+ tran_attn_dropout=0.0,
554
+ tran_ff_dropout=0.0,
555
+ tran_flash_attn=False,
556
+ ):
557
+ super().__init__()
558
+ self.sources = sources
559
+ self.audio_channels = audio_channels
560
+ self.dims = dims
561
+ band_keys = ['low', 'mid', 'high']
562
+ self.band_configs = {band_keys[i]: {'SR': band_SR[i], 'stride': band_stride[i], 'kernel': band_kernel[i]} for i
563
+ in range(len(band_keys))}
564
+ self.hop_length = hop_size
565
+ self.conv_config = {
566
+ 'compress': compress,
567
+ 'kernel': conv_kernel,
568
+ }
569
+ self.tran_params = {
570
+ 'rotary_embedding_dim': tran_rotary_embedding_dim,
571
+ 'depth': tran_depth,
572
+ 'heads': tran_heads,
573
+ 'dim_head': tran_dim_head,
574
+ 'attn_dropout': tran_attn_dropout,
575
+ 'ff_dropout': tran_ff_dropout,
576
+ 'flash_attn': tran_flash_attn,
577
+ }
578
+
579
+ self.stft_config = {
580
+ 'n_fft': nfft,
581
+ 'hop_length': hop_size,
582
+ 'win_length': win_size,
583
+ 'center': True,
584
+ 'normalized': normalized
585
+ }
586
+
587
+ self.first_conv = nn.Conv2d(dims[0], dims[0], 1, 1, 0, bias=False)
588
+
589
+ self.encoder = nn.ModuleList()
590
+ self.decoder = nn.ModuleList()
591
+
592
+ for index in range(len(dims) - 1):
593
+ enc = SDblock(
594
+ channels_in=dims[index],
595
+ channels_out=dims[index + 1],
596
+ band_configs=self.band_configs,
597
+ conv_config=self.conv_config,
598
+ depths=conv_depths
599
+ )
600
+ self.encoder.append(enc)
601
+
602
+ dec = nn.Sequential(
603
+ FusionLayer(channels=dims[index + 1]),
604
+ SUlayer(
605
+ channels_in=dims[index + 1],
606
+ channels_out=dims[index] if index != 0 else dims[index] * len(sources),
607
+ band_configs=self.band_configs,
608
+ )
609
+ )
610
+ self.decoder.insert(0, dec)
611
+
612
+ self.separation_net = SeparationNetTran(
613
+ channels=dims[-1],
614
+ expand=expand,
615
+ num_layers=num_dplayer,
616
+ tran_params=self.tran_params
617
+ )
618
+
619
+ def forward(self, x):
620
+ # B, C, L = x.shape
621
+ B = x.shape[0]
622
+ # In the initial padding, ensure that the number of frames after the STFT (the length of the T dimension) is even,
623
+ # so that the RFFT operation can be used in the separation network.
624
+ padding = self.hop_length - x.shape[-1] % self.hop_length
625
+ if (x.shape[-1] + padding) // self.hop_length % 2 == 0:
626
+ padding += self.hop_length
627
+ x = F.pad(x, (0, padding))
628
+
629
+ # STFT
630
+ L = x.shape[-1]
631
+ x = x.reshape(-1, L)
632
+ x = torch.stft(x, **self.stft_config, return_complex=True)
633
+ x = torch.view_as_real(x)
634
+ x = x.permute(0, 3, 1, 2).reshape(x.shape[0] // self.audio_channels, x.shape[3] * self.audio_channels,
635
+ x.shape[1], x.shape[2])
636
+
637
+ B, C, Fr, T = x.shape
638
+
639
+ save_skip = deque()
640
+ save_lengths = deque()
641
+ save_original_lengths = deque()
642
+ # encoder
643
+ for sd_layer in self.encoder:
644
+ x, skip, lengths, original_lengths = sd_layer(x)
645
+ save_skip.append(skip)
646
+ save_lengths.append(lengths)
647
+ save_original_lengths.append(original_lengths)
648
+
649
+ # separation
650
+ x = self.separation_net(x)
651
+
652
+ # decoder
653
+ for fusion_layer, su_layer in self.decoder:
654
+ x = fusion_layer(x, save_skip.pop())
655
+ x = su_layer(x, save_lengths.pop(), save_original_lengths.pop())
656
+
657
+ # output
658
+ n = self.dims[0]
659
+ x = x.view(B, n, -1, Fr, T)
660
+
661
+ x = x.reshape(-1, 2, Fr, T).permute(0, 2, 3, 1)
662
+ x = torch.view_as_complex(x.contiguous())
663
+ x = torch.istft(x, **self.stft_config)
664
+ x = x.reshape(B, len(self.sources), self.audio_channels, -1)
665
+
666
+ x = x[:, :, :, :-padding]
667
+
668
+ return x
phase_fixer.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import os
4
+ import gc
5
+
6
+ def frequency_blend_phases(phase1, phase2, freq_bins, low_cutoff=500, high_cutoff=5000, base_factor=0.25, scale_factor=1.85):
7
+ if phase1.shape != phase2.shape:
8
+ raise ValueError("phase1 and phase2 must have the same shape.")
9
+ if len(freq_bins) != phase1.shape[0]:
10
+ raise ValueError("freq_bins must have the same length as the number of frequency bins in phase1 and phase2.")
11
+ if low_cutoff >= high_cutoff:
12
+ raise ValueError("low_cutoff must be less than high_cutoff.")
13
+
14
+ blended_phase = torch.zeros_like(phase1)
15
+ blend_factors = torch.zeros_like(freq_bins)
16
+
17
+ blend_factors[freq_bins < low_cutoff] = base_factor
18
+ blend_factors[freq_bins > high_cutoff] = base_factor + scale_factor
19
+
20
+ in_range_mask = (freq_bins >= low_cutoff) & (freq_bins <= high_cutoff)
21
+ blend_factors[in_range_mask] = base_factor + scale_factor * (
22
+ (freq_bins[in_range_mask] - low_cutoff) / (high_cutoff - low_cutoff)
23
+ )
24
+
25
+ for i in range(phase1.shape[0]):
26
+ blended_phase[i, :] = (1 - blend_factors[i]) * phase1[i, :] + blend_factors[i] * phase2[i, :]
27
+
28
+ blended_phase = torch.remainder(blended_phase + torch.pi, 2 * torch.pi) - torch.pi
29
+
30
+ return blended_phase
31
+
32
+ def transfer_magnitude_phase(source_file, target_file, output_folder, transfer_magnitude=False, transfer_phase=True,
33
+ low_cutoff=500, high_cutoff=9000, scale_factor=1.4, output_format='flac'):
34
+ target_name, target_ext = os.path.splitext(os.path.basename(target_file))
35
+
36
+ target_name = target_name.replace("_other", "").replace("_vocals", "").replace("_instrumental", "")
37
+ target_name = target_name.replace("_Other", "").replace("_Vocals", "").replace("_Instrumental", "").strip()
38
+
39
+ ext = '.flac' if output_format == 'flac' else '.wav'
40
+ output_file = os.path.join(output_folder, f"{target_name} (Fixed Instrumental){ext}")
41
+
42
+ print(f"Phase Fixing: {os.path.basename(target_file)}...")
43
+ source_waveform, source_sr = torchaudio.load(source_file)
44
+ target_waveform, target_sr = torchaudio.load(target_file)
45
+
46
+ if source_sr != target_sr:
47
+ raise ValueError("Sample rates of source and target audio files must match.")
48
+
49
+ n_fft = 2048
50
+ hop_length = 512
51
+ window = torch.hann_window(n_fft)
52
+
53
+ source_stfts = torch.stft(source_waveform, n_fft=n_fft, hop_length=hop_length, window=window, return_complex=True, pad_mode="reflect")
54
+ target_stfts = torch.stft(target_waveform, n_fft=n_fft, hop_length=hop_length, window=window, return_complex=True, pad_mode="reflect")
55
+
56
+ freqs = torch.linspace(0, source_sr // 2, steps=n_fft // 2 + 1)
57
+
58
+ modified_stfts = []
59
+ for source_stft, target_stft in zip(source_stfts, target_stfts):
60
+ source_mag, source_phs = torch.abs(source_stft), torch.angle(source_stft)
61
+ target_mag, target_phs = torch.abs(target_stft), torch.angle(target_stft)
62
+
63
+ modified_stft = target_stft.clone()
64
+ if transfer_magnitude:
65
+ modified_stft = source_mag * torch.exp(1j * torch.angle(modified_stft))
66
+
67
+ if transfer_phase:
68
+ blended_phase = frequency_blend_phases(target_phs, source_phs, freqs, low_cutoff, high_cutoff, scale_factor=scale_factor)
69
+ modified_stft = torch.abs(modified_stft) * torch.exp(1j * blended_phase)
70
+
71
+ modified_stfts.append(modified_stft)
72
+
73
+ modified_waveform = torch.istft(
74
+ torch.stack(modified_stfts),
75
+ n_fft=n_fft,
76
+ hop_length=hop_length,
77
+ window=window,
78
+ length=source_waveform.size(1)
79
+ )
80
+
81
+ if output_format == 'flac':
82
+ torchaudio.save(output_file, modified_waveform, target_sr, format="flac", bits_per_sample=16)
83
+ else:
84
+ torchaudio.save(output_file, modified_waveform, target_sr)
85
+
86
+ print(f"Saved: {output_file}")
87
+ return output_file
88
+
89
+ def process_phase_fix(source_file, target_file, output_folder, low_cutoff=500, high_cutoff=9000,
90
+ scale_factor=1.4, output_format='flac'):
91
+ os.makedirs(output_folder, exist_ok=True)
92
+
93
+ try:
94
+ output_file = transfer_magnitude_phase(
95
+ source_file=source_file,
96
+ target_file=target_file,
97
+ output_folder=output_folder,
98
+ transfer_magnitude=False,
99
+ transfer_phase=True,
100
+ low_cutoff=low_cutoff,
101
+ high_cutoff=high_cutoff,
102
+ scale_factor=scale_factor,
103
+ output_format=output_format
104
+ )
105
+ gc.collect()
106
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
107
+ return output_file, "Phase fix completed successfully!"
108
+ except Exception as e:
109
+ return None, f"Error during phase fix: {str(e)}"
110
+
111
+ SOURCE_MODELS = [
112
+ 'VOCALS-MelBand-Roformer (by Becruily)',
113
+ 'VOCALS-Mel-Roformer big beta 4 (by unwa)',
114
+ 'VOCALS-Melband-Roformer BigBeta5e (by unwa)',
115
+ 'VOCALS-big_beta6 (by Unwa)',
116
+ 'VOCALS-big_beta6X (by Unwa)',
117
+ 'VOCALS-MelBand-Roformer (by KimberleyJSN)',
118
+ 'VOCALS-MelBand-Roformer Kim FT (by Unwa)',
119
+ 'VOCALS-MelBand-Roformer Kim FT 2 (by Unwa)',
120
+ 'VOCALS-MelBand-Roformer Kim FT 2 Blendless (by unwa)',
121
+ 'VOCALS-Mel-Roformer FT 3 Preview (by unwa)',
122
+ 'VOCALS-BS-Roformer_1296 (by viperx)',
123
+ 'VOCALS-BS-Roformer_1297 (by viperx)',
124
+ 'VOCALS-BS-RoformerLargev1 (by unwa)',
125
+ 'bs_roformer_revive (by unwa)'
126
+ ]
127
+
128
+ TARGET_MODELS = [
129
+ 'INST-MelBand-Roformer (by Becruily)',
130
+ 'INST-Mel-Roformer v1 (by unwa)',
131
+ 'INST-Mel-Roformer v2 (by unwa)',
132
+ 'inst_v1e (by unwa)',
133
+ 'INST-Mel-Roformer v1e+ (by unwa)',
134
+ 'Inst_GaboxV7 (by Gabox)',
135
+ 'INST-VOC-Mel-Roformer a.k.a. duality (by unwa)',
136
+ 'INST-VOC-Mel-Roformer a.k.a. duality v2 (by unwa)',
137
+ 'inst_gabox (by Gabox)',
138
+ 'inst_gaboxFlowersV10 (by Gabox)'
139
+ ]
processing.py CHANGED
@@ -13,9 +13,16 @@ sys.path.append(current_dir)
13
 
14
  from datetime import datetime
15
  from helpers import INPUT_DIR, OLD_OUTPUT_DIR, ENSEMBLE_DIR, AUTO_ENSEMBLE_TEMP, move_old_files, clear_directory, BASE_DIR, clean_model, extract_model_name_from_checkpoint, sanitize_filename, find_clear_segment, save_segment, run_matchering, clamp_percentage
16
- from model import get_model_config
17
  from apollo_processing import process_with_apollo # Import Apollo processing
18
  import torch
 
 
 
 
 
 
 
19
  import yaml
20
  import gradio as gr
21
  import threading
@@ -29,15 +36,25 @@ import locale
29
  import re
30
  import psutil
31
  import concurrent.futures
32
- from google.oauth2.credentials import Credentials
 
 
 
 
 
 
 
 
 
33
  import tempfile
34
- import traceback
35
  from urllib.parse import urlparse, quote
36
  try:
37
  from google.colab import drive
 
38
  IS_COLAB = True
39
  except ImportError:
40
  IS_COLAB = False
 
41
  import matchering as mg
42
 
43
  import warnings
@@ -61,14 +78,24 @@ def setup_directories():
61
  """Create necessary directories and check Google Drive access."""
62
  if IS_COLAB:
63
  try:
64
- if not os.path.exists('/content/drive/MyDrive'):
 
 
 
65
  print("Mounting Google Drive...")
66
- from google.colab import drive
67
- drive.mount('/content/drive', force_remount=True)
68
- if not os.path.exists('/content/drive/MyDrive'):
69
- raise RuntimeError("Google Drive mount failed. Please mount manually with 'from google.colab import drive; drive.mount('/content/drive', force_remount=True)'.")
 
 
 
 
 
 
70
  except Exception as e:
71
- raise RuntimeError(f"Failed to mount Google Drive: {str(e)}")
 
72
  os.makedirs(OUTPUT_DIR, exist_ok=True)
73
  os.makedirs(INPUT_DIR, exist_ok=True)
74
  os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
@@ -85,25 +112,47 @@ def refresh_auto_output():
85
  except Exception as e:
86
  return None, f"Error refreshing output: {str(e)}"
87
 
88
- import logging
89
-
90
- # Loglama ayarları
91
- logging.basicConfig(
92
- level=logging.DEBUG,
93
- filename='utils.log',
94
- filemode='a',
95
- format='%(asctime)s - %(levelname)s - %(message)s'
96
- )
97
-
98
- def update_progress_html(progress_label, progress_percent):
99
- """Gradio arayüzü için ilerleme çubuğu HTML'si oluşturur."""
100
- progress_percent = min(max(round(progress_percent), 0), 100) # %1 hassasiyet
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return f"""
 
 
 
 
 
 
102
  <div id="custom-progress" style="margin-top: 10px;">
103
  <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
104
  <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
105
- <div id="progress-bar" style="width: {progress_percent}%; height: 20px; background-color: #6e8efb; transition: width 0.3s; max-width: 100%;"></div>
106
  </div>
 
107
  </div>
108
  """
109
 
@@ -112,18 +161,38 @@ def extract_model_name_from_checkpoint(checkpoint_path):
112
  return "Unknown"
113
  base_name = os.path.basename(checkpoint_path)
114
  model_name = os.path.splitext(base_name)[0]
115
- print(f"Original checkpoint path: {checkpoint_path}, extracted model_name: {model_name}")
116
  return model_name.strip()
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def run_command_and_process_files(
119
  model_type,
120
  config_path,
121
  start_check_point,
122
  INPUT_DIR,
123
  OUTPUT_DIR,
124
- extract_instrumental=False,
125
- use_tta=False,
126
- demud_phaseremix_inst=False,
127
  progress=None,
128
  use_apollo=True,
129
  apollo_normal_model="Apollo Universal Model",
@@ -133,52 +202,113 @@ def run_command_and_process_files(
133
  apollo_overlap=2,
134
  apollo_method="normal_method",
135
  apollo_midside_model=None,
136
- output_format="wav"
 
 
 
 
137
  ):
 
 
 
 
138
  try:
139
- logging.info(f"Starting run_command_and_process_files: model_type={model_type}, config_path={config_path}, inference_chunk_size={inference_chunk_size}, inference_overlap={inference_overlap}")
140
-
141
- # Doğrulama
142
- for path, name in [
143
- (config_path, "Configuration file"),
144
- (start_check_point, "Checkpoint file"),
145
- (INPUT_DIR, "Input directory")
146
- ]:
147
- if not path:
148
- raise ValueError(f"{name} is empty")
149
- if not os.path.exists(path):
150
- raise FileNotFoundError(f"{name} not found: {path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- os.makedirs(OUTPUT_DIR, exist_ok=True)
153
 
154
- # Komut oluştur
155
- INFERENCE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "inference.py")
156
  cmd_parts = [
157
- "python", INFERENCE_PATH,
158
- "--model_type", str(model_type),
159
- "--config_path", str(config_path),
160
- "--start_check_point", str(start_check_point),
161
- "--input_folder", str(INPUT_DIR),
162
- "--store_dir", str(OUTPUT_DIR),
163
  "--chunk_size", str(inference_chunk_size),
164
  "--overlap", str(inference_overlap),
165
  "--export_format", f"{output_format} FLOAT"
166
  ]
167
- for flag, value in [
168
- ("--extract_instrumental", extract_instrumental),
169
- ("--use_tta", use_tta),
170
- ("--demud_phaseremix_inst", demud_phaseremix_inst)
171
- ]:
172
- if value:
173
- cmd_parts.append(flag)
174
-
175
- logging.info(f"Executing command: {' '.join(cmd_parts)}")
176
- start_time = time.time()
177
-
178
- # Subprocess başlat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  process = subprocess.Popen(
180
  cmd_parts,
181
- cwd=os.path.dirname(os.path.abspath(__file__)),
182
  stdout=subprocess.PIPE,
183
  stderr=subprocess.PIPE,
184
  text=True,
@@ -186,156 +316,188 @@ def run_command_and_process_files(
186
  universal_newlines=True
187
  )
188
 
189
- # İlerleme takibi
190
- mixture_paths = sorted(glob.glob(os.path.join(INPUT_DIR, '*.*')))
191
- total_files = len(mixture_paths)
192
- processed_files = 0
193
- base_progress_per_file = 80 / total_files if total_files > 0 else 80 # 0-80% ayrıştırma
194
-
195
  stderr_output = ""
196
- stdout_output = ""
197
- while process.poll() is None:
198
- line = process.stdout.readline().strip()
199
- if line:
200
- stdout_output += line + "\n"
201
- if i18n("loaded_audio").lower() in line.lower():
202
- processed_files += 1
203
- progress_value = round(processed_files * base_progress_per_file)
204
- if progress is not None and callable(getattr(progress, '__call__', None)):
205
- progress(progress_value / 100, desc=i18n("running_separation").format(processed_files, total_files))
206
- update_progress_html(i18n("running_separation").format(processed_files, total_files), progress_value)
207
- logging.debug(line)
208
-
209
- err_line = process.stderr.readline().strip()
210
- if err_line:
211
- stderr_output += err_line + "\n"
212
- logging.error(err_line)
213
-
214
- # Kalan çıktıları topla
215
- stdout, stderr = process.communicate()
216
- stdout_output += stdout
217
- stderr_output += stderr
218
- logging.debug(f"Subprocess stdout: {stdout_output}")
219
- if stderr_output:
220
- logging.error(f"Subprocess stderr: {stderr_output}")
221
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  if process.returncode != 0:
223
- raise RuntimeError(f"Subprocess failed with code {process.returncode}: {stderr_output}")
224
-
225
- elapsed_time = time.time() - start_time
226
- logging.info(f"Subprocess completed in {elapsed_time:.2f} seconds")
227
 
228
- # Çıktıları işle
229
  filename_model = extract_model_name_from_checkpoint(start_check_point)
230
- output_files = sorted(os.listdir(OUTPUT_DIR))
231
  if not output_files:
232
- raise FileNotFoundError(i18n("no_output_files").format(OUTPUT_DIR))
233
-
234
- # Dosya yeniden adlandırma: 80-90%
235
- total_output_files = len(output_files)
236
- renamed_files = 0
237
- for filename in output_files:
238
- file_path = os.path.join(OUTPUT_DIR, filename)
239
- if not any(filename.lower().endswith(ext) for ext in ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a']):
240
- continue
241
- base, ext = os.path.splitext(filename)
242
- detected_type = None
243
- for type_key in ['vocals', 'instrumental', 'phaseremix', 'drum', 'bass', 'other', 'effects', 'speech', 'music', 'dry', 'male', 'female', 'bleed', 'karaoke']:
244
- if type_key.lower() in base.lower():
245
- detected_type = type_key
246
- break
247
- type_suffix = detected_type.capitalize() if detected_type else 'Processed'
248
- clean_base = sanitize_filename(base.split('_')[0]).rsplit('.', 1)[0]
249
- new_filename = f"{clean_base}_{type_suffix}_{filename_model}{ext}"
250
- new_file_path = os.path.join(OUTPUT_DIR, new_filename)
251
- try:
252
- os.rename(file_path, new_file_path)
253
- renamed_files += 1
254
- progress_value = round(80 + (renamed_files / total_output_files) * 10)
255
- if progress is not None and callable(getattr(progress, '__call__', None)):
256
- progress(progress_value / 100, desc=i18n("renaming_files").format(renamed_files, total_output_files))
257
- update_progress_html(i18n("renaming_files").format(renamed_files, total_output_files), progress_value)
258
- except Exception as e:
259
- logging.error(f"Could not rename {file_path} to {new_file_path}: {e}")
260
 
261
- output_files = sorted(os.listdir(OUTPUT_DIR))
 
 
262
  if not output_files:
263
- raise FileNotFoundError(i18n("no_output_files_after_rename").format(OUTPUT_DIR))
264
 
265
- # Çıktıları eşleştir
266
- def find_file(keyword):
 
 
267
  matching_files = [
268
- os.path.join(OUTPUT_DIR, f) for f in output_files
269
- if keyword.lower() in f.lower()
270
  ]
271
  return matching_files[0] if matching_files else None
272
 
273
  output_list = [
274
- find_file('vocals'), find_file('instrumental'), find_file('phaseremix'),
275
  find_file('drum'), find_file('bass'), find_file('other'), find_file('effects'),
276
  find_file('speech'), find_file('music'), find_file('dry'), find_file('male'),
277
- find_file('female'), find_file('bleed'), find_file('karaoke')
 
278
  ]
279
 
280
- # Normalizasyon: 90-95%
281
  normalized_outputs = []
282
- for i, output_file in enumerate(output_list):
283
  if output_file and os.path.exists(output_file):
284
  normalized_file = os.path.join(OUTPUT_DIR, f"{sanitize_filename(os.path.splitext(os.path.basename(output_file))[0])}.{output_format}")
285
- try:
286
- if output_file.endswith(f".{output_format}") and output_file != normalized_file:
287
- shutil.copy(output_file, normalized_file)
288
- elif output_file != normalized_file:
289
- audio, sr = librosa.load(output_file, sr=None, mono=False)
290
- sf.write(normalized_file, audio.T if audio.ndim > 1 else audio, sr)
291
- else:
292
- normalized_file = output_file
293
- normalized_outputs.append(normalized_file)
294
-
295
- progress_value = round(90 + (i + 1) / len(output_list) * 5)
296
- if progress is not None and callable(getattr(progress, '__call__', None)):
297
- progress(progress_value / 100, desc=i18n("normalizing_output").format(i + 1, len(output_list)))
298
- update_progress_html(i18n("normalizing_output").format(i + 1, len(output_list), progress_value))
299
- except Exception as e:
300
- logging.error(f"Normalization failed for {output_file}: {e}")
301
- normalized_outputs.append(None)
302
  else:
303
- normalized_outputs.append(None)
304
 
305
- # Apollo işlemi: 95-100%
306
  if use_apollo:
307
- try:
308
- from apollo_processing import process_with_apollo
309
- normalized_outputs = process_with_apollo(
310
- output_files=normalized_outputs,
311
- output_dir=OUTPUT_DIR,
312
- apollo_chunk_size=apollo_chunk_size,
313
- apollo_overlap=apollo_overlap,
314
- apollo_method=apollo_method,
315
- apollo_normal_model=apollo_normal_model,
316
- apollo_midside_model=apollo_midside_model,
317
- output_format=output_format,
318
- progress=lambda p, desc: progress((95 + p * 5) / 100, desc=desc) if progress else None,
319
- total_progress_start=95,
320
- total_progress_end=100
321
- )
322
- except ImportError:
323
- logging.warning("apollo_processing module not found, skipping Apollo processing")
324
- except Exception as e:
325
- logging.error(f"Apollo processing failed: {e}")
326
-
327
- # Tamamlandı
328
- if progress is not None and callable(getattr(progress, '__call__', None)):
329
- progress(1.0, desc=i18n("separation_complete"))
330
- update_progress_html(i18n("separation_complete"), 100)
331
 
332
- logging.info(f"Processing completed successfully. Outputs: {normalized_outputs}")
333
- return tuple(normalized_outputs)
334
 
 
 
 
335
  except Exception as e:
336
- logging.error(f"run_command_and_process_files error: {str(e)}")
337
- traceback.print_exc(file=sys.stderr)
338
- return (None,) * 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
  def process_audio(
341
  input_audio_file,
@@ -343,6 +505,10 @@ def process_audio(
343
  chunk_size,
344
  overlap,
345
  export_format,
 
 
 
 
346
  use_tta,
347
  demud_phaseremix_inst,
348
  extract_instrumental,
@@ -352,12 +518,16 @@ def process_audio(
352
  apollo_method,
353
  apollo_normal_model,
354
  apollo_midside_model,
355
- use_matchering=False,
356
- matchering_passes=1,
357
  progress=gr.Progress(track_tqdm=True),
358
  *args,
359
  **kwargs
360
  ):
 
 
 
 
361
  try:
362
  # Check Google Drive connection
363
  setup_directories()
@@ -365,29 +535,29 @@ def process_audio(
365
  if input_audio_file is not None:
366
  audio_path = input_audio_file.name if hasattr(input_audio_file, 'name') else input_audio_file
367
  else:
368
- return (
369
- None, None, None, None, None, None, None, None, None, None, None, None, None, None,
370
  "No audio file provided",
371
  update_progress_html("No input provided", 0)
372
  )
 
373
 
374
  os.makedirs(OUTPUT_DIR, exist_ok=True)
375
  os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
376
  move_old_files(OUTPUT_DIR)
377
 
378
- print(f"process_audio: model parameter received: {model}")
379
  # Clean model name, remove ⭐ and other unwanted characters
380
  clean_model_name = clean_model(model) if not model.startswith("/") else extract_model_name_from_checkpoint(model)
381
- print(f"Processing audio: {audio_path}, model: {clean_model_name}")
382
-
383
- print(f"Raw UI inputs - chunk_size: {chunk_size}, overlap: {overlap}, apollo_chunk_size: {apollo_chunk_size}, apollo_overlap: {apollo_overlap}, apollo_method: {apollo_method}")
384
 
385
  # Validate inference parameters
 
386
  try:
387
- inference_chunk_size = int(chunk_size)
388
  except (TypeError, ValueError):
389
  print(f"Invalid chunk_size: {chunk_size}. Defaulting to: 352800.")
390
  inference_chunk_size = 352800
 
391
 
392
  try:
393
  inference_overlap = int(overlap)
@@ -416,21 +586,32 @@ def process_audio(
416
  else:
417
  print(f"Invalid apollo_method: {apollo_method}. Defaulting to: normal_method.")
418
  apollo_method = "normal_method"
419
- print(f"Parsed apollo_method: {apollo_method}")
420
-
421
- print(f"Corrected values - inference_chunk_size: {inference_chunk_size}, inference_overlap: {inference_overlap}, apollo_chunk_size: {apollo_chunk_size}, apollo_overlap: {apollo_overlap}")
422
-
423
  # Copy input file to INPUT_DIR
424
  input_filename = os.path.basename(audio_path)
425
  dest_path = os.path.join(INPUT_DIR, input_filename)
426
  shutil.copy(audio_path, dest_path)
427
- print(f"Input file copied: {dest_path}")
428
 
429
- # Get model configuration with cleaned model name
 
 
 
 
 
 
 
430
  model_type, config_path, start_check_point = get_model_config(clean_model_name, inference_chunk_size, inference_overlap)
431
- print(f"Model configuration: model_type={model_type}, config_path={config_path}, start_check_point={start_check_point}")
432
 
433
- outputs = run_command_and_process_files(
 
 
 
 
 
 
 
 
 
 
434
  model_type=model_type,
435
  config_path=config_path,
436
  start_check_point=start_check_point,
@@ -448,17 +629,32 @@ def process_audio(
448
  apollo_overlap=apollo_overlap,
449
  apollo_method=apollo_method,
450
  apollo_midside_model=apollo_midside_model,
451
- output_format=export_format.split()[0].lower()
452
- )
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
  if outputs is None or all(output is None for output in outputs):
455
  raise ValueError("run_command_and_process_files returned None or all None outputs")
456
 
457
  # Apply Matchering (if enabled)
458
  if use_matchering:
459
- # Progress update for Matchering
460
- if progress is not None and callable(getattr(progress, '__call__', None)):
461
- progress(90, desc="Applying Matchering")
 
 
 
462
 
463
  # Find clean segment from original audio
464
  segment_start, segment_end, segment_audio = find_clear_segment(audio_path)
@@ -488,12 +684,11 @@ def process_audio(
488
 
489
  outputs = tuple(mastered_outputs)
490
 
491
- if progress is not None and callable(getattr(progress, '__call__', None)):
492
- progress(100, desc="Processing complete")
493
-
494
- return (
495
  outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6],
496
  outputs[7], outputs[8], outputs[9], outputs[10], outputs[11], outputs[12], outputs[13],
 
497
  "Audio processing completed",
498
  update_progress_html("Audio processing completed", 100)
499
  )
@@ -502,8 +697,8 @@ def process_audio(
502
  print(f"process_audio error: {str(e)}")
503
  import traceback
504
  traceback.print_exc()
505
- return (
506
- None, None, None, None, None, None, None, None, None, None, None, None, None, None,
507
  f"Error occurred: {str(e)}",
508
  update_progress_html("Error occurred", 0)
509
  )
@@ -534,23 +729,63 @@ def ensemble_audio_fn(files, method, weights, progress=gr.Progress()):
534
  ensemble_args += ["--weights", *weights_list]
535
 
536
  progress(0, desc="Starting ensemble process", total=100)
537
- result = subprocess.run(
 
 
538
  ["python", "ensemble.py"] + ensemble_args,
539
- capture_output=True,
540
- text=True
 
 
 
541
  )
542
 
543
- start_time = time.time()
544
- total_estimated_time = 10.0 # Adjust based on actual ensemble duration
545
- elapsed_time = 0
546
- while elapsed_time < total_estimated_time:
547
- elapsed_time = time.time() - start_time
548
- progress_value = (elapsed_time / total_estimated_time) * 100
549
- progress_value = clamp_percentage(progress_value)
550
- progress(progress_value, desc=f"Ensembling progress: {progress_value}%")
551
- time.sleep(0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
- progress(100, desc="Finalizing ensemble output")
 
 
 
554
  log = f"Success: {result.stdout}" if not result.stderr else f"Error: {result.stderr}"
555
  return output_path, log
556
 
@@ -602,18 +837,15 @@ def auto_ensemble_process(
602
  input_filename = os.path.basename(audio_path)
603
  dest_path = os.path.join(INPUT_DIR, input_filename)
604
  shutil.copy(audio_path, dest_path)
605
- print(f"Input file copied: {dest_path}")
606
 
607
  # Parse apollo method
608
  if auto_apollo_method in ["2", 2]:
609
  auto_apollo_method = "mid_side_method"
610
  elif auto_apollo_method in ["1", 1]:
611
  auto_apollo_method = "normal_method"
612
- print(f"Parsed auto_apollo_method: {auto_apollo_method}")
613
 
614
  corrected_auto_chunk_size = int(auto_apollo_chunk_size)
615
  corrected_auto_overlap = int(auto_apollo_overlap)
616
- print(f"Corrected values - auto_apollo_chunk_size: {corrected_auto_chunk_size}, auto_apollo_overlap: {corrected_auto_overlap}")
617
 
618
  # Setup temporary directories
619
  auto_ensemble_temp = os.path.join(BASE_DIR, "auto_ensemble_temp")
@@ -627,7 +859,6 @@ def auto_ensemble_process(
627
 
628
  for i, model in enumerate(selected_models):
629
  clean_model_name = clean_model(model)
630
- print(f"Processing model {i+1}/{total_models}: Original={model}, Cleaned={clean_model_name}")
631
  model_output_dir = os.path.join(auto_ensemble_temp, clean_model_name)
632
  os.makedirs(model_output_dir, exist_ok=True)
633
 
@@ -639,7 +870,12 @@ def auto_ensemble_process(
639
  )
640
 
641
  model_type, config_path, start_check_point = get_model_config(clean_model_name, auto_chunk_size, auto_overlap)
642
- print(f"Model configuration: model_type={model_type}, config_path={config_path}, start_check_point={start_check_point}")
 
 
 
 
 
643
 
644
  cmd = [
645
  "python", INFERENCE_PATH,
@@ -648,7 +884,7 @@ def auto_ensemble_process(
648
  "--start_check_point", start_check_point,
649
  "--input_folder", INPUT_DIR,
650
  "--store_dir", model_output_dir,
651
- "--chunk_size", str(auto_chunk_size),
652
  "--overlap", str(auto_overlap),
653
  "--export_format", f"{export_format.split()[0].lower()} FLOAT"
654
  ]
@@ -668,20 +904,83 @@ def auto_ensemble_process(
668
  )
669
 
670
  stderr_output = ""
 
 
 
671
  for line in process.stdout:
672
- print(line.strip())
673
- if "Progress:" in line:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
  try:
675
- percentage = float(re.search(r"Progress: (\d+\.\d+)%", line).group(1))
 
 
 
 
676
  model_percentage = (percentage / 100) * model_progress_per_step
677
  current_progress = (i * model_progress_per_step) + model_percentage
678
  current_progress = clamp_percentage(current_progress)
679
- yield None, i18n("loading_model").format(i+1, total_models, clean_model_name), update_progress_html(
680
- i18n("loading_model_progress").format(i+1, total_models, clean_model_name, current_progress),
681
- current_progress
682
- )
683
- except (AttributeError, ValueError) as e:
684
- print(f"Progress parsing error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
 
686
  for line in process.stderr:
687
  stderr_output += line
@@ -868,6 +1167,8 @@ def auto_ensemble_process(
868
  print(f"File not found in Google Drive, copying from local path: {output_path}")
869
  shutil.copy(output_path, drive_output_path)
870
  print(f"Copied to Google Drive: {drive_output_path}")
 
 
871
 
872
  yield output_path, i18n("success_output_created") + f" Saved to {drive_output_path if IS_COLAB else output_path}", update_progress_html(
873
  i18n("ensemble_completed"), 100
 
13
 
14
  from datetime import datetime
15
  from helpers import INPUT_DIR, OLD_OUTPUT_DIR, ENSEMBLE_DIR, AUTO_ENSEMBLE_TEMP, move_old_files, clear_directory, BASE_DIR, clean_model, extract_model_name_from_checkpoint, sanitize_filename, find_clear_segment, save_segment, run_matchering, clamp_percentage
16
+ from model import get_model_config, get_model_chunk_size
17
  from apollo_processing import process_with_apollo # Import Apollo processing
18
  import torch
19
+
20
+ # PyTorch optimized backend (always available)
21
+ try:
22
+ from pytorch_backend import PyTorchBackend
23
+ PYTORCH_OPTIMIZED_AVAILABLE = True
24
+ except ImportError:
25
+ PYTORCH_OPTIMIZED_AVAILABLE = False
26
  import yaml
27
  import gradio as gr
28
  import threading
 
36
  import re
37
  import psutil
38
  import concurrent.futures
39
+ from tqdm import tqdm
40
+
41
+ # Google OAuth imports (optional - for Colab/Google Drive support)
42
+ try:
43
+ from google.oauth2.credentials import Credentials
44
+ GOOGLE_OAUTH_AVAILABLE = True
45
+ except ImportError:
46
+ GOOGLE_OAUTH_AVAILABLE = False
47
+ Credentials = None
48
+
49
  import tempfile
 
50
  from urllib.parse import urlparse, quote
51
  try:
52
  from google.colab import drive
53
+ # Verify we're actually in a working Colab environment
54
  IS_COLAB = True
55
  except ImportError:
56
  IS_COLAB = False
57
+ drive = None
58
  import matchering as mg
59
 
60
  import warnings
 
78
  """Create necessary directories and check Google Drive access."""
79
  if IS_COLAB:
80
  try:
81
+ # Check if Google Drive is already mounted
82
+ if os.path.exists('/content/drive/MyDrive'):
83
+ pass # Already mounted, no action needed
84
+ else:
85
  print("Mounting Google Drive...")
86
+ try:
87
+ from google.colab import drive
88
+ drive.mount('/content/drive', force_remount=True)
89
+ except AttributeError as ae:
90
+ # Handle 'NoneType' object has no attribute 'kernel' error
91
+ print(f"Warning: Google Drive mount skipped (Colab kernel issue): {str(ae)}")
92
+ print("Continuing with local storage...")
93
+ except Exception as mount_error:
94
+ print(f"Warning: Google Drive mount failed: {str(mount_error)}")
95
+ print("Continuing with local storage...")
96
  except Exception as e:
97
+ print(f"Warning: Google Drive setup error: {str(e)}")
98
+ print("Continuing without Google Drive...")
99
  os.makedirs(OUTPUT_DIR, exist_ok=True)
100
  os.makedirs(INPUT_DIR, exist_ok=True)
101
  os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
 
112
  except Exception as e:
113
  return None, f"Error refreshing output: {str(e)}"
114
 
115
+ def update_progress_html(progress_label, progress_percent, download_info=None):
116
+ """Generate progress HTML with smooth animations and optional download percentage.
117
+
118
+ Args:
119
+ progress_label: Text label to show above the progress bar
120
+ progress_percent: Overall progress percentage (0-100)
121
+ download_info: Optional dict with 'filename' and 'percent' for download progress
122
+ """
123
+ progress_percent = clamp_percentage(progress_percent)
124
+
125
+ # Determine if processing is active for pulse animation
126
+ is_active = 0 < progress_percent < 100
127
+ pulse_style = "animation: progress-pulse 1.5s ease-in-out infinite;" if is_active else ""
128
+
129
+ # Build download sub-bar if downloading
130
+ download_html = ""
131
+ if download_info and isinstance(download_info, dict):
132
+ dl_filename = download_info.get('filename', '')
133
+ dl_percent = clamp_percentage(download_info.get('percent', 0))
134
+ download_html = f"""
135
+ <div style="margin-top: 8px; padding: 8px; background: rgba(0,0,0,0.3); border-radius: 5px;">
136
+ <div style="font-size: 0.85rem; color: #a0a0a0; margin-bottom: 4px;">{dl_filename} - %{int(dl_percent)}</div>
137
+ <div style="width: 100%; background-color: #333; border-radius: 4px; overflow: hidden;">
138
+ <div style="width: {dl_percent}%; height: 14px; background: linear-gradient(90deg, #4ade80, #22d3ee); transition: width 0.3s ease-out; border-radius: 4px;"></div>
139
+ </div>
140
+ </div>
141
+ """
142
+
143
  return f"""
144
+ <style>
145
+ @keyframes progress-pulse {{
146
+ 0%, 100% {{ opacity: 1; }}
147
+ 50% {{ opacity: 0.85; }}
148
+ }}
149
+ </style>
150
  <div id="custom-progress" style="margin-top: 10px;">
151
  <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
152
  <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
153
+ <div id="progress-bar" style="width: {progress_percent}%; height: 20px; background: linear-gradient(90deg, #6e8efb, #a855f7); transition: width 0.5s ease-out; max-width: 100%; {pulse_style}"></div>
154
  </div>
155
+ {download_html}
156
  </div>
157
  """
158
 
 
161
  return "Unknown"
162
  base_name = os.path.basename(checkpoint_path)
163
  model_name = os.path.splitext(base_name)[0]
 
164
  return model_name.strip()
165
 
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
  def run_command_and_process_files(
188
  model_type,
189
  config_path,
190
  start_check_point,
191
  INPUT_DIR,
192
  OUTPUT_DIR,
193
+ extract_instrumental,
194
+ use_tta,
195
+ demud_phaseremix_inst,
196
  progress=None,
197
  use_apollo=True,
198
  apollo_normal_model="Apollo Universal Model",
 
202
  apollo_overlap=2,
203
  apollo_method="normal_method",
204
  apollo_midside_model=None,
205
+ output_format="wav",
206
+ optimize_mode='channels_last',
207
+ enable_amp=True,
208
+ enable_tf32=True,
209
+ enable_cudnn_benchmark=True
210
  ):
211
+ """
212
+ Run inference.py with specified parameters and process output files.
213
+ This is a generator function that yields progress updates for real-time UI feedback.
214
+ """
215
  try:
216
+ # Create directories and check Google Drive access
217
+ setup_directories()
218
+
219
+ if not config_path:
220
+ raise ValueError(f"Configuration path is empty: model_type: {model_type}")
221
+ if not os.path.exists(config_path):
222
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
223
+ if not start_check_point or not os.path.exists(start_check_point):
224
+ raise FileNotFoundError(f"Checkpoint file not found: {start_check_point}")
225
+
226
+ # Validate inference parameters
227
+ try:
228
+ inference_chunk_size = int(inference_chunk_size)
229
+ inference_overlap = int(inference_overlap)
230
+ except (TypeError, ValueError) as e:
231
+ print(f"Invalid inference_chunk_size or inference_overlap: {e}. Defaulting to: inference_chunk_size=352800, inference_overlap=2")
232
+ inference_chunk_size = 352800
233
+ inference_overlap = 2
234
+
235
+ # Validate Apollo parameters
236
+ try:
237
+ apollo_chunk_size = int(apollo_chunk_size)
238
+ apollo_overlap = int(apollo_overlap)
239
+ except (TypeError, ValueError) as e:
240
+ print(f"Invalid apollo_chunk_size or apollo_overlap: {e}. Defaulting to: apollo_chunk_size=19, apollo_overlap=2")
241
+ apollo_chunk_size = 19
242
+ apollo_overlap = 2
243
+
244
+ # Initial progress yield
245
+ yield {"progress": 0, "status": "Starting audio separation...", "outputs": None}
246
+
247
+ # Always use optimized PyTorch backend
248
+ python_exe = "python"
249
+
250
+ if PYTORCH_OPTIMIZED_AVAILABLE:
251
+ from inference_pytorch import INFERENCE_PATH as PYTORCH_INFERENCE_PATH
252
+ inference_script = PYTORCH_INFERENCE_PATH if os.path.exists(PYTORCH_INFERENCE_PATH) else INFERENCE_PATH
253
+ print(f"Using PyTorch backend (mode: {optimize_mode})")
254
+ print(f" AMP: {enable_amp} | TF32: {enable_tf32} | cuDNN: {enable_cudnn_benchmark}")
255
+ else:
256
+ inference_script = INFERENCE_PATH
257
+ print("Warning: PyTorch optimized backend not available, using standard inference")
258
+
259
+
260
+
261
+
262
+
263
+
264
 
 
265
 
 
 
266
  cmd_parts = [
267
+ python_exe, inference_script,
268
+ "--model_type", model_type,
269
+ "--config_path", config_path,
270
+ "--start_check_point", start_check_point,
271
+ "--input_folder", INPUT_DIR,
272
+ "--store_dir", OUTPUT_DIR,
273
  "--chunk_size", str(inference_chunk_size),
274
  "--overlap", str(inference_overlap),
275
  "--export_format", f"{output_format} FLOAT"
276
  ]
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+ # Add optimized backend arguments (always enabled)
288
+ if PYTORCH_OPTIMIZED_AVAILABLE:
289
+ cmd_parts.extend([
290
+ "--optimize_mode", optimize_mode
291
+ ])
292
+ if enable_amp:
293
+ cmd_parts.append("--enable_amp")
294
+ if enable_tf32:
295
+ cmd_parts.append("--enable_tf32")
296
+ if enable_cudnn_benchmark:
297
+ cmd_parts.append("--enable_cudnn_benchmark")
298
+
299
+ if extract_instrumental:
300
+ cmd_parts.append("--extract_instrumental")
301
+ if use_tta:
302
+ cmd_parts.append("--use_tta")
303
+ if demud_phaseremix_inst:
304
+ cmd_parts.append("--demud_phaseremix_inst")
305
+
306
+ print(f"Running command: {' '.join(cmd_parts)}")
307
+
308
+ # Use subprocess.Popen for real-time progress capture
309
  process = subprocess.Popen(
310
  cmd_parts,
311
+ cwd=BASE_DIR,
312
  stdout=subprocess.PIPE,
313
  stderr=subprocess.PIPE,
314
  text=True,
 
316
  universal_newlines=True
317
  )
318
 
 
 
 
 
 
 
319
  stderr_output = ""
320
+ last_yield_percent = -1
321
+ downloading_file = None
322
+
323
+ # Read stdout line-by-line for real-time progress updates
324
+ for line in process.stdout:
325
+ line_stripped = line.strip()
326
+
327
+ # Check for download progress [SESA_DOWNLOAD]
328
+ if line_stripped.startswith("[SESA_DOWNLOAD]"):
329
+ try:
330
+ dl_info = line_stripped.replace("[SESA_DOWNLOAD]", "")
331
+ if dl_info.startswith("START:"):
332
+ downloading_file = dl_info.replace("START:", "")
333
+ yield {"progress": 0, "status": i18n("downloading_model_file").format(downloading_file), "outputs": None}
334
+ elif dl_info.startswith("END:"):
335
+ downloading_file = None
336
+ elif ":" in dl_info:
337
+ parts = dl_info.rsplit(":", 1)
338
+ if len(parts) == 2:
339
+ filename, percent_str = parts
340
+ download_percent = int(percent_str)
341
+ yield {"progress": 0, "status": i18n("downloading_file_progress").format(filename, download_percent), "outputs": None}
342
+ except (ValueError, TypeError):
343
+ pass
344
+ # Check for [SESA_PROGRESS] prefix from inference script
345
+ elif line_stripped.startswith("[SESA_PROGRESS]"):
346
+ try:
347
+ percentage_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
348
+ percentage = float(percentage_str) if percentage_str else 0
349
+ percentage = min(max(percentage, 0), 100)
350
+
351
+ # Scale progress to 0-80% range (saving 80-100% for Apollo)
352
+ scaled_progress = int(percentage * 0.8)
353
+
354
+ # Yield on every percent change for smooth updates
355
+ if int(percentage) != last_yield_percent:
356
+ last_yield_percent = int(percentage)
357
+ yield {"progress": scaled_progress, "status": f"Separating audio... {int(percentage)}%", "outputs": None}
358
+ except (ValueError, TypeError):
359
+ pass
360
+ else:
361
+ # Only print important non-progress lines (errors, warnings, key info)
362
+ if line_stripped and not line_stripped.startswith((" ", " ")):
363
+ print(line_stripped)
364
+
365
+ # Capture stderr (only print errors)
366
+ for line in process.stderr:
367
+ stderr_output += line
368
+ line_s = line.strip()
369
+ if line_s and ("error" in line_s.lower() or "warning" in line_s.lower() or "traceback" in line_s.lower()):
370
+ print(f"Warning: {line_s}")
371
+
372
+ process.wait()
373
+
374
  if process.returncode != 0:
375
+ raise subprocess.CalledProcessError(process.returncode, cmd_parts, stderr=stderr_output)
376
+
377
+ yield {"progress": 80, "status": "Separation complete, processing outputs...", "outputs": None}
 
378
 
379
+ # Check if output files were created
380
  filename_model = extract_model_name_from_checkpoint(start_check_point)
381
+ output_files = os.listdir(OUTPUT_DIR)
382
  if not output_files:
383
+ raise FileNotFoundError("No output files created in OUTPUT_DIR")
384
+
385
+ def rename_files_with_model(folder, filename_model):
386
+ timestamp = datetime.now().strftime("%d-%m-%Y_%H-%M")
387
+ for filename in sorted(os.listdir(folder)):
388
+ file_path = os.path.join(folder, filename)
389
+ if not any(filename.lower().endswith(ext) for ext in ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a']):
390
+ continue
391
+ base, ext = os.path.splitext(filename)
392
+ detected_type = None
393
+ for type_key in ['vocals', 'instrumental', 'instrument', 'phaseremix', 'drum', 'bass', 'other', 'effects', 'speech', 'music', 'dry', 'male', 'female', 'bleed', 'karaoke', 'mid', 'side']:
394
+ if type_key in base.lower():
395
+ detected_type = type_key
396
+ break
397
+ # Normalize 'instrument' to 'Instrumental' for consistency
398
+ type_suffix = 'Instrumental' if detected_type == 'instrument' else (detected_type.capitalize() if detected_type else "Processed")
399
+ clean_base = sanitize_filename(base.split('_')[0]).rsplit('.', 1)[0]
400
+ new_filename = f"{timestamp}_{clean_base}_{type_suffix}_{filename_model}{ext}"
401
+ new_file_path = os.path.join(folder, new_filename)
402
+ try:
403
+ os.rename(file_path, new_file_path)
404
+ except Exception as e:
405
+ print(f"Could not rename file: {os.path.basename(file_path)} -> {os.path.basename(new_file_path)}: {str(e)}")
 
 
 
 
 
406
 
407
+ rename_files_with_model(OUTPUT_DIR, filename_model)
408
+
409
+ output_files = os.listdir(OUTPUT_DIR)
410
  if not output_files:
411
+ raise FileNotFoundError("No output files in OUTPUT_DIR after renaming")
412
 
413
+ def find_file(keywords):
414
+ """Find file matching any of the keywords (can be single keyword or list)."""
415
+ if isinstance(keywords, str):
416
+ keywords = [keywords]
417
  matching_files = [
418
+ os.path.join(OUTPUT_DIR, f) for f in output_files
419
+ if any(kw in f.lower() for kw in keywords)
420
  ]
421
  return matching_files[0] if matching_files else None
422
 
423
  output_list = [
424
+ find_file('vocals'), find_file(['instrumental', 'instrument']), find_file('phaseremix'),
425
  find_file('drum'), find_file('bass'), find_file('other'), find_file('effects'),
426
  find_file('speech'), find_file('music'), find_file('dry'), find_file('male'),
427
+ find_file('female'), find_file('bleed'), find_file('karaoke'),
428
+ find_file('mid'), find_file('side')
429
  ]
430
 
 
431
  normalized_outputs = []
432
+ for output_file in output_list:
433
  if output_file and os.path.exists(output_file):
434
  normalized_file = os.path.join(OUTPUT_DIR, f"{sanitize_filename(os.path.splitext(os.path.basename(output_file))[0])}.{output_format}")
435
+ if output_file.endswith(f".{output_format}") and output_file != normalized_file:
436
+ shutil.copy(output_file, normalized_file)
437
+ elif output_file != normalized_file:
438
+ audio, sr = librosa.load(output_file, sr=None, mono=False)
439
+ sf.write(normalized_file, audio.T if audio.ndim > 1 else audio, sr)
440
+ else:
441
+ normalized_file = output_file
442
+ normalized_outputs.append(normalized_file)
 
 
 
 
 
 
 
 
 
443
  else:
444
+ normalized_outputs.append(output_file)
445
 
446
+ # Apollo processing
447
  if use_apollo:
448
+ yield {"progress": 80, "status": "Enhancing with Apollo...", "outputs": None}
449
+ normalized_outputs = process_with_apollo(
450
+ output_files=normalized_outputs,
451
+ output_dir=OUTPUT_DIR,
452
+ apollo_chunk_size=apollo_chunk_size,
453
+ apollo_overlap=apollo_overlap,
454
+ apollo_method=apollo_method,
455
+ apollo_normal_model=apollo_normal_model,
456
+ apollo_midside_model=apollo_midside_model,
457
+ output_format=output_format,
458
+ progress=progress,
459
+ total_progress_start=80,
460
+ total_progress_end=100
461
+ )
 
 
 
 
 
 
 
 
 
 
462
 
463
+ # Final yield with outputs
464
+ yield {"progress": 100, "status": "Separation complete", "outputs": tuple(normalized_outputs)}
465
 
466
+ except subprocess.CalledProcessError as e:
467
+ print(f"Subprocess failed, code: {e.returncode}: {e.stderr}")
468
+ yield {"progress": 0, "status": f"Error: {e.stderr}", "outputs": (None,) * 16}
469
  except Exception as e:
470
+ print(f"run_command_and_process_files error: {str(e)}")
471
+ import traceback
472
+ traceback.print_exc()
473
+ yield {"progress": 0, "status": f"Error: {str(e)}", "outputs": (None,) * 16}
474
+
475
+
476
+
477
+
478
+
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
 
502
  def process_audio(
503
  input_audio_file,
 
505
  chunk_size,
506
  overlap,
507
  export_format,
508
+ optimize_mode,
509
+ enable_amp,
510
+ enable_tf32,
511
+ enable_cudnn_benchmark,
512
  use_tta,
513
  demud_phaseremix_inst,
514
  extract_instrumental,
 
518
  apollo_method,
519
  apollo_normal_model,
520
  apollo_midside_model,
521
+ use_matchering,
522
+ matchering_passes,
523
  progress=gr.Progress(track_tqdm=True),
524
  *args,
525
  **kwargs
526
  ):
527
+ """
528
+ Process audio with the selected model. This is a generator function that yields
529
+ progress updates for real-time UI feedback.
530
+ """
531
  try:
532
  # Check Google Drive connection
533
  setup_directories()
 
535
  if input_audio_file is not None:
536
  audio_path = input_audio_file.name if hasattr(input_audio_file, 'name') else input_audio_file
537
  else:
538
+ yield (
539
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
540
  "No audio file provided",
541
  update_progress_html("No input provided", 0)
542
  )
543
+ return
544
 
545
  os.makedirs(OUTPUT_DIR, exist_ok=True)
546
  os.makedirs(OLD_OUTPUT_DIR, exist_ok=True)
547
  move_old_files(OUTPUT_DIR)
548
 
 
549
  # Clean model name, remove ⭐ and other unwanted characters
550
  clean_model_name = clean_model(model) if not model.startswith("/") else extract_model_name_from_checkpoint(model)
551
+ print(f"Processing: {os.path.basename(audio_path)} | Model: {clean_model_name}")
 
 
552
 
553
  # Validate inference parameters
554
+ _use_yaml_chunk = (chunk_size == "yaml")
555
  try:
556
+ inference_chunk_size = 352800 if _use_yaml_chunk else int(chunk_size)
557
  except (TypeError, ValueError):
558
  print(f"Invalid chunk_size: {chunk_size}. Defaulting to: 352800.")
559
  inference_chunk_size = 352800
560
+ _use_yaml_chunk = True # fallback: read from YAML
561
 
562
  try:
563
  inference_overlap = int(overlap)
 
586
  else:
587
  print(f"Invalid apollo_method: {apollo_method}. Defaulting to: normal_method.")
588
  apollo_method = "normal_method"
 
 
 
 
589
  # Copy input file to INPUT_DIR
590
  input_filename = os.path.basename(audio_path)
591
  dest_path = os.path.join(INPUT_DIR, input_filename)
592
  shutil.copy(audio_path, dest_path)
 
593
 
594
+ # Yield status for model loading
595
+ yield (
596
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
597
+ f"Loading model: {clean_model_name}...",
598
+ update_progress_html(f"Loading model: {clean_model_name}", 0)
599
+ )
600
+
601
+ # Get model configuration with cleaned model name (downloads if needed)
602
  model_type, config_path, start_check_point = get_model_config(clean_model_name, inference_chunk_size, inference_overlap)
 
603
 
604
+ # Read the model's native chunk_size from its YAML (now guaranteed to be downloaded)
605
+ native_chunk = get_model_chunk_size(clean_model_name)
606
+ if _use_yaml_chunk and native_chunk:
607
+ print(f"Using model's native chunk_size from YAML: {native_chunk}")
608
+ inference_chunk_size = native_chunk
609
+ elif not _use_yaml_chunk:
610
+ print(f"Using user-selected chunk_size: {inference_chunk_size}")
611
+
612
+ # Iterate over the generator and yield progress updates
613
+ outputs = None
614
+ for update in run_command_and_process_files(
615
  model_type=model_type,
616
  config_path=config_path,
617
  start_check_point=start_check_point,
 
629
  apollo_overlap=apollo_overlap,
630
  apollo_method=apollo_method,
631
  apollo_midside_model=apollo_midside_model,
632
+ output_format=export_format.split()[0].lower(),
633
+ optimize_mode=optimize_mode,
634
+ enable_amp=enable_amp,
635
+ enable_tf32=enable_tf32,
636
+ enable_cudnn_benchmark=enable_cudnn_benchmark
637
+ ):
638
+ if update.get("outputs") is not None:
639
+ outputs = update["outputs"]
640
+ # Yield progress update to Gradio
641
+ yield (
642
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
643
+ update["status"],
644
+ update_progress_html(update["status"], update["progress"])
645
+ )
646
 
647
  if outputs is None or all(output is None for output in outputs):
648
  raise ValueError("run_command_and_process_files returned None or all None outputs")
649
 
650
  # Apply Matchering (if enabled)
651
  if use_matchering:
652
+ # Yield progress update for Matchering
653
+ yield (
654
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
655
+ "Applying Matchering...",
656
+ update_progress_html("Applying Matchering...", 90)
657
+ )
658
 
659
  # Find clean segment from original audio
660
  segment_start, segment_end, segment_audio = find_clear_segment(audio_path)
 
684
 
685
  outputs = tuple(mastered_outputs)
686
 
687
+ # Final yield with all outputs
688
+ yield (
 
 
689
  outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6],
690
  outputs[7], outputs[8], outputs[9], outputs[10], outputs[11], outputs[12], outputs[13],
691
+ outputs[14], outputs[15],
692
  "Audio processing completed",
693
  update_progress_html("Audio processing completed", 100)
694
  )
 
697
  print(f"process_audio error: {str(e)}")
698
  import traceback
699
  traceback.print_exc()
700
+ yield (
701
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
702
  f"Error occurred: {str(e)}",
703
  update_progress_html("Error occurred", 0)
704
  )
 
729
  ensemble_args += ["--weights", *weights_list]
730
 
731
  progress(0, desc="Starting ensemble process", total=100)
732
+
733
+ # Run ensemble subprocess with real-time output capture
734
+ process = subprocess.Popen(
735
  ["python", "ensemble.py"] + ensemble_args,
736
+ stdout=subprocess.PIPE,
737
+ stderr=subprocess.PIPE,
738
+ text=True,
739
+ bufsize=1,
740
+ universal_newlines=True
741
  )
742
 
743
+ stdout_output = ""
744
+ stderr_output = ""
745
+
746
+ # Read output in real-time and capture actual progress
747
+ for line in process.stdout:
748
+ stdout_output += line
749
+ line_stripped = line.strip()
750
+
751
+ # Capture real progress percentage from ensemble.py with new format
752
+ if line_stripped.startswith("[SESA_PROGRESS]"):
753
+ try:
754
+ percent_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
755
+ percent = int(float(percent_str)) if percent_str else 0
756
+ percent = min(max(percent, 0), 100)
757
+ progress(percent, desc=f"Ensemble progress: {percent}%")
758
+ except (ValueError, TypeError):
759
+ pass
760
+ # Legacy format support
761
+ elif line_stripped.startswith("Progress:"):
762
+ try:
763
+ percent = int(line_stripped.split(":")[1].strip().replace("%", ""))
764
+ percent = min(max(percent, 0), 100)
765
+ progress(percent, desc=f"Ensemble progress: {percent}%")
766
+ except (ValueError, IndexError):
767
+ pass
768
+ elif "loading" in line.lower():
769
+ print(f"Ensemble: {line_stripped}")
770
+ progress(5, desc="Loading audio files for ensemble...")
771
+ elif "processing ensemble" in line.lower():
772
+ print(f"Ensemble: {line_stripped}")
773
+ progress(10, desc="Starting ensemble processing...")
774
+ elif "saving" in line.lower():
775
+ print(f"Ensemble: {line_stripped}")
776
+ progress(95, desc="Saving ensemble output...")
777
+ elif line_stripped and not line_stripped.startswith("[SESA_PROGRESS]") and not line_stripped.startswith("Progress:"):
778
+ # Only print non-progress messages
779
+ print(f"Ensemble: {line_stripped}")
780
+
781
+ for line in process.stderr:
782
+ stderr_output += line
783
+ print(f"Ensemble stderr: {line.strip()}")
784
 
785
+ process.wait()
786
+ result = type('Result', (), {'stdout': stdout_output, 'stderr': stderr_output, 'returncode': process.returncode})()
787
+
788
+ progress(100, desc="Ensemble complete")
789
  log = f"Success: {result.stdout}" if not result.stderr else f"Error: {result.stderr}"
790
  return output_path, log
791
 
 
837
  input_filename = os.path.basename(audio_path)
838
  dest_path = os.path.join(INPUT_DIR, input_filename)
839
  shutil.copy(audio_path, dest_path)
 
840
 
841
  # Parse apollo method
842
  if auto_apollo_method in ["2", 2]:
843
  auto_apollo_method = "mid_side_method"
844
  elif auto_apollo_method in ["1", 1]:
845
  auto_apollo_method = "normal_method"
 
846
 
847
  corrected_auto_chunk_size = int(auto_apollo_chunk_size)
848
  corrected_auto_overlap = int(auto_apollo_overlap)
 
849
 
850
  # Setup temporary directories
851
  auto_ensemble_temp = os.path.join(BASE_DIR, "auto_ensemble_temp")
 
859
 
860
  for i, model in enumerate(selected_models):
861
  clean_model_name = clean_model(model)
 
862
  model_output_dir = os.path.join(auto_ensemble_temp, clean_model_name)
863
  os.makedirs(model_output_dir, exist_ok=True)
864
 
 
870
  )
871
 
872
  model_type, config_path, start_check_point = get_model_config(clean_model_name, auto_chunk_size, auto_overlap)
873
+
874
+ # Read the model's native chunk_size from its YAML after download
875
+ native_chunk = get_model_chunk_size(clean_model_name)
876
+ effective_chunk_size = native_chunk if native_chunk else auto_chunk_size
877
+ if native_chunk:
878
+ print(f"Using model's native chunk_size from YAML: {native_chunk} (UI value was: {auto_chunk_size})")
879
 
880
  cmd = [
881
  "python", INFERENCE_PATH,
 
884
  "--start_check_point", start_check_point,
885
  "--input_folder", INPUT_DIR,
886
  "--store_dir", model_output_dir,
887
+ "--chunk_size", str(effective_chunk_size),
888
  "--overlap", str(auto_overlap),
889
  "--export_format", f"{export_format.split()[0].lower()} FLOAT"
890
  ]
 
904
  )
905
 
906
  stderr_output = ""
907
+ last_yield_percent = -1
908
+ downloading_file = None
909
+
910
  for line in process.stdout:
911
+ line_stripped = line.strip()
912
+
913
+ # Check for download progress [SESA_DOWNLOAD]
914
+ if line_stripped.startswith("[SESA_DOWNLOAD]"):
915
+ try:
916
+ dl_info = line_stripped.replace("[SESA_DOWNLOAD]", "")
917
+ if dl_info.startswith("START:"):
918
+ downloading_file = dl_info.replace("START:", "")
919
+ yield None, i18n("downloading_model_file").format(downloading_file), update_progress_html(
920
+ i18n("downloading_model_file").format(downloading_file),
921
+ i * model_progress_per_step,
922
+ download_info={"filename": downloading_file, "percent": 0}
923
+ )
924
+ elif dl_info.startswith("END:"):
925
+ downloading_file = None
926
+ elif ":" in dl_info:
927
+ parts = dl_info.rsplit(":", 1)
928
+ if len(parts) == 2:
929
+ filename, percent_str = parts
930
+ download_percent = int(percent_str)
931
+ yield None, i18n("downloading_file_progress").format(filename, download_percent), update_progress_html(
932
+ i18n("downloading_model_file").format(filename),
933
+ i * model_progress_per_step,
934
+ download_info={"filename": filename, "percent": download_percent}
935
+ )
936
+ except (ValueError, TypeError):
937
+ pass
938
+ # Check for unique progress prefix [SESA_PROGRESS]
939
+ elif line_stripped.startswith("[SESA_PROGRESS]"):
940
  try:
941
+ # Extract percentage from [SESA_PROGRESS]XX format
942
+ percentage_str = line_stripped.replace("[SESA_PROGRESS]", "").strip()
943
+ percentage = float(percentage_str) if percentage_str else 0
944
+ percentage = min(max(percentage, 0), 100) # Clamp to 0-100
945
+
946
  model_percentage = (percentage / 100) * model_progress_per_step
947
  current_progress = (i * model_progress_per_step) + model_percentage
948
  current_progress = clamp_percentage(current_progress)
949
+
950
+ # Yield on every percent change for smooth updates
951
+ if int(percentage) != last_yield_percent:
952
+ last_yield_percent = int(percentage)
953
+ yield None, i18n("loading_model_progress_label").format(i+1, total_models, clean_model_name, int(percentage)), update_progress_html(
954
+ f"Model {i+1}/{total_models}: {clean_model_name} - {int(percentage)}%",
955
+ current_progress
956
+ )
957
+ except (ValueError, TypeError):
958
+ # Silently ignore parsing errors for progress lines
959
+ pass
960
+ # Also support legacy "Progress: XX%" format for backwards compatibility
961
+ elif line_stripped.startswith("Progress:"):
962
+ try:
963
+ match = re.search(r"Progress:\s*(\d+(?:\.\d+)?)%?", line_stripped)
964
+ if match:
965
+ percentage = float(match.group(1))
966
+ percentage = min(max(percentage, 0), 100)
967
+
968
+ model_percentage = (percentage / 100) * model_progress_per_step
969
+ current_progress = (i * model_progress_per_step) + model_percentage
970
+ current_progress = clamp_percentage(current_progress)
971
+
972
+ if int(percentage) != last_yield_percent:
973
+ last_yield_percent = int(percentage)
974
+ yield None, i18n("loading_model_progress_label").format(i+1, total_models, clean_model_name, int(percentage)), update_progress_html(
975
+ f"Model {i+1}/{total_models}: {clean_model_name} - {int(percentage)}%",
976
+ current_progress
977
+ )
978
+ except (ValueError, TypeError):
979
+ pass
980
+ else:
981
+ # Print non-progress lines
982
+ if line_stripped:
983
+ print(line_stripped)
984
 
985
  for line in process.stderr:
986
  stderr_output += line
 
1167
  print(f"File not found in Google Drive, copying from local path: {output_path}")
1168
  shutil.copy(output_path, drive_output_path)
1169
  print(f"Copied to Google Drive: {drive_output_path}")
1170
+ else:
1171
+ drive_output_path = output_path
1172
 
1173
  yield output_path, i18n("success_output_created") + f" Saved to {drive_output_path if IS_COLAB else output_path}", update_progress_html(
1174
  i18n("ensemble_completed"), 100
pytorch_backend.py ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ __author__ = 'PyTorch Backend Implementation'
3
+
4
+ import os
5
+ import pickle
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
+ from typing import Dict, Tuple, Optional, Any
10
+ import warnings
11
+ import hashlib
12
+ import time
13
+
14
+ # Suppress channels_last warnings for 3D audio tensors
15
+ warnings.filterwarnings("ignore", message=".*channels_last.*")
16
+ warnings.filterwarnings("ignore", message=".*rank 3.*")
17
+
18
+
19
+ class PyTorchBackend:
20
+ """
21
+ ULTRA-OPTIMIZED PyTorch backend for model inference.
22
+ Provides various optimization techniques for maximum speed.
23
+ """
24
+
25
+ def __init__(self, device='cuda:0', optimize_mode='channels_last'):
26
+ """
27
+ Initialize ULTRA-OPTIMIZED PyTorch backend.
28
+
29
+ Parameters:
30
+ ----------
31
+ device : str
32
+ Device to use for inference (cuda:0, cpu, mps, etc.)
33
+ optimize_mode : str
34
+ Optimization mode: 'channels_last' (recommended), 'compile', 'jit', or 'default'
35
+ """
36
+ self.device = device
37
+ self.optimize_mode = optimize_mode
38
+ self.model = None
39
+ self.compiled_model = None
40
+
41
+ # Check device availability
42
+ if device.startswith('cuda') and not torch.cuda.is_available():
43
+ warnings.warn("CUDA not available, falling back to CPU")
44
+ self.device = 'cpu'
45
+ elif device == 'mps' and not torch.backends.mps.is_available():
46
+ warnings.warn("MPS not available, falling back to CPU")
47
+ self.device = 'cpu'
48
+
49
+ # Apply ultra optimization settings
50
+ self._apply_ultra_optimizations()
51
+
52
+ def _apply_ultra_optimizations(self):
53
+ """Apply ultra-speed optimizations globally."""
54
+ if self.device.startswith('cuda'):
55
+ # Enable all CUDA optimizations
56
+ torch.backends.cudnn.benchmark = True
57
+ torch.backends.cuda.matmul.allow_tf32 = True
58
+ torch.backends.cudnn.allow_tf32 = True
59
+
60
+ # Set optimal CUDA settings
61
+ torch.backends.cudnn.deterministic = False
62
+ torch.backends.cudnn.enabled = True
63
+
64
+ # Enable cuBLAS optimizations
65
+ os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
66
+
67
+ # Optimize CPU inference
68
+ if self.device == 'cpu':
69
+ import multiprocessing
70
+ num_threads = multiprocessing.cpu_count()
71
+ torch.set_num_threads(num_threads)
72
+ torch.set_num_interop_threads(num_threads)
73
+ print(f"CPU threads set to {num_threads}")
74
+
75
+ def optimize_model(
76
+ self,
77
+ model: nn.Module,
78
+ example_input: Optional[torch.Tensor] = None,
79
+ use_amp: bool = True,
80
+ use_channels_last: bool = True
81
+ ) -> nn.Module:
82
+ """
83
+ Optimize PyTorch model for inference.
84
+
85
+ Parameters:
86
+ ----------
87
+ model : nn.Module
88
+ PyTorch model to optimize
89
+ example_input : Optional[torch.Tensor]
90
+ Example input for optimization (required for some modes)
91
+ use_amp : bool
92
+ Use automatic mixed precision (AMP)
93
+ use_channels_last : bool
94
+ Use channels-last memory format
95
+
96
+ Returns:
97
+ -------
98
+ nn.Module
99
+ Optimized model
100
+ """
101
+ print(f"Optimizing model with mode: {self.optimize_mode}")
102
+
103
+ self.model = model.eval().to(self.device)
104
+ self.use_amp = use_amp
105
+
106
+ # Disable gradients for all parameters (inference only)
107
+ for param in self.model.parameters():
108
+ param.requires_grad = False
109
+
110
+ # Apply memory format optimization (default: channels_last for CUDA)
111
+ # Note: Audio models use 3D tensors, so channels_last is applied only where beneficial
112
+ if use_channels_last and self.device.startswith('cuda'):
113
+ print(" Using channels-last optimization")
114
+ # Only apply to model if it has 4D conv layers, otherwise skip silently
115
+ try:
116
+ with warnings.catch_warnings():
117
+ warnings.simplefilter("ignore")
118
+ self.model = self.model.to(memory_format=torch.channels_last)
119
+ except Exception:
120
+ pass # Silently skip for models that don't support channels_last
121
+
122
+ # Set model to inference mode
123
+ torch.set_grad_enabled(False)
124
+
125
+ # Apply optimization based on mode
126
+ if self.optimize_mode == 'compile':
127
+ self.compiled_model = self._compile_model(self.model)
128
+ elif self.optimize_mode == 'jit':
129
+ if example_input is None:
130
+ raise ValueError("example_input required for JIT optimization")
131
+ self.compiled_model = self._jit_trace_model(self.model, example_input)
132
+ elif self.optimize_mode == 'channels_last':
133
+ self.compiled_model = self.model
134
+ else:
135
+ print(" Using default optimization")
136
+ self.compiled_model = self.model
137
+
138
+ # Apply fusion optimizations if possible
139
+ try:
140
+ if hasattr(torch.nn.utils, 'fusion'):
141
+ self.compiled_model = torch.nn.utils.fusion.fuse_conv_bn_eval(self.compiled_model)
142
+ print(" Conv-BN fusion applied")
143
+ except:
144
+ pass
145
+
146
+ print("Optimization complete")
147
+ return self.compiled_model
148
+
149
+ def _compile_model(self, model: nn.Module) -> nn.Module:
150
+ """
151
+ Compile model using torch.compile (PyTorch 2.0+) with ULTRA optimization.
152
+
153
+ Parameters:
154
+ ----------
155
+ model : nn.Module
156
+ Model to compile
157
+
158
+ Returns:
159
+ -------
160
+ nn.Module
161
+ Compiled model
162
+ """
163
+ try:
164
+ if hasattr(torch, 'compile'):
165
+ print(" Compiling model with torch.compile")
166
+ # Try max-autotune for best performance
167
+ try:
168
+ compiled = torch.compile(model, mode='max-autotune', fullgraph=True)
169
+ print(" Using max-autotune mode")
170
+ return compiled
171
+ except:
172
+ # Fallback to reduce-overhead
173
+ compiled = torch.compile(model, mode='reduce-overhead')
174
+ print(" Using reduce-overhead mode")
175
+ return compiled
176
+ else:
177
+ print(" torch.compile not available (requires PyTorch 2.0+)")
178
+ return model
179
+ except Exception as e:
180
+ print(f" Compilation failed: {e}")
181
+ return model
182
+
183
+ def _jit_trace_model(self, model: nn.Module, example_input: torch.Tensor) -> nn.Module:
184
+ """
185
+ Trace model using TorchScript JIT.
186
+
187
+ Parameters:
188
+ ----------
189
+ model : nn.Module
190
+ Model to trace
191
+ example_input : torch.Tensor
192
+ Example input for tracing
193
+
194
+ Returns:
195
+ -------
196
+ nn.Module
197
+ Traced model
198
+ """
199
+ try:
200
+ print(" → Tracing model with TorchScript JIT")
201
+ with torch.no_grad():
202
+ traced = torch.jit.trace(model, example_input)
203
+ traced = torch.jit.optimize_for_inference(traced)
204
+ return traced
205
+ except Exception as e:
206
+ print(f" JIT tracing failed: {e}")
207
+ return model
208
+
209
+ def save_optimized_model(self, save_path: str):
210
+ """
211
+ Save optimized model to file.
212
+
213
+ Parameters:
214
+ ----------
215
+ save_path : str
216
+ Path to save the model
217
+ """
218
+ if self.compiled_model is None:
219
+ raise RuntimeError("No model has been optimized yet")
220
+
221
+ try:
222
+ # Save based on optimization mode
223
+ if self.optimize_mode == 'jit':
224
+ torch.jit.save(self.compiled_model, save_path)
225
+ else:
226
+ torch.save(self.compiled_model.state_dict(), save_path)
227
+ print(f"✓ Model saved to: {save_path}")
228
+ except Exception as e:
229
+ print(f"✗ Failed to save model: {e}")
230
+
231
+ def load_optimized_model(self, load_path: str, model_template: nn.Module) -> nn.Module:
232
+ """
233
+ Load optimized model from file.
234
+
235
+ Parameters:
236
+ ----------
237
+ load_path : str
238
+ Path to the saved model
239
+ model_template : nn.Module
240
+ Model template for loading state dict
241
+
242
+ Returns:
243
+ -------
244
+ nn.Module
245
+ Loaded model
246
+ """
247
+ try:
248
+ if self.optimize_mode == 'jit':
249
+ self.compiled_model = torch.jit.load(load_path, map_location=self.device)
250
+ else:
251
+ model_template.load_state_dict(torch.load(load_path, map_location=self.device, weights_only=False))
252
+ self.compiled_model = model_template.eval()
253
+
254
+ print(f"✓ Model loaded from: {load_path}")
255
+ return self.compiled_model
256
+ except (pickle.UnpicklingError, RuntimeError, EOFError) as e:
257
+ error_details = f"""
258
+ CHECKPOINT FILE CORRUPTED
259
+
260
+ Error: {str(e)}
261
+
262
+ The checkpoint file appears to be corrupted or was not downloaded correctly.
263
+ File: {load_path}
264
+
265
+ Common causes:
266
+ - File is an HTML page (wrong download URL, e.g., HuggingFace /blob/ instead of /resolve/)
267
+ - Incomplete or interrupted download
268
+ - Network issues during download
269
+ - File system corruption
270
+
271
+ Solution:
272
+ 1. Delete the corrupted checkpoint file:
273
+ {load_path}
274
+ 2. Re-run the application - it will automatically re-download the model
275
+ 3. If the problem persists, check that your model URL uses /resolve/ not /blob/
276
+ Example: https://huggingface.co/user/repo/resolve/main/model.ckpt
277
+ """
278
+ print(error_details)
279
+ raise
280
+ except Exception as e:
281
+ print(f"✗ Failed to load model: {e}")
282
+ raise
283
+
284
+ def __call__(self, x: torch.Tensor) -> torch.Tensor:
285
+ """
286
+ Run inference with optimized model.
287
+
288
+ Parameters:
289
+ ----------
290
+ x : torch.Tensor
291
+ Input tensor
292
+
293
+ Returns:
294
+ -------
295
+ torch.Tensor
296
+ Model output
297
+ """
298
+ if self.compiled_model is None:
299
+ raise RuntimeError("No model has been optimized yet")
300
+
301
+ # Apply memory format if needed (only for 4D tensors - images)
302
+ # Audio models typically use 3D tensors, so we silently skip channels_last for them
303
+ if self.optimize_mode == 'channels_last' and x.dim() == 4:
304
+ x = x.to(memory_format=torch.channels_last)
305
+
306
+ # Run inference with AMP if enabled
307
+ try:
308
+ if self.use_amp and self.device.startswith('cuda'):
309
+ with torch.cuda.amp.autocast():
310
+ with torch.no_grad():
311
+ return self.compiled_model(x)
312
+ else:
313
+ with torch.no_grad():
314
+ return self.compiled_model(x)
315
+ except Exception as e:
316
+ # Fallback to non-compiled model if torch.compile fails at runtime
317
+ # This can happen with rotary embeddings that mutate class variables
318
+ if self.optimize_mode == 'compile' and self.model is not None:
319
+ print(f" ⚠️ torch.compile runtime error: {type(e).__name__}")
320
+ print(f" 🔄 Falling back to non-compiled model...")
321
+ self.compiled_model = self.model
322
+ self.optimize_mode = 'fallback'
323
+ # Retry with non-compiled model
324
+ if self.use_amp and self.device.startswith('cuda'):
325
+ with torch.cuda.amp.autocast():
326
+ with torch.no_grad():
327
+ return self.compiled_model(x)
328
+ else:
329
+ with torch.no_grad():
330
+ return self.compiled_model(x)
331
+ else:
332
+ raise
333
+
334
+
335
+ class PyTorchOptimizer:
336
+ """
337
+ Helper class for various PyTorch optimization techniques.
338
+ """
339
+
340
+ @staticmethod
341
+ def enable_cudnn_benchmark():
342
+ """Enable cuDNN benchmark mode."""
343
+ if torch.cuda.is_available():
344
+ torch.backends.cudnn.benchmark = True
345
+ torch.backends.cudnn.deterministic = False
346
+ print("cuDNN benchmark enabled")
347
+
348
+ @staticmethod
349
+ def enable_cudnn_deterministic():
350
+ """Enable cuDNN deterministic mode for reproducible results."""
351
+ if torch.cuda.is_available():
352
+ torch.backends.cudnn.deterministic = True
353
+ torch.backends.cudnn.benchmark = False
354
+ print("✓ cuDNN deterministic mode enabled")
355
+
356
+ @staticmethod
357
+ def enable_tf32():
358
+ """Enable TF32 for Ampere GPUs (RTX 30xx+)."""
359
+ if torch.cuda.is_available():
360
+ torch.backends.cuda.matmul.allow_tf32 = True
361
+ torch.backends.cudnn.allow_tf32 = True
362
+ # Also enable for float32 matmul precision
363
+ torch.set_float32_matmul_precision('high') # or 'highest' for max speed
364
+ print("TF32 enabled")
365
+
366
+ @staticmethod
367
+ def set_num_threads(num_threads: int):
368
+ """Set number of threads for CPU inference."""
369
+ torch.set_num_threads(num_threads)
370
+ print(f"✓ Number of threads set to: {num_threads}")
371
+
372
+ @staticmethod
373
+ def optimize_for_inference(model: nn.Module) -> nn.Module:
374
+ """
375
+ Apply ULTRA optimization for inference.
376
+
377
+ Parameters:
378
+ ----------
379
+ model : nn.Module
380
+ Model to optimize
381
+
382
+ Returns:
383
+ -------
384
+ nn.Module
385
+ ULTRA-optimized model
386
+ """
387
+ model.eval()
388
+ torch.set_grad_enabled(False)
389
+
390
+ # Disable gradient computation for all parameters
391
+ for param in model.parameters():
392
+ param.requires_grad = False
393
+
394
+ # Fuse operations if possible
395
+ try:
396
+ # Try to fuse batch norm
397
+ model = torch.quantization.fuse_modules(model, inplace=True)
398
+ print("Batch norm fused")
399
+ except:
400
+ pass
401
+
402
+ try:
403
+ # Try to fuse conv-bn if available
404
+ if hasattr(torch.nn.utils, 'fusion'):
405
+ model = torch.nn.utils.fusion.fuse_conv_bn_eval(model)
406
+ print("Conv-BN fused")
407
+ except:
408
+ pass
409
+
410
+ return model
411
+
412
+
413
+ def benchmark_pytorch_optimizations(
414
+ model: nn.Module,
415
+ input_shape: Tuple[int, ...],
416
+ device: str = 'cuda:0',
417
+ num_iterations: int = 100,
418
+ warmup_iterations: int = 10
419
+ ) -> Dict[str, float]:
420
+ """
421
+ Benchmark different PyTorch optimization techniques.
422
+
423
+ Parameters:
424
+ ----------
425
+ model : nn.Module
426
+ Model to benchmark
427
+ input_shape : Tuple[int, ...]
428
+ Input tensor shape
429
+ device : str
430
+ Device to use
431
+ num_iterations : int
432
+ Number of benchmark iterations
433
+ warmup_iterations : int
434
+ Number of warmup iterations
435
+
436
+ Returns:
437
+ -------
438
+ Dict[str, float]
439
+ Benchmark results with average inference times
440
+ """
441
+ results = {}
442
+ dummy_input = torch.randn(*input_shape).to(device)
443
+
444
+ optimization_modes = ['default', 'compile', 'channels_last']
445
+
446
+ for mode in optimization_modes:
447
+ print(f"\n{'='*60}")
448
+ print(f"Benchmarking: {mode}")
449
+ print('='*60)
450
+
451
+ try:
452
+ backend = PyTorchBackend(device=device, optimize_mode=mode)
453
+
454
+ # Optimize model
455
+ if mode == 'compile':
456
+ optimized_model = backend.optimize_model(model, use_amp=True)
457
+ else:
458
+ optimized_model = backend.optimize_model(
459
+ model,
460
+ example_input=dummy_input,
461
+ use_amp=True,
462
+ use_channels_last=(mode == 'channels_last')
463
+ )
464
+
465
+ # Warmup
466
+ for _ in range(warmup_iterations):
467
+ _ = backend(dummy_input)
468
+
469
+ # Benchmark
470
+ if device.startswith('cuda'):
471
+ torch.cuda.synchronize()
472
+
473
+ start = time.time()
474
+ for _ in range(num_iterations):
475
+ _ = backend(dummy_input)
476
+
477
+ if device.startswith('cuda'):
478
+ torch.cuda.synchronize()
479
+
480
+ elapsed = (time.time() - start) / num_iterations
481
+ results[mode] = elapsed * 1000 # Convert to ms
482
+
483
+ print(f" Average time: {results[mode]:.2f} ms")
484
+
485
+ except Exception as e:
486
+ print(f" Failed: {e}")
487
+ results[mode] = None
488
+
489
+ return results
490
+
491
+
492
+ def create_inference_session(
493
+ model: nn.Module,
494
+ device: str = 'cuda:0',
495
+ optimize_mode: str = 'default',
496
+ enable_amp: bool = True,
497
+ enable_tf32: bool = True,
498
+ enable_cudnn_benchmark: bool = True
499
+ ) -> PyTorchBackend:
500
+ """
501
+ Create an optimized inference session.
502
+
503
+ Parameters:
504
+ ----------
505
+ model : nn.Module
506
+ Model to use for inference
507
+ device : str
508
+ Device to use
509
+ optimize_mode : str
510
+ Optimization mode
511
+ enable_amp : bool
512
+ Enable automatic mixed precision
513
+ enable_tf32 : bool
514
+ Enable TF32 (for Ampere GPUs)
515
+ enable_cudnn_benchmark : bool
516
+ Enable cuDNN benchmark
517
+
518
+ Returns:
519
+ -------
520
+ PyTorchBackend
521
+ Configured inference session
522
+ """
523
+ # Apply global optimizations
524
+ optimizer = PyTorchOptimizer()
525
+
526
+ if enable_cudnn_benchmark:
527
+ optimizer.enable_cudnn_benchmark()
528
+
529
+ if enable_tf32 and device.startswith('cuda'):
530
+ optimizer.enable_tf32()
531
+
532
+ # Create backend
533
+ backend = PyTorchBackend(device=device, optimize_mode=optimize_mode)
534
+ backend.optimize_model(model, use_amp=enable_amp)
535
+
536
+ return backend
537
+
538
+
539
+ def convert_model_to_onnx(
540
+ model: nn.Module,
541
+ input_shape: Tuple[int, ...],
542
+ output_path: str,
543
+ opset_version: int = 14
544
+ ):
545
+ """
546
+ Convert PyTorch model to ONNX format.
547
+
548
+ Parameters:
549
+ ----------
550
+ model : nn.Module
551
+ Model to convert
552
+ input_shape : Tuple[int, ...]
553
+ Input tensor shape
554
+ output_path : str
555
+ Path to save ONNX model
556
+ opset_version : int
557
+ ONNX opset version
558
+ """
559
+ try:
560
+ import onnx
561
+
562
+ model.eval()
563
+ dummy_input = torch.randn(*input_shape)
564
+
565
+ print(f"Converting model to ONNX (opset {opset_version})...")
566
+ torch.onnx.export(
567
+ model,
568
+ dummy_input,
569
+ output_path,
570
+ export_params=True,
571
+ opset_version=opset_version,
572
+ do_constant_folding=True,
573
+ input_names=['input'],
574
+ output_names=['output'],
575
+ dynamic_axes={
576
+ 'input': {0: 'batch_size'},
577
+ 'output': {0: 'batch_size'}
578
+ }
579
+ )
580
+
581
+ # Verify ONNX model
582
+ onnx_model = onnx.load(output_path)
583
+ onnx.checker.check_model(onnx_model)
584
+
585
+ print(f"✓ ONNX model saved to: {output_path}")
586
+
587
+ except ImportError:
588
+ print("✗ ONNX not available. Install with: pip install onnx")
589
+ except Exception as e:
590
+ print(f"✗ ONNX conversion failed: {e}")
591
+
592
+
593
+ def get_model_info(model: nn.Module) -> Dict[str, Any]:
594
+ """
595
+ Get information about a PyTorch model.
596
+
597
+ Parameters:
598
+ ----------
599
+ model : nn.Module
600
+ Model to analyze
601
+
602
+ Returns:
603
+ -------
604
+ Dict[str, Any]
605
+ Model information
606
+ """
607
+ total_params = sum(p.numel() for p in model.parameters())
608
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
609
+
610
+ # Estimate model size
611
+ param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
612
+ buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
613
+ size_mb = (param_size + buffer_size) / (1024 ** 2)
614
+
615
+ return {
616
+ 'total_parameters': total_params,
617
+ 'trainable_parameters': trainable_params,
618
+ 'model_size_mb': size_mb,
619
+ 'device': next(model.parameters()).device,
620
+ 'dtype': next(model.parameters()).dtype
621
+ }
requirements.txt CHANGED
@@ -5,6 +5,7 @@ numpy
5
  pandas
6
  scipy
7
  torch
 
8
  torchvision
9
  torchmetrics
10
  ml_collections
@@ -17,7 +18,7 @@ demucs
17
  audiomentations
18
  torch_audiomentations
19
  httpx
20
- gradio
21
  google-api-python-client
22
  pytube
23
  tqdm
@@ -37,4 +38,10 @@ pyloudnorm
37
  gdown
38
  validators
39
  requests
40
- spaces
 
 
 
 
 
 
 
5
  pandas
6
  scipy
7
  torch
8
+ torchaudio
9
  torchvision
10
  torchmetrics
11
  ml_collections
 
18
  audiomentations
19
  torch_audiomentations
20
  httpx
21
+ gradio>=4.0
22
  google-api-python-client
23
  pytube
24
  tqdm
 
38
  gdown
39
  validators
40
  requests
41
+ sympy
42
+ typing_extensions
43
+ packaging
44
+ opencv-python-headless
45
+ pypresence
46
+ mpmath
47
+ pillow
utils.py CHANGED
@@ -13,13 +13,39 @@ from omegaconf import OmegaConf
13
  from tqdm.auto import tqdm
14
  from typing import Dict, List, Tuple, Any, Union
15
  import loralib as lora
16
- import gc # For garbage collection
17
- import logging # Hata takibi için
18
 
19
- # Log ayarları
20
- logging.basicConfig(level=logging.INFO, filename='utils.log', format='%(asctime)s - %(message)s')
 
 
 
 
 
 
21
 
22
  def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  try:
24
  with open(config_path, 'r') as f:
25
  if model_type == 'htdemucs':
@@ -32,6 +58,7 @@ def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaCon
32
  except Exception as e:
33
  raise ValueError(f"Error loading configuration: {e}")
34
 
 
35
  def get_model_from_config(model_type: str, config_path: str) -> Tuple:
36
  """
37
  Load the model specified by the model type and configuration file.
@@ -101,97 +128,251 @@ def get_model_from_config(model_type: str, config_path: str) -> Tuple:
101
  elif model_type == 'experimental_mdx23c_stht':
102
  from models.mdx23c_tfc_tdf_v3_with_STHT import TFC_TDF_net
103
  model = TFC_TDF_net(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  else:
105
  raise ValueError(f"Unknown model type: {model_type}")
106
 
107
  return model, config
108
 
 
109
  def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  try:
111
  mix, sr = sf.read(path)
112
- if len(mix.shape) == 1: # Mono audio
113
- mix = np.expand_dims(mix, axis=-1)
114
- return mix.T, sr
115
  except Exception as e:
116
  if skip_err:
117
  print(f"No stem {instr}: skip!")
118
  return None, None
119
- raise RuntimeError(f"Error reading the file at {path}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- def normalize_audio(audio: np.ndarray) -> Tuple[np.ndarray, Dict[str, float]]:
122
  mono = audio.mean(0)
123
  mean, std = mono.mean(), mono.std()
124
- return (audio - mean) / (std + 1e-8), {"mean": mean, "std": std}
 
125
 
126
  def denormalize_audio(audio: np.ndarray, norm_params: Dict[str, float]) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  return audio * norm_params["std"] + norm_params["mean"]
128
 
 
129
  def apply_tta(
130
- config,
131
- model: nn.Module,
132
- mix: torch.Tensor,
133
- waveforms_orig: Dict[str, torch.Tensor],
134
- device: str,
135
- model_type: str,
136
- progress=None # Gradio progress nesnesi
137
  ) -> Dict[str, torch.Tensor]:
138
- track_proc_list = [mix[::-1].clone(), -mix.clone()]
139
- total_steps = len(track_proc_list)
140
- processed_steps = 0
 
 
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  for i, augmented_mix in enumerate(track_proc_list):
143
- # TTA adımı için ilerleme güncellemesi
144
- processed_steps += 1
145
- progress_value = round((processed_steps / total_steps) * 50) # TTA için 0-50% aralığı
146
- if progress is not None and callable(getattr(progress, '__call__', None)):
147
- progress(progress_value / 100, desc=f"Applying TTA step {processed_steps}/{total_steps}")
148
- update_progress_html(f"Applying TTA step {processed_steps}/{total_steps}", progress_value)
149
-
150
- waveforms = demix(config, model, augmented_mix, device, model_type=model_type, pbar=False, progress=progress)
151
  for el in waveforms:
152
  if i == 0:
153
- waveforms_orig[el] += waveforms[el][::-1].clone()
154
  else:
155
  waveforms_orig[el] -= waveforms[el]
156
- del waveforms, augmented_mix
157
- gc.collect()
158
- if device.startswith('cuda'):
159
- torch.cuda.empty_cache()
160
 
 
161
  for el in waveforms_orig:
162
- waveforms_orig[el] /= (len(track_proc_list) + 1)
163
-
164
- # TTA tamamlandı
165
- if progress is not None and callable(getattr(progress, '__call__', None)):
166
- progress(0.5, desc="TTA completed")
167
- update_progress_html("TTA completed", 50)
168
 
169
  return waveforms_orig
170
 
 
171
  def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  fadein = torch.linspace(0, 1, fade_size)
173
  fadeout = torch.linspace(1, 0, fade_size)
 
174
  window = torch.ones(window_size)
175
  window[-fade_size:] = fadeout
176
  window[:fade_size] = fadein
177
  return window
178
 
 
179
  def demix(
180
- config: ConfigDict,
181
- model: nn.Module,
182
- mix: torch.Tensor,
183
- device: str,
184
- model_type: str,
185
- pbar: bool = False,
186
- progress=None # Gradio progress nesnesi
187
- ) -> Dict[str, np.ndarray]:
188
- logging.info(f"Starting demix for model_type: {model_type}, chunk_size: {config.audio.chunk_size}")
189
-
190
- # CPU'da FP16 ile başla
191
- mix = torch.tensor(mix, dtype=torch.float16, device='cpu')
192
- mode = 'demucs' if model_type == 'htdemucs' else 'generic'
193
 
194
- # İşlem parametreleri
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  if mode == 'demucs':
196
  chunk_size = config.training.samplerate * config.training.segment
197
  num_instruments = len(config.training.instruments)
@@ -201,146 +382,272 @@ def demix(
201
  chunk_size = config.audio.chunk_size
202
  num_instruments = len(prefer_target_instrument(config))
203
  num_overlap = config.inference.num_overlap
 
204
  fade_size = chunk_size // 10
205
  step = chunk_size // num_overlap
206
  border = chunk_size - step
207
  length_init = mix.shape[-1]
208
- windowing_array = _getWindowingArray(chunk_size, fade_size).to('cpu', dtype=torch.float16)
 
209
  if length_init > 2 * border and border > 0:
210
  mix = nn.functional.pad(mix, (border, border), mode="reflect")
211
 
212
- batch_size = getattr(config.inference, 'batch_size', 1) # Düşük bellek için varsayılan 1
213
-
214
- # Modeli cihaza taşı (ZeroGPU için cuda:0)
215
- model = model.to(device)
216
- model.eval()
217
 
218
- # Toplam chunk sayısını hesapla
219
- total_chunks = (mix.shape[1] + step - 1) // step
220
- processed_chunks = 0
221
 
222
- with torch.no_grad(): # Çıkarım için gradyan yok
223
- with torch.cuda.amp.autocast(enabled=device.startswith('cuda'), dtype=torch.float16):
 
224
  req_shape = (num_instruments,) + mix.shape
225
- result = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
226
- counter = torch.zeros(req_shape, dtype=torch.float16, device='cpu')
227
 
228
  i = 0
229
  batch_data = []
230
  batch_locations = []
231
- start_time = time.time()
 
 
232
 
233
  while i < mix.shape[1]:
234
- part = mix[:, i:i + chunk_size]
 
235
  chunk_len = part.shape[-1]
236
- pad_mode = "reflect" if mode == "generic" and chunk_len > chunk_size // 2 else "constant"
 
 
 
237
  part = nn.functional.pad(part, (0, chunk_size - chunk_len), mode=pad_mode, value=0)
238
 
239
  batch_data.append(part)
240
  batch_locations.append((i, chunk_len))
241
  i += step
242
 
 
243
  if len(batch_data) >= batch_size or i >= mix.shape[1]:
244
- # Veriyi GPU'ya taşı
245
- arr = torch.stack(batch_data, dim=0).to(device, non_blocking=True)
246
- x = model(arr) # Model çıkarımı GPU'da
247
-
248
- # Sonuçları hemen CPU'ya taşı
249
- x = x.cpu()
250
 
251
  if mode == "generic":
252
- window = windowing_array.clone()
253
- if i - step == 0:
254
  window[:fade_size] = 1
255
- elif i >= mix.shape[1]:
256
  window[-fade_size:] = 1
257
 
258
  for j, (start, seg_len) in enumerate(batch_locations):
259
  if mode == "generic":
260
- result[..., start:start + seg_len] += (x[j, ..., :seg_len] * window[..., :seg_len])
261
  counter[..., start:start + seg_len] += window[..., :seg_len]
262
  else:
263
- result[..., start:start + seg_len] += x[j, ..., :seg_len]
264
  counter[..., start:start + seg_len] += 1.0
265
 
266
- # İlerleme güncellemesi
267
- processed_chunks += len(batch_data)
268
- progress_value = min(round((processed_chunks / total_chunks) * 100), 100) # %1 hassasiyet
269
- if progress is not None and callable(getattr(progress, '__call__', None)):
270
- progress(progress_value / 100, desc=f"Processing chunk {processed_chunks}/{total_chunks}")
271
- update_progress_html(f"Processing chunk {processed_chunks}/{total_chunks}", progress_value)
272
-
273
- del arr, x
274
  batch_data.clear()
275
  batch_locations.clear()
276
- gc.collect()
277
- if device.startswith('cuda'):
278
- torch.cuda.empty_cache()
279
- logging.info("Cleared CUDA cache")
280
 
281
- elapsed_time = time.time() - start_time
282
- logging.info(f"Demix completed in {elapsed_time:.2f} seconds")
 
 
 
283
 
284
- estimated_sources = result / (counter + 1e-8)
285
- estimated_sources = estimated_sources.numpy().astype(np.float32)
 
286
  np.nan_to_num(estimated_sources, copy=False, nan=0.0)
287
 
288
- if mode == "generic" and length_init > 2 * border and border > 0:
289
- estimated_sources = estimated_sources[..., border:-border]
 
 
 
 
 
 
 
 
290
 
291
- instruments = config.training.instruments if mode == "demucs" else prefer_target_instrument(config)
292
  ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
293
- logging.info("Demix completed successfully")
294
 
295
- # Son ilerleme güncellemesi
296
- if progress is not None and callable(getattr(progress, '__call__', None)):
297
- progress(1.0, desc="Demix completed")
298
- update_progress_html("Demix completed", 100)
299
 
300
- return ret_data
301
 
302
  def prefer_target_instrument(config: ConfigDict) -> List[str]:
303
- return [config.training.target_instrument] if getattr(config.training, 'target_instrument', None) else config.training.instruments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
- def load_not_compatible_weights(model: nn.Module, weights: str, verbose: bool = False) -> None:
306
  new_model = model.state_dict()
307
- old_model = torch.load(weights, map_location='cpu')
308
  if 'state' in old_model:
 
309
  old_model = old_model['state']
310
  if 'state_dict' in old_model:
 
311
  old_model = old_model['state_dict']
 
312
  for el in new_model:
313
- if el in old_model and new_model[el].shape == old_model[el].shape:
314
- new_model[el] = old_model[el]
315
- model.load_state_dict(new_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- def load_lora_weights(model: nn.Module, lora_path: str, device: str = 'cpu') -> None:
318
- lora_state_dict = torch.load(lora_path, map_location=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  model.load_state_dict(lora_state_dict, strict=False)
320
 
321
- def load_start_checkpoint(args: argparse.Namespace, model: nn.Module, type_='train') -> None:
 
 
 
 
 
 
 
 
 
 
322
  print(f'Start from checkpoint: {args.start_check_point}')
323
- device = 'cpu'
324
- state_dict = torch.load(args.start_check_point, map_location=device, weights_only=True)
325
- if args.model_type in ['htdemucs', 'apollo'] and isinstance(state_dict, dict):
326
- state_dict = state_dict.get('state', state_dict.get('state_dict', state_dict))
327
- model.load_state_dict(state_dict)
328
- if args.lora_checkpoint:
329
- print(f"Loading LoRA weights from: {args.lora_checkpoint}")
330
- load_lora_weights(model, args.lora_checkpoint, device)
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  if 'lora' not in config:
334
  raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
335
- replaced_layers = 0
 
 
336
  for name, module in model.named_modules():
337
  hierarchy = name.split('.')
338
  layer_name = hierarchy[-1]
 
 
339
  if isinstance(module, nn.Linear):
340
  try:
 
341
  parent_module = model
342
  for submodule_name in hierarchy[:-1]:
343
  parent_module = getattr(parent_module, submodule_name)
 
 
344
  setattr(
345
  parent_module,
346
  layer_name,
@@ -351,23 +658,41 @@ def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
351
  **config['lora']
352
  )
353
  )
354
- replaced_layers += 1
 
355
  except Exception as e:
356
  print(f"Error replacing layer {name}: {e}")
357
- print(f"Number of layers replaced with LoRA: {replaced_layers}")
 
 
 
 
 
358
  return model
359
 
 
360
  def draw_spectrogram(waveform, sample_rate, length, output_file):
 
 
 
 
361
  import librosa.display
 
 
362
  x = waveform[:int(length * sample_rate), :]
363
- X = librosa.stft(x.mean(axis=-1))
364
- Xdb = librosa.amplitude_to_db(np.abs(X), ref=np.max)
365
  fig, ax = plt.subplots()
 
366
  img = librosa.display.specshow(
367
- Xdb, cmap='plasma', sr=sample_rate, x_axis='time', y_axis='linear', ax=ax
 
 
 
 
 
368
  )
369
  ax.set(title='File: ' + os.path.basename(output_file))
370
  fig.colorbar(img, ax=ax, format="%+2.f dB")
371
- if output_file:
372
  plt.savefig(output_file)
373
- plt.close()
 
13
  from tqdm.auto import tqdm
14
  from typing import Dict, List, Tuple, Any, Union
15
  import loralib as lora
 
 
16
 
17
+ # Fix matplotlib backend for isolated Python environments
18
+ try:
19
+ import matplotlib
20
+ matplotlib.use('Agg') # Non-interactive backend
21
+ import matplotlib.pyplot as plt
22
+ except ImportError:
23
+ plt = None
24
+
25
 
26
  def load_config(model_type: str, config_path: str) -> Union[ConfigDict, OmegaConf]:
27
+ """
28
+ Load the configuration from the specified path based on the model type.
29
+
30
+ Parameters:
31
+ ----------
32
+ model_type : str
33
+ The type of model to load (e.g., 'htdemucs', 'mdx23c', etc.).
34
+ config_path : str
35
+ The path to the YAML or OmegaConf configuration file.
36
+
37
+ Returns:
38
+ -------
39
+ config : Any
40
+ The loaded configuration, which can be in different formats (e.g., OmegaConf or ConfigDict).
41
+
42
+ Raises:
43
+ ------
44
+ FileNotFoundError:
45
+ If the configuration file at `config_path` is not found.
46
+ ValueError:
47
+ If there is an error loading the configuration file.
48
+ """
49
  try:
50
  with open(config_path, 'r') as f:
51
  if model_type == 'htdemucs':
 
58
  except Exception as e:
59
  raise ValueError(f"Error loading configuration: {e}")
60
 
61
+
62
  def get_model_from_config(model_type: str, config_path: str) -> Tuple:
63
  """
64
  Load the model specified by the model type and configuration file.
 
128
  elif model_type == 'experimental_mdx23c_stht':
129
  from models.mdx23c_tfc_tdf_v3_with_STHT import TFC_TDF_net
130
  model = TFC_TDF_net(config)
131
+ elif model_type == 'mel_band_roformer_experimental':
132
+ from models.bs_roformer.mel_band_roformer_experimental import MelBandRoformer
133
+ model = MelBandRoformer(**dict(config.model))
134
+ elif model_type == 'bs_roformer_experimental':
135
+ from models.bs_roformer.bs_roformer_experimental import BSRoformer
136
+ model = BSRoformer(**dict(config.model))
137
+ elif model_type == 'bs_roformer_custom':
138
+ from models.bs_roformer.bs_roformer_custom.bs_roformer import BSRoformer
139
+ model = BSRoformer(**dict(config.model))
140
+ elif model_type == 'scnet_tran':
141
+ from models.scnet.scnet_tran import SCNet_Tran
142
+ model = SCNet_Tran(**config.model)
143
+ elif model_type == 'scnet_masked':
144
+ from models.scnet.scnet_masked import SCNet
145
+ model = SCNet(**config.model)
146
+ elif model_type == 'conformer':
147
+ from models.conformer_model import ConformerMSS, NeuralModel
148
+ model = ConformerMSS(
149
+ core=NeuralModel(**config.model),
150
+ n_fft=config.stft.n_fft,
151
+ hop_length=config.stft.hop_length,
152
+ win_length=getattr(config.stft, 'win_length', config.stft.n_fft),
153
+ center=config.stft.center
154
+ )
155
+ elif model_type == 'mel_band_conformer':
156
+ from models.mel_band_conformer import MelBandConformer
157
+ model = MelBandConformer(**config.model)
158
  else:
159
  raise ValueError(f"Unknown model type: {model_type}")
160
 
161
  return model, config
162
 
163
+
164
  def read_audio_transposed(path: str, instr: str = None, skip_err: bool = False) -> Tuple[np.ndarray, int]:
165
+ """
166
+ Reads an audio file, ensuring mono audio is converted to two-dimensional format,
167
+ and transposes the data to have channels as the first dimension.
168
+ Parameters
169
+ ----------
170
+ path : str
171
+ Path to the audio file.
172
+ skip_err: bool
173
+ If true, not raise errors
174
+ instr:
175
+ name of instument
176
+ Returns
177
+ -------
178
+ Tuple[np.ndarray, int]
179
+ A tuple containing:
180
+ - Transposed audio data as a NumPy array with shape (channels, length).
181
+ For mono audio, the shape will be (1, length).
182
+ - Sampling rate (int), e.g., 44100.
183
+ """
184
+
185
  try:
186
  mix, sr = sf.read(path)
 
 
 
187
  except Exception as e:
188
  if skip_err:
189
  print(f"No stem {instr}: skip!")
190
  return None, None
191
+ else:
192
+ raise RuntimeError(f"Error reading the file at {path}: {e}")
193
+ else:
194
+ if len(mix.shape) == 1: # For mono audio
195
+ mix = np.expand_dims(mix, axis=-1)
196
+ return mix.T, sr
197
+
198
+
199
+ def normalize_audio(audio: np.ndarray) -> tuple[np.ndarray, Dict[str, float]]:
200
+ """
201
+ Normalize an audio signal by subtracting the mean and dividing by the standard deviation.
202
+
203
+ Parameters:
204
+ ----------
205
+ audio : np.ndarray
206
+ Input audio array with shape (channels, time) or (time,).
207
+
208
+ Returns:
209
+ -------
210
+ tuple[np.ndarray, dict[str, float]]
211
+ - Normalized audio array with the same shape as the input.
212
+ - Dictionary containing the mean and standard deviation of the original audio.
213
+ """
214
 
 
215
  mono = audio.mean(0)
216
  mean, std = mono.mean(), mono.std()
217
+ return (audio - mean) / std, {"mean": mean, "std": std}
218
+
219
 
220
  def denormalize_audio(audio: np.ndarray, norm_params: Dict[str, float]) -> np.ndarray:
221
+ """
222
+ Denormalize an audio signal by reversing the normalization process (multiplying by the standard deviation
223
+ and adding the mean).
224
+
225
+ Parameters:
226
+ ----------
227
+ audio : np.ndarray
228
+ Normalized audio array to be denormalized.
229
+ norm_params : dict[str, float]
230
+ Dictionary containing the 'mean' and 'std' values used for normalization.
231
+
232
+ Returns:
233
+ -------
234
+ np.ndarray
235
+ Denormalized audio array with the same shape as the input.
236
+ """
237
+
238
  return audio * norm_params["std"] + norm_params["mean"]
239
 
240
+
241
  def apply_tta(
242
+ config,
243
+ model: torch.nn.Module,
244
+ mix: torch.Tensor,
245
+ waveforms_orig: Dict[str, torch.Tensor],
246
+ device: torch.device,
247
+ model_type: str
 
248
  ) -> Dict[str, torch.Tensor]:
249
+ """
250
+ Apply Test-Time Augmentation (TTA) for source separation.
251
+
252
+ This function processes the input mixture with test-time augmentations, including
253
+ channel inversion and polarity inversion, to enhance the separation results. The
254
+ results from all augmentations are averaged to produce the final output.
255
 
256
+ Parameters:
257
+ ----------
258
+ config : Any
259
+ Configuration object containing model and processing parameters.
260
+ model : torch.nn.Module
261
+ The trained model used for source separation.
262
+ mix : torch.Tensor
263
+ The mixed audio tensor with shape (channels, time).
264
+ waveforms_orig : Dict[str, torch.Tensor]
265
+ Dictionary of original separated waveforms (before TTA) for each instrument.
266
+ device : torch.device
267
+ Device (CPU or CUDA) on which the model will be executed.
268
+ model_type : str
269
+ Type of the model being used (e.g., "demucs", "custom_model").
270
+
271
+ Returns:
272
+ -------
273
+ Dict[str, torch.Tensor]
274
+ Updated dictionary of separated waveforms after applying TTA.
275
+ """
276
+ # Create augmentations: channel inversion and polarity inversion
277
+ track_proc_list = [mix[::-1].copy(), -1.0 * mix.copy()]
278
+
279
+ # Process each augmented mixture
280
  for i, augmented_mix in enumerate(track_proc_list):
281
+ waveforms = demix(config, model, augmented_mix, device, model_type=model_type)
 
 
 
 
 
 
 
282
  for el in waveforms:
283
  if i == 0:
284
+ waveforms_orig[el] += waveforms[el][::-1].copy()
285
  else:
286
  waveforms_orig[el] -= waveforms[el]
 
 
 
 
287
 
288
+ # Average the results across augmentations
289
  for el in waveforms_orig:
290
+ waveforms_orig[el] /= len(track_proc_list) + 1
 
 
 
 
 
291
 
292
  return waveforms_orig
293
 
294
+
295
  def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
296
+ """
297
+ Generate a windowing array with a linear fade-in at the beginning and a fade-out at the end.
298
+
299
+ This function creates a window of size `window_size` where the first `fade_size` elements
300
+ linearly increase from 0 to 1 (fade-in) and the last `fade_size` elements linearly decrease
301
+ from 1 to 0 (fade-out). The middle part of the window is filled with ones.
302
+
303
+ Parameters:
304
+ ----------
305
+ window_size : int
306
+ The total size of the window.
307
+ fade_size : int
308
+ The size of the fade-in and fade-out regions.
309
+
310
+ Returns:
311
+ -------
312
+ torch.Tensor
313
+ A tensor of shape (window_size,) containing the generated windowing array.
314
+
315
+ Example:
316
+ -------
317
+ If `window_size=10` and `fade_size=3`, the output will be:
318
+ tensor([0.0000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.0000])
319
+ """
320
+
321
  fadein = torch.linspace(0, 1, fade_size)
322
  fadeout = torch.linspace(1, 0, fade_size)
323
+
324
  window = torch.ones(window_size)
325
  window[-fade_size:] = fadeout
326
  window[:fade_size] = fadein
327
  return window
328
 
329
+
330
  def demix(
331
+ config: ConfigDict,
332
+ model: torch.nn.Module,
333
+ mix: torch.Tensor,
334
+ device: torch.device,
335
+ model_type: str,
336
+ pbar: bool = False
337
+ ) -> Tuple[List[Dict[str, np.ndarray]], np.ndarray]:
338
+ """
339
+ Unified function for audio source separation with support for multiple processing modes.
340
+
341
+ This function separates audio into its constituent sources using either a generic custom logic
342
+ or a Demucs-specific logic. It supports batch processing and overlapping window-based chunking
343
+ for efficient and artifact-free separation.
344
 
345
+ Parameters:
346
+ ----------
347
+ config : ConfigDict
348
+ Configuration object containing audio and inference settings.
349
+ model : torch.nn.Module
350
+ The trained model used for audio source separation.
351
+ mix : torch.Tensor
352
+ Input audio tensor with shape (channels, time).
353
+ device : torch.device
354
+ The computation device (CPU or CUDA).
355
+ model_type : str, optional
356
+ Processing mode:
357
+ - "demucs" for logic specific to the Demucs model.
358
+ Default is "generic".
359
+ pbar : bool, optional
360
+ If True, displays a progress bar during chunk processing. Default is False.
361
+
362
+ Returns:
363
+ -------
364
+ Union[Dict[str, np.ndarray], np.ndarray]
365
+ - A dictionary mapping target instruments to separated audio sources if multiple instruments are present.
366
+ - A numpy array of the separated source if only one instrument is present.
367
+ """
368
+
369
+ mix = torch.tensor(mix, dtype=torch.float32)
370
+
371
+ if model_type == 'htdemucs':
372
+ mode = 'demucs'
373
+ else:
374
+ mode = 'generic'
375
+ # Define processing parameters based on the mode
376
  if mode == 'demucs':
377
  chunk_size = config.training.samplerate * config.training.segment
378
  num_instruments = len(config.training.instruments)
 
382
  chunk_size = config.audio.chunk_size
383
  num_instruments = len(prefer_target_instrument(config))
384
  num_overlap = config.inference.num_overlap
385
+
386
  fade_size = chunk_size // 10
387
  step = chunk_size // num_overlap
388
  border = chunk_size - step
389
  length_init = mix.shape[-1]
390
+ windowing_array = _getWindowingArray(chunk_size, fade_size)
391
+ # Add padding for generic mode to handle edge artifacts
392
  if length_init > 2 * border and border > 0:
393
  mix = nn.functional.pad(mix, (border, border), mode="reflect")
394
 
395
+ batch_size = config.inference.batch_size
 
 
 
 
396
 
397
+ use_amp = getattr(config.training, 'use_amp', True)
 
 
398
 
399
+ with torch.cuda.amp.autocast(enabled=use_amp):
400
+ with torch.inference_mode():
401
+ # Initialize result and counter tensors
402
  req_shape = (num_instruments,) + mix.shape
403
+ result = torch.zeros(req_shape, dtype=torch.float32)
404
+ counter = torch.zeros(req_shape, dtype=torch.float32)
405
 
406
  i = 0
407
  batch_data = []
408
  batch_locations = []
409
+ progress_bar = tqdm(
410
+ total=mix.shape[1], desc="Processing audio chunks", leave=False
411
+ ) if pbar else None
412
 
413
  while i < mix.shape[1]:
414
+ # Extract chunk and apply padding if necessary
415
+ part = mix[:, i:i + chunk_size].to(device)
416
  chunk_len = part.shape[-1]
417
+ if mode == "generic" and chunk_len > chunk_size // 2:
418
+ pad_mode = "reflect"
419
+ else:
420
+ pad_mode = "constant"
421
  part = nn.functional.pad(part, (0, chunk_size - chunk_len), mode=pad_mode, value=0)
422
 
423
  batch_data.append(part)
424
  batch_locations.append((i, chunk_len))
425
  i += step
426
 
427
+ # Process batch if it's full or the end is reached
428
  if len(batch_data) >= batch_size or i >= mix.shape[1]:
429
+ arr = torch.stack(batch_data, dim=0)
430
+ x = model(arr)
 
 
 
 
431
 
432
  if mode == "generic":
433
+ window = windowing_array.clone() # using clone() fixes the clicks at chunk edges when using batch_size=1
434
+ if i - step == 0: # First audio chunk, no fadein
435
  window[:fade_size] = 1
436
+ elif i >= mix.shape[1]: # Last audio chunk, no fadeout
437
  window[-fade_size:] = 1
438
 
439
  for j, (start, seg_len) in enumerate(batch_locations):
440
  if mode == "generic":
441
+ result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu() * window[..., :seg_len]
442
  counter[..., start:start + seg_len] += window[..., :seg_len]
443
  else:
444
+ result[..., start:start + seg_len] += x[j, ..., :seg_len].cpu()
445
  counter[..., start:start + seg_len] += 1.0
446
 
 
 
 
 
 
 
 
 
447
  batch_data.clear()
448
  batch_locations.clear()
 
 
 
 
449
 
450
+ if progress_bar:
451
+ progress_bar.update(step)
452
+
453
+ if progress_bar:
454
+ progress_bar.close()
455
 
456
+ # Compute final estimated sources
457
+ estimated_sources = result / counter
458
+ estimated_sources = estimated_sources.cpu().numpy()
459
  np.nan_to_num(estimated_sources, copy=False, nan=0.0)
460
 
461
+ # Remove padding for generic mode
462
+ if mode == "generic":
463
+ if length_init > 2 * border and border > 0:
464
+ estimated_sources = estimated_sources[..., border:-border]
465
+
466
+ # Return the result as a dictionary or a single array
467
+ if mode == "demucs":
468
+ instruments = config.training.instruments
469
+ else:
470
+ instruments = prefer_target_instrument(config)
471
 
 
472
  ret_data = {k: v for k, v in zip(instruments, estimated_sources)}
 
473
 
474
+ if mode == "demucs" and num_instruments <= 1:
475
+ return estimated_sources
476
+ else:
477
+ return ret_data
478
 
 
479
 
480
  def prefer_target_instrument(config: ConfigDict) -> List[str]:
481
+ """
482
+ Return the list of target instruments based on the configuration.
483
+ If a specific target instrument is specified in the configuration,
484
+ it returns a list with that instrument. Otherwise, it returns the list of instruments.
485
+
486
+ Parameters:
487
+ ----------
488
+ config : ConfigDict
489
+ Configuration object containing the list of instruments or the target instrument.
490
+
491
+ Returns:
492
+ -------
493
+ List[str]
494
+ A list of target instruments.
495
+ """
496
+ if getattr(config.training, 'target_instrument', None):
497
+ return [config.training.target_instrument]
498
+ else:
499
+ return config.training.instruments
500
+
501
+
502
+ def load_not_compatible_weights(model: torch.nn.Module, weights: str, verbose: bool = False) -> None:
503
+ """
504
+ Load weights into a model, handling mismatched shapes and dimensions.
505
+
506
+ Args:
507
+ model: PyTorch model into which the weights will be loaded.
508
+ weights: Path to the weights file.
509
+ verbose: If True, prints detailed information about matching and mismatched layers.
510
+ """
511
 
 
512
  new_model = model.state_dict()
513
+ old_model = torch.load(weights, weights_only=False)
514
  if 'state' in old_model:
515
+ # Fix for htdemucs weights loading
516
  old_model = old_model['state']
517
  if 'state_dict' in old_model:
518
+ # Fix for apollo weights loading
519
  old_model = old_model['state_dict']
520
+
521
  for el in new_model:
522
+ if el in old_model:
523
+ if verbose:
524
+ print(f'Match found for {el}!')
525
+ if new_model[el].shape == old_model[el].shape:
526
+ if verbose:
527
+ print('Action: Just copy weights!')
528
+ new_model[el] = old_model[el]
529
+ else:
530
+ if len(new_model[el].shape) != len(old_model[el].shape):
531
+ if verbose:
532
+ print('Action: Different dimension! Too lazy to write the code... Skip it')
533
+ else:
534
+ if verbose:
535
+ print(f'Shape is different: {tuple(new_model[el].shape)} != {tuple(old_model[el].shape)}')
536
+ ln = len(new_model[el].shape)
537
+ max_shape = []
538
+ slices_old = []
539
+ slices_new = []
540
+ for i in range(ln):
541
+ max_shape.append(max(new_model[el].shape[i], old_model[el].shape[i]))
542
+ slices_old.append(slice(0, old_model[el].shape[i]))
543
+ slices_new.append(slice(0, new_model[el].shape[i]))
544
+ # print(max_shape)
545
+ # print(slices_old, slices_new)
546
+ slices_old = tuple(slices_old)
547
+ slices_new = tuple(slices_new)
548
+ max_matrix = np.zeros(max_shape, dtype=np.float32)
549
+ for i in range(ln):
550
+ max_matrix[slices_old] = old_model[el].cpu().numpy()
551
+ max_matrix = torch.from_numpy(max_matrix)
552
+ new_model[el] = max_matrix[slices_new]
553
+ else:
554
+ if verbose:
555
+ print(f'Match not found for {el}!')
556
+ model.load_state_dict(
557
+ new_model
558
+ )
559
 
560
+
561
+ def load_lora_weights(model: torch.nn.Module, lora_path: str, device: str = 'cpu') -> None:
562
+ """
563
+ Load LoRA weights into a model.
564
+ This function updates the given model with LoRA-specific weights from the specified checkpoint file.
565
+ It does not require the checkpoint to match the model's full state dictionary, as only LoRA layers are updated.
566
+
567
+ Parameters:
568
+ ----------
569
+ model : Module
570
+ The PyTorch model into which the LoRA weights will be loaded.
571
+ lora_path : str
572
+ Path to the LoRA checkpoint file.
573
+ device : str, optional
574
+ The device to load the weights onto, by default 'cpu'. Common values are 'cpu' or 'cuda'.
575
+
576
+ Returns:
577
+ -------
578
+ None
579
+ The model is updated in place.
580
+ """
581
+ lora_state_dict = torch.load(lora_path, map_location=device, weights_only=False)
582
  model.load_state_dict(lora_state_dict, strict=False)
583
 
584
+
585
+ def load_start_checkpoint(args: argparse.Namespace, model: torch.nn.Module, type_='train') -> None:
586
+ """
587
+ Load the starting checkpoint for a model.
588
+
589
+ Args:
590
+ args: Parsed command-line arguments containing the checkpoint path.
591
+ model: PyTorch model to load the checkpoint into.
592
+ type_: how to load weights - for train we can load not fully compatible weights
593
+ """
594
+
595
  print(f'Start from checkpoint: {args.start_check_point}')
596
+ if type_ in ['train']:
597
+ if 1:
598
+ load_not_compatible_weights(model, args.start_check_point, verbose=False)
599
+ else:
600
+ model.load_state_dict(torch.load(args.start_check_point, weights_only=False))
601
+ else:
602
+ device='cpu'
603
+ if args.model_type in ['htdemucs', 'apollo']:
604
+ state_dict = torch.load(args.start_check_point, map_location=device, weights_only=False)
605
+ # Fix for htdemucs pretrained models
606
+ if 'state' in state_dict:
607
+ state_dict = state_dict['state']
608
+ # Fix for apollo pretrained models
609
+ if 'state_dict' in state_dict:
610
+ state_dict = state_dict['state_dict']
611
+ else:
612
+ state_dict = torch.load(args.start_check_point, map_location=device, weights_only=True)
613
+ model.load_state_dict(state_dict)
614
+
615
 
616
  def bind_lora_to_model(config: Dict[str, Any], model: nn.Module) -> nn.Module:
617
+ """
618
+ Replaces specific layers in the model with LoRA-extended versions.
619
+
620
+ Parameters:
621
+ ----------
622
+ config : Dict[str, Any]
623
+ Configuration containing parameters for LoRA. It should include a 'lora' key with parameters for `MergedLinear`.
624
+ model : nn.Module
625
+ The original model in which the layers will be replaced.
626
+
627
+ Returns:
628
+ -------
629
+ nn.Module
630
+ The modified model with the replaced layers.
631
+ """
632
+
633
  if 'lora' not in config:
634
  raise ValueError("Configuration must contain the 'lora' key with parameters for LoRA.")
635
+
636
+ replaced_layers = 0 # Counter for replaced layers
637
+
638
  for name, module in model.named_modules():
639
  hierarchy = name.split('.')
640
  layer_name = hierarchy[-1]
641
+
642
+ # Check if this is the target layer to replace (and layer_name == 'to_qkv')
643
  if isinstance(module, nn.Linear):
644
  try:
645
+ # Get the parent module
646
  parent_module = model
647
  for submodule_name in hierarchy[:-1]:
648
  parent_module = getattr(parent_module, submodule_name)
649
+
650
+ # Replace the module with LoRA-enabled layer
651
  setattr(
652
  parent_module,
653
  layer_name,
 
658
  **config['lora']
659
  )
660
  )
661
+ replaced_layers += 1 # Increment the counter
662
+
663
  except Exception as e:
664
  print(f"Error replacing layer {name}: {e}")
665
+
666
+ if replaced_layers == 0:
667
+ print("Warning: No layers were replaced. Check the model structure and configuration.")
668
+ else:
669
+ print(f"Number of layers replaced with LoRA: {replaced_layers}")
670
+
671
  return model
672
 
673
+
674
  def draw_spectrogram(waveform, sample_rate, length, output_file):
675
+ if plt is None:
676
+ print("Warning: matplotlib not available, skipping spectrogram")
677
+ return
678
+
679
  import librosa.display
680
+
681
+ # Cut only required part of spectorgram
682
  x = waveform[:int(length * sample_rate), :]
683
+ X = librosa.stft(x.mean(axis=-1)) # perform short-term fourier transform on mono signal
684
+ Xdb = librosa.amplitude_to_db(np.abs(X), ref=np.max) # convert an amplitude spectrogram to dB-scaled spectrogram.
685
  fig, ax = plt.subplots()
686
+ # plt.figure(figsize=(30, 10)) # initialize the fig size
687
  img = librosa.display.specshow(
688
+ Xdb,
689
+ cmap='plasma',
690
+ sr=sample_rate,
691
+ x_axis='time',
692
+ y_axis='linear',
693
+ ax=ax
694
  )
695
  ax.set(title='File: ' + os.path.basename(output_file))
696
  fig.colorbar(img, ax=ax, format="%+2.f dB")
697
+ if output_file is not None:
698
  plt.savefig(output_file)