Spaces:

Lollikit
/

TranskunGUI

Running

App Files Files Community

Lollikit commited on Sep 9, 2025

Commit

108a341

1 Parent(s): 5e1d30c

transkun

Browse files

Files changed (4) hide show

app.py +20 -53
models/2.0.conf +0 -37
models/2.0.pt +0 -3
models/__init__.py +0 -0

app.py CHANGED Viewed

@@ -296,6 +296,8 @@ def trim_midi_silence(mid, debug=False):
         if debug:
             print(f"MIDI裁剪失败: {e}")
 # 核心转换函数
 def process_audio(input_file, use_cuda=True, use_quantize=True, progress=gr.Progress(), file_progress_offset=0.0, file_progress_scale=1.0):
     """
@@ -311,76 +313,42 @@ def process_audio(input_file, use_cuda=True, use_quantize=True, progress=gr.Prog
     """
     temp_dir = None
     try:
-        # The fix: create a temporary directory to store all output files
-        # 修复：创建一个临时目录来存储所有的输出文件
         temp_dir = tempfile.mkdtemp()
-        # Get a meaningful filename from the input file
         # 从输入文件中获取一个有意义的文件名
         input_name = Path(input_file).stem
-        # Create the path for the non-quantized MIDI file inside the temp directory
-        # 在临时目录中创建非量化MIDI文件的路径
         output_file = Path(temp_dir) / f"{input_name}.mid"
         quantized_output_file = None
-        device = "cuda" if use_cuda and cuda_available else "cpu"
         start_time = time.time()
-        progress(file_progress_offset, desc="准备模型...")
-        # 加载模型和配置
-        default_weight = os.path.join(current_dir, "models\\2.0.pt")
-        default_conf = os.path.join(current_dir, "models\\2.0.conf")
-        # 检查模型文件是否存在
-        if not os.path.exists(default_weight) or not os.path.exists(default_conf):
-            raise FileNotFoundError(
-                f"找不到模型文件！请确保以下文件存在：\n"
-                f"{default_weight}\n"
-                f"{default_conf}"
-            )
-        # 加载配置
-        conf_manager = moduleconf.parseFromFile(default_conf)
-        TransKun = conf_manager["Model"].module.TransKun
-        conf = conf_manager["Model"].config
-        # 加载模型
-        checkpoint = torch.load(default_weight, map_location=device)
-        model = TransKun(conf=conf).to(device)
-        if "best_state_dict" not in checkpoint:
-            model.load_state_dict(checkpoint["state_dict"], strict=False)
-        else:
-            model.load_state_dict(checkpoint["best_state_dict"], strict=False)
-        model.eval()
-        progress(file_progress_offset + 0.2 * file_progress_scale, desc="读取音频...")
-        # 读取并处理音频
-        fs, audio = transkun.transcribe.readAudio(input_file)
-        if fs != model.fs:
-            import soxr
-            audio = soxr.resample(audio, fs, model.fs)
-        x = torch.from_numpy(audio).to(device)
-        progress(file_progress_offset + 0.4 * file_progress_scale, desc="转录中...")
-        # 转录
-        with torch.no_grad():
-            notes_est = model.transcribe(x)
         progress(file_progress_offset + 0.7 * file_progress_scale, desc="保存MIDI...")
-        # 保存MIDI到临时目录，将 Path 对象转换为字符串
-        output_midi = transkun.transcribe.writeMidi(notes_est)
-        output_midi.write(str(output_file))
         # 如果勾选了规整化选项，则进行MIDI规整化
         if use_quantize:
             progress(file_progress_offset + 0.8 * file_progress_scale, desc="规整化MIDI...")
             try:
-                # The midi_quantize function will now write the output file with the expected name
-                # midi_quantize函数现在将以预期的名称写入输出文件
                 quantized_output_file = midi_quantize(str(output_file), debug=False, optimize_bpm=True)
             except Exception as e:
                 print(f"规整化处理失败: {str(e)}")
@@ -407,7 +375,6 @@ def process_audio(input_file, use_cuda=True, use_quantize=True, progress=gr.Prog
             "output": f"转换失败: {str(e)}",
             "files": []
         }
-    # Removed the manual cleanup block, Gradio will handle this now.
     # 删除了手动清理代码块，现在由 Gradio 来处理。
 # 创建Gradio界面

         if debug:
             print(f"MIDI裁剪失败: {e}")
+import subprocess
 # 核心转换函数
 def process_audio(input_file, use_cuda=True, use_quantize=True, progress=gr.Progress(), file_progress_offset=0.0, file_progress_scale=1.0):
     """
     """
     temp_dir = None
     try:
+        # 创建一个临时目录来存储所有的输出文件
         temp_dir = tempfile.mkdtemp()
         # 从输入文件中获取一个有意义的文件名
         input_name = Path(input_file).stem
+        # 在临时目录中创建MIDI文件的路径
         output_file = Path(temp_dir) / f"{input_name}.mid"
         quantized_output_file = None
+        # 设置设备参数
+        device_param = "--cuda" if use_cuda and cuda_available else ""
         start_time = time.time()
+        progress(file_progress_offset, desc="准备转录...")
+        # 使用命令行调用transkun
+        progress(file_progress_offset + 0.3 * file_progress_scale, desc="转录中...")
+        cmd = ["transkun", input_file, str(output_file), device_param]
+        # 执行命令
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        stdout, stderr = process.communicate()
+        # 检查命令是否成功执行
+        if process.returncode != 0:
+            raise Exception(f"transkun命令执行失败: {stderr}")
         progress(file_progress_offset + 0.7 * file_progress_scale, desc="保存MIDI...")
         # 如果勾选了规整化选项，则进行MIDI规整化
         if use_quantize:
             progress(file_progress_offset + 0.8 * file_progress_scale, desc="规整化MIDI...")
             try:
+                # midi_quantize函数将以预期的名称写入输出文件
                 quantized_output_file = midi_quantize(str(output_file), debug=False, optimize_bpm=True)
             except Exception as e:
                 print(f"规整化处理失败: {str(e)}")
             "output": f"转换失败: {str(e)}",
             "files": []
         }
     # 删除了手动清理代码块，现在由 Gradio 来处理。
 # 创建Gradio界面

models/2.0.conf DELETED Viewed

@@ -1,37 +0,0 @@
-{
-	"Model": {
-		"module": "transkun.ModelTransformer",
-		"configClassName": "Config",
-		"config": {
-			"f_min": 30,
-			"f_max": 8000,
-			"n_mels": 229,
-			"segmentHopSizeInSecond": 8,
-			"segmentSizeInSecond": 16,
-			"hopSize": 1024,
-			"windowSize": 4096,
-			"fs": 44100,
-			"nExtraWins": 5,
-			"baseSize": 64,
-			"downsampleF": true,
-			"posEmbedInitGamma": 1,
-			"nHead": 8,
-			"fourierSize": 64,
-			"nLayers": 6,
-			"enabledAttn": [
-				"F",
-				"T"
-			],
-			"hiddenFactorAttn": 1,
-			"hiddenFactor": 4,
-			"velocityPredictorHiddenSize": 512,
-			"refinedOFPredictorHiddenSize": 512,
-			"scoringExpansionFactor": 4,
-			"useInnerProductScorer": true,
-			"scoreDropoutProb": 0.1,
-			"contextDropoutProb": 0.0,
-			"velocityDropoutProb": 0.1,
-			"refinedOFDropoutProb": 0.1
-		}
-	}
-}

models/2.0.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:50a80010effc2a59ffcd068a95cd2b29bd7f23a27a3515bc3ccd209c89a3d44c
-size 56408978

models/__init__.py DELETED Viewed

File without changes