Nx-Neuralon commited on
Commit
3988574
·
verified ·
1 Parent(s): 0d68d47

Update app/audio_utils.py

Browse files
Files changed (1) hide show
  1. app/audio_utils.py +95 -114
app/audio_utils.py CHANGED
@@ -1,114 +1,95 @@
1
- from __future__ import annotations
2
-
3
- import base64
4
- import os
5
- import shutil
6
- import subprocess
7
- from dataclasses import dataclass
8
- from typing import List
9
-
10
-
11
- @dataclass
12
- class AudioChunk:
13
- path: str
14
- start_sec: float
15
- end_sec: float
16
-
17
-
18
- def check_ffmpeg_available() -> bool:
19
- return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None
20
-
21
-
22
- def ensure_dir(path: str) -> None:
23
- os.makedirs(path, exist_ok=True)
24
-
25
-
26
- def get_media_duration(path: str) -> float:
27
- cmd = [
28
- "ffprobe",
29
- "-v", "error",
30
- "-show_entries", "format=duration",
31
- "-of", "default=noprint_wrappers=1:nokey=1",
32
- path,
33
- ]
34
- proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
35
- if proc.returncode != 0:
36
- raise RuntimeError(f"ffprobe 获取时长失败: {proc.stderr}")
37
- return float(proc.stdout.strip())
38
-
39
-
40
- def extract_audio_from_video(
41
- video_path: str,
42
- output_audio_path: str,
43
- bitrate: str = "64k",
44
- ) -> str:
45
- """
46
- 从视频中抽取音频,转成 16k 单声道 mp3,便于后续 ASR。
47
- """
48
- if not os.path.exists(video_path):
49
- raise FileNotFoundError(f"视频不存在: {video_path}")
50
- if not check_ffmpeg_available():
51
- raise RuntimeError("未检测到 ffmpeg/ffprobe,请先安装 ffmpeg。")
52
-
53
- ensure_dir(os.path.dirname(output_audio_path))
54
-
55
- cmd = [
56
- "ffmpeg",
57
- "-y",
58
- "-i", video_path,
59
- "-vn",
60
- "-ac", "1",
61
- "-ar", "16000",
62
- "-c:a", "mp3",
63
- "-b:a", bitrate,
64
- output_audio_path,
65
- ]
66
- proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
67
- if proc.returncode != 0:
68
- raise RuntimeError(f"抽取音频失败:\n{proc.stderr}")
69
- return output_audio_path
70
-
71
-
72
- def split_audio_to_chunks(
73
- audio_path: str,
74
- output_dir: str,
75
- chunk_seconds: int = 290,
76
- ) -> List[AudioChunk]:
77
- """
78
- 按固定时长切音频,避免超过 qwen3-asr-flash 的单次时长限制。
79
- """
80
- ensure_dir(output_dir)
81
- duration = get_media_duration(audio_path)
82
- chunks: List[AudioChunk] = []
83
-
84
- start = 0.0
85
- idx = 0
86
- while start < duration:
87
- end = min(duration, start + chunk_seconds)
88
- chunk_path = os.path.join(output_dir, f"audio_chunk_{idx:03d}.mp3")
89
-
90
- cmd = [
91
- "ffmpeg",
92
- "-y",
93
- "-i", audio_path,
94
- "-ss", str(start),
95
- "-t", str(end - start),
96
- "-acodec", "copy",
97
- chunk_path,
98
- ]
99
- proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
100
- if proc.returncode != 0:
101
- raise RuntimeError(f"切分音频失败:\n{proc.stderr}")
102
-
103
- chunks.append(AudioChunk(path=chunk_path, start_sec=start, end_sec=end))
104
- start = end
105
- idx += 1
106
-
107
- return chunks
108
-
109
-
110
- def audio_file_to_data_uri(audio_path: str, mime_type: str = "audio/mpeg") -> str:
111
- with open(audio_path, "rb") as f:
112
- b64 = base64.b64encode(f.read()).decode("utf-8")
113
- return f"data:{mime_type};base64,{b64}"
114
-
 
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ from dataclasses import dataclass
8
+ from typing import List
9
+
10
+ @dataclass
11
+ class AudioChunk:
12
+ path: str
13
+ start_sec: float
14
+ end_sec: float
15
+
16
+ def check_ffmpeg_available() -> bool:
17
+ return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None
18
+
19
+ def ensure_dir(path: str) -> None:
20
+ os.makedirs(path, exist_ok=True)
21
+
22
+ def get_media_duration(path: str) -> float:
23
+ cmd = [
24
+ "ffprobe",
25
+ "-v", "error",
26
+ "-show_entries", "format=duration",
27
+ "-of", "default=noprint_wrappers=1:nokey=1",
28
+ path,
29
+ ]
30
+ proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
31
+ # if proc.returncode != 0:
32
+ # print(f"获取时长失败: {proc.stderr}")
33
+ return float(proc.stdout.strip())
34
+
35
+ def extract_audio_from_video(
36
+ video_path: str,
37
+ output_audio_path: str,
38
+ bitrate: str = "64k",
39
+ ) -> str:
40
+ ensure_dir(os.path.dirname(output_audio_path))
41
+
42
+ cmd = [
43
+ "ffmpeg",
44
+ "-y",
45
+ "-i", video_path,
46
+ "-vn",
47
+ "-ac", "1",
48
+ "-ar", "16000",
49
+ "-c:a", "mp3",
50
+ "-b:a", bitrate,
51
+ output_audio_path,
52
+ ]
53
+ proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
54
+
55
+ return output_audio_path
56
+
57
+ def split_audio_to_chunks(
58
+ audio_path: str,
59
+ output_dir: str,
60
+ chunk_seconds: int = 290,
61
+ ) -> List[AudioChunk]:
62
+ ensure_dir(output_dir)
63
+ duration = get_media_duration(audio_path)
64
+ chunks: List[AudioChunk] = []
65
+
66
+ start = 0.0
67
+ idx = 0
68
+ while start < duration:
69
+ end = min(duration, start + chunk_seconds)
70
+ chunk_path = os.path.join(output_dir, f"audio_chunk_{idx:03d}.mp3")
71
+
72
+ cmd = [
73
+ "ffmpeg",
74
+ "-y",
75
+ "-i", audio_path,
76
+ "-ss", str(start),
77
+ "-t", str(end - start),
78
+ "-acodec", "copy",
79
+ chunk_path,
80
+ ]
81
+ proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
82
+ if proc.returncode != 0:
83
+ raise RuntimeError(f"切分音频失败:\n{proc.stderr}")
84
+
85
+ chunks.append(AudioChunk(path=chunk_path, start_sec=start, end_sec=end))
86
+ start = end
87
+ idx += 1
88
+
89
+ return chunks
90
+
91
+ def audio_file_to_data_uri(audio_path: str, mime_type: str = "audio/mpeg") -> str:
92
+ with open(audio_path, "rb") as f:
93
+ b64 = base64.b64encode(f.read()).decode("utf-8")
94
+ return f"data:{mime_type};base64,{b64}"
95
+