TNOT commited on
Commit
480e0c3
·
1 Parent(s): a01bfee

fix: HuggingFace 镜像源

Browse files
docs/流程文档_AI用.md CHANGED
@@ -155,10 +155,11 @@
155
  - 日文: `japanese_mfa.zip` + `japanese_mfa.dict`
156
 
157
  Silero VAD 下载镜像源(按优先级):
158
- - ghproxy.com 镜像(国内加速
159
- - mirror.ghproxy.com 镜像
 
 
160
  - jsdelivr CDN
161
- - fastgit 镜像
162
  - GitHub 原始地址(备选)
163
 
164
  MFA 字典文件完整性校验:
 
155
  - 日文: `japanese_mfa.zip` + `japanese_mfa.dict`
156
 
157
  Silero VAD 下载镜像源(按优先级):
158
+ - HuggingFace deepghs/silero-vad-onnx(国内云环境推荐
159
+ - HuggingFace onnx-community/silero-vad
160
+ - hf-mirror.com 镜像站
161
+ - ghproxy.com 镜像(GitHub 加速)
162
  - jsdelivr CDN
 
163
  - GitHub 原始地址(备选)
164
 
165
  MFA 字典文件完整性校验:
src/silero_vad_downloader.py CHANGED
@@ -24,20 +24,30 @@ SILERO_VAD_CONFIG = {
24
  }
25
 
26
  # 下载镜像源列表(按优先级排序)
27
- # 国内云环境优先使用镜像
28
  DOWNLOAD_MIRRORS = [
29
- # ghproxy 镜像(国内加速
 
 
 
 
 
 
30
  "https://ghproxy.com/https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
31
  # mirror.ghproxy 镜像
32
  "https://mirror.ghproxy.com/https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
33
  # jsdelivr CDN(稳定但可能有延迟)
34
  "https://cdn.jsdelivr.net/gh/snakers4/silero-vad@master/src/silero_vad/data",
35
- # fastgit 镜像
36
- "https://raw.fastgit.org/snakers4/silero-vad/master/src/silero_vad/data",
37
  # GitHub 原始地址(作为最后备选)
38
  "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
39
  ]
40
 
 
 
 
 
 
 
41
 
42
  def _download_file_from_url(
43
  url: str,
@@ -144,15 +154,26 @@ def _download_file_with_mirrors(
144
  if progress_callback:
145
  progress_callback(msg)
146
 
 
 
 
147
  for i, base_url in enumerate(mirrors):
148
- url = f"{base_url}/{filename}"
149
- log(f"尝试镜像源 {i + 1}/{len(mirrors)}: {base_url.split('/')[2]}")
 
 
 
 
 
150
 
151
  # 镜像源使用较短超时,快速切换
152
  timeout = 30 if i < len(mirrors) - 1 else 120
153
 
154
- if _download_file_from_url(url, dest_path, timeout, progress_callback):
155
- return True
 
 
 
156
 
157
  if i < len(mirrors) - 1:
158
  log("切换到下一个镜像源...")
 
24
  }
25
 
26
  # 下载镜像源列表(按优先级排序)
27
+ # 国内云环境优先使用 HuggingFace 镜像(魔搭创空间访问 HF 较快)
28
  DOWNLOAD_MIRRORS = [
29
+ # HuggingFace 镜像(国内云环境推荐
30
+ "https://huggingface.co/deepghs/silero-vad-onnx/resolve/main",
31
+ # HuggingFace onnx-community 镜像
32
+ "https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx",
33
+ # HuggingFace 镜像站(hf-mirror.com)
34
+ "https://hf-mirror.com/deepghs/silero-vad-onnx/resolve/main",
35
+ # ghproxy 镜像(GitHub 加速)
36
  "https://ghproxy.com/https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
37
  # mirror.ghproxy 镜像
38
  "https://mirror.ghproxy.com/https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
39
  # jsdelivr CDN(稳定但可能有延迟)
40
  "https://cdn.jsdelivr.net/gh/snakers4/silero-vad@master/src/silero_vad/data",
 
 
41
  # GitHub 原始地址(作为最后备选)
42
  "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data",
43
  ]
44
 
45
+ # HuggingFace 镜像的文件名映射(HF 上的文件名可能不同)
46
+ HF_FILENAME_MAP = {
47
+ "silero_vad.onnx": ["silero_vad.onnx", "model.onnx"],
48
+ "silero_vad.jit": ["silero_vad.jit"],
49
+ }
50
+
51
 
52
  def _download_file_from_url(
53
  url: str,
 
154
  if progress_callback:
155
  progress_callback(msg)
156
 
157
+ # 获取可能的文件名列表(用于 HuggingFace 镜像)
158
+ possible_filenames = HF_FILENAME_MAP.get(filename, [filename])
159
+
160
  for i, base_url in enumerate(mirrors):
161
+ # 提取域名用于日志显示
162
+ try:
163
+ domain = base_url.split('/')[2]
164
+ except:
165
+ domain = base_url[:30]
166
+
167
+ log(f"尝试镜像源 {i + 1}/{len(mirrors)}: {domain}")
168
 
169
  # 镜像源使用较短超时,快速切换
170
  timeout = 30 if i < len(mirrors) - 1 else 120
171
 
172
+ # 尝试不同的文件名(HuggingFace 镜像可能使用 model.onnx)
173
+ for try_filename in possible_filenames:
174
+ url = f"{base_url}/{try_filename}"
175
+ if _download_file_from_url(url, dest_path, timeout, progress_callback):
176
+ return True
177
 
178
  if i < len(mirrors) - 1:
179
  log("切换到下一个镜像源...")