malt666 commited on
Commit
dc9db3e
·
verified ·
1 Parent(s): fdd1fdb

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ build/visqol/visqol_lib_py.so filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用官方 Python 3.8 镜像以匹配 .so 文件
2
+ FROM python:3.8-slim
3
+
4
+ # 设置工作目录
5
+ WORKDIR /app
6
+
7
+ # 安装系统依赖 (libsndfile1 用于 soundfile, ffmpeg 用于转换)
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ libsndfile1 \
10
+ ffmpeg \
11
+ && apt-get clean \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # 复制 ViSQOL 构建文件到容器中的 /app/build 目录下
15
+ # 注意:源路径相对于 Dockerfile 所在位置
16
+ COPY ./build /app/build
17
+
18
+ # 复制项目文件到容器中
19
+ COPY requirements.txt app.py ./
20
+
21
+ # 安装 Python 依赖
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # 将 ViSQOL 库所在的目录添加到动态链接器查找路径
25
+ # 这样 Python 的 ctypes 或 CFFI 才能找到 .so 文件
26
+ ENV LD_LIBRARY_PATH=/app/build/visqol:${LD_LIBRARY_PATH}
27
+
28
+ # 确保 ViSQOL 库有执行权限 (虽然通常不需要对 .so 设置执行权限,但以防万一)
29
+ # RUN chmod +x /app/build/visqol/visqol_lib_py.so
30
+
31
+ # 暴露 FastAPI 默认使用的端口 (虽然 HF Spaces 会处理端口映射)
32
+ EXPOSE 8000
33
+
34
+ # 启动 FastAPI 应用
35
+ # 使用 uvicorn 运行 app.py 中的 app 实例
36
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -1,11 +1,60 @@
1
- ---
2
- title: Visqol
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ViSQOL Audio Quality API
3
+ emoji: 🎧
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 8000
8
+ # pinned: false
9
+ # license: apache-2.0 # 如果你想指定许可证
10
+ ---
11
+
12
+ # ViSQOL 音频质量评估 API
13
+
14
+ 这是一个基于 FastAPI 的 Hugging Face Space,用于提供 ViSQOL 音频质量评估服务。
15
+
16
+ 它使用了 Google 的 ViSQOL 算法 (Linux 编译版本,适用于 Python 3.8) 来计算参考音频和待评估音频之间的感知相似度得分 (MOS-LQO)。
17
+
18
+ ## API 端点
19
+
20
+ * **`POST /evaluate/`**
21
+ * 接收两个音频文件 (`reference` 和 `degraded`) 以及一个模式参数 (`mode`, 'audio' 或 'speech')。
22
+ * 返回包含 MOS-LQO 得分和其他信息的 JSON 响应。
23
+
24
+ ## 如何使用
25
+
26
+ 你可以通过发送 POST 请求到部署后的 Space URL 的 `/evaluate/` 路径来使用此 API。
27
+
28
+ **示例 (Python):**
29
+
30
+ ```python
31
+ import requests
32
+
33
+ # 替换为你的 Space URL
34
+ API_URL = "https://你的用户名-你的spacename.hf.space/evaluate/"
35
+
36
+ files = {
37
+ 'reference': open('path/to/reference.wav', 'rb'),
38
+ 'degraded': open('path/to/degraded.wav', 'rb')
39
+ }
40
+
41
+ params = {'mode': 'audio'} # 或 'speech'
42
+
43
+ response = requests.post(API_URL, files=files, params=params)
44
+
45
+ if response.status_code == 200:
46
+ result = response.json()
47
+ print(f"评估结果: {result}")
48
+ if result['status'] == '处理成功':
49
+ print(f"MOS-LQO: {result['moslqo']}")
50
+ else:
51
+ print(f"处理失败: {result['error_message']}")
52
+ else:
53
+ print(f"API 请求错误: {response.status_code} - {response.text}")
54
+ ```
55
+
56
+ ## 注意
57
+
58
+ * 输入的音频文件推荐使用 WAV 格式。
59
+ * 参考音频和待评估音频的采样率应该匹配。
60
+ * 此 Space 使用的 ViSQOL 库是为 Python 3.8 编译的。
app.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ import subprocess
4
+ import tempfile
5
+ import os
6
+ import shutil
7
+ from pydantic import BaseModel
8
+ import sys
9
+ import numpy as np # ViSQOL 可能需要 numpy
10
+ import soundfile as sf # 用于读取音频
11
+ from typing import Optional, List # 导入 List
12
+ import librosa # Need librosa for resampling during conversion if soundfile fails
13
+
14
+ app = FastAPI(title="ViSQOL 音频质量 API")
15
+
16
+ # --- 配置 ViSQOL 路径 ---
17
+ # 相对于 app.py 的路径
18
+ VISQOL_DIR = "./build/visqol"
19
+ VISQOL_LIB_PATH = os.path.join(VISQOL_DIR, "visqol_lib_py.so")
20
+ PB2_DIR = os.path.join(VISQOL_DIR, "pb2") # pb2 文件所在的目录
21
+ MODEL_DIR = os.path.join(VISQOL_DIR, "model")
22
+ SPEECH_MODEL_PATH = os.path.join(MODEL_DIR, "libsvm_nu_svr_model.txt")
23
+ AUDIO_MODEL_PATH = os.path.join(MODEL_DIR, "lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite")
24
+ # --- 路径配置结束 ---
25
+
26
+ # 检查文件是否存在
27
+ required_files = [VISQOL_LIB_PATH, SPEECH_MODEL_PATH, AUDIO_MODEL_PATH]
28
+ if not all(os.path.exists(f) for f in required_files):
29
+ missing = [f for f in required_files if not os.path.exists(f)]
30
+ raise FileNotFoundError(f"ViSQOL 必需文件未找到: {', '.join(missing)}")
31
+ if not os.path.exists(PB2_DIR) or not os.path.isdir(PB2_DIR):
32
+ raise FileNotFoundError(f"ViSQOL pb2 目录未找到: {PB2_DIR}")
33
+
34
+ # 动态导入 ViSQOL 库和 pb2 文件
35
+ try:
36
+ # 将 pb2 目录和 visqol 目录添加到 Python 路径
37
+ sys.path.insert(0, os.path.abspath(PB2_DIR))
38
+ sys.path.insert(0, os.path.abspath(VISQOL_DIR))
39
+ # 加载 .so 文件需要确保 Python 能找到它,或者它在 LD_LIBRARY_PATH 中
40
+ # 通常放在 sys.path 中对于纯 Python 导入是足够的,但 .so 可能不同
41
+ # 在 Dockerfile 中我们会处理库路径
42
+ import visqol_lib_py
43
+ import similarity_result_pb2
44
+ import visqol_config_pb2
45
+ print("ViSQOL 库和 pb2 文件导入成功。")
46
+ except ImportError as e:
47
+ print(f"错误:无法导入 ViSQOL 库或 pb2 文件。")
48
+ print(f"Python 搜索路径: {sys.path}")
49
+ print(f"错误详情: {e}")
50
+ # 在 Hugging Face 环境中,启动失败会显示日志,所以这里不直接 raise
51
+ # raise ImportError(f"无法导入 ViSQOL 库或 pb2 文件: {e}")
52
+ visqol_lib_py = None # 标记为不可用
53
+
54
+ # 定义 API 响应模型
55
+ class VisqolResponse(BaseModel):
56
+ reference_filename: str
57
+ degraded_filename: str
58
+ mode: str
59
+ moslqo: float
60
+ vnsim: Optional[float] = None # 添加 vnsim 字段,设为可选
61
+ fvnsim: Optional[List[float]] = None # 添加 fvnsim 字段,设为可选
62
+ status: str
63
+ error_message: Optional[str] = None
64
+
65
+ # Function to convert and resample audio using ffmpeg
66
+ def convert_and_resample_audio(input_path, output_path, target_sr):
67
+ """Converts audio to WAV format and resamples using ffmpeg."""
68
+ cmd = [
69
+ 'ffmpeg',
70
+ '-y', # Overwrite output file if it exists
71
+ '-i', input_path,
72
+ '-ar', str(target_sr), # Set target sample rate
73
+ '-ac', '1', # Force mono channel (ViSQOL often expects mono)
74
+ output_path
75
+ ]
76
+ print(f"Running ffmpeg: {' '.join(cmd)}")
77
+ try:
78
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
79
+ print("ffmpeg conversion successful.")
80
+ # print(f"ffmpeg stderr: {result.stderr}") # Optional debug
81
+ return True
82
+ except FileNotFoundError:
83
+ print("错误: ffmpeg 未找到,无法转换音频。请确保已在 Docker 环境中安装 ffmpeg。")
84
+ return False
85
+ except subprocess.CalledProcessError as e:
86
+ print(f"错误: ffmpeg 执行失败 (返回码 {e.returncode})。")
87
+ print(f"ffmpeg stderr: {e.stderr}")
88
+ return False
89
+ except Exception as e:
90
+ print(f"转换音频时发生未知错误: {e}")
91
+ return False
92
+
93
+ @app.post("/evaluate/", response_model=VisqolResponse)
94
+ async def evaluate_audio(
95
+ reference: UploadFile = File(..., description="参考音频文件"),
96
+ degraded: UploadFile = File(..., description="待评估音频文件"),
97
+ mode: str = "audio" # 'audio' 或 'speech'
98
+ ):
99
+ """
100
+ 使用 ViSQOL 评估两个音频文件之间的感知相似度。
101
+ 返回预测的平均意见得分 (MOS-LQO)。
102
+ """
103
+ if visqol_lib_py is None:
104
+ raise HTTPException(status_code=500, detail="ViSQOL 库未成功加载。")
105
+
106
+ if mode not in ["audio", "speech"]:
107
+ raise HTTPException(status_code=400, detail="模式参数 'mode' 必须是 'audio' 或 'speech'")
108
+
109
+ temp_dir = tempfile.mkdtemp()
110
+ # Save with original extension first to help ffmpeg identify format
111
+ ref_temp_orig = os.path.join(temp_dir, f"ref_{reference.filename}")
112
+ deg_temp_orig = os.path.join(temp_dir, f"deg_{degraded.filename}")
113
+ # Define final WAV paths
114
+ ref_path_wav = os.path.join(temp_dir, "reference.wav")
115
+ deg_path_wav = os.path.join(temp_dir, "degraded.wav")
116
+
117
+ mos = -1.0
118
+ vnsim_val = None # 初始化 vnsim
119
+ fvnsim_val = None # 初始化 fvnsim
120
+ status_msg = "处理失败"
121
+ error_msg = None
122
+
123
+ try:
124
+ # 1. 保存原始上传文件
125
+ ref_content = await reference.read()
126
+ with open(ref_temp_orig, "wb") as f: f.write(ref_content)
127
+ deg_content = await degraded.read()
128
+ with open(deg_temp_orig, "wb") as f: f.write(deg_content)
129
+ await reference.close()
130
+ await degraded.close()
131
+
132
+ # 2. 确定目标采样率并转换/重采样文件
133
+ target_sr = 48000 if mode == 'audio' else 16000
134
+ print(f"目标采样率: {target_sr} Hz for mode '{mode}'")
135
+
136
+ conv_ref_ok = convert_and_resample_audio(ref_temp_orig, ref_path_wav, target_sr)
137
+ conv_deg_ok = convert_and_resample_audio(deg_temp_orig, deg_path_wav, target_sr)
138
+
139
+ if not (conv_ref_ok and conv_deg_ok):
140
+ raise HTTPException(status_code=500, detail="使用 ffmpeg 转换或重采样音频文件失败。")
141
+
142
+ # 3. 验证转换后的 WAV 文件 (可选)
143
+ try:
144
+ ref_info = sf.info(ref_path_wav)
145
+ deg_info = sf.info(deg_path_wav)
146
+ if ref_info.samplerate != target_sr or deg_info.samplerate != target_sr:
147
+ print(f"警告:ffmpeg 转换后的采样率 ({ref_info.samplerate}/{deg_info.samplerate}) 与目标 ({target_sr}) 不符,可能影响 ViSQOL 结果。")
148
+ except Exception as audio_e:
149
+ # 如果 sf.info 失败,可能是 ffmpeg 转换有问题
150
+ raise HTTPException(status_code=400, detail=f"无法读取转换后的 WAV 文件: {audio_e}")
151
+
152
+ # 4. 加载转换/重采样后的音频数据
153
+ try:
154
+ print(f"从 WAV 加载音频数据: {ref_path_wav}, {deg_path_wav}")
155
+ # 确保读取为 float64 类型 (对应 C++ double)
156
+ ref_data, sr_ref = sf.read(ref_path_wav, dtype='float64')
157
+ deg_data, sr_deg = sf.read(deg_path_wav, dtype='float64')
158
+ # 确认采样率是否符合预期 (理论上 ffmpeg 已经处理)
159
+ if sr_ref != target_sr or sr_deg != target_sr:
160
+ print(f"警告:读取的 WAV 文件采样率 ({sr_ref}/{sr_deg}) 与目标 ({target_sr}) 不符。")
161
+ # 可以选择在这里停止或继续
162
+ print("音频数据加载成功。")
163
+ except Exception as read_e:
164
+ raise HTTPException(status_code=500, detail=f"读取转换后的 WAV 文件时出错: {read_e}")
165
+
166
+ # 5. 初始化 ViSQOL 配置 (修正模型选择逻辑)
167
+ config = visqol_config_pb2.VisqolConfig()
168
+ config.audio.sample_rate = target_sr # 使用目标采样率
169
+
170
+ # 修正模型选择:根据官方示例调整
171
+ if mode == "speech":
172
+ config.options.use_speech_scoring = True
173
+ # Speech mode uses the TFLite model according to official example
174
+ model_file_to_use = AUDIO_MODEL_PATH # .tflite model
175
+ else: # audio mode
176
+ config.options.use_speech_scoring = False
177
+ # Audio mode uses the SVR model according to official example
178
+ model_file_to_use = SPEECH_MODEL_PATH # .txt model (libsvm)
179
+
180
+ config.options.svr_model_path = os.path.abspath(model_file_to_use)
181
+ print(f"使用模型: {model_file_to_use} for mode '{mode}'")
182
+
183
+ # 6. 创建 API 实例并运行评估 (传递数据而不是路径)
184
+ api = visqol_lib_py.VisqolApi()
185
+ api.Create(config) # 传递对象
186
+ # 传递加载的 NumPy 数组
187
+ similarity_result_msg = api.Measure(ref_data, deg_data) # <--- 修改此处
188
+
189
+ # 7. 处理结果 (逻辑保持不变,增加提取 vnsim 和 fvnsim)
190
+ if similarity_result_msg and hasattr(similarity_result_msg, 'moslqo'):
191
+ mos = similarity_result_msg.moslqo
192
+ status_msg = "处理成功"
193
+ print(f"ViSQOL 评估完成: MOS-LQO = {mos}")
194
+ # 尝试提取 vnsim
195
+ if hasattr(similarity_result_msg, 'vnsim'):
196
+ vnsim_val = similarity_result_msg.vnsim
197
+ print(f"VNSIM = {vnsim_val}")
198
+ else:
199
+ print("ViSQOL 结果中未找到 vnsim 字段。")
200
+ # 尝试提取 fvnsim (需要转换为 Python 列表)
201
+ if hasattr(similarity_result_msg, 'fvnsim') and similarity_result_msg.fvnsim:
202
+ fvnsim_val = list(similarity_result_msg.fvnsim) # 转换为列表
203
+ print(f"FVNSIM (第一个元素): {fvnsim_val[0] if fvnsim_val else 'N/A'}") # 打印部分信息
204
+ else:
205
+ print("ViSQOL 结果中未找到 fvnsim 字段或为空。")
206
+ else:
207
+ error_msg = "ViSQOL 未返回有效的 MOS-LQO 结果。"
208
+ print(f"错误: {error_msg}")
209
+
210
+ except ImportError as e:
211
+ status_msg = "导入错误"
212
+ error_msg = f"无法导入 ViSQOL 库或依赖: {e}"
213
+ print(f"错误: {error_msg}")
214
+ except FileNotFoundError as e:
215
+ status_msg = "文件未找到错误"
216
+ error_msg = f"必需文件丢失: {e}"
217
+ print(f"错误: {error_msg}")
218
+ except HTTPException as e: # 捕获我们自己抛出的 HTTP 异常
219
+ status_msg = "请求错误"
220
+ error_msg = str(e.detail)
221
+ print(f"错误: {error_msg}")
222
+ except Exception as e:
223
+ status_msg = "运行时错误"
224
+ error_msg = f"处理过程中发生错误: {type(e).__name__} - {e}"
225
+ print(f"错误: {error_msg}")
226
+ # 可以在这里添加更详细的堆栈跟踪日志,如果需要
227
+ # import traceback
228
+ # print(traceback.format_exc())
229
+ finally:
230
+ if os.path.exists(temp_dir):
231
+ shutil.rmtree(temp_dir)
232
+
233
+ return VisqolResponse(
234
+ reference_filename=reference.filename,
235
+ degraded_filename=degraded.filename,
236
+ mode=mode,
237
+ moslqo=mos,
238
+ vnsim=vnsim_val, # 添加 vnsim 到响应
239
+ fvnsim=fvnsim_val, # 添加 fvnsim 到响应
240
+ status=status_msg,
241
+ error_message=error_msg
242
+ )
243
+
244
+ @app.get("/", include_in_schema=False)
245
+ async def root():
246
+ # 提供一个简单的根路径信息
247
+ return {"message": "欢迎使用 ViSQOL 音频质量评估 API。请使用 POST 方法访问 /evaluate/ 端点。"}
248
+
249
+ # 添加健康检查端点
250
+ @app.get("/healthz", status_code=200)
251
+ async def health_check():
252
+ """Hugging Face Spaces health check endpoint."""
253
+ # 如果 ViSQOL 库加载失败,也在这里反映出来
254
+ if visqol_lib_py is None:
255
+ return {"status": "error", "detail": "ViSQOL library not loaded"}
256
+ return {"status": "ok"}
257
+
258
+ # 如果直接运行脚本,用于本地测试 (可选)
259
+ if __name__ == "__main__":
260
+ import uvicorn
261
+ print("运行本地测试服务器: http://127.0.0.1:8000")
262
+ # 注意:本地运行可能需要正确设置 LD_LIBRARY_PATH 或将 .so 文件放在系统可查找的路径
263
+ uvicorn.run(app, host="127.0.0.1", port=8000)
build/visqol/model/lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd031d6c95594ae2cd1d471aa6611bff26b3bbf816528431ff1741bcd798dc6
3
+ size 2233840
build/visqol/model/libsvm_nu_svr_model.txt ADDED
The diff for this file is too large to render. See raw diff
 
build/visqol/pb2/similarity_result_pb2.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: similarity_result.proto
4
+ """Generated protocol buffer code."""
5
+ from google.protobuf import descriptor as _descriptor
6
+ from google.protobuf import descriptor_pool as _descriptor_pool
7
+ from google.protobuf import message as _message
8
+ from google.protobuf import reflection as _reflection
9
+ from google.protobuf import symbol_database as _symbol_database
10
+ # @@protoc_insertion_point(imports)
11
+
12
+ _sym_db = _symbol_database.Default()
13
+
14
+
15
+
16
+
17
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17similarity_result.proto\x12\x06Visqol\"\xe4\x03\n\x13SimilarityResultMsg\x12\x0e\n\x06moslqo\x18\x01 \x01(\x01\x12\r\n\x05vnsim\x18\x02 \x01(\x01\x12\x0e\n\x06\x66vnsim\x18\x03 \x03(\x01\x12\x10\n\x08\x66vnsim10\x18\x0b \x03(\x01\x12\x10\n\x08\x66stdnsim\x18\x08 \x03(\x01\x12\x13\n\x0b\x66vdegenergy\x18\t \x03(\x01\x12\x19\n\x11\x63\x65nter_freq_bands\x18\x04 \x03(\x01\x12\x42\n\npatch_sims\x18\x05 \x03(\x0b\x32..Visqol.SimilarityResultMsg.PatchSimilarityMsg\x12\x1a\n\x12reference_filepath\x18\x06 \x01(\t\x12\x19\n\x11\x64\x65graded_filepath\x18\x07 \x01(\t\x12\x17\n\x0f\x61lignment_lag_s\x18\n \x01(\x01\x1a\xb5\x01\n\x12PatchSimilarityMsg\x12\x12\n\nsimilarity\x18\x01 \x01(\x01\x12\x17\n\x0f\x66req_band_means\x18\x02 \x03(\x01\x12\x1c\n\x14ref_patch_start_time\x18\x03 \x01(\x01\x12\x1a\n\x12ref_patch_end_time\x18\x04 \x01(\x01\x12\x1c\n\x14\x64\x65g_patch_start_time\x18\x05 \x01(\x01\x12\x1a\n\x12\x64\x65g_patch_end_time\x18\x06 \x01(\x01\x62\x06proto3')
18
+
19
+
20
+
21
+ _SIMILARITYRESULTMSG = DESCRIPTOR.message_types_by_name['SimilarityResultMsg']
22
+ _SIMILARITYRESULTMSG_PATCHSIMILARITYMSG = _SIMILARITYRESULTMSG.nested_types_by_name['PatchSimilarityMsg']
23
+ SimilarityResultMsg = _reflection.GeneratedProtocolMessageType('SimilarityResultMsg', (_message.Message,), {
24
+
25
+ 'PatchSimilarityMsg' : _reflection.GeneratedProtocolMessageType('PatchSimilarityMsg', (_message.Message,), {
26
+ 'DESCRIPTOR' : _SIMILARITYRESULTMSG_PATCHSIMILARITYMSG,
27
+ '__module__' : 'similarity_result_pb2'
28
+ # @@protoc_insertion_point(class_scope:Visqol.SimilarityResultMsg.PatchSimilarityMsg)
29
+ })
30
+ ,
31
+ 'DESCRIPTOR' : _SIMILARITYRESULTMSG,
32
+ '__module__' : 'similarity_result_pb2'
33
+ # @@protoc_insertion_point(class_scope:Visqol.SimilarityResultMsg)
34
+ })
35
+ _sym_db.RegisterMessage(SimilarityResultMsg)
36
+ _sym_db.RegisterMessage(SimilarityResultMsg.PatchSimilarityMsg)
37
+
38
+ if _descriptor._USE_C_DESCRIPTORS == False:
39
+
40
+ DESCRIPTOR._options = None
41
+ _SIMILARITYRESULTMSG._serialized_start=36
42
+ _SIMILARITYRESULTMSG._serialized_end=520
43
+ _SIMILARITYRESULTMSG_PATCHSIMILARITYMSG._serialized_start=339
44
+ _SIMILARITYRESULTMSG_PATCHSIMILARITYMSG._serialized_end=520
45
+ # @@protoc_insertion_point(module_scope)
build/visqol/pb2/visqol_config_pb2.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: visqol_config.proto
4
+ """Generated protocol buffer code."""
5
+ from google.protobuf import descriptor as _descriptor
6
+ from google.protobuf import descriptor_pool as _descriptor_pool
7
+ from google.protobuf import message as _message
8
+ from google.protobuf import reflection as _reflection
9
+ from google.protobuf import symbol_database as _symbol_database
10
+ # @@protoc_insertion_point(imports)
11
+
12
+ _sym_db = _symbol_database.Default()
13
+
14
+
15
+
16
+
17
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13visqol_config.proto\x12\x06Visqol\"\xa9\x03\n\x0cVisqolConfig\x12\x33\n\x05\x61udio\x18\x01 \x01(\x0b\x32$.Visqol.VisqolConfig.VisqolAudioInfo\x12\x33\n\x07options\x18\x02 \x01(\x0b\x32\".Visqol.VisqolConfig.VisqolOptions\x1a&\n\x0fVisqolAudioInfo\x12\x13\n\x0bsample_rate\x18\x01 \x01(\x05\x1a\x86\x02\n\rVisqolOptions\x12\x18\n\x10output_mos_score\x18\x01 \x01(\x08\x12\x16\n\x0esvr_model_path\x18\x02 \x01(\t\x12\x1a\n\x12use_speech_scoring\x18\x03 \x01(\x08\x12\x1d\n\x15\x64\x65tect_voice_activity\x18\x04 \x01(\x08\x12&\n\x1e\x61llow_unsupported_sample_rates\x18\x05 \x01(\x08\x12\'\n\x1fuse_unscaled_speech_mos_mapping\x18\x06 \x01(\x08\x12\x1c\n\x14search_window_radius\x18\x07 \x01(\x05\x12\x19\n\x11use_lattice_model\x18\x08 \x01(\x08\x62\x06proto3')
18
+
19
+
20
+
21
+ _VISQOLCONFIG = DESCRIPTOR.message_types_by_name['VisqolConfig']
22
+ _VISQOLCONFIG_VISQOLAUDIOINFO = _VISQOLCONFIG.nested_types_by_name['VisqolAudioInfo']
23
+ _VISQOLCONFIG_VISQOLOPTIONS = _VISQOLCONFIG.nested_types_by_name['VisqolOptions']
24
+ VisqolConfig = _reflection.GeneratedProtocolMessageType('VisqolConfig', (_message.Message,), {
25
+
26
+ 'VisqolAudioInfo' : _reflection.GeneratedProtocolMessageType('VisqolAudioInfo', (_message.Message,), {
27
+ 'DESCRIPTOR' : _VISQOLCONFIG_VISQOLAUDIOINFO,
28
+ '__module__' : 'visqol_config_pb2'
29
+ # @@protoc_insertion_point(class_scope:Visqol.VisqolConfig.VisqolAudioInfo)
30
+ })
31
+ ,
32
+
33
+ 'VisqolOptions' : _reflection.GeneratedProtocolMessageType('VisqolOptions', (_message.Message,), {
34
+ 'DESCRIPTOR' : _VISQOLCONFIG_VISQOLOPTIONS,
35
+ '__module__' : 'visqol_config_pb2'
36
+ # @@protoc_insertion_point(class_scope:Visqol.VisqolConfig.VisqolOptions)
37
+ })
38
+ ,
39
+ 'DESCRIPTOR' : _VISQOLCONFIG,
40
+ '__module__' : 'visqol_config_pb2'
41
+ # @@protoc_insertion_point(class_scope:Visqol.VisqolConfig)
42
+ })
43
+ _sym_db.RegisterMessage(VisqolConfig)
44
+ _sym_db.RegisterMessage(VisqolConfig.VisqolAudioInfo)
45
+ _sym_db.RegisterMessage(VisqolConfig.VisqolOptions)
46
+
47
+ if _descriptor._USE_C_DESCRIPTORS == False:
48
+
49
+ DESCRIPTOR._options = None
50
+ _VISQOLCONFIG._serialized_start=32
51
+ _VISQOLCONFIG._serialized_end=457
52
+ _VISQOLCONFIG_VISQOLAUDIOINFO._serialized_start=154
53
+ _VISQOLCONFIG_VISQOLAUDIOINFO._serialized_end=192
54
+ _VISQOLCONFIG_VISQOLOPTIONS._serialized_start=195
55
+ _VISQOLCONFIG_VISQOLOPTIONS._serialized_end=457
56
+ # @@protoc_insertion_point(module_scope)
build/visqol/visqol_lib_py.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e70dc0a46db8549b183819ef543d22d665952ebd999d75506c7969d46a05ba
3
+ size 10110248
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic
5
+ numpy
6
+ soundfile
7
+ protobuf~=3.20.0
8
+ librosa