Upload 9 files
Browse files- .gitattributes +1 -0
- Dockerfile +36 -0
- README.md +60 -11
- app.py +263 -0
- build/visqol/model/lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite +3 -0
- build/visqol/model/libsvm_nu_svr_model.txt +0 -0
- build/visqol/pb2/similarity_result_pb2.py +45 -0
- build/visqol/pb2/visqol_config_pb2.py +56 -0
- build/visqol/visqol_lib_py.so +3 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
build/visqol/visqol_lib_py.so filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 使用官方 Python 3.8 镜像以匹配 .so 文件
|
| 2 |
+
FROM python:3.8-slim
|
| 3 |
+
|
| 4 |
+
# 设置工作目录
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# 安装系统依赖 (libsndfile1 用于 soundfile, ffmpeg 用于转换)
|
| 8 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
libsndfile1 \
|
| 10 |
+
ffmpeg \
|
| 11 |
+
&& apt-get clean \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# 复制 ViSQOL 构建文件到容器中的 /app/build 目录下
|
| 15 |
+
# 注意:源路径相对于 Dockerfile 所在位置
|
| 16 |
+
COPY ./build /app/build
|
| 17 |
+
|
| 18 |
+
# 复制项目文件到容器中
|
| 19 |
+
COPY requirements.txt app.py ./
|
| 20 |
+
|
| 21 |
+
# 安装 Python 依赖
|
| 22 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# 将 ViSQOL 库所在的目录添加到动态链接器查找路径
|
| 25 |
+
# 这样 Python 的 ctypes 或 CFFI 才能找到 .so 文件
|
| 26 |
+
ENV LD_LIBRARY_PATH=/app/build/visqol:${LD_LIBRARY_PATH}
|
| 27 |
+
|
| 28 |
+
# 确保 ViSQOL 库有执行权限 (虽然通常不需要对 .so 设置执行权限,但以防万一)
|
| 29 |
+
# RUN chmod +x /app/build/visqol/visqol_lib_py.so
|
| 30 |
+
|
| 31 |
+
# 暴露 FastAPI 默认使用的端口 (虽然 HF Spaces 会处理端口映射)
|
| 32 |
+
EXPOSE 8000
|
| 33 |
+
|
| 34 |
+
# 启动 FastAPI 应用
|
| 35 |
+
# 使用 uvicorn 运行 app.py 中的 app 实例
|
| 36 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,11 +1,60 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ViSQOL Audio Quality API
|
| 3 |
+
emoji: 🎧
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
+
# pinned: false
|
| 9 |
+
# license: apache-2.0 # 如果你想指定许可证
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# ViSQOL 音频质量评估 API
|
| 13 |
+
|
| 14 |
+
这是一个基于 FastAPI 的 Hugging Face Space,用于提供 ViSQOL 音频质量评估服务。
|
| 15 |
+
|
| 16 |
+
它使用了 Google 的 ViSQOL 算法 (Linux 编译版本,适用于 Python 3.8) 来计算参考音频和待评估音频之间的感知相似度得分 (MOS-LQO)。
|
| 17 |
+
|
| 18 |
+
## API 端点
|
| 19 |
+
|
| 20 |
+
* **`POST /evaluate/`**
|
| 21 |
+
* 接收两个音频文件 (`reference` 和 `degraded`) 以及一个模式参数 (`mode`, 'audio' 或 'speech')。
|
| 22 |
+
* 返回包含 MOS-LQO 得分和其他信息的 JSON 响应。
|
| 23 |
+
|
| 24 |
+
## 如何使用
|
| 25 |
+
|
| 26 |
+
你可以通过发送 POST 请求到部署后的 Space URL 的 `/evaluate/` 路径来使用此 API。
|
| 27 |
+
|
| 28 |
+
**示例 (Python):**
|
| 29 |
+
|
| 30 |
+
```python
|
| 31 |
+
import requests
|
| 32 |
+
|
| 33 |
+
# 替换为你的 Space URL
|
| 34 |
+
API_URL = "https://你的用户名-你的spacename.hf.space/evaluate/"
|
| 35 |
+
|
| 36 |
+
files = {
|
| 37 |
+
'reference': open('path/to/reference.wav', 'rb'),
|
| 38 |
+
'degraded': open('path/to/degraded.wav', 'rb')
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
params = {'mode': 'audio'} # 或 'speech'
|
| 42 |
+
|
| 43 |
+
response = requests.post(API_URL, files=files, params=params)
|
| 44 |
+
|
| 45 |
+
if response.status_code == 200:
|
| 46 |
+
result = response.json()
|
| 47 |
+
print(f"评估结果: {result}")
|
| 48 |
+
if result['status'] == '处理成功':
|
| 49 |
+
print(f"MOS-LQO: {result['moslqo']}")
|
| 50 |
+
else:
|
| 51 |
+
print(f"处理失败: {result['error_message']}")
|
| 52 |
+
else:
|
| 53 |
+
print(f"API 请求错误: {response.status_code} - {response.text}")
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
## 注意
|
| 57 |
+
|
| 58 |
+
* 输入的音频文件推荐使用 WAV 格式。
|
| 59 |
+
* 参考音频和待评估音频的采样率应该匹配。
|
| 60 |
+
* 此 Space 使用的 ViSQOL 库是为 Python 3.8 编译的。
|
app.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
import subprocess
|
| 4 |
+
import tempfile
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
import sys
|
| 9 |
+
import numpy as np # ViSQOL 可能需要 numpy
|
| 10 |
+
import soundfile as sf # 用于读取音频
|
| 11 |
+
from typing import Optional, List # 导入 List
|
| 12 |
+
import librosa # Need librosa for resampling during conversion if soundfile fails
|
| 13 |
+
|
| 14 |
+
app = FastAPI(title="ViSQOL 音频质量 API")
|
| 15 |
+
|
| 16 |
+
# --- 配置 ViSQOL 路径 ---
|
| 17 |
+
# 相对于 app.py 的路径
|
| 18 |
+
VISQOL_DIR = "./build/visqol"
|
| 19 |
+
VISQOL_LIB_PATH = os.path.join(VISQOL_DIR, "visqol_lib_py.so")
|
| 20 |
+
PB2_DIR = os.path.join(VISQOL_DIR, "pb2") # pb2 文件所在的目录
|
| 21 |
+
MODEL_DIR = os.path.join(VISQOL_DIR, "model")
|
| 22 |
+
SPEECH_MODEL_PATH = os.path.join(MODEL_DIR, "libsvm_nu_svr_model.txt")
|
| 23 |
+
AUDIO_MODEL_PATH = os.path.join(MODEL_DIR, "lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite")
|
| 24 |
+
# --- 路径配置结束 ---
|
| 25 |
+
|
| 26 |
+
# 检查文件是否存在
|
| 27 |
+
required_files = [VISQOL_LIB_PATH, SPEECH_MODEL_PATH, AUDIO_MODEL_PATH]
|
| 28 |
+
if not all(os.path.exists(f) for f in required_files):
|
| 29 |
+
missing = [f for f in required_files if not os.path.exists(f)]
|
| 30 |
+
raise FileNotFoundError(f"ViSQOL 必需文件未找到: {', '.join(missing)}")
|
| 31 |
+
if not os.path.exists(PB2_DIR) or not os.path.isdir(PB2_DIR):
|
| 32 |
+
raise FileNotFoundError(f"ViSQOL pb2 目录未找到: {PB2_DIR}")
|
| 33 |
+
|
| 34 |
+
# 动态导入 ViSQOL 库和 pb2 文件
|
| 35 |
+
try:
|
| 36 |
+
# 将 pb2 目录和 visqol 目录添加到 Python 路径
|
| 37 |
+
sys.path.insert(0, os.path.abspath(PB2_DIR))
|
| 38 |
+
sys.path.insert(0, os.path.abspath(VISQOL_DIR))
|
| 39 |
+
# 加载 .so 文件需要确保 Python 能找到它,或者它在 LD_LIBRARY_PATH 中
|
| 40 |
+
# 通常放在 sys.path 中对于纯 Python 导入是足够的,但 .so 可能不同
|
| 41 |
+
# 在 Dockerfile 中我们会处理库路径
|
| 42 |
+
import visqol_lib_py
|
| 43 |
+
import similarity_result_pb2
|
| 44 |
+
import visqol_config_pb2
|
| 45 |
+
print("ViSQOL 库和 pb2 文件导入成功。")
|
| 46 |
+
except ImportError as e:
|
| 47 |
+
print(f"错误:无法导入 ViSQOL 库或 pb2 文件。")
|
| 48 |
+
print(f"Python 搜索路径: {sys.path}")
|
| 49 |
+
print(f"错误详情: {e}")
|
| 50 |
+
# 在 Hugging Face 环境中,启动失败会显示日志,所以这里不直接 raise
|
| 51 |
+
# raise ImportError(f"无法导入 ViSQOL 库或 pb2 文件: {e}")
|
| 52 |
+
visqol_lib_py = None # 标记为不可用
|
| 53 |
+
|
| 54 |
+
# 定义 API 响应模型
|
| 55 |
+
class VisqolResponse(BaseModel):
|
| 56 |
+
reference_filename: str
|
| 57 |
+
degraded_filename: str
|
| 58 |
+
mode: str
|
| 59 |
+
moslqo: float
|
| 60 |
+
vnsim: Optional[float] = None # 添加 vnsim 字段,设为可选
|
| 61 |
+
fvnsim: Optional[List[float]] = None # 添加 fvnsim 字段,设为可选
|
| 62 |
+
status: str
|
| 63 |
+
error_message: Optional[str] = None
|
| 64 |
+
|
| 65 |
+
# Function to convert and resample audio using ffmpeg
|
| 66 |
+
def convert_and_resample_audio(input_path, output_path, target_sr):
|
| 67 |
+
"""Converts audio to WAV format and resamples using ffmpeg."""
|
| 68 |
+
cmd = [
|
| 69 |
+
'ffmpeg',
|
| 70 |
+
'-y', # Overwrite output file if it exists
|
| 71 |
+
'-i', input_path,
|
| 72 |
+
'-ar', str(target_sr), # Set target sample rate
|
| 73 |
+
'-ac', '1', # Force mono channel (ViSQOL often expects mono)
|
| 74 |
+
output_path
|
| 75 |
+
]
|
| 76 |
+
print(f"Running ffmpeg: {' '.join(cmd)}")
|
| 77 |
+
try:
|
| 78 |
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
|
| 79 |
+
print("ffmpeg conversion successful.")
|
| 80 |
+
# print(f"ffmpeg stderr: {result.stderr}") # Optional debug
|
| 81 |
+
return True
|
| 82 |
+
except FileNotFoundError:
|
| 83 |
+
print("错误: ffmpeg 未找到,无法转换音频。请确保已在 Docker 环境中安装 ffmpeg。")
|
| 84 |
+
return False
|
| 85 |
+
except subprocess.CalledProcessError as e:
|
| 86 |
+
print(f"错误: ffmpeg 执行失败 (返回码 {e.returncode})。")
|
| 87 |
+
print(f"ffmpeg stderr: {e.stderr}")
|
| 88 |
+
return False
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"转换音频时发生未知错误: {e}")
|
| 91 |
+
return False
|
| 92 |
+
|
| 93 |
+
@app.post("/evaluate/", response_model=VisqolResponse)
|
| 94 |
+
async def evaluate_audio(
|
| 95 |
+
reference: UploadFile = File(..., description="参考音频文件"),
|
| 96 |
+
degraded: UploadFile = File(..., description="待评估音频文件"),
|
| 97 |
+
mode: str = "audio" # 'audio' 或 'speech'
|
| 98 |
+
):
|
| 99 |
+
"""
|
| 100 |
+
使用 ViSQOL 评估两个音频文件之间的感知相似度。
|
| 101 |
+
返回预测的平均意见得分 (MOS-LQO)。
|
| 102 |
+
"""
|
| 103 |
+
if visqol_lib_py is None:
|
| 104 |
+
raise HTTPException(status_code=500, detail="ViSQOL 库未成功加载。")
|
| 105 |
+
|
| 106 |
+
if mode not in ["audio", "speech"]:
|
| 107 |
+
raise HTTPException(status_code=400, detail="模式参数 'mode' 必须是 'audio' 或 'speech'")
|
| 108 |
+
|
| 109 |
+
temp_dir = tempfile.mkdtemp()
|
| 110 |
+
# Save with original extension first to help ffmpeg identify format
|
| 111 |
+
ref_temp_orig = os.path.join(temp_dir, f"ref_{reference.filename}")
|
| 112 |
+
deg_temp_orig = os.path.join(temp_dir, f"deg_{degraded.filename}")
|
| 113 |
+
# Define final WAV paths
|
| 114 |
+
ref_path_wav = os.path.join(temp_dir, "reference.wav")
|
| 115 |
+
deg_path_wav = os.path.join(temp_dir, "degraded.wav")
|
| 116 |
+
|
| 117 |
+
mos = -1.0
|
| 118 |
+
vnsim_val = None # 初始化 vnsim
|
| 119 |
+
fvnsim_val = None # 初始化 fvnsim
|
| 120 |
+
status_msg = "处理失败"
|
| 121 |
+
error_msg = None
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
# 1. 保存原始上传文件
|
| 125 |
+
ref_content = await reference.read()
|
| 126 |
+
with open(ref_temp_orig, "wb") as f: f.write(ref_content)
|
| 127 |
+
deg_content = await degraded.read()
|
| 128 |
+
with open(deg_temp_orig, "wb") as f: f.write(deg_content)
|
| 129 |
+
await reference.close()
|
| 130 |
+
await degraded.close()
|
| 131 |
+
|
| 132 |
+
# 2. 确定目标采样率并转换/重采样文件
|
| 133 |
+
target_sr = 48000 if mode == 'audio' else 16000
|
| 134 |
+
print(f"目标采样率: {target_sr} Hz for mode '{mode}'")
|
| 135 |
+
|
| 136 |
+
conv_ref_ok = convert_and_resample_audio(ref_temp_orig, ref_path_wav, target_sr)
|
| 137 |
+
conv_deg_ok = convert_and_resample_audio(deg_temp_orig, deg_path_wav, target_sr)
|
| 138 |
+
|
| 139 |
+
if not (conv_ref_ok and conv_deg_ok):
|
| 140 |
+
raise HTTPException(status_code=500, detail="使用 ffmpeg 转换或重采样音频文件失败。")
|
| 141 |
+
|
| 142 |
+
# 3. 验证转换后的 WAV 文件 (可选)
|
| 143 |
+
try:
|
| 144 |
+
ref_info = sf.info(ref_path_wav)
|
| 145 |
+
deg_info = sf.info(deg_path_wav)
|
| 146 |
+
if ref_info.samplerate != target_sr or deg_info.samplerate != target_sr:
|
| 147 |
+
print(f"警告:ffmpeg 转换后的采样率 ({ref_info.samplerate}/{deg_info.samplerate}) 与目标 ({target_sr}) 不符,可能影响 ViSQOL 结果。")
|
| 148 |
+
except Exception as audio_e:
|
| 149 |
+
# 如果 sf.info 失败,可能是 ffmpeg 转换有问题
|
| 150 |
+
raise HTTPException(status_code=400, detail=f"无法读取转换后的 WAV 文件: {audio_e}")
|
| 151 |
+
|
| 152 |
+
# 4. 加载转换/重采样后的音频数据
|
| 153 |
+
try:
|
| 154 |
+
print(f"从 WAV 加载音频数据: {ref_path_wav}, {deg_path_wav}")
|
| 155 |
+
# 确保读取为 float64 类型 (对应 C++ double)
|
| 156 |
+
ref_data, sr_ref = sf.read(ref_path_wav, dtype='float64')
|
| 157 |
+
deg_data, sr_deg = sf.read(deg_path_wav, dtype='float64')
|
| 158 |
+
# 确认采样率是否符合预期 (理论上 ffmpeg 已经处理)
|
| 159 |
+
if sr_ref != target_sr or sr_deg != target_sr:
|
| 160 |
+
print(f"警告:读取的 WAV 文件采样率 ({sr_ref}/{sr_deg}) 与目标 ({target_sr}) 不符。")
|
| 161 |
+
# 可以选择在这里停止或继续
|
| 162 |
+
print("音频数据加载成功。")
|
| 163 |
+
except Exception as read_e:
|
| 164 |
+
raise HTTPException(status_code=500, detail=f"读取转换后的 WAV 文件时出错: {read_e}")
|
| 165 |
+
|
| 166 |
+
# 5. 初始化 ViSQOL 配置 (修正模型选择逻辑)
|
| 167 |
+
config = visqol_config_pb2.VisqolConfig()
|
| 168 |
+
config.audio.sample_rate = target_sr # 使用目标采样率
|
| 169 |
+
|
| 170 |
+
# 修正模型选择:根据官方示例调整
|
| 171 |
+
if mode == "speech":
|
| 172 |
+
config.options.use_speech_scoring = True
|
| 173 |
+
# Speech mode uses the TFLite model according to official example
|
| 174 |
+
model_file_to_use = AUDIO_MODEL_PATH # .tflite model
|
| 175 |
+
else: # audio mode
|
| 176 |
+
config.options.use_speech_scoring = False
|
| 177 |
+
# Audio mode uses the SVR model according to official example
|
| 178 |
+
model_file_to_use = SPEECH_MODEL_PATH # .txt model (libsvm)
|
| 179 |
+
|
| 180 |
+
config.options.svr_model_path = os.path.abspath(model_file_to_use)
|
| 181 |
+
print(f"使用模型: {model_file_to_use} for mode '{mode}'")
|
| 182 |
+
|
| 183 |
+
# 6. 创建 API 实例并运行评估 (传递数据而不是路径)
|
| 184 |
+
api = visqol_lib_py.VisqolApi()
|
| 185 |
+
api.Create(config) # 传递对象
|
| 186 |
+
# 传递加载的 NumPy 数组
|
| 187 |
+
similarity_result_msg = api.Measure(ref_data, deg_data) # <--- 修改此处
|
| 188 |
+
|
| 189 |
+
# 7. 处理结果 (逻辑保持不变,增加提取 vnsim 和 fvnsim)
|
| 190 |
+
if similarity_result_msg and hasattr(similarity_result_msg, 'moslqo'):
|
| 191 |
+
mos = similarity_result_msg.moslqo
|
| 192 |
+
status_msg = "处理成功"
|
| 193 |
+
print(f"ViSQOL 评估完成: MOS-LQO = {mos}")
|
| 194 |
+
# 尝试提取 vnsim
|
| 195 |
+
if hasattr(similarity_result_msg, 'vnsim'):
|
| 196 |
+
vnsim_val = similarity_result_msg.vnsim
|
| 197 |
+
print(f"VNSIM = {vnsim_val}")
|
| 198 |
+
else:
|
| 199 |
+
print("ViSQOL 结果中未找到 vnsim 字段。")
|
| 200 |
+
# 尝试提取 fvnsim (需要转换为 Python 列表)
|
| 201 |
+
if hasattr(similarity_result_msg, 'fvnsim') and similarity_result_msg.fvnsim:
|
| 202 |
+
fvnsim_val = list(similarity_result_msg.fvnsim) # 转换为列表
|
| 203 |
+
print(f"FVNSIM (第一个元素): {fvnsim_val[0] if fvnsim_val else 'N/A'}") # 打印部分信息
|
| 204 |
+
else:
|
| 205 |
+
print("ViSQOL 结果中未找到 fvnsim 字段或为空。")
|
| 206 |
+
else:
|
| 207 |
+
error_msg = "ViSQOL 未返回有效的 MOS-LQO 结果。"
|
| 208 |
+
print(f"错误: {error_msg}")
|
| 209 |
+
|
| 210 |
+
except ImportError as e:
|
| 211 |
+
status_msg = "导入错误"
|
| 212 |
+
error_msg = f"无法导入 ViSQOL 库或依赖: {e}"
|
| 213 |
+
print(f"错误: {error_msg}")
|
| 214 |
+
except FileNotFoundError as e:
|
| 215 |
+
status_msg = "文件未找到错误"
|
| 216 |
+
error_msg = f"必需文件丢失: {e}"
|
| 217 |
+
print(f"错误: {error_msg}")
|
| 218 |
+
except HTTPException as e: # 捕获我们自己抛出的 HTTP 异常
|
| 219 |
+
status_msg = "请求错误"
|
| 220 |
+
error_msg = str(e.detail)
|
| 221 |
+
print(f"错误: {error_msg}")
|
| 222 |
+
except Exception as e:
|
| 223 |
+
status_msg = "运行时错误"
|
| 224 |
+
error_msg = f"处理过程中发生错误: {type(e).__name__} - {e}"
|
| 225 |
+
print(f"错误: {error_msg}")
|
| 226 |
+
# 可以在这里添加更详细的堆栈跟踪日志,如果需要
|
| 227 |
+
# import traceback
|
| 228 |
+
# print(traceback.format_exc())
|
| 229 |
+
finally:
|
| 230 |
+
if os.path.exists(temp_dir):
|
| 231 |
+
shutil.rmtree(temp_dir)
|
| 232 |
+
|
| 233 |
+
return VisqolResponse(
|
| 234 |
+
reference_filename=reference.filename,
|
| 235 |
+
degraded_filename=degraded.filename,
|
| 236 |
+
mode=mode,
|
| 237 |
+
moslqo=mos,
|
| 238 |
+
vnsim=vnsim_val, # 添加 vnsim 到响应
|
| 239 |
+
fvnsim=fvnsim_val, # 添加 fvnsim 到响应
|
| 240 |
+
status=status_msg,
|
| 241 |
+
error_message=error_msg
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
@app.get("/", include_in_schema=False)
|
| 245 |
+
async def root():
|
| 246 |
+
# 提供一个简单的根路径信息
|
| 247 |
+
return {"message": "欢迎使用 ViSQOL 音频质量评估 API。请使用 POST 方法访问 /evaluate/ 端点。"}
|
| 248 |
+
|
| 249 |
+
# 添加健康检查端点
|
| 250 |
+
@app.get("/healthz", status_code=200)
|
| 251 |
+
async def health_check():
|
| 252 |
+
"""Hugging Face Spaces health check endpoint."""
|
| 253 |
+
# 如果 ViSQOL 库加载失败,也在这里反映出来
|
| 254 |
+
if visqol_lib_py is None:
|
| 255 |
+
return {"status": "error", "detail": "ViSQOL library not loaded"}
|
| 256 |
+
return {"status": "ok"}
|
| 257 |
+
|
| 258 |
+
# 如果直接运行脚本,用于本地测试 (可选)
|
| 259 |
+
if __name__ == "__main__":
|
| 260 |
+
import uvicorn
|
| 261 |
+
print("运行本地测试服务器: http://127.0.0.1:8000")
|
| 262 |
+
# 注意:本地运行可能需要正确设置 LD_LIBRARY_PATH 或将 .so 文件放在系统可查找的路径
|
| 263 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
build/visqol/model/lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bd031d6c95594ae2cd1d471aa6611bff26b3bbf816528431ff1741bcd798dc6
|
| 3 |
+
size 2233840
|
build/visqol/model/libsvm_nu_svr_model.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
build/visqol/pb2/similarity_result_pb2.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
| 3 |
+
# source: similarity_result.proto
|
| 4 |
+
"""Generated protocol buffer code."""
|
| 5 |
+
from google.protobuf import descriptor as _descriptor
|
| 6 |
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
| 7 |
+
from google.protobuf import message as _message
|
| 8 |
+
from google.protobuf import reflection as _reflection
|
| 9 |
+
from google.protobuf import symbol_database as _symbol_database
|
| 10 |
+
# @@protoc_insertion_point(imports)
|
| 11 |
+
|
| 12 |
+
_sym_db = _symbol_database.Default()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17similarity_result.proto\x12\x06Visqol\"\xe4\x03\n\x13SimilarityResultMsg\x12\x0e\n\x06moslqo\x18\x01 \x01(\x01\x12\r\n\x05vnsim\x18\x02 \x01(\x01\x12\x0e\n\x06\x66vnsim\x18\x03 \x03(\x01\x12\x10\n\x08\x66vnsim10\x18\x0b \x03(\x01\x12\x10\n\x08\x66stdnsim\x18\x08 \x03(\x01\x12\x13\n\x0b\x66vdegenergy\x18\t \x03(\x01\x12\x19\n\x11\x63\x65nter_freq_bands\x18\x04 \x03(\x01\x12\x42\n\npatch_sims\x18\x05 \x03(\x0b\x32..Visqol.SimilarityResultMsg.PatchSimilarityMsg\x12\x1a\n\x12reference_filepath\x18\x06 \x01(\t\x12\x19\n\x11\x64\x65graded_filepath\x18\x07 \x01(\t\x12\x17\n\x0f\x61lignment_lag_s\x18\n \x01(\x01\x1a\xb5\x01\n\x12PatchSimilarityMsg\x12\x12\n\nsimilarity\x18\x01 \x01(\x01\x12\x17\n\x0f\x66req_band_means\x18\x02 \x03(\x01\x12\x1c\n\x14ref_patch_start_time\x18\x03 \x01(\x01\x12\x1a\n\x12ref_patch_end_time\x18\x04 \x01(\x01\x12\x1c\n\x14\x64\x65g_patch_start_time\x18\x05 \x01(\x01\x12\x1a\n\x12\x64\x65g_patch_end_time\x18\x06 \x01(\x01\x62\x06proto3')
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
_SIMILARITYRESULTMSG = DESCRIPTOR.message_types_by_name['SimilarityResultMsg']
|
| 22 |
+
_SIMILARITYRESULTMSG_PATCHSIMILARITYMSG = _SIMILARITYRESULTMSG.nested_types_by_name['PatchSimilarityMsg']
|
| 23 |
+
SimilarityResultMsg = _reflection.GeneratedProtocolMessageType('SimilarityResultMsg', (_message.Message,), {
|
| 24 |
+
|
| 25 |
+
'PatchSimilarityMsg' : _reflection.GeneratedProtocolMessageType('PatchSimilarityMsg', (_message.Message,), {
|
| 26 |
+
'DESCRIPTOR' : _SIMILARITYRESULTMSG_PATCHSIMILARITYMSG,
|
| 27 |
+
'__module__' : 'similarity_result_pb2'
|
| 28 |
+
# @@protoc_insertion_point(class_scope:Visqol.SimilarityResultMsg.PatchSimilarityMsg)
|
| 29 |
+
})
|
| 30 |
+
,
|
| 31 |
+
'DESCRIPTOR' : _SIMILARITYRESULTMSG,
|
| 32 |
+
'__module__' : 'similarity_result_pb2'
|
| 33 |
+
# @@protoc_insertion_point(class_scope:Visqol.SimilarityResultMsg)
|
| 34 |
+
})
|
| 35 |
+
_sym_db.RegisterMessage(SimilarityResultMsg)
|
| 36 |
+
_sym_db.RegisterMessage(SimilarityResultMsg.PatchSimilarityMsg)
|
| 37 |
+
|
| 38 |
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
| 39 |
+
|
| 40 |
+
DESCRIPTOR._options = None
|
| 41 |
+
_SIMILARITYRESULTMSG._serialized_start=36
|
| 42 |
+
_SIMILARITYRESULTMSG._serialized_end=520
|
| 43 |
+
_SIMILARITYRESULTMSG_PATCHSIMILARITYMSG._serialized_start=339
|
| 44 |
+
_SIMILARITYRESULTMSG_PATCHSIMILARITYMSG._serialized_end=520
|
| 45 |
+
# @@protoc_insertion_point(module_scope)
|
build/visqol/pb2/visqol_config_pb2.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
| 3 |
+
# source: visqol_config.proto
|
| 4 |
+
"""Generated protocol buffer code."""
|
| 5 |
+
from google.protobuf import descriptor as _descriptor
|
| 6 |
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
| 7 |
+
from google.protobuf import message as _message
|
| 8 |
+
from google.protobuf import reflection as _reflection
|
| 9 |
+
from google.protobuf import symbol_database as _symbol_database
|
| 10 |
+
# @@protoc_insertion_point(imports)
|
| 11 |
+
|
| 12 |
+
_sym_db = _symbol_database.Default()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13visqol_config.proto\x12\x06Visqol\"\xa9\x03\n\x0cVisqolConfig\x12\x33\n\x05\x61udio\x18\x01 \x01(\x0b\x32$.Visqol.VisqolConfig.VisqolAudioInfo\x12\x33\n\x07options\x18\x02 \x01(\x0b\x32\".Visqol.VisqolConfig.VisqolOptions\x1a&\n\x0fVisqolAudioInfo\x12\x13\n\x0bsample_rate\x18\x01 \x01(\x05\x1a\x86\x02\n\rVisqolOptions\x12\x18\n\x10output_mos_score\x18\x01 \x01(\x08\x12\x16\n\x0esvr_model_path\x18\x02 \x01(\t\x12\x1a\n\x12use_speech_scoring\x18\x03 \x01(\x08\x12\x1d\n\x15\x64\x65tect_voice_activity\x18\x04 \x01(\x08\x12&\n\x1e\x61llow_unsupported_sample_rates\x18\x05 \x01(\x08\x12\'\n\x1fuse_unscaled_speech_mos_mapping\x18\x06 \x01(\x08\x12\x1c\n\x14search_window_radius\x18\x07 \x01(\x05\x12\x19\n\x11use_lattice_model\x18\x08 \x01(\x08\x62\x06proto3')
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
_VISQOLCONFIG = DESCRIPTOR.message_types_by_name['VisqolConfig']
|
| 22 |
+
_VISQOLCONFIG_VISQOLAUDIOINFO = _VISQOLCONFIG.nested_types_by_name['VisqolAudioInfo']
|
| 23 |
+
_VISQOLCONFIG_VISQOLOPTIONS = _VISQOLCONFIG.nested_types_by_name['VisqolOptions']
|
| 24 |
+
VisqolConfig = _reflection.GeneratedProtocolMessageType('VisqolConfig', (_message.Message,), {
|
| 25 |
+
|
| 26 |
+
'VisqolAudioInfo' : _reflection.GeneratedProtocolMessageType('VisqolAudioInfo', (_message.Message,), {
|
| 27 |
+
'DESCRIPTOR' : _VISQOLCONFIG_VISQOLAUDIOINFO,
|
| 28 |
+
'__module__' : 'visqol_config_pb2'
|
| 29 |
+
# @@protoc_insertion_point(class_scope:Visqol.VisqolConfig.VisqolAudioInfo)
|
| 30 |
+
})
|
| 31 |
+
,
|
| 32 |
+
|
| 33 |
+
'VisqolOptions' : _reflection.GeneratedProtocolMessageType('VisqolOptions', (_message.Message,), {
|
| 34 |
+
'DESCRIPTOR' : _VISQOLCONFIG_VISQOLOPTIONS,
|
| 35 |
+
'__module__' : 'visqol_config_pb2'
|
| 36 |
+
# @@protoc_insertion_point(class_scope:Visqol.VisqolConfig.VisqolOptions)
|
| 37 |
+
})
|
| 38 |
+
,
|
| 39 |
+
'DESCRIPTOR' : _VISQOLCONFIG,
|
| 40 |
+
'__module__' : 'visqol_config_pb2'
|
| 41 |
+
# @@protoc_insertion_point(class_scope:Visqol.VisqolConfig)
|
| 42 |
+
})
|
| 43 |
+
_sym_db.RegisterMessage(VisqolConfig)
|
| 44 |
+
_sym_db.RegisterMessage(VisqolConfig.VisqolAudioInfo)
|
| 45 |
+
_sym_db.RegisterMessage(VisqolConfig.VisqolOptions)
|
| 46 |
+
|
| 47 |
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
| 48 |
+
|
| 49 |
+
DESCRIPTOR._options = None
|
| 50 |
+
_VISQOLCONFIG._serialized_start=32
|
| 51 |
+
_VISQOLCONFIG._serialized_end=457
|
| 52 |
+
_VISQOLCONFIG_VISQOLAUDIOINFO._serialized_start=154
|
| 53 |
+
_VISQOLCONFIG_VISQOLAUDIOINFO._serialized_end=192
|
| 54 |
+
_VISQOLCONFIG_VISQOLOPTIONS._serialized_start=195
|
| 55 |
+
_VISQOLCONFIG_VISQOLOPTIONS._serialized_end=457
|
| 56 |
+
# @@protoc_insertion_point(module_scope)
|
build/visqol/visqol_lib_py.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0e70dc0a46db8549b183819ef543d22d665952ebd999d75506c7969d46a05ba
|
| 3 |
+
size 10110248
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
python-multipart
|
| 4 |
+
pydantic
|
| 5 |
+
numpy
|
| 6 |
+
soundfile
|
| 7 |
+
protobuf~=3.20.0
|
| 8 |
+
librosa
|