Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +21 -4
- requirements.txt +8 -7
app.py
CHANGED
|
@@ -13,7 +13,8 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Header, Depends
|
|
| 13 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 14 |
from pydantic import BaseModel, HttpUrl
|
| 15 |
import torch
|
| 16 |
-
import
|
|
|
|
| 17 |
import tempfile
|
| 18 |
import requests
|
| 19 |
from pathlib import Path
|
|
@@ -23,6 +24,7 @@ import uvicorn
|
|
| 23 |
import asyncio
|
| 24 |
from contextlib import asynccontextmanager
|
| 25 |
import socket
|
|
|
|
| 26 |
|
| 27 |
# Global model variable
|
| 28 |
model = None
|
|
@@ -209,14 +211,29 @@ def classify_audio(file_path: str) -> dict:
|
|
| 209 |
if file_size == 0:
|
| 210 |
raise ValueError("Audio file is empty")
|
| 211 |
|
| 212 |
-
# Load audio
|
| 213 |
-
print("๐ Loading audio with
|
| 214 |
-
audio, sr =
|
| 215 |
print(f"๐ผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
|
| 216 |
|
| 217 |
if len(audio) == 0:
|
| 218 |
raise ValueError("Audio file contains no audio data")
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
# Convert to tensor and add batch dimension
|
| 221 |
print("๐งฎ Converting to tensor...")
|
| 222 |
audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
|
|
|
|
| 13 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 14 |
from pydantic import BaseModel, HttpUrl
|
| 15 |
import torch
|
| 16 |
+
import soundfile as sf
|
| 17 |
+
import scipy.signal
|
| 18 |
import tempfile
|
| 19 |
import requests
|
| 20 |
from pathlib import Path
|
|
|
|
| 24 |
import asyncio
|
| 25 |
from contextlib import asynccontextmanager
|
| 26 |
import socket
|
| 27 |
+
import numpy as np
|
| 28 |
|
| 29 |
# Global model variable
|
| 30 |
model = None
|
|
|
|
| 211 |
if file_size == 0:
|
| 212 |
raise ValueError("Audio file is empty")
|
| 213 |
|
| 214 |
+
# Load audio with soundfile
|
| 215 |
+
print("๐ Loading audio with soundfile...")
|
| 216 |
+
audio, sr = sf.read(file_path)
|
| 217 |
print(f"๐ผ Audio loaded: {len(audio)} samples at {sr}Hz, duration: {len(audio)/sr:.2f}s")
|
| 218 |
|
| 219 |
if len(audio) == 0:
|
| 220 |
raise ValueError("Audio file contains no audio data")
|
| 221 |
|
| 222 |
+
# Convert to mono if stereo
|
| 223 |
+
if audio.ndim > 1:
|
| 224 |
+
print("๐ Converting stereo to mono...")
|
| 225 |
+
audio = np.mean(audio, axis=1)
|
| 226 |
+
|
| 227 |
+
# Resample to 16kHz if needed (model requirement)
|
| 228 |
+
target_sr = 16000
|
| 229 |
+
if sr != target_sr:
|
| 230 |
+
print(f"๐ Resampling from {sr}Hz to {target_sr}Hz...")
|
| 231 |
+
# Calculate the number of samples after resampling
|
| 232 |
+
num_samples = int(len(audio) * target_sr / sr)
|
| 233 |
+
audio = scipy.signal.resample(audio, num_samples)
|
| 234 |
+
sr = target_sr
|
| 235 |
+
print(f"โ
Resampled: {len(audio)} samples at {sr}Hz")
|
| 236 |
+
|
| 237 |
# Convert to tensor and add batch dimension
|
| 238 |
print("๐งฎ Converting to tensor...")
|
| 239 |
audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
|
requirements.txt
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
-
fastapi==0.104.1
|
| 2 |
-
uvicorn==0.24.0
|
| 3 |
-
streamlit>=1.28.0
|
| 4 |
-
torch>=2.0.0
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
git+https://github.com/awsaf49/sonics.git
|
|
|
|
| 1 |
+
fastapi==0.104.1
|
| 2 |
+
uvicorn==0.24.0
|
| 3 |
+
streamlit>=1.28.0
|
| 4 |
+
torch>=2.0.0
|
| 5 |
+
soundfile>=0.12.1
|
| 6 |
+
scipy>=1.9.0
|
| 7 |
+
requests>=2.25.0
|
| 8 |
+
pydantic>=2.0.0
|
| 9 |
git+https://github.com/awsaf49/sonics.git
|