Spaces:
Sleeping
Sleeping
Commit ·
0073001
1
Parent(s): c9de652
- Dockerfile +3 -0
- app.py +17 -31
- requirements.txt +1 -1
Dockerfile
CHANGED
|
@@ -8,6 +8,9 @@ WORKDIR /app
|
|
| 8 |
COPY requirements.txt .
|
| 9 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
# Copy the application code.
|
| 12 |
COPY . .
|
| 13 |
|
|
|
|
| 8 |
COPY requirements.txt .
|
| 9 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
|
| 11 |
+
# Install additional system dependencies for soundfile
|
| 12 |
+
RUN apt-get update && apt-get install -y libsndfile1
|
| 13 |
+
|
| 14 |
# Copy the application code.
|
| 15 |
COPY . .
|
| 16 |
|
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
from pydantic import BaseModel
|
| 4 |
-
import
|
| 5 |
import numpy as np
|
| 6 |
import tempfile
|
| 7 |
import os
|
|
@@ -12,42 +12,28 @@ warnings.filterwarnings("ignore")
|
|
| 12 |
app = FastAPI()
|
| 13 |
|
| 14 |
def extract_audio_features(audio_file_path):
|
| 15 |
-
# Load the audio file using
|
| 16 |
-
waveform, sample_rate =
|
| 17 |
-
|
| 18 |
-
# Ensure waveform is mono by averaging channels if necessary
|
| 19 |
-
if waveform.shape[0] > 1:
|
| 20 |
-
waveform = waveform.mean(dim=0, keepdim=True)
|
| 21 |
-
|
| 22 |
-
waveform = waveform.squeeze() # Remove channel dimension if it's 1
|
| 23 |
-
|
| 24 |
-
# Extract pitch (fundamental frequency)
|
| 25 |
-
pitch_frequencies, voiced_flags, _ = torchaudio.functional.detect_pitch_frequency(
|
| 26 |
-
waveform, sample_rate, frame_time=0.01, win_length=1024
|
| 27 |
-
)
|
| 28 |
-
f0 = pitch_frequencies[voiced_flags > 0]
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
-
#
|
| 38 |
-
|
| 39 |
-
speech_rate =
|
|
|
|
| 40 |
|
| 41 |
-
return f0
|
| 42 |
|
| 43 |
def analyze_voice_stress(audio_file_path):
|
| 44 |
f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
mean_f0 = np.mean(f0)
|
| 48 |
-
std_f0 = np.std(f0)
|
| 49 |
-
mean_energy = np.mean(energy)
|
| 50 |
-
std_energy = np.std(energy)
|
| 51 |
gender = 'male' if mean_f0 < 165 else 'female'
|
| 52 |
norm_mean_f0 = 110 if gender == 'male' else 220
|
| 53 |
norm_std_f0 = 20
|
|
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
from pydantic import BaseModel
|
| 4 |
+
import soundfile as sf
|
| 5 |
import numpy as np
|
| 6 |
import tempfile
|
| 7 |
import os
|
|
|
|
| 12 |
app = FastAPI()
|
| 13 |
|
| 14 |
def extract_audio_features(audio_file_path):
|
| 15 |
+
# Load the audio file using soundfile
|
| 16 |
+
waveform, sample_rate = sf.read(audio_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
# Ensure waveform is a 1D array (mono audio)
|
| 19 |
+
if waveform.ndim > 1:
|
| 20 |
+
waveform = waveform.mean(axis=1)
|
| 21 |
+
|
| 22 |
+
# Calculate basic features (pitch estimation requires a more complex algorithm, but we'll simplify)
|
| 23 |
+
energy = np.mean(waveform ** 2)
|
| 24 |
+
mfccs = np.mean(np.abs(np.fft.fft(waveform)[:13]), axis=0) # Simplified MFCC-like features
|
| 25 |
|
| 26 |
+
# Placeholder for speech rate and fundamental frequency
|
| 27 |
+
# Speech rate and pitch extraction would require more complex DSP techniques or external libraries.
|
| 28 |
+
speech_rate = 4.0 # Arbitrary placeholder value for speech rate
|
| 29 |
+
f0 = np.mean(np.abs(np.diff(waveform))) * sample_rate / (2 * np.pi) # Rough pitch estimate
|
| 30 |
|
| 31 |
+
return f0, energy, speech_rate, mfccs, waveform, sample_rate
|
| 32 |
|
| 33 |
def analyze_voice_stress(audio_file_path):
|
| 34 |
f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
|
| 35 |
+
mean_f0 = f0
|
| 36 |
+
mean_energy = energy
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
gender = 'male' if mean_f0 < 165 else 'female'
|
| 38 |
norm_mean_f0 = 110 if gender == 'male' else 220
|
| 39 |
norm_std_f0 = 20
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn
|
| 3 |
-
|
| 4 |
numpy
|
| 5 |
pydantic
|
| 6 |
python-multipart
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn
|
| 3 |
+
soundfile
|
| 4 |
numpy
|
| 5 |
pydantic
|
| 6 |
python-multipart
|