Anvit25 commited on
Commit
63e1917
·
1 Parent(s): 42200b5

Initial commit

Browse files
README.md CHANGED
@@ -9,3 +9,9 @@ license: apache-2.0
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+ # Parkinson's Voice Detection API (FastAPI)
14
+
15
+ This Space detects Parkinson's disease from voice recordings using a deep learning model. Upload an audio file and get predictions.
16
+
17
+ Try `/docs` after it's deployed.
app/feature_names.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ebbaf34c525edfd3174a3fbbd88b2c28109e0d1b1456e3eea806c7e95d371f
3
+ size 224
app/main.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import contextlib
3
+ import wave
4
+ import librosa
5
+ import numpy as np
6
+ import pandas as pd
7
+ import parselmouth
8
+ import soundfile as sf
9
+ import webrtcvad
10
+ from tensorflow.keras.models import load_model
11
+ import joblib
12
+ import warnings
13
+ import tempfile
14
+
15
+ # --- FastAPI Imports ---
16
+ from fastapi import FastAPI, File, UploadFile, HTTPException
17
+ from fastapi.responses import JSONResponse
18
+
19
+ # --- Configuration ---
20
+ TARGET_SR = 16000
21
+ MODEL_PATH = "vocal_model.h5"
22
+ SCALER_PATH = "vocal_scaler.joblib"
23
+ FEATURES_PATH = "feature_names.joblib"
24
+
25
+ # --- Suppress Warnings ---
26
+ warnings.filterwarnings('ignore')
27
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
28
+
29
+ # --- Load Models and Scaler at Startup ---
30
+ # This is efficient as they are loaded only once when the app starts
31
+ try:
32
+ model = load_model(MODEL_PATH)
33
+ scaler = joblib.load(SCALER_PATH)
34
+ feature_names = joblib.load(FEATURES_PATH)
35
+ print("✅ Model, scaler, and feature list loaded successfully.")
36
+ except Exception as e:
37
+ print(f"❌ FATAL ERROR: Could not load model files. The application will not work.")
38
+ print(f" Details: {e}")
39
+ # In a real-world scenario, you might want the app to fail to start here.
40
+ model, scaler, feature_names = None, None, None
41
+
42
+ # --- Feature Extraction Functions (Copied from your script) ---
43
+ # (I've omitted the functions for brevity, but you should copy ALL of them here)
44
+ # - preprocess_audio
45
+ # - extract_features
46
+ # ... (all your existing helper functions) ...
47
+ def preprocess_audio(input_path, target_sr=TARGET_SR):
48
+ try:
49
+ data, sr = librosa.load(input_path, sr=None, mono=False)
50
+ if data.ndim > 1: data = data.mean(axis=0)
51
+ if sr != target_sr: data = librosa.resample(data, orig_sr=sr, target_sr=target_sr)
52
+ base, ext = os.path.splitext(input_path)
53
+ output_path = f"{base}_processed_for_prediction.wav"
54
+ sf.write(output_path, data, target_sr, subtype='PCM_16')
55
+ return output_path
56
+ except Exception as e:
57
+ print(f"Error preprocessing {input_path}: {e}")
58
+ return None
59
+
60
+ def extract_features(file_path):
61
+ try:
62
+ y, sr = librosa.load(file_path, sr=None)
63
+ duration = librosa.get_duration(y=y, sr=sr)
64
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
65
+ mfcc_means = np.mean(mfccs, axis=1)
66
+
67
+ snd = parselmouth.Sound(file_path)
68
+ pitch = snd.to_pitch()
69
+ pitch_values = pitch.selected_array['frequency']
70
+ pitch_values = pitch_values[pitch_values != 0]
71
+
72
+ pitch_mean = np.mean(pitch_values) if len(pitch_values) > 0 else 0
73
+ pitch_std = np.std(pitch_values) if len(pitch_values) > 0 else 0
74
+
75
+ point_process = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", 75, 500)
76
+ jitter_local = parselmouth.praat.call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
77
+ shimmer_local = parselmouth.praat.call([snd, point_process], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
78
+
79
+ def read_wave(path):
80
+ with contextlib.closing(wave.open(path, 'rb')) as wf:
81
+ pcm_data, sample_rate = wf.readframes(wf.getnframes()), wf.getframerate()
82
+ return pcm_data, sample_rate
83
+
84
+ def frame_generator(frame_duration_ms, audio, sample_rate):
85
+ n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
86
+ offset = 0
87
+ while offset + n < len(audio):
88
+ yield audio[offset:offset + n]
89
+ offset += n
90
+
91
+ vad = webrtcvad.Vad(1)
92
+ audio, sample_rate = read_wave(file_path)
93
+ frames = list(frame_generator(30, audio, sample_rate))
94
+ voiced_seconds = 0
95
+ num_segments = 0
96
+ if frames:
97
+ for frame in frames:
98
+ if vad.is_speech(frame, sample_rate):
99
+ voiced_seconds += 0.03 # 30ms frame
100
+ num_segments +=1
101
+
102
+ silence_ratio = max(0, (duration - voiced_seconds) / duration) if duration > 0 else 0
103
+ speaking_rate = num_segments / duration if duration > 0 else 0
104
+
105
+ features = {
106
+ 'Duration': duration,
107
+ 'Pitch_Mean': pitch_mean,
108
+ 'Pitch_Std': pitch_std,
109
+ 'Jitter': jitter_local,
110
+ 'Shimmer': shimmer_local,
111
+ 'Speaking_Rate': speaking_rate,
112
+ 'Silence_Ratio': silence_ratio,
113
+ }
114
+ for idx, val in enumerate(mfcc_means):
115
+ features[f'MFCC_{idx+1}'] = val
116
+
117
+ return features
118
+
119
+ except Exception as e:
120
+ print(f"Error extracting features from {file_path}: {e}")
121
+ return None
122
+
123
+ # --- Main Prediction Logic (Refactored to return a dictionary) ---
124
+
125
+ def predict_from_audio_path(file_path):
126
+ """
127
+ Takes a file path, runs the full prediction pipeline, and returns a result dictionary.
128
+ """
129
+ if not all([model, scaler, feature_names]):
130
+ raise HTTPException(status_code=503, detail="Model is not loaded or available.")
131
+
132
+ # 1. Preprocess audio
133
+ processed_path = preprocess_audio(file_path)
134
+ if not processed_path:
135
+ raise HTTPException(status_code=400, detail="Audio preprocessing failed.")
136
+
137
+ # 2. Extract features
138
+ features_dict = extract_features(processed_path)
139
+ if not features_dict:
140
+ os.remove(processed_path)
141
+ raise HTTPException(status_code=400, detail="Feature extraction failed.")
142
+
143
+ try:
144
+ # 3. Convert to DataFrame and ensure correct column order
145
+ feature_df = pd.DataFrame([features_dict])
146
+ feature_df = feature_df[feature_names] # Crucial step!
147
+
148
+ # 4. Scale features
149
+ scaled_features = scaler.transform(feature_df)
150
+
151
+ # 5. Make a prediction
152
+ prediction_prob = model.predict(scaled_features, verbose=0)[0][0]
153
+ prediction_label = int((prediction_prob > 0.5).astype("int32"))
154
+
155
+ # 6. Format the result
156
+ result_text = "Parkinson's Detected" if prediction_label == 1 else "Healthy"
157
+
158
+ # 7. Cleanup the temporary processed file
159
+ os.remove(processed_path)
160
+
161
+ return {
162
+ "status": "success",
163
+ "prediction": result_text,
164
+ "confidence": float(prediction_prob),
165
+ "label": prediction_label
166
+ }
167
+ except Exception as e:
168
+ # Ensure cleanup even if an error occurs after file creation
169
+ os.remove(processed_path)
170
+ raise HTTPException(status_code=500, detail=f"An error occurred during prediction: {str(e)}")
171
+
172
+
173
+ # --- FastAPI App Definition ---
174
+
175
+ app = FastAPI(
176
+ title="Parkinson's Voice Detection API",
177
+ description="An API that uses a deep learning model to predict the presence of Parkinson's disease from a voice recording.",
178
+ version="1.0"
179
+ )
180
+
181
+ @app.get("/", tags=["General"])
182
+ def read_root():
183
+ """A welcome message to check if the API is running."""
184
+ return {"message": "Welcome to the Parkinson's Voice Prediction API. Go to /docs for usage."}
185
+
186
+ @app.post("/predict/", tags=["Prediction"])
187
+ async def create_prediction(file: UploadFile = File(...)):
188
+ """
189
+ Accepts an audio file, processes it, and returns the prediction result.
190
+ The audio file can be in any format that librosa supports (wav, mp3, etc.).
191
+ """
192
+ # Save the uploaded file to a temporary location on the server
193
+ try:
194
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp_file:
195
+ content = await file.read()
196
+ tmp_file.write(content)
197
+ tmp_file_path = tmp_file.name
198
+ except Exception as e:
199
+ raise HTTPException(status_code=500, detail=f"Error handling the uploaded file: {e}")
200
+
201
+ # Now, run the prediction on the saved temporary file
202
+ try:
203
+ result = predict_from_audio_path(tmp_file_path)
204
+ return JSONResponse(content=result)
205
+ finally:
206
+ # CRITICAL: Always clean up the temporary file
207
+ os.remove(tmp_file_path)
app/requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ librosa
4
+ numpy
5
+ pandas
6
+ parselmouth
7
+ soundfile
8
+ webrtcvad
9
+ tensorflow
10
+ joblib
app/vocal_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe5990d6dcdef7a1a81a5ce9f215411c14f990e8e780257b8ce83cd4c26632f7
3
+ size 195048
app/vocal_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5c4807143ef823a8cdcad9d302c1d684abf0acb717d9db050ca35bd73191ab
3
+ size 1559
dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a lightweight Python image
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /code
6
+
7
+ # Install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy app code and models
12
+ COPY app/ /code/app
13
+
14
+ # Set environment variable for Hugging Face Spaces
15
+ ENV HOST 0.0.0.0
16
+ ENV PORT 7860
17
+
18
+ # Expose port
19
+ EXPOSE 7860
20
+
21
+ # Run FastAPI app with Uvicorn
22
+ CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]