Dimsumcat
/

breath-server

Model card Files Files and versions

xet

Community

Dimsumcat commited on Dec 1, 2024

Commit

e546b40

verified ·

1 Parent(s): 9d19aca

Update breath-server/app.py

Browse files

Files changed (1) hide show

breath-server/app.py +9 -3

breath-server/app.py CHANGED Viewed

@@ -19,10 +19,12 @@ input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()
 def extract_features(y, sr):
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     return np.mean(mfccs.T, axis=0)
 def segment_audio(y, sr, segment_length=2, hop_length=1):
     frames = []
     for start in range(0, len(y) - int(segment_length * sr), int(hop_length * sr)):
         end = start + int(segment_length * sr)
@@ -31,6 +33,7 @@ def segment_audio(y, sr, segment_length=2, hop_length=1):
     return np.array(frames, dtype=np.float32)
 def predict_periods(interpreter, y, sr, segment_length=2, hop_length=1):
     frames = segment_audio(y, sr, segment_length, hop_length)
     predictions = []
@@ -44,6 +47,7 @@ def predict_periods(interpreter, y, sr, segment_length=2, hop_length=1):
     return predicted_labels
 def get_periods_and_durations(predicted_labels, segment_length=2, hop_length=1):
     periods = []
     durations = []
     current_label = None
@@ -66,10 +70,12 @@ def get_periods_and_durations(predicted_labels, segment_length=2, hop_length=1):
     return periods, durations
 def process_audio(audio_path):
     y, sr = librosa.load(audio_path, sr=None)
     return y, sr
 def plot_waveform_with_predictions(y, sr, periods, durations, segment_length=2, hop_length=1):
     # Plot the audio waveform
     plt.figure(figsize=(10, 6))
     librosa.display.waveshow(y, sr=sr, alpha=0.6, label='Audio Waveform')
@@ -120,6 +126,7 @@ def predict():
     print(f'File saved at {file_path}')
     try:
         y, sr = process_audio(file_path)
         predicted_labels = predict_periods(interpreter, y, sr)
         periods, durations = get_periods_and_durations(predicted_labels)
@@ -133,11 +140,11 @@ def predict():
         # Generate the plot and return it as an image
         img_io = plot_waveform_with_predictions(y, sr, periods, durations)
-        # Clean up file
         os.remove(file_path)
         print(f'File removed from {file_path}')
-        # Ensure the image is returned with the correct headers
         return send_file(
             img_io,
             mimetype='image/png',
@@ -150,6 +157,5 @@ def predict():
         os.remove(file_path)  # Clean up file in case of error
         return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=5000, debug=True)

 output_details = interpreter.get_output_details()
 def extract_features(y, sr):
+    """Extract MFCC features from the audio."""
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     return np.mean(mfccs.T, axis=0)
 def segment_audio(y, sr, segment_length=2, hop_length=1):
+    """Segment audio into frames and extract features."""
     frames = []
     for start in range(0, len(y) - int(segment_length * sr), int(hop_length * sr)):
         end = start + int(segment_length * sr)
     return np.array(frames, dtype=np.float32)
 def predict_periods(interpreter, y, sr, segment_length=2, hop_length=1):
+    """Predict inhale/exhale periods based on audio features."""
     frames = segment_audio(y, sr, segment_length, hop_length)
     predictions = []
     return predicted_labels
 def get_periods_and_durations(predicted_labels, segment_length=2, hop_length=1):
+    """Determine the inhale/exhale periods and their durations."""
     periods = []
     durations = []
     current_label = None
     return periods, durations
 def process_audio(audio_path):
+    """Process the uploaded audio file using librosa."""
     y, sr = librosa.load(audio_path, sr=None)
     return y, sr
 def plot_waveform_with_predictions(y, sr, periods, durations, segment_length=2, hop_length=1):
+    """Generate a waveform plot with predicted inhale/exhale periods."""
     # Plot the audio waveform
     plt.figure(figsize=(10, 6))
     librosa.display.waveshow(y, sr=sr, alpha=0.6, label='Audio Waveform')
     print(f'File saved at {file_path}')
     try:
+        # Process the audio and make predictions
         y, sr = process_audio(file_path)
         predicted_labels = predict_periods(interpreter, y, sr)
         periods, durations = get_periods_and_durations(predicted_labels)
         # Generate the plot and return it as an image
         img_io = plot_waveform_with_predictions(y, sr, periods, durations)
+        # Clean up file after processing
         os.remove(file_path)
         print(f'File removed from {file_path}')
+        # Return the plot as an image
         return send_file(
             img_io,
             mimetype='image/png',
         os.remove(file_path)  # Clean up file in case of error
         return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=5000, debug=True)