Spaces:

Mrkomiljon
/

DeepVoiceGuard

Running

App Files Files Community

Mrkomiljon commited on Jan 13, 2025

Commit

59b5f81

verified ·

1 Parent(s): affa9ca

Create app.py

Browse files

Files changed (1) hide show

app.py +90 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+import librosa
+import numpy as np
+import onnxruntime as ort
+# Audio padding function
+def pad(x, max_len=64600):
+    """
+    Pad or trim an audio segment to a fixed length by repeating or slicing.
+    """
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        return x[:max_len]  # Trim if longer
+    # Repeat to fill max_len
+    num_repeats = (max_len // x_len) + 1
+    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
+    return padded_x
+# Preprocess audio for a single segment
+def preprocess_audio_segment(segment, cut=64600):
+    """
+    Preprocess a single audio segment: pad or trim as required.
+    """
+    segment = pad(segment, max_len=cut)
+    return np.expand_dims(np.array(segment, dtype=np.float32), axis=0)  # Add batch dimension
+# Sliding window prediction function
+def predict_with_sliding_window(audio_path, onnx_model_url, window_size=64600, step_size=64600, sample_rate=16000):
+    """
+    Use a sliding window to predict if the audio is real or fake over the entire audio.
+    """
+    # Load ONNX runtime session
+    ort_session = ort.InferenceSession(onnx_model_url)
+    # Load audio file
+    waveform, _ = librosa.load(audio_path, sr=sample_rate)
+    total_segments = []
+    total_probabilities = []
+    # Sliding window processing
+    for start in range(0, len(waveform), step_size):
+        end = start + window_size
+        segment = waveform[start:end]
+        # Preprocess the segment
+        audio_tensor = preprocess_audio_segment(segment)
+        # Perform inference
+        inputs = {ort_session.get_inputs()[0].name: audio_tensor}
+        outputs = ort_session.run(None, inputs)
+        probabilities = np.exp(outputs[0])  # Softmax probabilities
+        prediction = np.argmax(probabilities)
+        # Store the results
+        predicted_class = "Real" if prediction == 1 else "Fake"
+        total_segments.append(predicted_class)
+        total_probabilities.append(probabilities[0][prediction])
+    # Final aggregation
+    majority_class = max(set(total_segments), key=total_segments.count)  # Majority voting
+    avg_probability = np.mean(total_probabilities) * 100  # Average probability in percentage
+    return majority_class, avg_probability
+# Streamlit app
+st.title("Audio Spoof Detection with ONNX Model")
+st.write("Upload an audio file to detect if it is Real or Fake.")
+# File uploader
+uploaded_file = st.file_uploader("Upload your audio file (WAV or MP3)", type=["wav", "mp3"])
+if uploaded_file is not None:
+    # Path to your ONNX model
+    onnx_model_url = "https://huggingface.co/Mrkomiljon/DeepVoiceGuard/blob/main/RawNet_model.onnx"
+    # Save uploaded file temporarily
+    with open("temp_audio_file.wav", "wb") as f:
+        f.write(uploaded_file.read())
+    # Perform prediction
+    with st.spinner("Processing..."):
+        result, avg_probability = predict_with_sliding_window("temp_audio_file.wav", onnx_model_url)
+    # Display results
+    st.success(f"Prediction: {result}")
+    st.info(f"Confidence: {avg_probability:.2f}%")
+    # Clean up temporary file
+    import os
+    os.remove("temp_audio_file.wav")