File size: 4,802 Bytes
55cf66a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import numpy as np
import librosa
import tensorflow as tf
import streamlit as st
import sounddevice as sd
import wave
import os
# Constants
window_length = 0.02 # 20ms window length
hop_length = 0.0025 # 2.5ms hop length
sample_rate = 22050 # Standard audio sample rate
n_mels = 128 # Number of mel filter banks
threshold_zcr = 0.1 # Adjust this threshold to detect breath based on ZCR
threshold_rmse = 0.1 # Adjust this threshold to detect breath based on RMSE
max_len = 500 # Fix length for feature extraction
# Load TFLite model
interpreter = tf.lite.Interpreter(model_path="model_breath_logspec_mfcc_cnn.tflite")
interpreter.allocate_tensors()
# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Function to extract breath features
def extract_breath_features(y, sr):
frame_length = int(window_length * sr)
hop_length_samples = int(hop_length * sr)
zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
zcr = zcr.T.flatten()
rmse = rmse.T.flatten()
breaths = (zcr > threshold_zcr) & (rmse > threshold_rmse)
breath_feature = np.where(breaths, 1, 0)
return breath_feature
# Feature extraction
def extract_features(file_path):
try:
y, sr = librosa.load(file_path, sr=None)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
breath_feature = extract_breath_features(y, sr)
# Fix lengths
mfcc = librosa.util.fix_length(mfcc, size=max_len, axis=1)
logspec = librosa.util.fix_length(logspec, size=max_len, axis=1)
breath_feature = librosa.util.fix_length(breath_feature, size=max_len)
return np.vstack((mfcc, logspec, breath_feature))
except Exception as e:
st.error(f"Error processing audio: {e}")
return None
# Prepare input for model
def prepare_single_data(features):
features = librosa.util.fix_length(features, size=max_len, axis=1)
features = features[np.newaxis, ..., np.newaxis] # Add batch and channel dimensions
return features.astype(np.float32) # Convert to FLOAT32
# Predict audio class
def predict_audio(file_path):
features = extract_features(file_path)
if features is not None:
prepared_features = prepare_single_data(features)
interpreter.set_tensor(input_details[0]['index'], prepared_features)
interpreter.invoke()
prediction = interpreter.get_tensor(output_details[0]['index'])
predicted_class = np.argmax(prediction, axis=1)
predicted_prob = prediction[0]
return predicted_class[0], predicted_prob
return None, None
# Record audio function
def record_audio(duration=5, samplerate=22050):
st.info(f"🎤 Recording for {duration} seconds...")
audio_data = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16)
sd.wait()
st.success("✅ Recording Complete!")
return audio_data, samplerate
# Save recorded audio as .wav
def save_wav(file_path, audio_data, samplerate):
with wave.open(file_path, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(samplerate)
wf.writeframes(audio_data.tobytes())
# Streamlit UI
st.title('🎙️ Audio Deepfake Detection')
st.write('Upload or record an audio file to classify it as real or fake.')
# File uploader
uploaded_file = st.file_uploader('📂 Upload an audio file', type=['wav', 'mp3'])
recorded_file_path = "recorded_audio.wav"
# Record audio button
if st.button("🎤 Record Live Audio"):
duration = st.slider("⏳ Set Duration (seconds)", 1, 10, 5)
audio_data, samplerate = record_audio(duration)
save_wav(recorded_file_path, audio_data, samplerate)
st.audio(recorded_file_path, format="audio/wav")
# Process uploaded or recorded audio
if uploaded_file is not None:
with open("uploaded_audio.wav", 'wb') as f:
f.write(uploaded_file.getbuffer())
file_path = "uploaded_audio.wav"
st.audio(file_path, format="audio/wav")
elif os.path.exists(recorded_file_path):
file_path = recorded_file_path
else:
file_path = None
# Run prediction
if file_path:
prediction, probability = predict_audio(file_path)
if prediction is not None:
st.write(f'**Predicted Class:** {prediction}')
st.write(f'**Probability of being Real:** {probability[0]*100:.2f}%')
st.write(f'**Probability of being Fake:** {probability[1]*100:.2f}%')
else:
st.error("❌ Failed to process the audio file.")
|