Spaces:

msmaje
/

voice_recognition

Sleeping

App Files Files Community

msmaje commited on Jun 4, 2025

Commit

6ff31e7

verified ·

1 Parent(s): 162d28c

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# app.py
+import gradio as gr
+import torch
+import joblib
+import librosa
+import numpy as np
+from sklearn.preprocessing import LabelEncoder
+# Load model and assets
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+label_encoder = joblib.load("label_encoder.joblib")
+feature_params = joblib.load("feature_params.joblib")
+# Load your model architecture (must match training)
+class VoiceModel(torch.nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        # Define your model architecture here (same as training)
+        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, padding=1)
+        # ... rest of your architecture
+    def forward(self, x):
+        # Your forward pass
+        return x
+# Initialize and load weights
+model = VoiceModel(len(label_encoder.classes_)).to(device)
+model.load_state_dict(torch.load("voice_recognition_final.pth", map_location=device))
+model.eval()
+def extract_features(file_path, max_pad_len=174):
+    """Your feature extraction function (simplified for deployment)"""
+    try:
+        audio, sr = librosa.load(file_path, sr=feature_params['sample_rate'])
+        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=feature_params['n_mfcc'])
+        # Pad/truncate
+        if mfccs.shape[1] < max_pad_len:
+            mfccs = np.pad(mfccs, ((0,0), (0, max_pad_len - mfccs.shape[1])))
+        else:
+            mfccs = mfccs[:, :max_pad_len]
+        return mfccs
+    except Exception as e:
+        print(f"Error processing audio: {e}")
+        return None
+def predict(audio_path):
+    features = extract_features(audio_path)
+    if features is None:
+        return "Error processing audio"
+    # Convert to tensor
+    input_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
+    # Predict
+    with torch.no_grad():
+        outputs = model(input_tensor)
+        probs = torch.nn.functional.softmax(outputs, dim=1)
+        confidence, pred = torch.max(probs, 1)
+    predicted_user = label_encoder.inverse_transform([pred.item()])[0]
+    return f"User: {predicted_user} (Confidence: {confidence.item():.2f})"
+# Create Gradio interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs="text",
+    title="Voice Recognition Security System",
+    description="Record your voice or upload an audio file for user identification"
+)
+iface.launch()