msmaje commited on
Commit
6ff31e7
·
verified ·
1 Parent(s): 162d28c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ import joblib
5
+ import librosa
6
+ import numpy as np
7
+ from sklearn.preprocessing import LabelEncoder
8
+
9
+ # Load model and assets
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ label_encoder = joblib.load("label_encoder.joblib")
12
+ feature_params = joblib.load("feature_params.joblib")
13
+
14
+ # Load your model architecture (must match training)
15
+ class VoiceModel(torch.nn.Module):
16
+ def __init__(self, num_classes):
17
+ super().__init__()
18
+ # Define your model architecture here (same as training)
19
+ self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, padding=1)
20
+ # ... rest of your architecture
21
+
22
+ def forward(self, x):
23
+ # Your forward pass
24
+ return x
25
+
26
+ # Initialize and load weights
27
+ model = VoiceModel(len(label_encoder.classes_)).to(device)
28
+ model.load_state_dict(torch.load("voice_recognition_final.pth", map_location=device))
29
+ model.eval()
30
+
31
+ def extract_features(file_path, max_pad_len=174):
32
+ """Your feature extraction function (simplified for deployment)"""
33
+ try:
34
+ audio, sr = librosa.load(file_path, sr=feature_params['sample_rate'])
35
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=feature_params['n_mfcc'])
36
+
37
+ # Pad/truncate
38
+ if mfccs.shape[1] < max_pad_len:
39
+ mfccs = np.pad(mfccs, ((0,0), (0, max_pad_len - mfccs.shape[1])))
40
+ else:
41
+ mfccs = mfccs[:, :max_pad_len]
42
+
43
+ return mfccs
44
+ except Exception as e:
45
+ print(f"Error processing audio: {e}")
46
+ return None
47
+
48
+ def predict(audio_path):
49
+ features = extract_features(audio_path)
50
+ if features is None:
51
+ return "Error processing audio"
52
+
53
+ # Convert to tensor
54
+ input_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)
55
+
56
+ # Predict
57
+ with torch.no_grad():
58
+ outputs = model(input_tensor)
59
+ probs = torch.nn.functional.softmax(outputs, dim=1)
60
+ confidence, pred = torch.max(probs, 1)
61
+
62
+ predicted_user = label_encoder.inverse_transform([pred.item()])[0]
63
+ return f"User: {predicted_user} (Confidence: {confidence.item():.2f})"
64
+
65
+ # Create Gradio interface
66
+ iface = gr.Interface(
67
+ fn=predict,
68
+ inputs=gr.Audio(source="microphone", type="filepath"),
69
+ outputs="text",
70
+ title="Voice Recognition Security System",
71
+ description="Record your voice or upload an audio file for user identification"
72
+ )
73
+
74
+ iface.launch()