Ashritha27426 commited on
Commit
6b1f3f6
·
verified ·
1 Parent(s): b11a082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py CHANGED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn as nn
4
+ import librosa
5
+ import numpy as np
6
+ import whisper
7
+ import pandas as pd
8
+ from datasets import load_dataset
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.linear_model import LogisticRegression
11
+
12
+ device = torch.device("cpu")
13
+
14
+ # ================= LOAD DATASET =================
15
+ data1 = pd.read_csv("spam_dataset.csv")
16
+ data2 = load_dataset("ucirvine/sms_spam")
17
+
18
+ # convert to pandas
19
+ data2 = data2["train"].to_pandas()
20
+
21
+ # rename columns to match your dataset
22
+ data2 = data2.rename(columns={"sms": "text", "label": "label"})
23
+
24
+ # combine both
25
+ data = pd.concat([data1, data2], ignore_index=True)
26
+
27
+ texts = data["text"]
28
+ labels = data["label"]
29
+
30
+ # ================= ML TRAINING =================
31
+ vectorizer = TfidfVectorizer()
32
+ X = vectorizer.fit_transform(texts)
33
+
34
+ ml_model = LogisticRegression()
35
+ ml_model.fit(X, labels)
36
+
37
+ # ================= CNN MODEL =================
38
+ class ScamAudioCNN(nn.Module):
39
+ def __init__(self):
40
+ super(ScamAudioCNN, self).__init__()
41
+ self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
42
+ self.pool = nn.MaxPool2d(2, 2)
43
+ self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
44
+ self.fc1 = nn.Linear(32 * 10 * 25, 128)
45
+ self.fc2 = nn.Linear(128, 2)
46
+
47
+ def forward(self, x):
48
+ x = self.pool(torch.relu(self.conv1(x)))
49
+ x = self.pool(torch.relu(self.conv2(x)))
50
+ x = x.view(x.size(0), -1)
51
+ x = torch.relu(self.fc1(x))
52
+ x = self.fc2(x)
53
+ return x
54
+
55
+ cnn_model = ScamAudioCNN().to(device)
56
+
57
+ # ================= LOAD CNN (optional) =================
58
+ try:
59
+ cnn_model.load_state_dict(torch.load("scam_audio_model.pth", map_location=device))
60
+ except:
61
+ pass
62
+
63
+ cnn_model.eval()
64
+
65
+ # ================= WHISPER =================
66
+ whisper_model = whisper.load_model("tiny", device="cpu")
67
+
68
+ # ================= MFCC =================
69
+ def extract_features(file_path, max_len=100):
70
+ y, sr = librosa.load(file_path, sr=16000)
71
+
72
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
73
+
74
+ if mfcc.shape[1] < max_len:
75
+ mfcc = np.pad(mfcc, ((0,0),(0,max_len-mfcc.shape[1])))
76
+ else:
77
+ mfcc = mfcc[:, :max_len]
78
+
79
+ mfcc = mfcc[np.newaxis, np.newaxis, :, :]
80
+ return torch.tensor(mfcc, dtype=torch.float32)
81
+
82
+ # ================= TRANSCRIPTION =================
83
+ def transcribe_audio(file_path):
84
+ result = whisper_model.transcribe(file_path)
85
+ return result["text"].lower()
86
+
87
+ # ================= KEYWORDS =================
88
+ scam_keywords = [
89
+ "otp","bank","account","verify","urgent","blocked","suspend",
90
+ "credit card","loan","refund","investment","crypto","kyc",
91
+ "password","security","congratulations","won","winner","prize",
92
+ "claim","fee","pay","offer","lottery","jackpot","gift","free"
93
+ ]
94
+
95
+ def keyword_score(text):
96
+ found = [w for w in scam_keywords if w in text]
97
+ score = min(len(found)/4, 1.0)
98
+ return score, found
99
+
100
+ # ================= ML PREDICTION =================
101
+ def ml_predict(text):
102
+ X_test = vectorizer.transform([text])
103
+ prob = ml_model.predict_proba(X_test)[0][1]
104
+ return prob
105
+
106
+ # ================= MAIN =================
107
+ def analyze_audio(audio):
108
+
109
+ if audio is None:
110
+ return "No audio detected."
111
+
112
+ try:
113
+ # Whisper
114
+ transcript = transcribe_audio(audio)
115
+
116
+ # Keyword
117
+ k_score, words = keyword_score(transcript)
118
+
119
+ # ML
120
+ ml_score = ml_predict(transcript)
121
+
122
+ # CNN
123
+ features = extract_features(audio).to(device)
124
+ with torch.no_grad():
125
+ out = cnn_model(features)
126
+ probs = torch.softmax(out, dim=1)
127
+ cnn_score = probs[0][1].item()
128
+
129
+ # FINAL FUSION
130
+ final_score = (0.3 * k_score) + (0.4 * ml_score) + (0.3 * cnn_score)
131
+
132
+ if final_score < 0.30:
133
+ risk = "Low Risk"
134
+ elif final_score < 0.60:
135
+ risk = "Medium Risk"
136
+ else:
137
+ risk = "High Scam Risk"
138
+
139
+ result = "SPAM" if final_score >= 0.30 else "NOT SPAM"
140
+
141
+ return f"""
142
+ Transcript: {transcript}
143
+
144
+ Spam Words Found: {', '.join(words) if words else 'None'}
145
+
146
+ Keyword Score: {k_score:.2f}
147
+ ML Score: {ml_score:.2f}
148
+ CNN Score: {cnn_score:.2f}
149
+
150
+ Final Probability: {final_score*100:.2f}%
151
+
152
+ Risk Level: {risk}
153
+
154
+ Final Result: {result}
155
+ """
156
+
157
+ except Exception as e:
158
+ return f"Error: {str(e)}"
159
+
160
+ # ================= UI =================
161
+ with gr.Blocks() as demo:
162
+ gr.Markdown("# 🎙️ Hybrid Voice Scam Detection System")
163
+ gr.Markdown("Using speech and content analysis")
164
+
165
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
166
+ output = gr.Textbox()
167
+
168
+ gr.Button("Analyze").click(
169
+ analyze_audio,
170
+ inputs=audio_input,
171
+ outputs=output
172
+ )
173
+
174
+ demo.launch()