Jacksonnavigator7 commited on
Commit
72a1ea2
Β·
verified Β·
1 Parent(s): 956d7bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +237 -2
app.py CHANGED
@@ -2,19 +2,197 @@ import gradio as gr
2
  import joblib
3
  import os
4
  import logging
 
 
 
 
5
  from datetime import datetime
 
 
6
 
7
  # Configure logging
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
  class SMSScamDetector:
12
- """Enhanced SMS Scam Detection System"""
13
 
14
  def __init__(self):
15
  self.model = None
16
  self.vectorizer = None
 
 
17
  self.load_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def load_models(self):
20
  """Load machine learning models with error handling"""
@@ -59,7 +237,7 @@ class SMSScamDetector:
59
  return "Haba (Low)", "🟒"
60
 
61
  def predict_sms(self, text):
62
- """Enhanced prediction function with detailed output"""
63
  # Input validation
64
  if not text or len(text.strip()) == 0:
65
  return "❌ **Kosa**: Tafadhali ingiza ujumbe wa SMS"
@@ -75,6 +253,9 @@ class SMSScamDetector:
75
  # Preprocess text
76
  cleaned_text = self.preprocess_text(text)
77
 
 
 
 
78
  # Vectorize text
79
  text_vector = self.vectorizer.transform([cleaned_text])
80
 
@@ -85,6 +266,9 @@ class SMSScamDetector:
85
  # Get confidence level
86
  confidence, emoji = self.get_confidence_level(prediction_proba)
87
 
 
 
 
88
  # Format prediction
89
  if prediction.lower() == 'scam':
90
  result_text = "**SCAM** 🚨"
@@ -95,6 +279,12 @@ class SMSScamDetector:
95
  result_color = "success"
96
  advice = "Ujumbe huu unaonekana kuwa wa kawaida, lakini bado kuwa makini."
97
 
 
 
 
 
 
 
98
  # Create detailed output
99
  output = f"""
100
  ## Matokeo ya Uchunguzi {emoji}
@@ -106,6 +296,7 @@ class SMSScamDetector:
106
  **Kiwango cha Uhakika**: {confidence}
107
 
108
  **Maoni**: {advice}
 
109
 
110
  ---
111
  *Tarehe*: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
@@ -194,6 +385,16 @@ with gr.Blocks(
194
  value="Matokeo yataonyeshwa hapa baada ya kuchunguza ujumbe..."
195
  )
196
 
 
 
 
 
 
 
 
 
 
 
197
  # Information section
198
  with gr.Accordion("ℹ️ Maelezo ya Ziada", open=False):
199
  gr.Markdown("""
@@ -209,11 +410,35 @@ with gr.Blocks(
209
  - πŸ”— Ina viungo vya kugusia (links)
210
  - ⚑ Inadai ni ya dharura
211
  - πŸ“ž Inaomba taarifa za kibinafsi
 
 
 
 
 
 
 
212
 
213
  ### Onyo Muhimu:
214
  Mfumo huu ni wa kusaidia tu. Daima tumia busara zako na usijibu SMS zisizoeleweka.
215
  """)
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  # Event handlers
218
  predict_btn.click(
219
  fn=detector.predict_sms,
@@ -221,6 +446,16 @@ with gr.Blocks(
221
  outputs=output_result
222
  )
223
 
 
 
 
 
 
 
 
 
 
 
224
  clear_btn.click(
225
  fn=lambda: ("", "Matokeo yataonyeshwa hapa baada ya kuchunguza ujumbe..."),
226
  outputs=[sms_input, output_result]
 
2
  import joblib
3
  import os
4
  import logging
5
+ import sqlite3
6
+ import hashlib
7
+ import json
8
+ import pandas as pd
9
  from datetime import datetime
10
+ from collections import Counter
11
+ import re
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
  class SMSScamDetector:
18
+ """Enhanced SMS Scam Detection System with Analytics and Reporting"""
19
 
20
  def __init__(self):
21
  self.model = None
22
  self.vectorizer = None
23
+ self.db_path = "sms_analytics.db"
24
+ self.init_database()
25
  self.load_models()
26
+ self.scam_patterns = self.load_scam_patterns()
27
+
28
+ def init_database(self):
29
+ """Initialize SQLite database for analytics"""
30
+ try:
31
+ conn = sqlite3.connect(self.db_path)
32
+ cursor = conn.cursor()
33
+ cursor.execute('''
34
+ CREATE TABLE IF NOT EXISTS sms_logs (
35
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
36
+ message_hash TEXT UNIQUE,
37
+ prediction TEXT,
38
+ confidence REAL,
39
+ timestamp DATETIME,
40
+ message_length INTEGER,
41
+ suspicious_keywords INTEGER
42
+ )
43
+ ''')
44
+ conn.commit()
45
+ conn.close()
46
+ logger.info("Database initialized successfully")
47
+ except Exception as e:
48
+ logger.error(f"Database initialization error: {str(e)}")
49
+
50
+ def load_scam_patterns(self):
51
+ """Load common scam patterns and keywords"""
52
+ return {
53
+ 'prize_keywords': ['ushindi', 'zawadi', 'hongera', 'umeshinda', 'pesa', 'dola'],
54
+ 'urgency_keywords': ['haraka', 'sasa hivi', 'urgent', 'muda mchache'],
55
+ 'suspicious_urls': [r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'],
56
+ 'phone_patterns': [r'\*\d+#', r'\d{10,}'],
57
+ 'money_patterns': [r'tsh?\s*[\d,]+', r'usd?\s*[\d,]+', r'[\d,]+\s*shilling']
58
+ }
59
+
60
+ def analyze_message_patterns(self, text):
61
+ """Analyze message for suspicious patterns"""
62
+ text_lower = text.lower()
63
+ suspicious_score = 0
64
+ detected_patterns = []
65
+
66
+ # Check for prize/money keywords
67
+ for keyword in self.scam_patterns['prize_keywords']:
68
+ if keyword in text_lower:
69
+ suspicious_score += 2
70
+ detected_patterns.append(f"Prize keyword: {keyword}")
71
+
72
+ # Check for urgency keywords
73
+ for keyword in self.scam_patterns['urgency_keywords']:
74
+ if keyword in text_lower:
75
+ suspicious_score += 1
76
+ detected_patterns.append(f"Urgency keyword: {keyword}")
77
+
78
+ # Check for URLs
79
+ if re.search(self.scam_patterns['suspicious_urls'][0], text):
80
+ suspicious_score += 3
81
+ detected_patterns.append("Contains suspicious URL")
82
+
83
+ # Check for USSD codes
84
+ if re.search(self.scam_patterns['phone_patterns'][0], text):
85
+ suspicious_score += 2
86
+ detected_patterns.append("Contains USSD code")
87
+
88
+ # Check for money mentions
89
+ for pattern in self.scam_patterns['money_patterns']:
90
+ if re.search(pattern, text_lower):
91
+ suspicious_score += 1
92
+ detected_patterns.append("Contains money amount")
93
+ break
94
+
95
+ return suspicious_score, detected_patterns
96
+
97
+ def log_prediction(self, text, prediction, confidence, suspicious_score):
98
+ """Log prediction to database for analytics"""
99
+ try:
100
+ conn = sqlite3.connect(self.db_path)
101
+ cursor = conn.cursor()
102
+
103
+ message_hash = hashlib.md5(text.encode()).hexdigest()
104
+
105
+ cursor.execute('''
106
+ INSERT OR REPLACE INTO sms_logs
107
+ (message_hash, prediction, confidence, timestamp, message_length, suspicious_keywords)
108
+ VALUES (?, ?, ?, ?, ?, ?)
109
+ ''', (
110
+ message_hash,
111
+ prediction,
112
+ float(max(confidence)),
113
+ datetime.now().isoformat(),
114
+ len(text),
115
+ suspicious_score
116
+ ))
117
+
118
+ conn.commit()
119
+ conn.close()
120
+ except Exception as e:
121
+ logger.error(f"Logging error: {str(e)}")
122
+
123
+ def get_analytics(self):
124
+ """Get analytics data from database"""
125
+ try:
126
+ conn = sqlite3.connect(self.db_path)
127
+ df = pd.read_sql_query("SELECT * FROM sms_logs ORDER BY timestamp DESC LIMIT 100", conn)
128
+ conn.close()
129
+
130
+ if df.empty:
131
+ return "Hakuna data ya kutosha kwa takwimu"
132
+
133
+ total_messages = len(df)
134
+ scam_count = len(df[df['prediction'] == 'scam'])
135
+ trust_count = len(df[df['prediction'] == 'trust'])
136
+ avg_confidence = df['confidence'].mean()
137
+
138
+ analytics = f"""
139
+ ## πŸ“Š Takwimu za Mfumo
140
+
141
+ **Jumla ya Ujumbe**: {total_messages}
142
+ **Scam**: {scam_count} ({scam_count/total_messages*100:.1f}%)
143
+ **Trust**: {trust_count} ({trust_count/total_messages*100:.1f}%)
144
+ **Wastani wa Uhakika**: {avg_confidence:.2f}
145
+
146
+ ### Takwimu za Wiki Hii
147
+ - Ujumbe mrefu zaidi: {df['message_length'].max()} herufi
148
+ - Ujumbe mfupi zaidi: {df['message_length'].min()} herufi
149
+ - Wastani wa urefu: {df['message_length'].mean():.0f} herufi
150
+ """
151
+
152
+ return analytics
153
+
154
+ except Exception as e:
155
+ return f"Kosa la takwimu: {str(e)}"
156
+
157
+ def export_report(self):
158
+ """Export detailed report"""
159
+ try:
160
+ conn = sqlite3.connect(self.db_path)
161
+ df = pd.read_sql_query("""
162
+ SELECT prediction, confidence, timestamp, message_length, suspicious_keywords
163
+ FROM sms_logs
164
+ ORDER BY timestamp DESC LIMIT 1000
165
+ """, conn)
166
+ conn.close()
167
+
168
+ if df.empty:
169
+ return "Hakuna data ya kuexport"
170
+
171
+ # Create summary report
172
+ report = {
173
+ 'total_analyzed': len(df),
174
+ 'scam_percentage': (df['prediction'] == 'scam').mean() * 100,
175
+ 'average_confidence': df['confidence'].mean(),
176
+ 'date_range': {
177
+ 'from': df['timestamp'].min(),
178
+ 'to': df['timestamp'].max()
179
+ },
180
+ 'message_stats': {
181
+ 'avg_length': df['message_length'].mean(),
182
+ 'max_length': df['message_length'].max(),
183
+ 'min_length': df['message_length'].min()
184
+ }
185
+ }
186
+
187
+ # Save to JSON
188
+ report_file = f"sms_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
189
+ with open(report_file, 'w') as f:
190
+ json.dump(report, f, indent=2, default=str)
191
+
192
+ return f"Ripoti imehifadhiwa: {report_file}"
193
+
194
+ except Exception as e:
195
+ return f"Kosa la report: {str(e)}"
196
 
197
  def load_models(self):
198
  """Load machine learning models with error handling"""
 
237
  return "Haba (Low)", "🟒"
238
 
239
  def predict_sms(self, text):
240
+ """Enhanced prediction function with detailed output and logging"""
241
  # Input validation
242
  if not text or len(text.strip()) == 0:
243
  return "❌ **Kosa**: Tafadhali ingiza ujumbe wa SMS"
 
253
  # Preprocess text
254
  cleaned_text = self.preprocess_text(text)
255
 
256
+ # Analyze patterns
257
+ suspicious_score, detected_patterns = self.analyze_message_patterns(text)
258
+
259
  # Vectorize text
260
  text_vector = self.vectorizer.transform([cleaned_text])
261
 
 
266
  # Get confidence level
267
  confidence, emoji = self.get_confidence_level(prediction_proba)
268
 
269
+ # Log prediction
270
+ self.log_prediction(text, prediction, prediction_proba, suspicious_score)
271
+
272
  # Format prediction
273
  if prediction.lower() == 'scam':
274
  result_text = "**SCAM** 🚨"
 
279
  result_color = "success"
280
  advice = "Ujumbe huu unaonekana kuwa wa kawaida, lakini bado kuwa makini."
281
 
282
+ # Add pattern analysis to output
283
+ pattern_analysis = ""
284
+ if detected_patterns:
285
+ pattern_analysis = f"\n**Dalili Zilizogunduliwa**:\n" + "\n".join([f"β€’ {pattern}" for pattern in detected_patterns])
286
+ pattern_analysis += f"\n**Alama za Utata**: {suspicious_score}/10"
287
+
288
  # Create detailed output
289
  output = f"""
290
  ## Matokeo ya Uchunguzi {emoji}
 
296
  **Kiwango cha Uhakika**: {confidence}
297
 
298
  **Maoni**: {advice}
299
+ {pattern_analysis}
300
 
301
  ---
302
  *Tarehe*: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
 
385
  value="Matokeo yataonyeshwa hapa baada ya kuchunguza ujumbe..."
386
  )
387
 
388
+ with gr.Row():
389
+ with gr.Column(scale=1):
390
+ # Analytics Section
391
+ gr.Markdown("### πŸ“Š Takwimu za Mfumo")
392
+ analytics_btn = gr.Button("πŸ“ˆ Ona Takwimu", variant="outline")
393
+ analytics_output = gr.Markdown("Bonyeza hapo juu kuona takwimu...")
394
+
395
+ export_btn = gr.Button("πŸ“„ Export Ripoti", variant="outline")
396
+ export_output = gr.Markdown("")
397
+
398
  # Information section
399
  with gr.Accordion("ℹ️ Maelezo ya Ziada", open=False):
400
  gr.Markdown("""
 
410
  - πŸ”— Ina viungo vya kugusia (links)
411
  - ⚑ Inadai ni ya dharura
412
  - πŸ“ž Inaomba taarifa za kibinafsi
413
+ - πŸ“± Ina USSD codes (*123#)
414
+
415
+ ### Vipimo Vipya:
416
+ - **Pattern Analysis**: Mfumo unachunguza maneno na michoro ya kawaida
417
+ - **Database Logging**: Kila ujumbe unahifadhiwa kwa takwimu
418
+ - **Confidence Scoring**: Kiwango cha uhakika kinajumuishwa
419
+ - **Analytics Dashboard**: Takwimu za jumla za matumizi
420
 
421
  ### Onyo Muhimu:
422
  Mfumo huu ni wa kusaidia tu. Daima tumia busara zako na usijibu SMS zisizoeleweka.
423
  """)
424
 
425
+ # Advanced Features Section
426
+ with gr.Accordion("πŸ”§ Vipengele vya Kina", open=False):
427
+ gr.Markdown("""
428
+ ### Uchanganuzi wa Kina:
429
+ - **Keyword Detection**: Inachunguza maneno yenye hatari
430
+ - **URL Analysis**: Inaangalia viungo vya web
431
+ - **USSD Detection**: Inagundua nambari za *123#
432
+ - **Money Pattern**: Inatambua maelezo ya pesa
433
+ - **Urgency Detection**: Inagundua maneno ya dharura
434
+
435
+ ### Data Analytics:
436
+ - Takwimu za ujumbe wote uliochunguzwa
437
+ - Asilimia ya scam vs trust
438
+ - Wastani wa uhakika wa mfumo
439
+ - Export ya ripoti za kina
440
+ """)
441
+
442
  # Event handlers
443
  predict_btn.click(
444
  fn=detector.predict_sms,
 
446
  outputs=output_result
447
  )
448
 
449
+ analytics_btn.click(
450
+ fn=detector.get_analytics,
451
+ outputs=analytics_output
452
+ )
453
+
454
+ export_btn.click(
455
+ fn=detector.export_report,
456
+ outputs=export_output
457
+ )
458
+
459
  clear_btn.click(
460
  fn=lambda: ("", "Matokeo yataonyeshwa hapa baada ya kuchunguza ujumbe..."),
461
  outputs=[sms_input, output_result]