meccatronis commited on
Commit
02fa65d
·
verified ·
1 Parent(s): 53fed7b

Upload core/database_recovery.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. core/database_recovery.py +574 -0
core/database_recovery.py ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Recovery Module
3
+ ========================
4
+
5
+ Handles extraction and parsing of Android SQLite databases.
6
+ Supports contacts, messages, call logs, and app-specific databases.
7
+ """
8
+
9
+ import os
10
+ import logging
11
+ import sqlite3
12
+ import tempfile
13
+ from typing import List, Dict, Optional, Any
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+
17
+ from .adb_manager import ADBManager
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DatabaseRecovery:
23
+ """
24
+ Extracts and parses data from Android SQLite databases.
25
+
26
+ Supported databases:
27
+ - Contacts (contacts2.db)
28
+ - Messages (mmssms.db)
29
+ - Call Logs (calllog.db)
30
+ - WhatsApp (msgstore.db, wa.db)
31
+ """
32
+
33
+ # Database paths on Android
34
+ DATABASE_PATHS = {
35
+ 'contacts': '/data/data/com.android.providers.contacts/databases/contacts2.db',
36
+ 'messages': '/data/data/com.android.providers.telephony/databases/mmssms.db',
37
+ 'call_logs': '/data/data/com.android.providers.contacts/databases/calllog.db',
38
+ 'whatsapp_messages': '/data/data/com.whatsapp/databases/msgstore.db',
39
+ 'whatsapp_contacts': '/data/data/com.whatsapp/databases/wa.db',
40
+ }
41
+
42
+ def __init__(self, adb_manager: ADBManager):
43
+ """
44
+ Initialize Database Recovery.
45
+
46
+ Args:
47
+ adb_manager: ADB Manager instance
48
+ """
49
+ self.adb = adb_manager
50
+ self._temp_dir = tempfile.mkdtemp(prefix="android_recovery_")
51
+
52
+ def __del__(self):
53
+ """Cleanup temporary files."""
54
+ import shutil
55
+ try:
56
+ shutil.rmtree(self._temp_dir, ignore_errors=True)
57
+ except Exception:
58
+ pass
59
+
60
+ def _pull_database(self, db_path: str) -> Optional[str]:
61
+ """
62
+ Pull a database file from the device.
63
+
64
+ Args:
65
+ db_path: Path to database on device
66
+
67
+ Returns:
68
+ Local path to pulled database or None
69
+ """
70
+ device_info = self.adb.get_device_info()
71
+ if not device_info:
72
+ logger.error("No device connected")
73
+ return None
74
+
75
+ # Check if database exists
76
+ if not self.adb.file_exists(db_path, as_root=True):
77
+ logger.warning(f"Database not found: {db_path}")
78
+ return None
79
+
80
+ # Create local path
81
+ db_name = os.path.basename(db_path)
82
+ local_path = os.path.join(self._temp_dir, db_name)
83
+
84
+ # Need root to access /data/data
85
+ if db_path.startswith('/data/data'):
86
+ if not device_info.is_rooted:
87
+ logger.warning("Root access required to access app databases")
88
+ return None
89
+
90
+ # Copy to accessible location first
91
+ temp_device_path = f"/sdcard/.temp_{db_name}"
92
+ success, _ = self.adb.shell(f"cp '{db_path}' '{temp_device_path}'", as_root=True)
93
+
94
+ if not success:
95
+ logger.error(f"Failed to copy database: {db_path}")
96
+ return None
97
+
98
+ # Also copy journal and wal files if they exist
99
+ for suffix in ['-journal', '-wal', '-shm']:
100
+ journal_path = db_path + suffix
101
+ if self.adb.file_exists(journal_path, as_root=True):
102
+ self.adb.shell(
103
+ f"cp '{journal_path}' '{temp_device_path}{suffix}'",
104
+ as_root=True
105
+ )
106
+
107
+ # Pull the database
108
+ if not self.adb.pull_file(temp_device_path, local_path):
109
+ logger.error(f"Failed to pull database: {temp_device_path}")
110
+ self.adb.shell(f"rm '{temp_device_path}'*", as_root=True)
111
+ return None
112
+
113
+ # Pull journal files
114
+ for suffix in ['-journal', '-wal', '-shm']:
115
+ temp_journal = temp_device_path + suffix
116
+ if self.adb.file_exists(temp_journal):
117
+ self.adb.pull_file(temp_journal, local_path + suffix)
118
+
119
+ # Cleanup
120
+ self.adb.shell(f"rm '{temp_device_path}'*", as_root=True)
121
+ else:
122
+ # Direct pull for accessible paths
123
+ if not self.adb.pull_file(db_path, local_path):
124
+ logger.error(f"Failed to pull database: {db_path}")
125
+ return None
126
+
127
+ return local_path
128
+
129
+ def _query_database(self, db_path: str, query: str) -> List[Dict[str, Any]]:
130
+ """
131
+ Execute a query on a SQLite database.
132
+
133
+ Args:
134
+ db_path: Path to local database file
135
+ query: SQL query to execute
136
+
137
+ Returns:
138
+ List of result dictionaries
139
+ """
140
+ results = []
141
+
142
+ try:
143
+ conn = sqlite3.connect(db_path)
144
+ conn.row_factory = sqlite3.Row
145
+ cursor = conn.cursor()
146
+
147
+ cursor.execute(query)
148
+ rows = cursor.fetchall()
149
+
150
+ for row in rows:
151
+ results.append(dict(row))
152
+
153
+ conn.close()
154
+ except sqlite3.Error as e:
155
+ logger.error(f"Database query error: {e}")
156
+ except Exception as e:
157
+ logger.error(f"Error querying database: {e}")
158
+
159
+ return results
160
+
161
+ def extract_contacts(self) -> List[Dict[str, Any]]:
162
+ """
163
+ Extract contacts from the device.
164
+
165
+ Returns:
166
+ List of contact dictionaries
167
+ """
168
+ db_path = self._pull_database(self.DATABASE_PATHS['contacts'])
169
+ if not db_path:
170
+ return []
171
+
172
+ contacts = []
173
+
174
+ try:
175
+ # Query for contacts with phone numbers
176
+ query = """
177
+ SELECT
178
+ c._id as id,
179
+ c.display_name as name,
180
+ p.data1 as phone,
181
+ p.data2 as phone_type,
182
+ e.data1 as email
183
+ FROM contacts c
184
+ LEFT JOIN data p ON c._id = p.contact_id AND p.mimetype_id = (
185
+ SELECT _id FROM mimetypes WHERE mimetype = 'vnd.android.cursor.item/phone_v2'
186
+ )
187
+ LEFT JOIN data e ON c._id = e.contact_id AND e.mimetype_id = (
188
+ SELECT _id FROM mimetypes WHERE mimetype = 'vnd.android.cursor.item/email_v2'
189
+ )
190
+ WHERE c.display_name IS NOT NULL
191
+ ORDER BY c.display_name
192
+ """
193
+
194
+ results = self._query_database(db_path, query)
195
+
196
+ # Process and deduplicate
197
+ seen_ids = set()
198
+ for row in results:
199
+ contact_id = row.get('id')
200
+ if contact_id in seen_ids:
201
+ # Update existing contact with additional info
202
+ for contact in contacts:
203
+ if contact['id'] == contact_id:
204
+ if row.get('phone') and row['phone'] not in contact.get('phones', []):
205
+ contact.setdefault('phones', []).append(row['phone'])
206
+ if row.get('email') and row['email'] not in contact.get('emails', []):
207
+ contact.setdefault('emails', []).append(row['email'])
208
+ break
209
+ else:
210
+ seen_ids.add(contact_id)
211
+ contact = {
212
+ 'id': contact_id,
213
+ 'name': row.get('name', ''),
214
+ 'phone': row.get('phone', ''),
215
+ 'email': row.get('email', ''),
216
+ 'phones': [row['phone']] if row.get('phone') else [],
217
+ 'emails': [row['email']] if row.get('email') else [],
218
+ }
219
+ contacts.append(contact)
220
+
221
+ logger.info(f"Extracted {len(contacts)} contacts")
222
+
223
+ except Exception as e:
224
+ logger.error(f"Error extracting contacts: {e}")
225
+
226
+ return contacts
227
+
228
+ def extract_messages(self) -> List[Dict[str, Any]]:
229
+ """
230
+ Extract SMS/MMS messages from the device.
231
+
232
+ Returns:
233
+ List of message dictionaries
234
+ """
235
+ db_path = self._pull_database(self.DATABASE_PATHS['messages'])
236
+ if not db_path:
237
+ return []
238
+
239
+ messages = []
240
+
241
+ try:
242
+ # Query for SMS messages
243
+ query = """
244
+ SELECT
245
+ _id as id,
246
+ address as phone_number,
247
+ body as message,
248
+ date as timestamp,
249
+ type as message_type,
250
+ read as is_read,
251
+ seen as is_seen
252
+ FROM sms
253
+ ORDER BY date DESC
254
+ """
255
+
256
+ results = self._query_database(db_path, query)
257
+
258
+ for row in results:
259
+ # Convert timestamp
260
+ timestamp = row.get('timestamp', 0)
261
+ if timestamp:
262
+ try:
263
+ date_str = datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
264
+ except Exception:
265
+ date_str = str(timestamp)
266
+ else:
267
+ date_str = ''
268
+
269
+ # Determine message direction
270
+ msg_type = row.get('message_type', 0)
271
+ direction = 'received' if msg_type == 1 else 'sent' if msg_type == 2 else 'unknown'
272
+
273
+ message = {
274
+ 'id': row.get('id'),
275
+ 'phone_number': row.get('phone_number', ''),
276
+ 'message': row.get('message', ''),
277
+ 'date': date_str,
278
+ 'timestamp': timestamp,
279
+ 'direction': direction,
280
+ 'is_read': bool(row.get('is_read', 0)),
281
+ }
282
+ messages.append(message)
283
+
284
+ logger.info(f"Extracted {len(messages)} messages")
285
+
286
+ except Exception as e:
287
+ logger.error(f"Error extracting messages: {e}")
288
+
289
+ return messages
290
+
291
+ def extract_call_logs(self) -> List[Dict[str, Any]]:
292
+ """
293
+ Extract call logs from the device.
294
+
295
+ Returns:
296
+ List of call log dictionaries
297
+ """
298
+ db_path = self._pull_database(self.DATABASE_PATHS['call_logs'])
299
+ if not db_path:
300
+ return []
301
+
302
+ call_logs = []
303
+
304
+ try:
305
+ query = """
306
+ SELECT
307
+ _id as id,
308
+ number as phone_number,
309
+ name as contact_name,
310
+ date as timestamp,
311
+ duration as duration_seconds,
312
+ type as call_type
313
+ FROM calls
314
+ ORDER BY date DESC
315
+ """
316
+
317
+ results = self._query_database(db_path, query)
318
+
319
+ for row in results:
320
+ # Convert timestamp
321
+ timestamp = row.get('timestamp', 0)
322
+ if timestamp:
323
+ try:
324
+ date_str = datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
325
+ except Exception:
326
+ date_str = str(timestamp)
327
+ else:
328
+ date_str = ''
329
+
330
+ # Determine call type
331
+ call_type_id = row.get('call_type', 0)
332
+ call_types = {
333
+ 1: 'incoming',
334
+ 2: 'outgoing',
335
+ 3: 'missed',
336
+ 4: 'voicemail',
337
+ 5: 'rejected',
338
+ 6: 'blocked',
339
+ }
340
+ call_type = call_types.get(call_type_id, 'unknown')
341
+
342
+ # Format duration
343
+ duration = row.get('duration_seconds', 0)
344
+ if duration:
345
+ minutes, seconds = divmod(duration, 60)
346
+ hours, minutes = divmod(minutes, 60)
347
+ if hours:
348
+ duration_str = f"{hours}:{minutes:02d}:{seconds:02d}"
349
+ else:
350
+ duration_str = f"{minutes}:{seconds:02d}"
351
+ else:
352
+ duration_str = "0:00"
353
+
354
+ call_log = {
355
+ 'id': row.get('id'),
356
+ 'phone_number': row.get('phone_number', ''),
357
+ 'contact_name': row.get('contact_name', ''),
358
+ 'date': date_str,
359
+ 'timestamp': timestamp,
360
+ 'duration': duration_str,
361
+ 'duration_seconds': duration,
362
+ 'call_type': call_type,
363
+ }
364
+ call_logs.append(call_log)
365
+
366
+ logger.info(f"Extracted {len(call_logs)} call logs")
367
+
368
+ except Exception as e:
369
+ logger.error(f"Error extracting call logs: {e}")
370
+
371
+ return call_logs
372
+
373
+ def extract_whatsapp_messages(self) -> List[Dict[str, Any]]:
374
+ """
375
+ Extract WhatsApp messages from the device.
376
+
377
+ Returns:
378
+ List of WhatsApp message dictionaries
379
+ """
380
+ db_path = self._pull_database(self.DATABASE_PATHS['whatsapp_messages'])
381
+ if not db_path:
382
+ return []
383
+
384
+ messages = []
385
+
386
+ try:
387
+ # WhatsApp database structure varies by version
388
+ # This is a common query structure
389
+ query = """
390
+ SELECT
391
+ m._id as id,
392
+ m.key_remote_jid as chat_id,
393
+ m.data as message,
394
+ m.timestamp as timestamp,
395
+ m.key_from_me as is_from_me,
396
+ m.media_wa_type as media_type,
397
+ m.media_mime_type as mime_type,
398
+ m.media_name as media_name
399
+ FROM messages m
400
+ WHERE m.data IS NOT NULL OR m.media_wa_type > 0
401
+ ORDER BY m.timestamp DESC
402
+ LIMIT 10000
403
+ """
404
+
405
+ results = self._query_database(db_path, query)
406
+
407
+ for row in results:
408
+ # Convert timestamp
409
+ timestamp = row.get('timestamp', 0)
410
+ if timestamp:
411
+ try:
412
+ date_str = datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
413
+ except Exception:
414
+ date_str = str(timestamp)
415
+ else:
416
+ date_str = ''
417
+
418
+ # Parse chat ID to get phone number
419
+ chat_id = row.get('chat_id', '')
420
+ phone_number = chat_id.split('@')[0] if '@' in chat_id else chat_id
421
+
422
+ # Determine media type
423
+ media_types = {
424
+ 0: 'text',
425
+ 1: 'image',
426
+ 2: 'audio',
427
+ 3: 'video',
428
+ 4: 'contact',
429
+ 5: 'location',
430
+ 8: 'document',
431
+ 9: 'gif',
432
+ 13: 'sticker',
433
+ }
434
+ media_type = media_types.get(row.get('media_type', 0), 'unknown')
435
+
436
+ message = {
437
+ 'id': row.get('id'),
438
+ 'phone_number': phone_number,
439
+ 'message': row.get('message', ''),
440
+ 'date': date_str,
441
+ 'timestamp': timestamp,
442
+ 'direction': 'sent' if row.get('is_from_me') else 'received',
443
+ 'media_type': media_type,
444
+ 'media_name': row.get('media_name', ''),
445
+ }
446
+ messages.append(message)
447
+
448
+ logger.info(f"Extracted {len(messages)} WhatsApp messages")
449
+
450
+ except Exception as e:
451
+ logger.error(f"Error extracting WhatsApp messages: {e}")
452
+
453
+ return messages
454
+
455
+ def extract_whatsapp_contacts(self) -> List[Dict[str, Any]]:
456
+ """
457
+ Extract WhatsApp contacts from the device.
458
+
459
+ Returns:
460
+ List of WhatsApp contact dictionaries
461
+ """
462
+ db_path = self._pull_database(self.DATABASE_PATHS['whatsapp_contacts'])
463
+ if not db_path:
464
+ return []
465
+
466
+ contacts = []
467
+
468
+ try:
469
+ query = """
470
+ SELECT
471
+ _id as id,
472
+ jid as whatsapp_id,
473
+ display_name as name,
474
+ number as phone_number,
475
+ status as status_message
476
+ FROM wa_contacts
477
+ WHERE is_whatsapp_user = 1
478
+ ORDER BY display_name
479
+ """
480
+
481
+ results = self._query_database(db_path, query)
482
+
483
+ for row in results:
484
+ # Parse WhatsApp ID to get phone number if not available
485
+ wa_id = row.get('whatsapp_id', '')
486
+ phone = row.get('phone_number', '')
487
+ if not phone and wa_id:
488
+ phone = wa_id.split('@')[0]
489
+
490
+ contact = {
491
+ 'id': row.get('id'),
492
+ 'name': row.get('name', ''),
493
+ 'phone_number': phone,
494
+ 'whatsapp_id': wa_id,
495
+ 'status': row.get('status_message', ''),
496
+ }
497
+ contacts.append(contact)
498
+
499
+ logger.info(f"Extracted {len(contacts)} WhatsApp contacts")
500
+
501
+ except Exception as e:
502
+ logger.error(f"Error extracting WhatsApp contacts: {e}")
503
+
504
+ return contacts
505
+
506
+ def extract_all_data(self) -> Dict[str, List[Dict[str, Any]]]:
507
+ """
508
+ Extract all available data from the device.
509
+
510
+ Returns:
511
+ Dictionary with all extracted data
512
+ """
513
+ return {
514
+ 'contacts': self.extract_contacts(),
515
+ 'messages': self.extract_messages(),
516
+ 'call_logs': self.extract_call_logs(),
517
+ 'whatsapp_messages': self.extract_whatsapp_messages(),
518
+ 'whatsapp_contacts': self.extract_whatsapp_contacts(),
519
+ }
520
+
521
+ def get_database_info(self, db_name: str) -> Optional[Dict[str, Any]]:
522
+ """
523
+ Get information about a database.
524
+
525
+ Args:
526
+ db_name: Database name (contacts, messages, etc.)
527
+
528
+ Returns:
529
+ Dictionary with database information
530
+ """
531
+ if db_name not in self.DATABASE_PATHS:
532
+ return None
533
+
534
+ db_path = self._pull_database(self.DATABASE_PATHS[db_name])
535
+ if not db_path:
536
+ return None
537
+
538
+ info = {
539
+ 'name': db_name,
540
+ 'path': self.DATABASE_PATHS[db_name],
541
+ 'local_path': db_path,
542
+ 'size': os.path.getsize(db_path),
543
+ 'tables': [],
544
+ }
545
+
546
+ try:
547
+ conn = sqlite3.connect(db_path)
548
+ cursor = conn.cursor()
549
+
550
+ # Get table list
551
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
552
+ tables = cursor.fetchall()
553
+
554
+ for (table_name,) in tables:
555
+ # Get row count
556
+ cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
557
+ count = cursor.fetchone()[0]
558
+
559
+ # Get column info
560
+ cursor.execute(f"PRAGMA table_info({table_name})")
561
+ columns = [col[1] for col in cursor.fetchall()]
562
+
563
+ info['tables'].append({
564
+ 'name': table_name,
565
+ 'row_count': count,
566
+ 'columns': columns,
567
+ })
568
+
569
+ conn.close()
570
+
571
+ except Exception as e:
572
+ logger.error(f"Error getting database info: {e}")
573
+
574
+ return info