mustafa2ak commited on
Commit
96d23c9
·
verified ·
1 Parent(s): 0ace461

Create database.py

Browse files
Files changed (1) hide show
  1. database.py +589 -0
database.py ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ database.py - SQLite Database Manager for Dog Monitoring System
3
+ Handles persistent storage of dog data, features, and annotations
4
+ """
5
+ import sqlite3
6
+ import json
7
+ import pickle
8
+ import base64
9
+ import numpy as np
10
+ import cv2
11
+ from datetime import datetime
12
+ from typing import List, Dict, Optional, Tuple, Any
13
+ from pathlib import Path
14
+ import pandas as pd
15
+
16
+ class DogDatabase:
17
+ """SQLite database manager for dog monitoring system"""
18
+
19
+ def __init__(self, db_path: str = "dog_monitoring.db"):
20
+ """Initialize database connection and create tables"""
21
+ self.db_path = db_path
22
+ self.conn = sqlite3.connect(db_path, check_same_thread=False)
23
+ self.conn.row_factory = sqlite3.Row
24
+ self.cursor = self.conn.cursor()
25
+
26
+ # Create tables if they don't exist
27
+ self._create_tables()
28
+
29
+ def _create_tables(self):
30
+ """Create all necessary database tables"""
31
+
32
+ # Dogs table - main registry
33
+ self.cursor.execute("""
34
+ CREATE TABLE IF NOT EXISTS dogs (
35
+ dog_id INTEGER PRIMARY KEY,
36
+ name TEXT,
37
+ first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
38
+ last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
39
+ total_sightings INTEGER DEFAULT 1,
40
+ notes TEXT,
41
+ merged_from TEXT, -- JSON list of merged dog IDs
42
+ status TEXT DEFAULT 'active' -- active, merged, deleted
43
+ )
44
+ """)
45
+
46
+ # Dog features table - stores extracted features
47
+ self.cursor.execute("""
48
+ CREATE TABLE IF NOT EXISTS dog_features (
49
+ feature_id INTEGER PRIMARY KEY AUTOINCREMENT,
50
+ dog_id INTEGER,
51
+ resnet_features BLOB, -- Pickled numpy array
52
+ color_histogram BLOB, -- Pickled numpy array
53
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
54
+ confidence REAL,
55
+ FOREIGN KEY (dog_id) REFERENCES dogs(dog_id)
56
+ )
57
+ """)
58
+
59
+ # Dog images table - stores actual images
60
+ self.cursor.execute("""
61
+ CREATE TABLE IF NOT EXISTS dog_images (
62
+ image_id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ dog_id INTEGER,
64
+ image_data BLOB, -- Base64 encoded image
65
+ thumbnail BLOB, -- Small preview
66
+ width INTEGER,
67
+ height INTEGER,
68
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
69
+ frame_number INTEGER,
70
+ video_source TEXT,
71
+ bbox TEXT, -- JSON [x1, y1, x2, y2]
72
+ confidence REAL,
73
+ is_validated BOOLEAN DEFAULT 0,
74
+ is_discarded BOOLEAN DEFAULT 0,
75
+ FOREIGN KEY (dog_id) REFERENCES dogs(dog_id)
76
+ )
77
+ """)
78
+
79
+ # Body parts table - stores cropped body parts
80
+ self.cursor.execute("""
81
+ CREATE TABLE IF NOT EXISTS body_parts (
82
+ part_id INTEGER PRIMARY KEY AUTOINCREMENT,
83
+ dog_id INTEGER,
84
+ image_id INTEGER,
85
+ part_type TEXT, -- 'head', 'torso', 'rear'
86
+ part_image BLOB, -- Base64 encoded crop
87
+ crop_bbox TEXT, -- JSON [x1, y1, x2, y2] relative to full image
88
+ confidence REAL,
89
+ is_validated BOOLEAN DEFAULT 0,
90
+ is_discarded BOOLEAN DEFAULT 0,
91
+ FOREIGN KEY (dog_id) REFERENCES dogs(dog_id),
92
+ FOREIGN KEY (image_id) REFERENCES dog_images(image_id)
93
+ )
94
+ """)
95
+
96
+ # Sightings table - tracks when/where dogs were seen
97
+ self.cursor.execute("""
98
+ CREATE TABLE IF NOT EXISTS sightings (
99
+ sighting_id INTEGER PRIMARY KEY AUTOINCREMENT,
100
+ dog_id INTEGER,
101
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
102
+ position_x REAL,
103
+ position_y REAL,
104
+ video_source TEXT,
105
+ frame_number INTEGER,
106
+ confidence REAL,
107
+ FOREIGN KEY (dog_id) REFERENCES dogs(dog_id)
108
+ )
109
+ """)
110
+
111
+ # Processing sessions table
112
+ self.cursor.execute("""
113
+ CREATE TABLE IF NOT EXISTS sessions (
114
+ session_id INTEGER PRIMARY KEY AUTOINCREMENT,
115
+ start_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
116
+ end_time TIMESTAMP,
117
+ video_path TEXT,
118
+ total_frames INTEGER,
119
+ dogs_detected INTEGER,
120
+ settings TEXT -- JSON of processing settings
121
+ )
122
+ """)
123
+
124
+ # Create indexes for performance
125
+ self.cursor.execute("CREATE INDEX IF NOT EXISTS idx_dog_features ON dog_features(dog_id)")
126
+ self.cursor.execute("CREATE INDEX IF NOT EXISTS idx_dog_images ON dog_images(dog_id)")
127
+ self.cursor.execute("CREATE INDEX IF NOT EXISTS idx_sightings ON sightings(dog_id)")
128
+
129
+ self.conn.commit()
130
+
131
+ # ========== Dog Management ==========
132
+
133
+ def add_dog(self, dog_id: Optional[int] = None, name: Optional[str] = None) -> int:
134
+ """Add a new dog to the database"""
135
+ if dog_id:
136
+ self.cursor.execute(
137
+ "INSERT OR IGNORE INTO dogs (dog_id, name) VALUES (?, ?)",
138
+ (dog_id, name)
139
+ )
140
+ else:
141
+ self.cursor.execute(
142
+ "INSERT INTO dogs (name) VALUES (?)",
143
+ (name,)
144
+ )
145
+ dog_id = self.cursor.lastrowid
146
+
147
+ self.conn.commit()
148
+ return dog_id
149
+
150
+ def update_dog_sighting(self, dog_id: int):
151
+ """Update last seen time and increment sighting count"""
152
+ self.cursor.execute("""
153
+ UPDATE dogs
154
+ SET last_seen = CURRENT_TIMESTAMP,
155
+ total_sightings = total_sightings + 1
156
+ WHERE dog_id = ?
157
+ """, (dog_id,))
158
+ self.conn.commit()
159
+
160
+ def merge_dogs(self, keep_id: int, merge_id: int) -> bool:
161
+ """Merge two dogs, keeping keep_id"""
162
+ try:
163
+ # Update all references
164
+ self.cursor.execute("UPDATE dog_features SET dog_id = ? WHERE dog_id = ?",
165
+ (keep_id, merge_id))
166
+ self.cursor.execute("UPDATE dog_images SET dog_id = ? WHERE dog_id = ?",
167
+ (keep_id, merge_id))
168
+ self.cursor.execute("UPDATE sightings SET dog_id = ? WHERE dog_id = ?",
169
+ (keep_id, merge_id))
170
+
171
+ # Get merged_from history
172
+ self.cursor.execute("SELECT merged_from FROM dogs WHERE dog_id = ?", (merge_id,))
173
+ row = self.cursor.fetchone()
174
+ merged_history = json.loads(row['merged_from'] if row and row['merged_from'] else '[]')
175
+ merged_history.append(merge_id)
176
+
177
+ # Update keep_id dog with merge history
178
+ self.cursor.execute("""
179
+ UPDATE dogs
180
+ SET merged_from = ?,
181
+ total_sightings = total_sightings + (
182
+ SELECT total_sightings FROM dogs WHERE dog_id = ?
183
+ )
184
+ WHERE dog_id = ?
185
+ """, (json.dumps(merged_history), merge_id, keep_id))
186
+
187
+ # Mark merge_id as merged
188
+ self.cursor.execute(
189
+ "UPDATE dogs SET status = 'merged' WHERE dog_id = ?",
190
+ (merge_id,)
191
+ )
192
+
193
+ self.conn.commit()
194
+ return True
195
+ except Exception as e:
196
+ print(f"Error merging dogs: {e}")
197
+ self.conn.rollback()
198
+ return False
199
+
200
+ def delete_dog(self, dog_id: int, hard_delete: bool = False):
201
+ """Delete or mark dog as deleted"""
202
+ if hard_delete:
203
+ # Hard delete - remove all data
204
+ self.cursor.execute("DELETE FROM dog_features WHERE dog_id = ?", (dog_id,))
205
+ self.cursor.execute("DELETE FROM dog_images WHERE dog_id = ?", (dog_id,))
206
+ self.cursor.execute("DELETE FROM sightings WHERE dog_id = ?", (dog_id,))
207
+ self.cursor.execute("DELETE FROM dogs WHERE dog_id = ?", (dog_id,))
208
+ else:
209
+ # Soft delete - mark as deleted
210
+ self.cursor.execute(
211
+ "UPDATE dogs SET status = 'deleted' WHERE dog_id = ?",
212
+ (dog_id,)
213
+ )
214
+ self.conn.commit()
215
+
216
+ # ========== Features Management ==========
217
+
218
+ def save_features(self, dog_id: int, resnet_features: np.ndarray,
219
+ color_histogram: np.ndarray, confidence: float):
220
+ """Save dog features to database"""
221
+ resnet_blob = pickle.dumps(resnet_features)
222
+ color_blob = pickle.dumps(color_histogram)
223
+
224
+ self.cursor.execute("""
225
+ INSERT INTO dog_features
226
+ (dog_id, resnet_features, color_histogram, confidence)
227
+ VALUES (?, ?, ?, ?)
228
+ """, (dog_id, resnet_blob, color_blob, confidence))
229
+
230
+ self.conn.commit()
231
+
232
+ def get_features(self, dog_id: int, limit: int = 20) -> List[Dict]:
233
+ """Get recent features for a dog"""
234
+ self.cursor.execute("""
235
+ SELECT * FROM dog_features
236
+ WHERE dog_id = ?
237
+ ORDER BY timestamp DESC
238
+ LIMIT ?
239
+ """, (dog_id, limit))
240
+
241
+ features = []
242
+ for row in self.cursor.fetchall():
243
+ features.append({
244
+ 'resnet_features': pickle.loads(row['resnet_features']),
245
+ 'color_histogram': pickle.loads(row['color_histogram']),
246
+ 'confidence': row['confidence'],
247
+ 'timestamp': row['timestamp']
248
+ })
249
+
250
+ return features
251
+
252
+ # ========== Images Management ==========
253
+
254
+ def save_image(self, dog_id: int, image: np.ndarray,
255
+ frame_number: int, video_source: str,
256
+ bbox: List[float], confidence: float,
257
+ pose_keypoints: Optional[List] = None):
258
+ """Save dog image to database"""
259
+ # Encode image as JPEG
260
+ _, buffer = cv2.imencode('.jpg', image)
261
+ image_data = base64.b64encode(buffer).decode('utf-8')
262
+
263
+ # Create thumbnail
264
+ thumbnail = cv2.resize(image, (128, 128))
265
+ _, thumb_buffer = cv2.imencode('.jpg', thumbnail, [cv2.IMWRITE_JPEG_QUALITY, 70])
266
+ thumb_data = base64.b64encode(thumb_buffer).decode('utf-8')
267
+
268
+ h, w = image.shape[:2]
269
+
270
+ self.cursor.execute("""
271
+ INSERT INTO dog_images
272
+ (dog_id, image_data, thumbnail, width, height,
273
+ frame_number, video_source, bbox, confidence, pose_keypoints)
274
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
275
+ """, (dog_id, image_data, thumb_data, w, h,
276
+ frame_number, video_source, json.dumps(bbox),
277
+ confidence, json.dumps(pose_keypoints) if pose_keypoints else None))
278
+
279
+ self.conn.commit()
280
+ return self.cursor.lastrowid
281
+
282
+ def get_dog_images(self, dog_id: int, validated_only: bool = False,
283
+ include_discarded: bool = False) -> List[Dict]:
284
+ """Get all images for a dog"""
285
+ query = "SELECT * FROM dog_images WHERE dog_id = ?"
286
+ params = [dog_id]
287
+
288
+ if validated_only:
289
+ query += " AND is_validated = 1"
290
+ if not include_discarded:
291
+ query += " AND is_discarded = 0"
292
+
293
+ query += " ORDER BY timestamp DESC"
294
+
295
+ self.cursor.execute(query, params)
296
+
297
+ images = []
298
+ for row in self.cursor.fetchall():
299
+ # Decode image
300
+ image_bytes = base64.b64decode(row['image_data'])
301
+ nparr = np.frombuffer(image_bytes, np.uint8)
302
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
303
+
304
+ images.append({
305
+ 'image_id': row['image_id'],
306
+ 'image': image,
307
+ 'thumbnail': row['thumbnail'],
308
+ 'bbox': json.loads(row['bbox']),
309
+ 'confidence': row['confidence'],
310
+ 'frame_number': row['frame_number'],
311
+ 'video_source': row['video_source'],
312
+ 'is_validated': row['is_validated'],
313
+ 'is_discarded': row['is_discarded'],
314
+ 'pose_keypoints': json.loads(row['pose_keypoints']) if row['pose_keypoints'] else None
315
+ })
316
+
317
+ return images
318
+
319
+ def validate_image(self, image_id: int, is_valid: bool = True):
320
+ """Mark image as validated or discarded"""
321
+ if is_valid:
322
+ self.cursor.execute(
323
+ "UPDATE dog_images SET is_validated = 1 WHERE image_id = ?",
324
+ (image_id,)
325
+ )
326
+ else:
327
+ self.cursor.execute(
328
+ "UPDATE dog_images SET is_discarded = 1 WHERE image_id = ?",
329
+ (image_id,)
330
+ )
331
+ self.conn.commit()
332
+
333
+ # ========== Body Parts Management ==========
334
+
335
+ def save_body_parts(self, dog_id: int, image_id: int,
336
+ head_crop: Optional[np.ndarray],
337
+ torso_crop: Optional[np.ndarray],
338
+ rear_crop: Optional[np.ndarray],
339
+ confidences: Dict[str, float]):
340
+ """Save body part crops to database"""
341
+ parts = {
342
+ 'head': head_crop,
343
+ 'torso': torso_crop,
344
+ 'rear': rear_crop
345
+ }
346
+
347
+ for part_type, crop in parts.items():
348
+ if crop is not None:
349
+ # Encode crop as JPEG
350
+ _, buffer = cv2.imencode('.jpg', crop)
351
+ crop_data = base64.b64encode(buffer).decode('utf-8')
352
+
353
+ confidence = confidences.get(part_type, 0.0)
354
+
355
+ self.cursor.execute("""
356
+ INSERT INTO body_parts
357
+ (dog_id, image_id, part_type, part_image, confidence)
358
+ VALUES (?, ?, ?, ?, ?)
359
+ """, (dog_id, image_id, part_type, crop_data, confidence))
360
+
361
+ self.conn.commit()
362
+
363
+ def get_body_parts(self, dog_id: int, part_type: Optional[str] = None,
364
+ validated_only: bool = False) -> List[Dict]:
365
+ """Get body part crops for a dog"""
366
+ query = "SELECT * FROM body_parts WHERE dog_id = ?"
367
+ params = [dog_id]
368
+
369
+ if part_type:
370
+ query += " AND part_type = ?"
371
+ params.append(part_type)
372
+
373
+ if validated_only:
374
+ query += " AND is_validated = 1"
375
+
376
+ query += " AND is_discarded = 0"
377
+
378
+ self.cursor.execute(query, params)
379
+
380
+ parts = []
381
+ for row in self.cursor.fetchall():
382
+ # Decode image
383
+ image_bytes = base64.b64decode(row['part_image'])
384
+ nparr = np.frombuffer(image_bytes, np.uint8)
385
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
386
+
387
+ parts.append({
388
+ 'part_id': row['part_id'],
389
+ 'part_type': row['part_type'],
390
+ 'image': image,
391
+ 'confidence': row['confidence'],
392
+ 'is_validated': row['is_validated'],
393
+ 'image_id': row['image_id']
394
+ })
395
+
396
+ return parts
397
+
398
+ def validate_body_part(self, part_id: int, is_valid: bool = True):
399
+ """Mark body part as validated or discarded"""
400
+ if is_valid:
401
+ self.cursor.execute(
402
+ "UPDATE body_parts SET is_validated = 1 WHERE part_id = ?",
403
+ (part_id,)
404
+ )
405
+ else:
406
+ self.cursor.execute(
407
+ "UPDATE body_parts SET is_discarded = 1 WHERE part_id = ?",
408
+ (part_id,)
409
+ )
410
+ self.conn.commit()
411
+
412
+ def add_sighting(self, dog_id: int, position: Tuple[float, float],
413
+ video_source: str, frame_number: int, confidence: float):
414
+ """Record a dog sighting"""
415
+ self.cursor.execute("""
416
+ INSERT INTO sightings
417
+ (dog_id, position_x, position_y, video_source, frame_number, confidence)
418
+ VALUES (?, ?, ?, ?, ?, ?)
419
+ """, (dog_id, position[0], position[1], video_source, frame_number, confidence))
420
+
421
+ self.conn.commit()
422
+
423
+ # ========== Query Methods ==========
424
+
425
+ def get_all_dogs(self, active_only: bool = True) -> pd.DataFrame:
426
+ """Get all dogs as DataFrame"""
427
+ query = "SELECT * FROM dogs"
428
+ if active_only:
429
+ query += " WHERE status = 'active'"
430
+ query += " ORDER BY dog_id"
431
+
432
+ return pd.read_sql_query(query, self.conn)
433
+
434
+ def get_dog_statistics(self) -> Dict:
435
+ """Get overall statistics"""
436
+ stats = {}
437
+
438
+ # Total dogs
439
+ self.cursor.execute("SELECT COUNT(*) FROM dogs WHERE status = 'active'")
440
+ stats['total_active_dogs'] = self.cursor.fetchone()[0]
441
+
442
+ # Total images
443
+ self.cursor.execute("SELECT COUNT(*) FROM dog_images WHERE is_discarded = 0")
444
+ stats['total_images'] = self.cursor.fetchone()[0]
445
+
446
+ # Validated images
447
+ self.cursor.execute("SELECT COUNT(*) FROM dog_images WHERE is_validated = 1")
448
+ stats['validated_images'] = self.cursor.fetchone()[0]
449
+
450
+ # Total sightings
451
+ self.cursor.execute("SELECT COUNT(*) FROM sightings")
452
+ stats['total_sightings'] = self.cursor.fetchone()[0]
453
+
454
+ # Most seen dog
455
+ self.cursor.execute("""
456
+ SELECT d.dog_id, d.name, d.total_sightings
457
+ FROM dogs d
458
+ WHERE d.status = 'active'
459
+ ORDER BY d.total_sightings DESC
460
+ LIMIT 1
461
+ """)
462
+ row = self.cursor.fetchone()
463
+ if row:
464
+ stats['most_seen_dog'] = {
465
+ 'dog_id': row[0],
466
+ 'name': row[1] or f"Dog #{row[0]}",
467
+ 'sightings': row[2]
468
+ }
469
+
470
+ return stats
471
+
472
+ # ========== Export Methods ==========
473
+
474
+ def export_training_dataset(self, output_dir: str, validated_only: bool = True) -> Dict:
475
+ """Export dataset with body parts for fine-tuning"""
476
+ output_path = Path(output_dir)
477
+ output_path.mkdir(parents=True, exist_ok=True)
478
+
479
+ # Create directories
480
+ images_dir = output_path / "images"
481
+ images_dir.mkdir(exist_ok=True)
482
+
483
+ # Export data
484
+ dataset = []
485
+
486
+ dogs = self.get_all_dogs()
487
+ for _, dog in dogs.iterrows():
488
+ dog_id = dog['dog_id']
489
+
490
+ # Create directories for each dog
491
+ dog_dir = images_dir / f"dog_{dog_id}"
492
+ dog_dir.mkdir(exist_ok=True)
493
+
494
+ # Subdirectories for body parts
495
+ for part in ['full', 'head', 'torso', 'rear']:
496
+ part_dir = dog_dir / part
497
+ part_dir.mkdir(exist_ok=True)
498
+
499
+ # Get full images
500
+ images = self.get_dog_images(dog_id, validated_only=validated_only)
501
+
502
+ for idx, img_data in enumerate(images):
503
+ # Save full image
504
+ full_path = dog_dir / 'full' / f"img_{idx:04d}.jpg"
505
+ cv2.imwrite(str(full_path), img_data['image'])
506
+
507
+ # Get and save body parts for this image
508
+ parts = self.get_body_parts(dog_id, validated_only=validated_only)
509
+
510
+ part_paths = {}
511
+ for part_data in parts:
512
+ if part_data['image_id'] == img_data['image_id']:
513
+ part_type = part_data['part_type']
514
+ part_path = dog_dir / part_type / f"img_{idx:04d}.jpg"
515
+ cv2.imwrite(str(part_path), part_data['image'])
516
+ part_paths[part_type] = str(part_path.relative_to(output_path))
517
+
518
+ # Add to dataset
519
+ dataset_entry = {
520
+ 'dog_id': dog_id,
521
+ 'full_image': str(full_path.relative_to(output_path)),
522
+ 'bbox': img_data['bbox'],
523
+ 'confidence': img_data['confidence']
524
+ }
525
+
526
+ # Add body part paths if available
527
+ for part_type in ['head', 'torso', 'rear']:
528
+ dataset_entry[f'{part_type}_image'] = part_paths.get(part_type, None)
529
+
530
+ dataset.append(dataset_entry)
531
+
532
+ # Save dataset info
533
+ dataset_df = pd.DataFrame(dataset)
534
+ dataset_df.to_csv(output_path / "dataset.csv", index=False)
535
+
536
+ # Save metadata
537
+ metadata = {
538
+ 'total_dogs': len(dogs),
539
+ 'total_images': len(dataset),
540
+ 'export_date': datetime.now().isoformat(),
541
+ 'validated_only': validated_only,
542
+ 'includes_body_parts': True
543
+ }
544
+
545
+ with open(output_path / "metadata.json", 'w') as f:
546
+ json.dump(metadata, f, indent=2)
547
+
548
+ # Create training splits
549
+ from sklearn.model_selection import train_test_split
550
+
551
+ train_df, test_df = train_test_split(dataset_df, test_size=0.2,
552
+ stratify=dataset_df['dog_id'])
553
+ train_df.to_csv(output_path / "train.csv", index=False)
554
+ test_df.to_csv(output_path / "test.csv", index=False)
555
+
556
+ metadata['train_samples'] = len(train_df)
557
+ metadata['test_samples'] = len(test_df)
558
+
559
+ return metadata
560
+
561
+ # ========== Cleanup Methods ==========
562
+
563
+ def reset_database(self, confirm: bool = False):
564
+ """Reset entire database"""
565
+ if not confirm:
566
+ return False
567
+
568
+ tables = ['sightings', 'dog_images', 'dog_features', 'dogs', 'sessions']
569
+ for table in tables:
570
+ self.cursor.execute(f"DELETE FROM {table}")
571
+
572
+ # Reset autoincrement
573
+ self.cursor.execute("DELETE FROM sqlite_sequence")
574
+
575
+ self.conn.commit()
576
+ return True
577
+
578
+ def vacuum(self):
579
+ """Optimize database file size"""
580
+ self.conn.execute("VACUUM")
581
+
582
+ def close(self):
583
+ """Close database connection"""
584
+ self.conn.close()
585
+
586
+ def __del__(self):
587
+ """Ensure connection is closed"""
588
+ if hasattr(self, 'conn'):
589
+ self.conn.close()