vedaco commited on
Commit
f70ee56
·
verified ·
1 Parent(s): 04ba188

Create database.py

Browse files
Files changed (1) hide show
  1. database.py +368 -0
database.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQLite database handler for storing user interactions"""
2
+
3
+ import sqlite3
4
+ import json
5
+ from datetime import datetime
6
+ from typing import List, Dict, Optional, Tuple
7
+ import os
8
+ from config import DATABASE_PATH
9
+
10
+ class VedaDatabase:
11
+ """Database handler for Veda Programming LLM"""
12
+
13
+ def __init__(self, db_path: str = DATABASE_PATH):
14
+ self.db_path = db_path
15
+ self._init_database()
16
+
17
+ def _get_connection(self):
18
+ """Get database connection"""
19
+ conn = sqlite3.connect(self.db_path)
20
+ conn.row_factory = sqlite3.Row
21
+ return conn
22
+
23
+ def _init_database(self):
24
+ """Initialize database tables"""
25
+ conn = self._get_connection()
26
+ cursor = conn.cursor()
27
+
28
+ # User interactions table
29
+ cursor.execute('''
30
+ CREATE TABLE IF NOT EXISTS interactions (
31
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
32
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
33
+ prompt TEXT NOT NULL,
34
+ generated_code TEXT NOT NULL,
35
+ temperature REAL,
36
+ max_tokens INTEGER,
37
+ feedback INTEGER DEFAULT 0,
38
+ is_approved BOOLEAN DEFAULT 0,
39
+ is_used_for_training BOOLEAN DEFAULT 0,
40
+ session_id TEXT,
41
+ user_edited_code TEXT
42
+ )
43
+ ''')
44
+
45
+ # Training history table
46
+ cursor.execute('''
47
+ CREATE TABLE IF NOT EXISTS training_history (
48
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
49
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
50
+ samples_used INTEGER,
51
+ epochs INTEGER,
52
+ final_loss REAL,
53
+ final_accuracy REAL,
54
+ model_version TEXT,
55
+ notes TEXT
56
+ )
57
+ ''')
58
+
59
+ # Code samples table (curated training data)
60
+ cursor.execute('''
61
+ CREATE TABLE IF NOT EXISTS code_samples (
62
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
64
+ code TEXT NOT NULL,
65
+ source TEXT,
66
+ category TEXT,
67
+ quality_score REAL DEFAULT 0,
68
+ times_used INTEGER DEFAULT 0
69
+ )
70
+ ''')
71
+
72
+ # Statistics table
73
+ cursor.execute('''
74
+ CREATE TABLE IF NOT EXISTS statistics (
75
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
76
+ date DATE UNIQUE,
77
+ total_generations INTEGER DEFAULT 0,
78
+ positive_feedback INTEGER DEFAULT 0,
79
+ negative_feedback INTEGER DEFAULT 0,
80
+ training_runs INTEGER DEFAULT 0
81
+ )
82
+ ''')
83
+
84
+ conn.commit()
85
+ conn.close()
86
+ print(f"Database initialized at {self.db_path}")
87
+
88
+ # ==================== Interactions ====================
89
+
90
+ def save_interaction(
91
+ self,
92
+ prompt: str,
93
+ generated_code: str,
94
+ temperature: float = 0.7,
95
+ max_tokens: int = 100,
96
+ session_id: str = None
97
+ ) -> int:
98
+ """Save a user interaction"""
99
+ conn = self._get_connection()
100
+ cursor = conn.cursor()
101
+
102
+ cursor.execute('''
103
+ INSERT INTO interactions
104
+ (prompt, generated_code, temperature, max_tokens, session_id)
105
+ VALUES (?, ?, ?, ?, ?)
106
+ ''', (prompt, generated_code, temperature, max_tokens, session_id))
107
+
108
+ interaction_id = cursor.lastrowid
109
+
110
+ # Update daily statistics
111
+ today = datetime.now().date()
112
+ cursor.execute('''
113
+ INSERT INTO statistics (date, total_generations)
114
+ VALUES (?, 1)
115
+ ON CONFLICT(date) DO UPDATE SET
116
+ total_generations = total_generations + 1
117
+ ''', (today,))
118
+
119
+ conn.commit()
120
+ conn.close()
121
+
122
+ return interaction_id
123
+
124
+ def update_feedback(self, interaction_id: int, feedback: int,
125
+ user_edited_code: str = None):
126
+ """Update feedback for an interaction (1 = positive, -1 = negative)"""
127
+ conn = self._get_connection()
128
+ cursor = conn.cursor()
129
+
130
+ is_approved = feedback > 0
131
+
132
+ cursor.execute('''
133
+ UPDATE interactions
134
+ SET feedback = ?, is_approved = ?, user_edited_code = ?
135
+ WHERE id = ?
136
+ ''', (feedback, is_approved, user_edited_code, interaction_id))
137
+
138
+ # Update daily statistics
139
+ today = datetime.now().date()
140
+ if feedback > 0:
141
+ cursor.execute('''
142
+ INSERT INTO statistics (date, positive_feedback)
143
+ VALUES (?, 1)
144
+ ON CONFLICT(date) DO UPDATE SET
145
+ positive_feedback = positive_feedback + 1
146
+ ''', (today,))
147
+ elif feedback < 0:
148
+ cursor.execute('''
149
+ INSERT INTO statistics (date, negative_feedback)
150
+ VALUES (?, 1)
151
+ ON CONFLICT(date) DO UPDATE SET
152
+ negative_feedback = negative_feedback + 1
153
+ ''', (today,))
154
+
155
+ conn.commit()
156
+ conn.close()
157
+
158
+ def get_approved_samples(self, limit: int = None,
159
+ not_used: bool = False) -> List[Dict]:
160
+ """Get approved samples for training"""
161
+ conn = self._get_connection()
162
+ cursor = conn.cursor()
163
+
164
+ query = '''
165
+ SELECT id, prompt,
166
+ COALESCE(user_edited_code, generated_code) as code
167
+ FROM interactions
168
+ WHERE is_approved = 1
169
+ '''
170
+
171
+ if not_used:
172
+ query += ' AND is_used_for_training = 0'
173
+
174
+ query += ' ORDER BY timestamp DESC'
175
+
176
+ if limit:
177
+ query += f' LIMIT {limit}'
178
+
179
+ cursor.execute(query)
180
+ rows = cursor.fetchall()
181
+
182
+ conn.close()
183
+
184
+ return [dict(row) for row in rows]
185
+
186
+ def mark_as_used_for_training(self, interaction_ids: List[int]):
187
+ """Mark interactions as used for training"""
188
+ conn = self._get_connection()
189
+ cursor = conn.cursor()
190
+
191
+ placeholders = ','.join('?' * len(interaction_ids))
192
+ cursor.execute(f'''
193
+ UPDATE interactions
194
+ SET is_used_for_training = 1
195
+ WHERE id IN ({placeholders})
196
+ ''', interaction_ids)
197
+
198
+ conn.commit()
199
+ conn.close()
200
+
201
+ def get_pending_samples_count(self) -> int:
202
+ """Get count of approved but unused samples"""
203
+ conn = self._get_connection()
204
+ cursor = conn.cursor()
205
+
206
+ cursor.execute('''
207
+ SELECT COUNT(*) FROM interactions
208
+ WHERE is_approved = 1 AND is_used_for_training = 0
209
+ ''')
210
+
211
+ count = cursor.fetchone()[0]
212
+ conn.close()
213
+
214
+ return count
215
+
216
+ # ==================== Code Samples ====================
217
+
218
+ def add_code_sample(self, code: str, source: str = "user",
219
+ category: str = "general") -> int:
220
+ """Add a curated code sample"""
221
+ conn = self._get_connection()
222
+ cursor = conn.cursor()
223
+
224
+ cursor.execute('''
225
+ INSERT INTO code_samples (code, source, category)
226
+ VALUES (?, ?, ?)
227
+ ''', (code, source, category))
228
+
229
+ sample_id = cursor.lastrowid
230
+ conn.commit()
231
+ conn.close()
232
+
233
+ return sample_id
234
+
235
+ def get_all_code_samples(self) -> List[Dict]:
236
+ """Get all code samples"""
237
+ conn = self._get_connection()
238
+ cursor = conn.cursor()
239
+
240
+ cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC')
241
+ rows = cursor.fetchall()
242
+
243
+ conn.close()
244
+
245
+ return [dict(row) for row in rows]
246
+
247
+ # ==================== Training History ====================
248
+
249
+ def save_training_run(
250
+ self,
251
+ samples_used: int,
252
+ epochs: int,
253
+ final_loss: float,
254
+ final_accuracy: float,
255
+ model_version: str,
256
+ notes: str = ""
257
+ ) -> int:
258
+ """Save training run information"""
259
+ conn = self._get_connection()
260
+ cursor = conn.cursor()
261
+
262
+ cursor.execute('''
263
+ INSERT INTO training_history
264
+ (samples_used, epochs, final_loss, final_accuracy, model_version, notes)
265
+ VALUES (?, ?, ?, ?, ?, ?)
266
+ ''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes))
267
+
268
+ run_id = cursor.lastrowid
269
+
270
+ # Update daily statistics
271
+ today = datetime.now().date()
272
+ cursor.execute('''
273
+ INSERT INTO statistics (date, training_runs)
274
+ VALUES (?, 1)
275
+ ON CONFLICT(date) DO UPDATE SET
276
+ training_runs = training_runs + 1
277
+ ''', (today,))
278
+
279
+ conn.commit()
280
+ conn.close()
281
+
282
+ return run_id
283
+
284
+ def get_training_history(self, limit: int = 10) -> List[Dict]:
285
+ """Get recent training history"""
286
+ conn = self._get_connection()
287
+ cursor = conn.cursor()
288
+
289
+ cursor.execute('''
290
+ SELECT * FROM training_history
291
+ ORDER BY timestamp DESC
292
+ LIMIT ?
293
+ ''', (limit,))
294
+
295
+ rows = cursor.fetchall()
296
+ conn.close()
297
+
298
+ return [dict(row) for row in rows]
299
+
300
+ # ==================== Statistics ====================
301
+
302
+ def get_statistics(self) -> Dict:
303
+ """Get overall statistics"""
304
+ conn = self._get_connection()
305
+ cursor = conn.cursor()
306
+
307
+ # Total counts
308
+ cursor.execute('SELECT COUNT(*) FROM interactions')
309
+ total_interactions = cursor.fetchone()[0]
310
+
311
+ cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0')
312
+ positive_count = cursor.fetchone()[0]
313
+
314
+ cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0')
315
+ negative_count = cursor.fetchone()[0]
316
+
317
+ cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1')
318
+ approved_count = cursor.fetchone()[0]
319
+
320
+ cursor.execute('SELECT COUNT(*) FROM training_history')
321
+ training_runs = cursor.fetchone()[0]
322
+
323
+ cursor.execute('SELECT COUNT(*) FROM code_samples')
324
+ code_samples = cursor.fetchone()[0]
325
+
326
+ # Recent stats (last 7 days)
327
+ cursor.execute('''
328
+ SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback)
329
+ FROM statistics
330
+ WHERE date >= date('now', '-7 days')
331
+ ''')
332
+ recent = cursor.fetchone()
333
+
334
+ conn.close()
335
+
336
+ return {
337
+ 'total_interactions': total_interactions,
338
+ 'positive_feedback': positive_count,
339
+ 'negative_feedback': negative_count,
340
+ 'approved_samples': approved_count,
341
+ 'training_runs': training_runs,
342
+ 'code_samples': code_samples,
343
+ 'recent_generations': recent[0] or 0,
344
+ 'recent_positive': recent[1] or 0,
345
+ 'recent_negative': recent[2] or 0,
346
+ 'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0
347
+ }
348
+
349
+ def get_recent_interactions(self, limit: int = 20) -> List[Dict]:
350
+ """Get recent interactions"""
351
+ conn = self._get_connection()
352
+ cursor = conn.cursor()
353
+
354
+ cursor.execute('''
355
+ SELECT id, timestamp, prompt, generated_code, feedback, is_approved
356
+ FROM interactions
357
+ ORDER BY timestamp DESC
358
+ LIMIT ?
359
+ ''', (limit,))
360
+
361
+ rows = cursor.fetchall()
362
+ conn.close()
363
+
364
+ return [dict(row) for row in rows]
365
+
366
+
367
+ # Singleton instance
368
+ db = VedaDatabase()