File size: 11,909 Bytes
f70ee56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
"""SQLite database handler for storing user interactions"""

import sqlite3
import json
from datetime import datetime
from typing import List, Dict, Optional, Tuple
import os
from config import DATABASE_PATH

class VedaDatabase:
    """Database handler for Veda Programming LLM"""
    
    def __init__(self, db_path: str = DATABASE_PATH):
        self.db_path = db_path
        self._init_database()
    
    def _get_connection(self):
        """Get database connection"""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        return conn
    
    def _init_database(self):
        """Initialize database tables"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        # User interactions table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS interactions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                prompt TEXT NOT NULL,
                generated_code TEXT NOT NULL,
                temperature REAL,
                max_tokens INTEGER,
                feedback INTEGER DEFAULT 0,
                is_approved BOOLEAN DEFAULT 0,
                is_used_for_training BOOLEAN DEFAULT 0,
                session_id TEXT,
                user_edited_code TEXT
            )
        ''')
        
        # Training history table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS training_history (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                samples_used INTEGER,
                epochs INTEGER,
                final_loss REAL,
                final_accuracy REAL,
                model_version TEXT,
                notes TEXT
            )
        ''')
        
        # Code samples table (curated training data)
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS code_samples (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                code TEXT NOT NULL,
                source TEXT,
                category TEXT,
                quality_score REAL DEFAULT 0,
                times_used INTEGER DEFAULT 0
            )
        ''')
        
        # Statistics table
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS statistics (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                date DATE UNIQUE,
                total_generations INTEGER DEFAULT 0,
                positive_feedback INTEGER DEFAULT 0,
                negative_feedback INTEGER DEFAULT 0,
                training_runs INTEGER DEFAULT 0
            )
        ''')
        
        conn.commit()
        conn.close()
        print(f"Database initialized at {self.db_path}")
    
    # ==================== Interactions ====================
    
    def save_interaction(
        self,
        prompt: str,
        generated_code: str,
        temperature: float = 0.7,
        max_tokens: int = 100,
        session_id: str = None
    ) -> int:
        """Save a user interaction"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT INTO interactions 
            (prompt, generated_code, temperature, max_tokens, session_id)
            VALUES (?, ?, ?, ?, ?)
        ''', (prompt, generated_code, temperature, max_tokens, session_id))
        
        interaction_id = cursor.lastrowid
        
        # Update daily statistics
        today = datetime.now().date()
        cursor.execute('''
            INSERT INTO statistics (date, total_generations)
            VALUES (?, 1)
            ON CONFLICT(date) DO UPDATE SET
            total_generations = total_generations + 1
        ''', (today,))
        
        conn.commit()
        conn.close()
        
        return interaction_id
    
    def update_feedback(self, interaction_id: int, feedback: int, 
                        user_edited_code: str = None):
        """Update feedback for an interaction (1 = positive, -1 = negative)"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        is_approved = feedback > 0
        
        cursor.execute('''
            UPDATE interactions
            SET feedback = ?, is_approved = ?, user_edited_code = ?
            WHERE id = ?
        ''', (feedback, is_approved, user_edited_code, interaction_id))
        
        # Update daily statistics
        today = datetime.now().date()
        if feedback > 0:
            cursor.execute('''
                INSERT INTO statistics (date, positive_feedback)
                VALUES (?, 1)
                ON CONFLICT(date) DO UPDATE SET
                positive_feedback = positive_feedback + 1
            ''', (today,))
        elif feedback < 0:
            cursor.execute('''
                INSERT INTO statistics (date, negative_feedback)
                VALUES (?, 1)
                ON CONFLICT(date) DO UPDATE SET
                negative_feedback = negative_feedback + 1
            ''', (today,))
        
        conn.commit()
        conn.close()
    
    def get_approved_samples(self, limit: int = None, 
                             not_used: bool = False) -> List[Dict]:
        """Get approved samples for training"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        query = '''
            SELECT id, prompt, 
                   COALESCE(user_edited_code, generated_code) as code
            FROM interactions
            WHERE is_approved = 1
        '''
        
        if not_used:
            query += ' AND is_used_for_training = 0'
        
        query += ' ORDER BY timestamp DESC'
        
        if limit:
            query += f' LIMIT {limit}'
        
        cursor.execute(query)
        rows = cursor.fetchall()
        
        conn.close()
        
        return [dict(row) for row in rows]
    
    def mark_as_used_for_training(self, interaction_ids: List[int]):
        """Mark interactions as used for training"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        placeholders = ','.join('?' * len(interaction_ids))
        cursor.execute(f'''
            UPDATE interactions
            SET is_used_for_training = 1
            WHERE id IN ({placeholders})
        ''', interaction_ids)
        
        conn.commit()
        conn.close()
    
    def get_pending_samples_count(self) -> int:
        """Get count of approved but unused samples"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT COUNT(*) FROM interactions
            WHERE is_approved = 1 AND is_used_for_training = 0
        ''')
        
        count = cursor.fetchone()[0]
        conn.close()
        
        return count
    
    # ==================== Code Samples ====================
    
    def add_code_sample(self, code: str, source: str = "user",
                        category: str = "general") -> int:
        """Add a curated code sample"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT INTO code_samples (code, source, category)
            VALUES (?, ?, ?)
        ''', (code, source, category))
        
        sample_id = cursor.lastrowid
        conn.commit()
        conn.close()
        
        return sample_id
    
    def get_all_code_samples(self) -> List[Dict]:
        """Get all code samples"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC')
        rows = cursor.fetchall()
        
        conn.close()
        
        return [dict(row) for row in rows]
    
    # ==================== Training History ====================
    
    def save_training_run(
        self,
        samples_used: int,
        epochs: int,
        final_loss: float,
        final_accuracy: float,
        model_version: str,
        notes: str = ""
    ) -> int:
        """Save training run information"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            INSERT INTO training_history 
            (samples_used, epochs, final_loss, final_accuracy, model_version, notes)
            VALUES (?, ?, ?, ?, ?, ?)
        ''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes))
        
        run_id = cursor.lastrowid
        
        # Update daily statistics
        today = datetime.now().date()
        cursor.execute('''
            INSERT INTO statistics (date, training_runs)
            VALUES (?, 1)
            ON CONFLICT(date) DO UPDATE SET
            training_runs = training_runs + 1
        ''', (today,))
        
        conn.commit()
        conn.close()
        
        return run_id
    
    def get_training_history(self, limit: int = 10) -> List[Dict]:
        """Get recent training history"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT * FROM training_history
            ORDER BY timestamp DESC
            LIMIT ?
        ''', (limit,))
        
        rows = cursor.fetchall()
        conn.close()
        
        return [dict(row) for row in rows]
    
    # ==================== Statistics ====================
    
    def get_statistics(self) -> Dict:
        """Get overall statistics"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        # Total counts
        cursor.execute('SELECT COUNT(*) FROM interactions')
        total_interactions = cursor.fetchone()[0]
        
        cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0')
        positive_count = cursor.fetchone()[0]
        
        cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0')
        negative_count = cursor.fetchone()[0]
        
        cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1')
        approved_count = cursor.fetchone()[0]
        
        cursor.execute('SELECT COUNT(*) FROM training_history')
        training_runs = cursor.fetchone()[0]
        
        cursor.execute('SELECT COUNT(*) FROM code_samples')
        code_samples = cursor.fetchone()[0]
        
        # Recent stats (last 7 days)
        cursor.execute('''
            SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback)
            FROM statistics
            WHERE date >= date('now', '-7 days')
        ''')
        recent = cursor.fetchone()
        
        conn.close()
        
        return {
            'total_interactions': total_interactions,
            'positive_feedback': positive_count,
            'negative_feedback': negative_count,
            'approved_samples': approved_count,
            'training_runs': training_runs,
            'code_samples': code_samples,
            'recent_generations': recent[0] or 0,
            'recent_positive': recent[1] or 0,
            'recent_negative': recent[2] or 0,
            'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0
        }
    
    def get_recent_interactions(self, limit: int = 20) -> List[Dict]:
        """Get recent interactions"""
        conn = self._get_connection()
        cursor = conn.cursor()
        
        cursor.execute('''
            SELECT id, timestamp, prompt, generated_code, feedback, is_approved
            FROM interactions
            ORDER BY timestamp DESC
            LIMIT ?
        ''', (limit,))
        
        rows = cursor.fetchall()
        conn.close()
        
        return [dict(row) for row in rows]


# Singleton instance
db = VedaDatabase()