"""SQLite database handler for storing user interactions""" import sqlite3 import json from datetime import datetime from typing import List, Dict, Optional, Tuple import os from config import DATABASE_PATH class VedaDatabase: """Database handler for Veda Programming LLM""" def __init__(self, db_path: str = DATABASE_PATH): self.db_path = db_path self._init_database() def _get_connection(self): """Get database connection""" conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row return conn def _init_database(self): """Initialize database tables""" conn = self._get_connection() cursor = conn.cursor() # User interactions table cursor.execute(''' CREATE TABLE IF NOT EXISTS interactions ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, prompt TEXT NOT NULL, generated_code TEXT NOT NULL, temperature REAL, max_tokens INTEGER, feedback INTEGER DEFAULT 0, is_approved BOOLEAN DEFAULT 0, is_used_for_training BOOLEAN DEFAULT 0, session_id TEXT, user_edited_code TEXT ) ''') # Training history table cursor.execute(''' CREATE TABLE IF NOT EXISTS training_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, samples_used INTEGER, epochs INTEGER, final_loss REAL, final_accuracy REAL, model_version TEXT, notes TEXT ) ''') # Code samples table (curated training data) cursor.execute(''' CREATE TABLE IF NOT EXISTS code_samples ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, code TEXT NOT NULL, source TEXT, category TEXT, quality_score REAL DEFAULT 0, times_used INTEGER DEFAULT 0 ) ''') # Statistics table cursor.execute(''' CREATE TABLE IF NOT EXISTS statistics ( id INTEGER PRIMARY KEY AUTOINCREMENT, date DATE UNIQUE, total_generations INTEGER DEFAULT 0, positive_feedback INTEGER DEFAULT 0, negative_feedback INTEGER DEFAULT 0, training_runs INTEGER DEFAULT 0 ) ''') conn.commit() conn.close() print(f"Database initialized at {self.db_path}") # ==================== Interactions ==================== def save_interaction( self, prompt: str, generated_code: str, temperature: float = 0.7, max_tokens: int = 100, session_id: str = None ) -> int: """Save a user interaction""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' INSERT INTO interactions (prompt, generated_code, temperature, max_tokens, session_id) VALUES (?, ?, ?, ?, ?) ''', (prompt, generated_code, temperature, max_tokens, session_id)) interaction_id = cursor.lastrowid # Update daily statistics today = datetime.now().date() cursor.execute(''' INSERT INTO statistics (date, total_generations) VALUES (?, 1) ON CONFLICT(date) DO UPDATE SET total_generations = total_generations + 1 ''', (today,)) conn.commit() conn.close() return interaction_id def update_feedback(self, interaction_id: int, feedback: int, user_edited_code: str = None): """Update feedback for an interaction (1 = positive, -1 = negative)""" conn = self._get_connection() cursor = conn.cursor() is_approved = feedback > 0 cursor.execute(''' UPDATE interactions SET feedback = ?, is_approved = ?, user_edited_code = ? WHERE id = ? ''', (feedback, is_approved, user_edited_code, interaction_id)) # Update daily statistics today = datetime.now().date() if feedback > 0: cursor.execute(''' INSERT INTO statistics (date, positive_feedback) VALUES (?, 1) ON CONFLICT(date) DO UPDATE SET positive_feedback = positive_feedback + 1 ''', (today,)) elif feedback < 0: cursor.execute(''' INSERT INTO statistics (date, negative_feedback) VALUES (?, 1) ON CONFLICT(date) DO UPDATE SET negative_feedback = negative_feedback + 1 ''', (today,)) conn.commit() conn.close() def get_approved_samples(self, limit: int = None, not_used: bool = False) -> List[Dict]: """Get approved samples for training""" conn = self._get_connection() cursor = conn.cursor() query = ''' SELECT id, prompt, COALESCE(user_edited_code, generated_code) as code FROM interactions WHERE is_approved = 1 ''' if not_used: query += ' AND is_used_for_training = 0' query += ' ORDER BY timestamp DESC' if limit: query += f' LIMIT {limit}' cursor.execute(query) rows = cursor.fetchall() conn.close() return [dict(row) for row in rows] def mark_as_used_for_training(self, interaction_ids: List[int]): """Mark interactions as used for training""" conn = self._get_connection() cursor = conn.cursor() placeholders = ','.join('?' * len(interaction_ids)) cursor.execute(f''' UPDATE interactions SET is_used_for_training = 1 WHERE id IN ({placeholders}) ''', interaction_ids) conn.commit() conn.close() def get_pending_samples_count(self) -> int: """Get count of approved but unused samples""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT COUNT(*) FROM interactions WHERE is_approved = 1 AND is_used_for_training = 0 ''') count = cursor.fetchone()[0] conn.close() return count # ==================== Code Samples ==================== def add_code_sample(self, code: str, source: str = "user", category: str = "general") -> int: """Add a curated code sample""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' INSERT INTO code_samples (code, source, category) VALUES (?, ?, ?) ''', (code, source, category)) sample_id = cursor.lastrowid conn.commit() conn.close() return sample_id def get_all_code_samples(self) -> List[Dict]: """Get all code samples""" conn = self._get_connection() cursor = conn.cursor() cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC') rows = cursor.fetchall() conn.close() return [dict(row) for row in rows] # ==================== Training History ==================== def save_training_run( self, samples_used: int, epochs: int, final_loss: float, final_accuracy: float, model_version: str, notes: str = "" ) -> int: """Save training run information""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' INSERT INTO training_history (samples_used, epochs, final_loss, final_accuracy, model_version, notes) VALUES (?, ?, ?, ?, ?, ?) ''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes)) run_id = cursor.lastrowid # Update daily statistics today = datetime.now().date() cursor.execute(''' INSERT INTO statistics (date, training_runs) VALUES (?, 1) ON CONFLICT(date) DO UPDATE SET training_runs = training_runs + 1 ''', (today,)) conn.commit() conn.close() return run_id def get_training_history(self, limit: int = 10) -> List[Dict]: """Get recent training history""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT * FROM training_history ORDER BY timestamp DESC LIMIT ? ''', (limit,)) rows = cursor.fetchall() conn.close() return [dict(row) for row in rows] # ==================== Statistics ==================== def get_statistics(self) -> Dict: """Get overall statistics""" conn = self._get_connection() cursor = conn.cursor() # Total counts cursor.execute('SELECT COUNT(*) FROM interactions') total_interactions = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0') positive_count = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0') negative_count = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1') approved_count = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM training_history') training_runs = cursor.fetchone()[0] cursor.execute('SELECT COUNT(*) FROM code_samples') code_samples = cursor.fetchone()[0] # Recent stats (last 7 days) cursor.execute(''' SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback) FROM statistics WHERE date >= date('now', '-7 days') ''') recent = cursor.fetchone() conn.close() return { 'total_interactions': total_interactions, 'positive_feedback': positive_count, 'negative_feedback': negative_count, 'approved_samples': approved_count, 'training_runs': training_runs, 'code_samples': code_samples, 'recent_generations': recent[0] or 0, 'recent_positive': recent[1] or 0, 'recent_negative': recent[2] or 0, 'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0 } def get_recent_interactions(self, limit: int = 20) -> List[Dict]: """Get recent interactions""" conn = self._get_connection() cursor = conn.cursor() cursor.execute(''' SELECT id, timestamp, prompt, generated_code, feedback, is_approved FROM interactions ORDER BY timestamp DESC LIMIT ? ''', (limit,)) rows = cursor.fetchall() conn.close() return [dict(row) for row in rows] # Singleton instance db = VedaDatabase()