Spaces:

vedaco
/

veda-programming

Sleeping

App Files Files Community

vedaco commited on Jan 9

Commit

f70ee56

verified ·

1 Parent(s): 04ba188

Create database.py

Browse files

Files changed (1) hide show

database.py +368 -0

database.py ADDED Viewed

	@@ -0,0 +1,368 @@

+"""SQLite database handler for storing user interactions"""
+import sqlite3
+import json
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+import os
+from config import DATABASE_PATH
+class VedaDatabase:
+    """Database handler for Veda Programming LLM"""
+    def __init__(self, db_path: str = DATABASE_PATH):
+        self.db_path = db_path
+        self._init_database()
+    def _get_connection(self):
+        """Get database connection"""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        return conn
+    def _init_database(self):
+        """Initialize database tables"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        # User interactions table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS interactions (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                prompt TEXT NOT NULL,
+                generated_code TEXT NOT NULL,
+                temperature REAL,
+                max_tokens INTEGER,
+                feedback INTEGER DEFAULT 0,
+                is_approved BOOLEAN DEFAULT 0,
+                is_used_for_training BOOLEAN DEFAULT 0,
+                session_id TEXT,
+                user_edited_code TEXT
+            )
+        ''')
+        # Training history table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS training_history (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                samples_used INTEGER,
+                epochs INTEGER,
+                final_loss REAL,
+                final_accuracy REAL,
+                model_version TEXT,
+                notes TEXT
+            )
+        ''')
+        # Code samples table (curated training data)
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS code_samples (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                code TEXT NOT NULL,
+                source TEXT,
+                category TEXT,
+                quality_score REAL DEFAULT 0,
+                times_used INTEGER DEFAULT 0
+            )
+        ''')
+        # Statistics table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS statistics (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                date DATE UNIQUE,
+                total_generations INTEGER DEFAULT 0,
+                positive_feedback INTEGER DEFAULT 0,
+                negative_feedback INTEGER DEFAULT 0,
+                training_runs INTEGER DEFAULT 0
+            )
+        ''')
+        conn.commit()
+        conn.close()
+        print(f"Database initialized at {self.db_path}")
+    # ==================== Interactions ====================
+    def save_interaction(
+        self,
+        prompt: str,
+        generated_code: str,
+        temperature: float = 0.7,
+        max_tokens: int = 100,
+        session_id: str = None
+    ) -> int:
+        """Save a user interaction"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            INSERT INTO interactions
+            (prompt, generated_code, temperature, max_tokens, session_id)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (prompt, generated_code, temperature, max_tokens, session_id))
+        interaction_id = cursor.lastrowid
+        # Update daily statistics
+        today = datetime.now().date()
+        cursor.execute('''
+            INSERT INTO statistics (date, total_generations)
+            VALUES (?, 1)
+            ON CONFLICT(date) DO UPDATE SET
+            total_generations = total_generations + 1
+        ''', (today,))
+        conn.commit()
+        conn.close()
+        return interaction_id
+    def update_feedback(self, interaction_id: int, feedback: int,
+                        user_edited_code: str = None):
+        """Update feedback for an interaction (1 = positive, -1 = negative)"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        is_approved = feedback > 0
+        cursor.execute('''
+            UPDATE interactions
+            SET feedback = ?, is_approved = ?, user_edited_code = ?
+            WHERE id = ?
+        ''', (feedback, is_approved, user_edited_code, interaction_id))
+        # Update daily statistics
+        today = datetime.now().date()
+        if feedback > 0:
+            cursor.execute('''
+                INSERT INTO statistics (date, positive_feedback)
+                VALUES (?, 1)
+                ON CONFLICT(date) DO UPDATE SET
+                positive_feedback = positive_feedback + 1
+            ''', (today,))
+        elif feedback < 0:
+            cursor.execute('''
+                INSERT INTO statistics (date, negative_feedback)
+                VALUES (?, 1)
+                ON CONFLICT(date) DO UPDATE SET
+                negative_feedback = negative_feedback + 1
+            ''', (today,))
+        conn.commit()
+        conn.close()
+    def get_approved_samples(self, limit: int = None,
+                             not_used: bool = False) -> List[Dict]:
+        """Get approved samples for training"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        query = '''
+            SELECT id, prompt,
+                   COALESCE(user_edited_code, generated_code) as code
+            FROM interactions
+            WHERE is_approved = 1
+        '''
+        if not_used:
+            query += ' AND is_used_for_training = 0'
+        query += ' ORDER BY timestamp DESC'
+        if limit:
+            query += f' LIMIT {limit}'
+        cursor.execute(query)
+        rows = cursor.fetchall()
+        conn.close()
+        return [dict(row) for row in rows]
+    def mark_as_used_for_training(self, interaction_ids: List[int]):
+        """Mark interactions as used for training"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        placeholders = ','.join('?' * len(interaction_ids))
+        cursor.execute(f'''
+            UPDATE interactions
+            SET is_used_for_training = 1
+            WHERE id IN ({placeholders})
+        ''', interaction_ids)
+        conn.commit()
+        conn.close()
+    def get_pending_samples_count(self) -> int:
+        """Get count of approved but unused samples"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT COUNT(*) FROM interactions
+            WHERE is_approved = 1 AND is_used_for_training = 0
+        ''')
+        count = cursor.fetchone()[0]
+        conn.close()
+        return count
+    # ==================== Code Samples ====================
+    def add_code_sample(self, code: str, source: str = "user",
+                        category: str = "general") -> int:
+        """Add a curated code sample"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            INSERT INTO code_samples (code, source, category)
+            VALUES (?, ?, ?)
+        ''', (code, source, category))
+        sample_id = cursor.lastrowid
+        conn.commit()
+        conn.close()
+        return sample_id
+    def get_all_code_samples(self) -> List[Dict]:
+        """Get all code samples"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC')
+        rows = cursor.fetchall()
+        conn.close()
+        return [dict(row) for row in rows]
+    # ==================== Training History ====================
+    def save_training_run(
+        self,
+        samples_used: int,
+        epochs: int,
+        final_loss: float,
+        final_accuracy: float,
+        model_version: str,
+        notes: str = ""
+    ) -> int:
+        """Save training run information"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            INSERT INTO training_history
+            (samples_used, epochs, final_loss, final_accuracy, model_version, notes)
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes))
+        run_id = cursor.lastrowid
+        # Update daily statistics
+        today = datetime.now().date()
+        cursor.execute('''
+            INSERT INTO statistics (date, training_runs)
+            VALUES (?, 1)
+            ON CONFLICT(date) DO UPDATE SET
+            training_runs = training_runs + 1
+        ''', (today,))
+        conn.commit()
+        conn.close()
+        return run_id
+    def get_training_history(self, limit: int = 10) -> List[Dict]:
+        """Get recent training history"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT * FROM training_history
+            ORDER BY timestamp DESC
+            LIMIT ?
+        ''', (limit,))
+        rows = cursor.fetchall()
+        conn.close()
+        return [dict(row) for row in rows]
+    # ==================== Statistics ====================
+    def get_statistics(self) -> Dict:
+        """Get overall statistics"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        # Total counts
+        cursor.execute('SELECT COUNT(*) FROM interactions')
+        total_interactions = cursor.fetchone()[0]
+        cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0')
+        positive_count = cursor.fetchone()[0]
+        cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0')
+        negative_count = cursor.fetchone()[0]
+        cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1')
+        approved_count = cursor.fetchone()[0]
+        cursor.execute('SELECT COUNT(*) FROM training_history')
+        training_runs = cursor.fetchone()[0]
+        cursor.execute('SELECT COUNT(*) FROM code_samples')
+        code_samples = cursor.fetchone()[0]
+        # Recent stats (last 7 days)
+        cursor.execute('''
+            SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback)
+            FROM statistics
+            WHERE date >= date('now', '-7 days')
+        ''')
+        recent = cursor.fetchone()
+        conn.close()
+        return {
+            'total_interactions': total_interactions,
+            'positive_feedback': positive_count,
+            'negative_feedback': negative_count,
+            'approved_samples': approved_count,
+            'training_runs': training_runs,
+            'code_samples': code_samples,
+            'recent_generations': recent[0] or 0,
+            'recent_positive': recent[1] or 0,
+            'recent_negative': recent[2] or 0,
+            'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0
+        }
+    def get_recent_interactions(self, limit: int = 20) -> List[Dict]:
+        """Get recent interactions"""
+        conn = self._get_connection()
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT id, timestamp, prompt, generated_code, feedback, is_approved
+            FROM interactions
+            ORDER BY timestamp DESC
+            LIMIT ?
+        ''', (limit,))
+        rows = cursor.fetchall()
+        conn.close()
+        return [dict(row) for row in rows]
+# Singleton instance
+db = VedaDatabase()