veda-programming / database.py
vedaco's picture
Create database.py
f70ee56 verified
raw
history blame
11.9 kB
"""SQLite database handler for storing user interactions"""
import sqlite3
import json
from datetime import datetime
from typing import List, Dict, Optional, Tuple
import os
from config import DATABASE_PATH
class VedaDatabase:
"""Database handler for Veda Programming LLM"""
def __init__(self, db_path: str = DATABASE_PATH):
self.db_path = db_path
self._init_database()
def _get_connection(self):
"""Get database connection"""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
def _init_database(self):
"""Initialize database tables"""
conn = self._get_connection()
cursor = conn.cursor()
# User interactions table
cursor.execute('''
CREATE TABLE IF NOT EXISTS interactions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
prompt TEXT NOT NULL,
generated_code TEXT NOT NULL,
temperature REAL,
max_tokens INTEGER,
feedback INTEGER DEFAULT 0,
is_approved BOOLEAN DEFAULT 0,
is_used_for_training BOOLEAN DEFAULT 0,
session_id TEXT,
user_edited_code TEXT
)
''')
# Training history table
cursor.execute('''
CREATE TABLE IF NOT EXISTS training_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
samples_used INTEGER,
epochs INTEGER,
final_loss REAL,
final_accuracy REAL,
model_version TEXT,
notes TEXT
)
''')
# Code samples table (curated training data)
cursor.execute('''
CREATE TABLE IF NOT EXISTS code_samples (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
code TEXT NOT NULL,
source TEXT,
category TEXT,
quality_score REAL DEFAULT 0,
times_used INTEGER DEFAULT 0
)
''')
# Statistics table
cursor.execute('''
CREATE TABLE IF NOT EXISTS statistics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date DATE UNIQUE,
total_generations INTEGER DEFAULT 0,
positive_feedback INTEGER DEFAULT 0,
negative_feedback INTEGER DEFAULT 0,
training_runs INTEGER DEFAULT 0
)
''')
conn.commit()
conn.close()
print(f"Database initialized at {self.db_path}")
# ==================== Interactions ====================
def save_interaction(
self,
prompt: str,
generated_code: str,
temperature: float = 0.7,
max_tokens: int = 100,
session_id: str = None
) -> int:
"""Save a user interaction"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
INSERT INTO interactions
(prompt, generated_code, temperature, max_tokens, session_id)
VALUES (?, ?, ?, ?, ?)
''', (prompt, generated_code, temperature, max_tokens, session_id))
interaction_id = cursor.lastrowid
# Update daily statistics
today = datetime.now().date()
cursor.execute('''
INSERT INTO statistics (date, total_generations)
VALUES (?, 1)
ON CONFLICT(date) DO UPDATE SET
total_generations = total_generations + 1
''', (today,))
conn.commit()
conn.close()
return interaction_id
def update_feedback(self, interaction_id: int, feedback: int,
user_edited_code: str = None):
"""Update feedback for an interaction (1 = positive, -1 = negative)"""
conn = self._get_connection()
cursor = conn.cursor()
is_approved = feedback > 0
cursor.execute('''
UPDATE interactions
SET feedback = ?, is_approved = ?, user_edited_code = ?
WHERE id = ?
''', (feedback, is_approved, user_edited_code, interaction_id))
# Update daily statistics
today = datetime.now().date()
if feedback > 0:
cursor.execute('''
INSERT INTO statistics (date, positive_feedback)
VALUES (?, 1)
ON CONFLICT(date) DO UPDATE SET
positive_feedback = positive_feedback + 1
''', (today,))
elif feedback < 0:
cursor.execute('''
INSERT INTO statistics (date, negative_feedback)
VALUES (?, 1)
ON CONFLICT(date) DO UPDATE SET
negative_feedback = negative_feedback + 1
''', (today,))
conn.commit()
conn.close()
def get_approved_samples(self, limit: int = None,
not_used: bool = False) -> List[Dict]:
"""Get approved samples for training"""
conn = self._get_connection()
cursor = conn.cursor()
query = '''
SELECT id, prompt,
COALESCE(user_edited_code, generated_code) as code
FROM interactions
WHERE is_approved = 1
'''
if not_used:
query += ' AND is_used_for_training = 0'
query += ' ORDER BY timestamp DESC'
if limit:
query += f' LIMIT {limit}'
cursor.execute(query)
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
def mark_as_used_for_training(self, interaction_ids: List[int]):
"""Mark interactions as used for training"""
conn = self._get_connection()
cursor = conn.cursor()
placeholders = ','.join('?' * len(interaction_ids))
cursor.execute(f'''
UPDATE interactions
SET is_used_for_training = 1
WHERE id IN ({placeholders})
''', interaction_ids)
conn.commit()
conn.close()
def get_pending_samples_count(self) -> int:
"""Get count of approved but unused samples"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
SELECT COUNT(*) FROM interactions
WHERE is_approved = 1 AND is_used_for_training = 0
''')
count = cursor.fetchone()[0]
conn.close()
return count
# ==================== Code Samples ====================
def add_code_sample(self, code: str, source: str = "user",
category: str = "general") -> int:
"""Add a curated code sample"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
INSERT INTO code_samples (code, source, category)
VALUES (?, ?, ?)
''', (code, source, category))
sample_id = cursor.lastrowid
conn.commit()
conn.close()
return sample_id
def get_all_code_samples(self) -> List[Dict]:
"""Get all code samples"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC')
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
# ==================== Training History ====================
def save_training_run(
self,
samples_used: int,
epochs: int,
final_loss: float,
final_accuracy: float,
model_version: str,
notes: str = ""
) -> int:
"""Save training run information"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
INSERT INTO training_history
(samples_used, epochs, final_loss, final_accuracy, model_version, notes)
VALUES (?, ?, ?, ?, ?, ?)
''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes))
run_id = cursor.lastrowid
# Update daily statistics
today = datetime.now().date()
cursor.execute('''
INSERT INTO statistics (date, training_runs)
VALUES (?, 1)
ON CONFLICT(date) DO UPDATE SET
training_runs = training_runs + 1
''', (today,))
conn.commit()
conn.close()
return run_id
def get_training_history(self, limit: int = 10) -> List[Dict]:
"""Get recent training history"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM training_history
ORDER BY timestamp DESC
LIMIT ?
''', (limit,))
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
# ==================== Statistics ====================
def get_statistics(self) -> Dict:
"""Get overall statistics"""
conn = self._get_connection()
cursor = conn.cursor()
# Total counts
cursor.execute('SELECT COUNT(*) FROM interactions')
total_interactions = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0')
positive_count = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0')
negative_count = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1')
approved_count = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM training_history')
training_runs = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM code_samples')
code_samples = cursor.fetchone()[0]
# Recent stats (last 7 days)
cursor.execute('''
SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback)
FROM statistics
WHERE date >= date('now', '-7 days')
''')
recent = cursor.fetchone()
conn.close()
return {
'total_interactions': total_interactions,
'positive_feedback': positive_count,
'negative_feedback': negative_count,
'approved_samples': approved_count,
'training_runs': training_runs,
'code_samples': code_samples,
'recent_generations': recent[0] or 0,
'recent_positive': recent[1] or 0,
'recent_negative': recent[2] or 0,
'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0
}
def get_recent_interactions(self, limit: int = 20) -> List[Dict]:
"""Get recent interactions"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('''
SELECT id, timestamp, prompt, generated_code, feedback, is_approved
FROM interactions
ORDER BY timestamp DESC
LIMIT ?
''', (limit,))
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
# Singleton instance
db = VedaDatabase()