Spaces:

vedaco
/

veda-programming

Running

App Files Files Community

veda-programming / database.py

vedaco

Create database.py

f70ee56 verified 10 days ago

raw

history blame

11.9 kB

	"""SQLite database handler for storing user interactions"""

	import sqlite3
	import json
	from datetime import datetime
	from typing import List, Dict, Optional, Tuple
	import os
	from config import DATABASE_PATH

	class VedaDatabase:
	"""Database handler for Veda Programming LLM"""

	def __init__(self, db_path: str = DATABASE_PATH):
	self.db_path = db_path
	self._init_database()

	def _get_connection(self):
	"""Get database connection"""
	conn = sqlite3.connect(self.db_path)
	conn.row_factory = sqlite3.Row
	return conn

	def _init_database(self):
	"""Initialize database tables"""
	conn = self._get_connection()
	cursor = conn.cursor()

	# User interactions table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS interactions (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
	prompt TEXT NOT NULL,
	generated_code TEXT NOT NULL,
	temperature REAL,
	max_tokens INTEGER,
	feedback INTEGER DEFAULT 0,
	is_approved BOOLEAN DEFAULT 0,
	is_used_for_training BOOLEAN DEFAULT 0,
	session_id TEXT,
	user_edited_code TEXT
	)
	''')

	# Training history table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS training_history (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
	samples_used INTEGER,
	epochs INTEGER,
	final_loss REAL,
	final_accuracy REAL,
	model_version TEXT,
	notes TEXT
	)
	''')

	# Code samples table (curated training data)
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS code_samples (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
	code TEXT NOT NULL,
	source TEXT,
	category TEXT,
	quality_score REAL DEFAULT 0,
	times_used INTEGER DEFAULT 0
	)
	''')

	# Statistics table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS statistics (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	date DATE UNIQUE,
	total_generations INTEGER DEFAULT 0,
	positive_feedback INTEGER DEFAULT 0,
	negative_feedback INTEGER DEFAULT 0,
	training_runs INTEGER DEFAULT 0
	)
	''')

	conn.commit()
	conn.close()
	print(f"Database initialized at {self.db_path}")

	# ==================== Interactions ====================

	def save_interaction(
	self,
	prompt: str,
	generated_code: str,
	temperature: float = 0.7,
	max_tokens: int = 100,
	session_id: str = None
	) -> int:
	"""Save a user interaction"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	INSERT INTO interactions
	(prompt, generated_code, temperature, max_tokens, session_id)
	VALUES (?, ?, ?, ?, ?)
	''', (prompt, generated_code, temperature, max_tokens, session_id))

	interaction_id = cursor.lastrowid

	# Update daily statistics
	today = datetime.now().date()
	cursor.execute('''
	INSERT INTO statistics (date, total_generations)
	VALUES (?, 1)
	ON CONFLICT(date) DO UPDATE SET
	total_generations = total_generations + 1
	''', (today,))

	conn.commit()
	conn.close()

	return interaction_id

	def update_feedback(self, interaction_id: int, feedback: int,
	user_edited_code: str = None):
	"""Update feedback for an interaction (1 = positive, -1 = negative)"""
	conn = self._get_connection()
	cursor = conn.cursor()

	is_approved = feedback > 0

	cursor.execute('''
	UPDATE interactions
	SET feedback = ?, is_approved = ?, user_edited_code = ?
	WHERE id = ?
	''', (feedback, is_approved, user_edited_code, interaction_id))

	# Update daily statistics
	today = datetime.now().date()
	if feedback > 0:
	cursor.execute('''
	INSERT INTO statistics (date, positive_feedback)
	VALUES (?, 1)
	ON CONFLICT(date) DO UPDATE SET
	positive_feedback = positive_feedback + 1
	''', (today,))
	elif feedback < 0:
	cursor.execute('''
	INSERT INTO statistics (date, negative_feedback)
	VALUES (?, 1)
	ON CONFLICT(date) DO UPDATE SET
	negative_feedback = negative_feedback + 1
	''', (today,))

	conn.commit()
	conn.close()

	def get_approved_samples(self, limit: int = None,
	not_used: bool = False) -> List[Dict]:
	"""Get approved samples for training"""
	conn = self._get_connection()
	cursor = conn.cursor()

	query = '''
	SELECT id, prompt,
	COALESCE(user_edited_code, generated_code) as code
	FROM interactions
	WHERE is_approved = 1
	'''

	if not_used:
	query += ' AND is_used_for_training = 0'

	query += ' ORDER BY timestamp DESC'

	if limit:
	query += f' LIMIT {limit}'

	cursor.execute(query)
	rows = cursor.fetchall()

	conn.close()

	return [dict(row) for row in rows]

	def mark_as_used_for_training(self, interaction_ids: List[int]):
	"""Mark interactions as used for training"""
	conn = self._get_connection()
	cursor = conn.cursor()

	placeholders = ','.join('?' * len(interaction_ids))
	cursor.execute(f'''
	UPDATE interactions
	SET is_used_for_training = 1
	WHERE id IN ({placeholders})
	''', interaction_ids)

	conn.commit()
	conn.close()

	def get_pending_samples_count(self) -> int:
	"""Get count of approved but unused samples"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	SELECT COUNT(*) FROM interactions
	WHERE is_approved = 1 AND is_used_for_training = 0
	''')

	count = cursor.fetchone()[0]
	conn.close()

	return count

	# ==================== Code Samples ====================

	def add_code_sample(self, code: str, source: str = "user",
	category: str = "general") -> int:
	"""Add a curated code sample"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	INSERT INTO code_samples (code, source, category)
	VALUES (?, ?, ?)
	''', (code, source, category))

	sample_id = cursor.lastrowid
	conn.commit()
	conn.close()

	return sample_id

	def get_all_code_samples(self) -> List[Dict]:
	"""Get all code samples"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('SELECT * FROM code_samples ORDER BY quality_score DESC')
	rows = cursor.fetchall()

	conn.close()

	return [dict(row) for row in rows]

	# ==================== Training History ====================

	def save_training_run(
	self,
	samples_used: int,
	epochs: int,
	final_loss: float,
	final_accuracy: float,
	model_version: str,
	notes: str = ""
	) -> int:
	"""Save training run information"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	INSERT INTO training_history
	(samples_used, epochs, final_loss, final_accuracy, model_version, notes)
	VALUES (?, ?, ?, ?, ?, ?)
	''', (samples_used, epochs, final_loss, final_accuracy, model_version, notes))

	run_id = cursor.lastrowid

	# Update daily statistics
	today = datetime.now().date()
	cursor.execute('''
	INSERT INTO statistics (date, training_runs)
	VALUES (?, 1)
	ON CONFLICT(date) DO UPDATE SET
	training_runs = training_runs + 1
	''', (today,))

	conn.commit()
	conn.close()

	return run_id

	def get_training_history(self, limit: int = 10) -> List[Dict]:
	"""Get recent training history"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	SELECT * FROM training_history
	ORDER BY timestamp DESC
	LIMIT ?
	''', (limit,))

	rows = cursor.fetchall()
	conn.close()

	return [dict(row) for row in rows]

	# ==================== Statistics ====================

	def get_statistics(self) -> Dict:
	"""Get overall statistics"""
	conn = self._get_connection()
	cursor = conn.cursor()

	# Total counts
	cursor.execute('SELECT COUNT(*) FROM interactions')
	total_interactions = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback > 0')
	positive_count = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*) FROM interactions WHERE feedback < 0')
	negative_count = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*) FROM interactions WHERE is_approved = 1')
	approved_count = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*) FROM training_history')
	training_runs = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*) FROM code_samples')
	code_samples = cursor.fetchone()[0]

	# Recent stats (last 7 days)
	cursor.execute('''
	SELECT SUM(total_generations), SUM(positive_feedback), SUM(negative_feedback)
	FROM statistics
	WHERE date >= date('now', '-7 days')
	''')
	recent = cursor.fetchone()

	conn.close()

	return {
	'total_interactions': total_interactions,
	'positive_feedback': positive_count,
	'negative_feedback': negative_count,
	'approved_samples': approved_count,
	'training_runs': training_runs,
	'code_samples': code_samples,
	'recent_generations': recent[0] or 0,
	'recent_positive': recent[1] or 0,
	'recent_negative': recent[2] or 0,
	'approval_rate': (positive_count / total_interactions * 100) if total_interactions > 0 else 0
	}

	def get_recent_interactions(self, limit: int = 20) -> List[Dict]:
	"""Get recent interactions"""
	conn = self._get_connection()
	cursor = conn.cursor()

	cursor.execute('''
	SELECT id, timestamp, prompt, generated_code, feedback, is_approved
	FROM interactions
	ORDER BY timestamp DESC
	LIMIT ?
	''', (limit,))

	rows = cursor.fetchall()
	conn.close()

	return [dict(row) for row in rows]


	# Singleton instance
	db = VedaDatabase()