thadillo
Add advanced training features and HF deployment guide
00aacad
from app import db
from datetime import datetime
import json
class Token(db.Model):
__tablename__ = 'tokens'
id = db.Column(db.Integer, primary_key=True)
token = db.Column(db.String(50), unique=True, nullable=False)
type = db.Column(db.String(20), nullable=False) # admin, government, community, industry, ngo, academic, other
name = db.Column(db.String(100), nullable=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
def to_dict(self):
return {
'id': self.id,
'token': self.token,
'type': self.type,
'name': self.name,
'created_at': self.created_at.isoformat() if self.created_at else None
}
class Submission(db.Model):
__tablename__ = 'submissions'
id = db.Column(db.Integer, primary_key=True)
message = db.Column(db.Text, nullable=False)
contributor_type = db.Column(db.String(20), nullable=False)
latitude = db.Column(db.Float, nullable=True)
longitude = db.Column(db.Float, nullable=True)
timestamp = db.Column(db.DateTime, default=datetime.utcnow)
category = db.Column(db.String(50), nullable=True) # Vision, Problem, Objectives, Directives, Values, Actions (backward compat)
flagged_as_offensive = db.Column(db.Boolean, default=False)
sentence_analysis_done = db.Column(db.Boolean, default=False) # NEW: Track if sentence-level analysis is complete
def get_primary_category(self):
"""Get most frequent category from sentences (or fallback to old category)"""
if not self.sentences or len(self.sentences) == 0:
return self.category # Fallback to old system
from collections import Counter
categories = [s.category for s in self.sentences if s.category]
if not categories:
return None
return Counter(categories).most_common(1)[0][0]
def get_category_distribution(self):
"""Get percentage of each category in this submission"""
if not self.sentences or len(self.sentences) == 0:
return {self.category: 100.0} if self.category else {}
from collections import Counter
categories = [s.category for s in self.sentences if s.category]
total = len(categories)
if total == 0:
return {}
counts = Counter(categories)
return {cat: round((count/total)*100, 1) for cat, count in counts.items()}
def to_dict(self):
"""Convert to dictionary with sentence-level support"""
base_dict = {
'id': self.id,
'message': self.message,
'contributorType': self.contributor_type,
'location': {
'lat': self.latitude,
'lng': self.longitude
} if self.latitude and self.longitude else None,
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
'category': self.get_primary_category() if self.sentence_analysis_done else self.category,
'flaggedAsOffensive': self.flagged_as_offensive,
'sentenceAnalysisDone': self.sentence_analysis_done
}
# Add sentence-level data if available
if self.sentence_analysis_done and self.sentences:
base_dict['sentences'] = [s.to_dict() for s in self.sentences]
base_dict['categoryDistribution'] = self.get_category_distribution()
return base_dict
class SubmissionSentence(db.Model):
"""Stores individual sentences from submissions with their categories"""
__tablename__ = 'submission_sentences'
id = db.Column(db.Integer, primary_key=True)
submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=False)
sentence_index = db.Column(db.Integer, nullable=False) # 0, 1, 2...
text = db.Column(db.Text, nullable=False)
category = db.Column(db.String(50), nullable=True)
confidence = db.Column(db.Float, nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Relationships
submission = db.relationship('Submission', backref='sentences')
# Composite unique constraint
__table_args__ = (
db.UniqueConstraint('submission_id', 'sentence_index', name='uq_submission_sentence'),
)
def to_dict(self):
return {
'id': self.id,
'submission_id': self.submission_id,
'sentence_index': self.sentence_index,
'text': self.text,
'category': self.category,
'confidence': self.confidence,
'created_at': self.created_at.isoformat() if self.created_at else None
}
class Settings(db.Model):
__tablename__ = 'settings'
id = db.Column(db.Integer, primary_key=True)
key = db.Column(db.String(50), unique=True, nullable=False)
value = db.Column(db.String(100), nullable=False) # Increased to support model IDs
@staticmethod
def get_setting(key, default='true'):
setting = Settings.query.filter_by(key=key).first()
return setting.value if setting else default
@staticmethod
def set_setting(key, value):
setting = Settings.query.filter_by(key=key).first()
if setting:
setting.value = value
else:
setting = Settings(key=key, value=value)
db.session.add(setting)
db.session.commit()
class TrainingExample(db.Model):
"""Stores admin corrections for model fine-tuning"""
__tablename__ = 'training_examples'
id = db.Column(db.Integer, primary_key=True)
submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=True) # Made nullable for sentence-level
sentence_id = db.Column(db.Integer, db.ForeignKey('submission_sentences.id'), nullable=True) # NEW: Link to sentence
message = db.Column(db.Text, nullable=False) # Snapshot of submission/sentence text
original_category = db.Column(db.String(50), nullable=True) # AI's prediction
corrected_category = db.Column(db.String(50), nullable=False) # Admin's correction
contributor_type = db.Column(db.String(20), nullable=False)
correction_timestamp = db.Column(db.DateTime, default=datetime.utcnow)
confidence_score = db.Column(db.Float, nullable=True) # Original prediction confidence
used_in_training = db.Column(db.Boolean, default=False)
training_run_id = db.Column(db.Integer, db.ForeignKey('fine_tuning_runs.id'), nullable=True)
# Relationships
submission = db.relationship('Submission', backref='training_examples')
sentence = db.relationship('SubmissionSentence', backref='training_examples')
training_run = db.relationship('FineTuningRun', backref='training_examples')
def to_dict(self):
return {
'id': self.id,
'submission_id': self.submission_id,
'message': self.message,
'original_category': self.original_category,
'corrected_category': self.corrected_category,
'contributor_type': self.contributor_type,
'correction_timestamp': self.correction_timestamp.isoformat() if self.correction_timestamp else None,
'confidence_score': self.confidence_score,
'used_in_training': self.used_in_training,
'training_run_id': self.training_run_id,
'is_correction': self.original_category != self.corrected_category if self.original_category else False
}
class FineTuningRun(db.Model):
"""Tracks fine-tuning training runs and their results"""
__tablename__ = 'fine_tuning_runs'
id = db.Column(db.Integer, primary_key=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
status = db.Column(db.String(20), default='preparing') # preparing, training, evaluating, completed, failed
num_training_examples = db.Column(db.Integer, nullable=True)
num_validation_examples = db.Column(db.Integer, nullable=True)
num_test_examples = db.Column(db.Integer, nullable=True)
training_config = db.Column(db.Text, nullable=True) # JSON string
results = db.Column(db.Text, nullable=True) # JSON string with metrics
model_path = db.Column(db.String(255), nullable=True)
is_active_model = db.Column(db.Boolean, default=False)
improvement_over_baseline = db.Column(db.Float, nullable=True)
completed_at = db.Column(db.DateTime, nullable=True)
error_message = db.Column(db.Text, nullable=True)
# Progress tracking
current_epoch = db.Column(db.Integer, default=0)
total_epochs = db.Column(db.Integer, nullable=True)
current_step = db.Column(db.Integer, default=0)
total_steps = db.Column(db.Integer, nullable=True)
current_loss = db.Column(db.Float, nullable=True)
progress_message = db.Column(db.String(255), nullable=True)
def to_dict(self):
return {
'id': self.id,
'created_at': self.created_at.isoformat() if self.created_at else None,
'status': self.status,
'num_training_examples': self.num_training_examples,
'num_validation_examples': self.num_validation_examples,
'num_test_examples': self.num_test_examples,
'training_config': json.loads(self.training_config) if self.training_config else None,
'results': json.loads(self.results) if self.results else None,
'model_path': self.model_path,
'is_active_model': self.is_active_model,
'improvement_over_baseline': self.improvement_over_baseline,
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
'error_message': self.error_message
}
def set_config(self, config_dict):
"""Set training config from dict"""
self.training_config = json.dumps(config_dict)
def get_config(self):
"""Get training config as dict"""
return json.loads(self.training_config) if self.training_config else {}
def set_results(self, results_dict):
"""Set results from dict"""
self.results = json.dumps(results_dict)
def get_results(self):
"""Get results as dict"""
return json.loads(self.results) if self.results else {}