from app import db from datetime import datetime import json class Token(db.Model): __tablename__ = 'tokens' id = db.Column(db.Integer, primary_key=True) token = db.Column(db.String(50), unique=True, nullable=False) type = db.Column(db.String(20), nullable=False) # admin, government, community, industry, ngo, academic, other name = db.Column(db.String(100), nullable=False) created_at = db.Column(db.DateTime, default=datetime.utcnow) def to_dict(self): return { 'id': self.id, 'token': self.token, 'type': self.type, 'name': self.name, 'created_at': self.created_at.isoformat() if self.created_at else None } class Submission(db.Model): __tablename__ = 'submissions' id = db.Column(db.Integer, primary_key=True) message = db.Column(db.Text, nullable=False) contributor_type = db.Column(db.String(20), nullable=False) latitude = db.Column(db.Float, nullable=True) longitude = db.Column(db.Float, nullable=True) timestamp = db.Column(db.DateTime, default=datetime.utcnow) category = db.Column(db.String(50), nullable=True) # Vision, Problem, Objectives, Directives, Values, Actions (backward compat) flagged_as_offensive = db.Column(db.Boolean, default=False) sentence_analysis_done = db.Column(db.Boolean, default=False) # NEW: Track if sentence-level analysis is complete def get_primary_category(self): """Get most frequent category from sentences (or fallback to old category)""" if not self.sentences or len(self.sentences) == 0: return self.category # Fallback to old system from collections import Counter categories = [s.category for s in self.sentences if s.category] if not categories: return None return Counter(categories).most_common(1)[0][0] def get_category_distribution(self): """Get percentage of each category in this submission""" if not self.sentences or len(self.sentences) == 0: return {self.category: 100.0} if self.category else {} from collections import Counter categories = [s.category for s in self.sentences if s.category] total = len(categories) if total == 0: return {} counts = Counter(categories) return {cat: round((count/total)*100, 1) for cat, count in counts.items()} def to_dict(self): """Convert to dictionary with sentence-level support""" base_dict = { 'id': self.id, 'message': self.message, 'contributorType': self.contributor_type, 'location': { 'lat': self.latitude, 'lng': self.longitude } if self.latitude and self.longitude else None, 'timestamp': self.timestamp.isoformat() if self.timestamp else None, 'category': self.get_primary_category() if self.sentence_analysis_done else self.category, 'flaggedAsOffensive': self.flagged_as_offensive, 'sentenceAnalysisDone': self.sentence_analysis_done } # Add sentence-level data if available if self.sentence_analysis_done and self.sentences: base_dict['sentences'] = [s.to_dict() for s in self.sentences] base_dict['categoryDistribution'] = self.get_category_distribution() return base_dict class SubmissionSentence(db.Model): """Stores individual sentences from submissions with their categories""" __tablename__ = 'submission_sentences' id = db.Column(db.Integer, primary_key=True) submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=False) sentence_index = db.Column(db.Integer, nullable=False) # 0, 1, 2... text = db.Column(db.Text, nullable=False) category = db.Column(db.String(50), nullable=True) confidence = db.Column(db.Float, nullable=True) created_at = db.Column(db.DateTime, default=datetime.utcnow) # Relationships submission = db.relationship('Submission', backref='sentences') # Composite unique constraint __table_args__ = ( db.UniqueConstraint('submission_id', 'sentence_index', name='uq_submission_sentence'), ) def to_dict(self): return { 'id': self.id, 'submission_id': self.submission_id, 'sentence_index': self.sentence_index, 'text': self.text, 'category': self.category, 'confidence': self.confidence, 'created_at': self.created_at.isoformat() if self.created_at else None } class Settings(db.Model): __tablename__ = 'settings' id = db.Column(db.Integer, primary_key=True) key = db.Column(db.String(50), unique=True, nullable=False) value = db.Column(db.String(100), nullable=False) # Increased to support model IDs @staticmethod def get_setting(key, default='true'): setting = Settings.query.filter_by(key=key).first() return setting.value if setting else default @staticmethod def set_setting(key, value): setting = Settings.query.filter_by(key=key).first() if setting: setting.value = value else: setting = Settings(key=key, value=value) db.session.add(setting) db.session.commit() class TrainingExample(db.Model): """Stores admin corrections for model fine-tuning""" __tablename__ = 'training_examples' id = db.Column(db.Integer, primary_key=True) submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=True) # Made nullable for sentence-level sentence_id = db.Column(db.Integer, db.ForeignKey('submission_sentences.id'), nullable=True) # NEW: Link to sentence message = db.Column(db.Text, nullable=False) # Snapshot of submission/sentence text original_category = db.Column(db.String(50), nullable=True) # AI's prediction corrected_category = db.Column(db.String(50), nullable=False) # Admin's correction contributor_type = db.Column(db.String(20), nullable=False) correction_timestamp = db.Column(db.DateTime, default=datetime.utcnow) confidence_score = db.Column(db.Float, nullable=True) # Original prediction confidence used_in_training = db.Column(db.Boolean, default=False) training_run_id = db.Column(db.Integer, db.ForeignKey('fine_tuning_runs.id'), nullable=True) # Relationships submission = db.relationship('Submission', backref='training_examples') sentence = db.relationship('SubmissionSentence', backref='training_examples') training_run = db.relationship('FineTuningRun', backref='training_examples') def to_dict(self): return { 'id': self.id, 'submission_id': self.submission_id, 'message': self.message, 'original_category': self.original_category, 'corrected_category': self.corrected_category, 'contributor_type': self.contributor_type, 'correction_timestamp': self.correction_timestamp.isoformat() if self.correction_timestamp else None, 'confidence_score': self.confidence_score, 'used_in_training': self.used_in_training, 'training_run_id': self.training_run_id, 'is_correction': self.original_category != self.corrected_category if self.original_category else False } class FineTuningRun(db.Model): """Tracks fine-tuning training runs and their results""" __tablename__ = 'fine_tuning_runs' id = db.Column(db.Integer, primary_key=True) created_at = db.Column(db.DateTime, default=datetime.utcnow) status = db.Column(db.String(20), default='preparing') # preparing, training, evaluating, completed, failed num_training_examples = db.Column(db.Integer, nullable=True) num_validation_examples = db.Column(db.Integer, nullable=True) num_test_examples = db.Column(db.Integer, nullable=True) training_config = db.Column(db.Text, nullable=True) # JSON string results = db.Column(db.Text, nullable=True) # JSON string with metrics model_path = db.Column(db.String(255), nullable=True) is_active_model = db.Column(db.Boolean, default=False) improvement_over_baseline = db.Column(db.Float, nullable=True) completed_at = db.Column(db.DateTime, nullable=True) error_message = db.Column(db.Text, nullable=True) # Progress tracking current_epoch = db.Column(db.Integer, default=0) total_epochs = db.Column(db.Integer, nullable=True) current_step = db.Column(db.Integer, default=0) total_steps = db.Column(db.Integer, nullable=True) current_loss = db.Column(db.Float, nullable=True) progress_message = db.Column(db.String(255), nullable=True) def to_dict(self): return { 'id': self.id, 'created_at': self.created_at.isoformat() if self.created_at else None, 'status': self.status, 'num_training_examples': self.num_training_examples, 'num_validation_examples': self.num_validation_examples, 'num_test_examples': self.num_test_examples, 'training_config': json.loads(self.training_config) if self.training_config else None, 'results': json.loads(self.results) if self.results else None, 'model_path': self.model_path, 'is_active_model': self.is_active_model, 'improvement_over_baseline': self.improvement_over_baseline, 'completed_at': self.completed_at.isoformat() if self.completed_at else None, 'error_message': self.error_message } def set_config(self, config_dict): """Set training config from dict""" self.training_config = json.dumps(config_dict) def get_config(self): """Get training config as dict""" return json.loads(self.training_config) if self.training_config else {} def set_results(self, results_dict): """Set results from dict""" self.results = json.dumps(results_dict) def get_results(self): """Get results as dict""" return json.loads(self.results) if self.results else {}