File size: 10,265 Bytes
23654e5
 
19ce9e8
23654e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71797a4
23654e5
71797a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23654e5
 
71797a4
 
23654e5
 
 
 
 
 
 
 
71797a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23654e5
71797a4
 
23654e5
 
71797a4
23654e5
 
 
 
 
1377fb1
23654e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19ce9e8
 
 
 
 
 
 
71797a4
 
 
19ce9e8
 
 
 
 
 
 
 
 
 
71797a4
19ce9e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00aacad
 
 
 
 
 
 
 
19ce9e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
from app import db
from datetime import datetime
import json

class Token(db.Model):
    __tablename__ = 'tokens'

    id = db.Column(db.Integer, primary_key=True)
    token = db.Column(db.String(50), unique=True, nullable=False)
    type = db.Column(db.String(20), nullable=False)  # admin, government, community, industry, ngo, academic, other
    name = db.Column(db.String(100), nullable=False)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)

    def to_dict(self):
        return {
            'id': self.id,
            'token': self.token,
            'type': self.type,
            'name': self.name,
            'created_at': self.created_at.isoformat() if self.created_at else None
        }

class Submission(db.Model):
    __tablename__ = 'submissions'

    id = db.Column(db.Integer, primary_key=True)
    message = db.Column(db.Text, nullable=False)
    contributor_type = db.Column(db.String(20), nullable=False)
    latitude = db.Column(db.Float, nullable=True)
    longitude = db.Column(db.Float, nullable=True)
    timestamp = db.Column(db.DateTime, default=datetime.utcnow)
    category = db.Column(db.String(50), nullable=True)  # Vision, Problem, Objectives, Directives, Values, Actions (backward compat)
    flagged_as_offensive = db.Column(db.Boolean, default=False)
    sentence_analysis_done = db.Column(db.Boolean, default=False)  # NEW: Track if sentence-level analysis is complete

    def get_primary_category(self):
        """Get most frequent category from sentences (or fallback to old category)"""
        if not self.sentences or len(self.sentences) == 0:
            return self.category  # Fallback to old system
        
        from collections import Counter
        categories = [s.category for s in self.sentences if s.category]
        if not categories:
            return None
        return Counter(categories).most_common(1)[0][0]
    
    def get_category_distribution(self):
        """Get percentage of each category in this submission"""
        if not self.sentences or len(self.sentences) == 0:
            return {self.category: 100.0} if self.category else {}
        
        from collections import Counter
        categories = [s.category for s in self.sentences if s.category]
        total = len(categories)
        if total == 0:
            return {}
        
        counts = Counter(categories)
        return {cat: round((count/total)*100, 1) for cat, count in counts.items()}

    def to_dict(self):
        """Convert to dictionary with sentence-level support"""
        base_dict = {
            'id': self.id,
            'message': self.message,
            'contributorType': self.contributor_type,
            'location': {
                'lat': self.latitude,
                'lng': self.longitude
            } if self.latitude and self.longitude else None,
            'timestamp': self.timestamp.isoformat() if self.timestamp else None,
            'category': self.get_primary_category() if self.sentence_analysis_done else self.category,
            'flaggedAsOffensive': self.flagged_as_offensive,
            'sentenceAnalysisDone': self.sentence_analysis_done
        }
        
        # Add sentence-level data if available
        if self.sentence_analysis_done and self.sentences:
            base_dict['sentences'] = [s.to_dict() for s in self.sentences]
            base_dict['categoryDistribution'] = self.get_category_distribution()
        
        return base_dict


class SubmissionSentence(db.Model):
    """Stores individual sentences from submissions with their categories"""
    __tablename__ = 'submission_sentences'
    
    id = db.Column(db.Integer, primary_key=True)
    submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=False)
    sentence_index = db.Column(db.Integer, nullable=False)  # 0, 1, 2...
    text = db.Column(db.Text, nullable=False)
    category = db.Column(db.String(50), nullable=True)
    confidence = db.Column(db.Float, nullable=True)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    
    # Relationships
    submission = db.relationship('Submission', backref='sentences')
    
    # Composite unique constraint
    __table_args__ = (
        db.UniqueConstraint('submission_id', 'sentence_index', name='uq_submission_sentence'),
    )
    
    def to_dict(self):
        return {
            'id': self.id,
            'submission_id': self.submission_id,
            'sentence_index': self.sentence_index,
            'text': self.text,
            'category': self.category,
            'confidence': self.confidence,
            'created_at': self.created_at.isoformat() if self.created_at else None
        }


class Settings(db.Model):
    __tablename__ = 'settings'

    id = db.Column(db.Integer, primary_key=True)
    key = db.Column(db.String(50), unique=True, nullable=False)
    value = db.Column(db.String(100), nullable=False)  # Increased to support model IDs

    @staticmethod
    def get_setting(key, default='true'):
        setting = Settings.query.filter_by(key=key).first()
        return setting.value if setting else default

    @staticmethod
    def set_setting(key, value):
        setting = Settings.query.filter_by(key=key).first()
        if setting:
            setting.value = value
        else:
            setting = Settings(key=key, value=value)
            db.session.add(setting)
        db.session.commit()


class TrainingExample(db.Model):
    """Stores admin corrections for model fine-tuning"""
    __tablename__ = 'training_examples'

    id = db.Column(db.Integer, primary_key=True)
    submission_id = db.Column(db.Integer, db.ForeignKey('submissions.id'), nullable=True)  # Made nullable for sentence-level
    sentence_id = db.Column(db.Integer, db.ForeignKey('submission_sentences.id'), nullable=True)  # NEW: Link to sentence
    message = db.Column(db.Text, nullable=False)  # Snapshot of submission/sentence text
    original_category = db.Column(db.String(50), nullable=True)  # AI's prediction
    corrected_category = db.Column(db.String(50), nullable=False)  # Admin's correction
    contributor_type = db.Column(db.String(20), nullable=False)
    correction_timestamp = db.Column(db.DateTime, default=datetime.utcnow)
    confidence_score = db.Column(db.Float, nullable=True)  # Original prediction confidence
    used_in_training = db.Column(db.Boolean, default=False)
    training_run_id = db.Column(db.Integer, db.ForeignKey('fine_tuning_runs.id'), nullable=True)

    # Relationships
    submission = db.relationship('Submission', backref='training_examples')
    sentence = db.relationship('SubmissionSentence', backref='training_examples')
    training_run = db.relationship('FineTuningRun', backref='training_examples')

    def to_dict(self):
        return {
            'id': self.id,
            'submission_id': self.submission_id,
            'message': self.message,
            'original_category': self.original_category,
            'corrected_category': self.corrected_category,
            'contributor_type': self.contributor_type,
            'correction_timestamp': self.correction_timestamp.isoformat() if self.correction_timestamp else None,
            'confidence_score': self.confidence_score,
            'used_in_training': self.used_in_training,
            'training_run_id': self.training_run_id,
            'is_correction': self.original_category != self.corrected_category if self.original_category else False
        }


class FineTuningRun(db.Model):
    """Tracks fine-tuning training runs and their results"""
    __tablename__ = 'fine_tuning_runs'

    id = db.Column(db.Integer, primary_key=True)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    status = db.Column(db.String(20), default='preparing')  # preparing, training, evaluating, completed, failed
    num_training_examples = db.Column(db.Integer, nullable=True)
    num_validation_examples = db.Column(db.Integer, nullable=True)
    num_test_examples = db.Column(db.Integer, nullable=True)
    training_config = db.Column(db.Text, nullable=True)  # JSON string
    results = db.Column(db.Text, nullable=True)  # JSON string with metrics
    model_path = db.Column(db.String(255), nullable=True)
    is_active_model = db.Column(db.Boolean, default=False)
    improvement_over_baseline = db.Column(db.Float, nullable=True)
    completed_at = db.Column(db.DateTime, nullable=True)
    error_message = db.Column(db.Text, nullable=True)

    # Progress tracking
    current_epoch = db.Column(db.Integer, default=0)
    total_epochs = db.Column(db.Integer, nullable=True)
    current_step = db.Column(db.Integer, default=0)
    total_steps = db.Column(db.Integer, nullable=True)
    current_loss = db.Column(db.Float, nullable=True)
    progress_message = db.Column(db.String(255), nullable=True)

    def to_dict(self):
        return {
            'id': self.id,
            'created_at': self.created_at.isoformat() if self.created_at else None,
            'status': self.status,
            'num_training_examples': self.num_training_examples,
            'num_validation_examples': self.num_validation_examples,
            'num_test_examples': self.num_test_examples,
            'training_config': json.loads(self.training_config) if self.training_config else None,
            'results': json.loads(self.results) if self.results else None,
            'model_path': self.model_path,
            'is_active_model': self.is_active_model,
            'improvement_over_baseline': self.improvement_over_baseline,
            'completed_at': self.completed_at.isoformat() if self.completed_at else None,
            'error_message': self.error_message
        }

    def set_config(self, config_dict):
        """Set training config from dict"""
        self.training_config = json.dumps(config_dict)

    def get_config(self):
        """Get training config as dict"""
        return json.loads(self.training_config) if self.training_config else {}

    def set_results(self, results_dict):
        """Set results from dict"""
        self.results = json.dumps(results_dict)

    def get_results(self):
        """Get results as dict"""
        return json.loads(self.results) if self.results else {}