Upload annotation_app.py

#1
by maghwa - opened
Files changed (1) hide show
  1. annotation_app.py +456 -0
annotation_app.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web-based Annotation Interface for Privacy Inferences
3
+ Modified for Hugging Face Spaces deployment
4
+
5
+ Run: python annotation_app.py
6
+ Then open: http://localhost:7860
7
+ """
8
+ from flask import Flask, render_template, request, jsonify, send_file, session, redirect, url_for
9
+ import json
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ import os
13
+ from functools import wraps
14
+ import zipfile
15
+ import io
16
+ from collections import defaultdict
17
+
18
+ app = Flask(__name__)
19
+
20
+ # Security configuration
21
+ app.secret_key = os.environ.get('SECRET_KEY', 'votre-cle-secrete-tres-longue-a-changer-123456789')
22
+ ADMIN_PASSWORD = os.environ.get('ADMIN_PASSWORD', 'antoine2025')
23
+
24
+ # Configuration
25
+ RESULTS_DIR = Path("results")
26
+ ANNOTATIONS_DIR = Path("annotations")
27
+ ANNOTATIONS_DIR.mkdir(exist_ok=True)
28
+
29
+ MONTHS = [
30
+ 'JANUARY', 'FEBRUARY', 'MARCH', 'APRIL', 'MAY', 'JUNE',
31
+ 'JULY', 'AUGUST', 'SEPTEMBER', 'OCTOBER', 'NOVEMBER', 'DECEMBER'
32
+ ]
33
+
34
+ CATEGORIES = ['health', 'religion', 'family', 'routines', 'work', 'leisure', 'economics']
35
+
36
+
37
+ # ============================================================================
38
+ # AUTHENTICATION
39
+ # ============================================================================
40
+
41
+ def login_required(f):
42
+ """Decorator to protect routes"""
43
+ @wraps(f)
44
+ def decorated_function(*args, **kwargs):
45
+ if not session.get('logged_in'):
46
+ return redirect(url_for('login'))
47
+ return f(*args, **kwargs)
48
+ return decorated_function
49
+
50
+
51
+ @app.route('/login', methods=['GET', 'POST'])
52
+ def login():
53
+ """Login page"""
54
+ if request.method == 'POST':
55
+ if request.form.get('password') == ADMIN_PASSWORD:
56
+ session['logged_in'] = True
57
+ return redirect(url_for('index'))
58
+ else:
59
+ return '''
60
+ <html>
61
+ <head>
62
+ <title>Erreur</title>
63
+ <style>
64
+ body {
65
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
66
+ max-width: 400px;
67
+ margin: 100px auto;
68
+ text-align: center;
69
+ }
70
+ .error { color: #ef4444; margin: 20px 0; }
71
+ a { color: #667eea; text-decoration: none; }
72
+ </style>
73
+ </head>
74
+ <body>
75
+ <h2>❌ Mot de passe incorrect</h2>
76
+ <p class="error">Le mot de passe saisi n'est pas valide.</p>
77
+ <a href="/login">← RΓ©essayer</a>
78
+ </body>
79
+ </html>
80
+ '''
81
+
82
+ return '''
83
+ <html>
84
+ <head>
85
+ <title>Connexion - Privacy Annotation</title>
86
+ <style>
87
+ body {
88
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
89
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
90
+ height: 100vh;
91
+ display: flex;
92
+ align-items: center;
93
+ justify-content: center;
94
+ margin: 0;
95
+ }
96
+ .login-box {
97
+ background: white;
98
+ padding: 40px;
99
+ border-radius: 12px;
100
+ box-shadow: 0 10px 40px rgba(0,0,0,0.2);
101
+ width: 100%;
102
+ max-width: 400px;
103
+ }
104
+ h2 {
105
+ margin: 0 0 30px 0;
106
+ color: #333;
107
+ text-align: center;
108
+ }
109
+ input {
110
+ width: 100%;
111
+ padding: 12px;
112
+ margin: 10px 0;
113
+ border: 2px solid #e5e7eb;
114
+ border-radius: 8px;
115
+ font-size: 16px;
116
+ box-sizing: border-box;
117
+ }
118
+ input:focus {
119
+ outline: none;
120
+ border-color: #667eea;
121
+ }
122
+ button {
123
+ width: 100%;
124
+ padding: 12px;
125
+ margin-top: 10px;
126
+ background: #667eea;
127
+ color: white;
128
+ border: none;
129
+ border-radius: 8px;
130
+ font-size: 16px;
131
+ font-weight: 600;
132
+ cursor: pointer;
133
+ transition: background 0.2s;
134
+ }
135
+ button:hover {
136
+ background: #5568d3;
137
+ }
138
+ .info {
139
+ text-align: center;
140
+ color: #6b7280;
141
+ font-size: 14px;
142
+ margin-top: 20px;
143
+ }
144
+ </style>
145
+ </head>
146
+ <body>
147
+ <div class="login-box">
148
+ <h2>πŸ”’ Privacy Annotation</h2>
149
+ <form method="post">
150
+ <input type="password" name="password" placeholder="Mot de passe" required autofocus>
151
+ <button type="submit">Se connecter</button>
152
+ </form>
153
+ <p class="info">Interface d'annotation pour évaluation des modèles LLM</p>
154
+ </div>
155
+ </body>
156
+ </html>
157
+ '''
158
+
159
+
160
+ @app.route('/logout')
161
+ def logout():
162
+ """Logout"""
163
+ session.pop('logged_in', None)
164
+ return redirect(url_for('login'))
165
+
166
+
167
+ # ============================================================================
168
+ # HELPER FUNCTIONS
169
+ # ============================================================================
170
+
171
+ def get_available_models():
172
+ """Get list of available model directories."""
173
+ if not RESULTS_DIR.exists():
174
+ return []
175
+ return [d.name for d in RESULTS_DIR.iterdir() if d.is_dir() and not d.name.startswith('.')]
176
+
177
+
178
+ def load_result(model_name, month):
179
+ """Load a specific result file."""
180
+ filepath = RESULTS_DIR / model_name / "2017" / f"2017_{month}_P1.json"
181
+
182
+ if not filepath.exists():
183
+ return None
184
+
185
+ with open(filepath, 'r') as f:
186
+ return json.load(f)
187
+
188
+
189
+ def get_annotation_status():
190
+ """Get status of all annotations."""
191
+ status = {}
192
+
193
+ models = get_available_models()
194
+
195
+ for model in models:
196
+ status[model] = {}
197
+ for month in MONTHS:
198
+ ann_file = ANNOTATIONS_DIR / f"{model}_{month}_annotations.json"
199
+ status[model][month] = ann_file.exists()
200
+
201
+ return status
202
+
203
+
204
+ def extract_inferences(response, category):
205
+ """Extract inferences for a specific category."""
206
+ inferences = []
207
+ lines = response.split('\n')
208
+ in_category = False
209
+
210
+ for line in lines:
211
+ # Check if entering this category
212
+ if category.lower() in line.lower() and (':' in line or category in line):
213
+ in_category = True
214
+ continue
215
+
216
+ # Check if entering new category
217
+ if any(cat.lower() in line.lower() for cat in CATEGORIES if cat != category):
218
+ in_category = False
219
+
220
+ # Extract inference
221
+ if in_category and line.strip():
222
+ cleaned = line.strip().lstrip('β€’-*0123456789.) ')
223
+ if len(cleaned) > 15:
224
+ inferences.append(cleaned)
225
+
226
+ return inferences
227
+
228
+
229
+ # ============================================================================
230
+ # MAIN ROUTES
231
+ # ============================================================================
232
+
233
+ @app.route('/')
234
+ @login_required
235
+ def index():
236
+ """Main page - show annotation dashboard."""
237
+ models = get_available_models()
238
+ status = get_annotation_status()
239
+
240
+ return render_template('index.html',
241
+ models=models,
242
+ months=MONTHS,
243
+ status=status)
244
+
245
+
246
+ @app.route('/annotate/<model>/<month>')
247
+ @login_required
248
+ def annotate(model, month):
249
+ """Annotation page for specific model and month."""
250
+ result = load_result(model, month)
251
+
252
+ if not result:
253
+ return f"Result not found: {model}/{month}", 404
254
+
255
+ # Check if already annotated
256
+ ann_file = ANNOTATIONS_DIR / f"{model}_{month}_annotations.json"
257
+ existing_annotation = None
258
+
259
+ if ann_file.exists():
260
+ with open(ann_file, 'r') as f:
261
+ existing_annotation = json.load(f)
262
+
263
+ # Extract inferences by category
264
+ category_inferences = {}
265
+ for category in CATEGORIES:
266
+ category_inferences[category] = extract_inferences(result['response'], category)
267
+
268
+ return render_template('annotate.html',
269
+ model=model,
270
+ month=month,
271
+ response=result['response'],
272
+ categories=CATEGORIES,
273
+ category_inferences=category_inferences,
274
+ existing_annotation=existing_annotation,
275
+ trajectory_stats=result.get('metadata', {}).get('trajectory_stats', {}))
276
+
277
+
278
+ @app.route('/api/save_annotation', methods=['POST'])
279
+ @login_required
280
+ def save_annotation():
281
+ """Save annotation via API."""
282
+ data = request.json
283
+
284
+ model = data['model']
285
+ month = data['month']
286
+ annotations = data['annotations']
287
+
288
+ # Load original result
289
+ result = load_result(model, month)
290
+
291
+ if not result:
292
+ return jsonify({'success': False, 'error': 'Result not found'}), 404
293
+
294
+ # Create annotation data
295
+ ann_data = {
296
+ 'model_name': model,
297
+ 'month': month,
298
+ 'year': 2017,
299
+ 'annotated_at': datetime.now().isoformat(),
300
+ 'annotation_mode': 'web_interface',
301
+ 'original_response': result['response'],
302
+ 'annotations': annotations,
303
+ 'metadata': {
304
+ 'trajectory_period': result.get('trajectory_period'),
305
+ 'prompt_type': result.get('prompt_type'),
306
+ }
307
+ }
308
+
309
+ # Save to file
310
+ ann_file = ANNOTATIONS_DIR / f"{model}_{month}_annotations.json"
311
+
312
+ with open(ann_file, 'w') as f:
313
+ json.dump(ann_data, f, indent=2)
314
+
315
+ return jsonify({'success': True, 'file': str(ann_file)})
316
+
317
+
318
+ @app.route('/api/metrics/<model>')
319
+ @login_required
320
+ def get_metrics(model):
321
+ """Calculate metrics for a model."""
322
+ pattern = f"{model}_*_annotations.json"
323
+ ann_files = list(ANNOTATIONS_DIR.glob(pattern))
324
+
325
+ if not ann_files:
326
+ return jsonify({'success': False, 'error': 'No annotations found'})
327
+
328
+ total_tp = 0
329
+ total_fp = 0
330
+ total_pa = 0
331
+ by_category = {}
332
+ annotated_months = []
333
+
334
+ for ann_file in ann_files:
335
+ with open(ann_file, 'r') as f:
336
+ data = json.load(f)
337
+
338
+ annotated_months.append(data['month'])
339
+
340
+ for category, items in data['annotations'].items():
341
+ if category not in by_category:
342
+ by_category[category] = {'TP': 0, 'FP': 0, 'PA': 0}
343
+
344
+ for item in items:
345
+ label = item['label']
346
+ if label == 'TP':
347
+ total_tp += 1
348
+ by_category[category]['TP'] += 1
349
+ elif label == 'FP':
350
+ total_fp += 1
351
+ by_category[category]['FP'] += 1
352
+ elif label == 'PA':
353
+ total_pa += 1
354
+ by_category[category]['PA'] += 1
355
+
356
+ # Calculate precision
357
+ precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
358
+
359
+ # Category metrics
360
+ category_metrics = {}
361
+ for category, stats in by_category.items():
362
+ tp = stats['TP']
363
+ fp = stats['FP']
364
+ prec = tp / (tp + fp) if (tp + fp) > 0 else 0
365
+ category_metrics[category] = {
366
+ 'tp': tp,
367
+ 'fp': fp,
368
+ 'pa': stats['PA'],
369
+ 'precision': round(prec, 3)
370
+ }
371
+
372
+ return jsonify({
373
+ 'success': True,
374
+ 'model': model,
375
+ 'months_annotated': len(annotated_months),
376
+ 'annotated_months': annotated_months,
377
+ 'total_tp': total_tp,
378
+ 'total_fp': total_fp,
379
+ 'total_pa': total_pa,
380
+ 'precision': round(precision, 3),
381
+ 'by_category': category_metrics
382
+ })
383
+
384
+
385
+ @app.route('/metrics')
386
+ @login_required
387
+ def metrics_page():
388
+ """Metrics dashboard page."""
389
+ models = get_available_models()
390
+ return render_template('metrics.html', models=models)
391
+
392
+
393
+ @app.route('/download-annotations')
394
+ @login_required
395
+ def download_annotations():
396
+ """Download all annotations as ZIP file"""
397
+ memory_file = io.BytesIO()
398
+
399
+ # Create ZIP with all annotations
400
+ with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
401
+ ann_files = list(ANNOTATIONS_DIR.glob('*.json'))
402
+
403
+ if not ann_files:
404
+ return "Aucune annotation disponible pour le moment.", 404
405
+
406
+ for file in ann_files:
407
+ zf.write(file, file.name)
408
+
409
+ memory_file.seek(0)
410
+
411
+ # Filename with date
412
+ filename = f'annotations_antoine_{datetime.now().strftime("%Y%m%d_%H%M")}.zip'
413
+
414
+ return send_file(
415
+ memory_file,
416
+ mimetype='application/zip',
417
+ as_attachment=True,
418
+ download_name=filename
419
+ )
420
+
421
+
422
+ # ============================================================================
423
+ # MAIN
424
+ # ============================================================================
425
+
426
+ if __name__ == '__main__':
427
+ # Hugging Face Spaces uses port 7860 by default
428
+ port = int(os.environ.get('PORT', 7860))
429
+
430
+ # Create directories if needed
431
+ templates_dir = Path("templates")
432
+ templates_dir.mkdir(exist_ok=True)
433
+
434
+ RESULTS_DIR.mkdir(exist_ok=True)
435
+ ANNOTATIONS_DIR.mkdir(exist_ok=True)
436
+
437
+ print("="*70)
438
+ print("PRIVACY INFERENCE ANNOTATION WEB INTERFACE")
439
+ print("="*70)
440
+ print(f"\nβœ“ Starting server on port {port}...")
441
+ print(f"βœ“ Results directory: {RESULTS_DIR.absolute()}")
442
+ print(f"βœ“ Annotations will be saved to: {ANNOTATIONS_DIR.absolute()}")
443
+
444
+ # Check if running on HF
445
+ if os.environ.get('SPACE_ID'):
446
+ print(f"βœ“ Running on Hugging Face Spaces")
447
+ print(f"βœ“ Space ID: {os.environ.get('SPACE_ID')}")
448
+ else:
449
+ print(f"\n🌐 Open your browser to: http://localhost:{port}")
450
+
451
+ print("\nPress Ctrl+C to stop the server")
452
+ print("="*70 + "\n")
453
+
454
+ # IMPORTANT: host='0.0.0.0' to be accessible from outside
455
+ # debug=False for production
456
+ app.run(host='0.0.0.0', port=port, debug=False)