File size: 25,593 Bytes
e70050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e97fc5
 
 
 
 
ea9303b
 
 
 
 
 
 
 
 
 
8e97fc5
 
 
e70050b
 
 
 
 
 
5692ae0
 
 
 
 
 
e70050b
 
 
 
 
 
5692ae0
e70050b
5692ae0
e70050b
 
 
 
 
 
 
 
 
 
 
5692ae0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e70050b
 
 
 
ea9303b
 
 
 
 
 
 
 
 
 
e70050b
 
 
 
 
 
 
 
 
8e97fc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e70050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea9303b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e70050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e97fc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e70050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e97fc5
 
 
 
 
 
 
 
 
e70050b
 
5692ae0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
# -*- coding: utf-8 -*-
"""
SysCRED Backend API - Flask Server
===================================
REST API for the credibility verification system.

Endpoints:
- POST /api/verify - Verify URL or text credibility
- POST /api/seo - Get SEO analysis only
- GET /api/ontology/stats - Get ontology statistics
- GET /api/health - Health check
- GET /api/config - View current configuration

(c) Dominique S. Loyer - PhD Thesis Prototype
"""

import sys
import os
import traceback

# Load environment variables from .env file
from pathlib import Path
try:
    from dotenv import load_dotenv
    # .env is at project root (parent of syscred/)
    env_path = Path(__file__).resolve().parent.parent / '.env'
    if not env_path.exists():
        # Fallback: check syscred/ directory
        env_path = Path(__file__).parent / '.env'
    if env_path.exists():
        load_dotenv(env_path)
        print(f"[SysCRED Backend] Loaded .env from {env_path}")
    else:
        print(f"[SysCRED Backend] No .env file found, using system env vars")
except ImportError:
    print("[SysCRED Backend] python-dotenv not installed, using system env vars")

from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS

# Add syscred package to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Import SysCRED modules (with graceful fallbacks)
SYSCRED_AVAILABLE = False
TREC_AVAILABLE = False
DB_AVAILABLE = False

# Core modules (required)
try:
    from syscred.verification_system import CredibilityVerificationSystem
    from syscred.seo_analyzer import SEOAnalyzer
    from syscred.ontology_manager import OntologyManager
    from syscred.config import config, Config
    SYSCRED_AVAILABLE = True
    print("[SysCRED Backend] Core modules imported successfully")
except ImportError as e:
    print(f"[SysCRED Backend] Warning: Core modules failed: {e}")
    # Fallback config
    class Config:
        HOST = "0.0.0.0"
        PORT = 5000
        DEBUG = True
        ONTOLOGY_BASE_PATH = None
        ONTOLOGY_DATA_PATH = None
        LOAD_ML_MODELS = True
        GOOGLE_FACT_CHECK_API_KEY = None
    config = Config()

# Database (optional)
try:
    from syscred.database import init_db, db, AnalysisResult
    DB_AVAILABLE = True
    print("[SysCRED Backend] Database module loaded")
except ImportError as e:
    print(f"[SysCRED Backend] Database disabled: {e}")
    def init_db(app): pass

# TREC modules (optional)
try:
    from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
    from syscred.eval_metrics import EvaluationMetrics
    TREC_AVAILABLE = True
    print("[SysCRED Backend] TREC modules loaded")
except ImportError as e:
    print(f"[SysCRED Backend] TREC modules disabled: {e}")

# --- Initialize Flask App ---
app = Flask(__name__)
CORS(app)  # Enable CORS for frontend

# Allow iframe embedding on UQAM domains (for syscred.uqam.ca mirror)
@app.after_request
def add_security_headers(response):
    """Add security headers allowing UQAM iframe embedding."""
    response.headers['X-Frame-Options'] = 'ALLOW-FROM https://syscred.uqam.ca'
    response.headers['Content-Security-Policy'] = (
        "frame-ancestors 'self' https://syscred.uqam.ca https://*.uqam.ca"
    )
    return response

# Initialize Database
try:
    init_db(app) # [NEW] Setup DB connection
except Exception as e:
    print(f"[SysCRED Backend] Warning: DB init failed: {e}")

# --- Initialize SysCRED System ---
credibility_system = None
seo_analyzer = None
trec_retriever = None
eval_metrics = None

# Demo corpus for TREC (AP88-90 style documents)
TREC_DEMO_CORPUS = {
    "AP880101-0001": {
        "text": "Climate change is primarily caused by human activities, particularly the burning of fossil fuels which release greenhouse gases into the atmosphere.",
        "title": "Climate Science Report"
    },
    "AP880101-0002": {
        "text": "The Earth's temperature has risen significantly over the past century due to greenhouse gas emissions from industrial activities and deforestation.",
        "title": "Global Warming Study"
    },
    "AP880102-0001": {
        "text": "Scientists warn that sea levels could rise dramatically if current warming trends continue, threatening coastal cities worldwide.",
        "title": "Sea Level Warning"
    },
    "AP890215-0001": {
        "text": "The presidential election campaign focused on economic policies, healthcare reform, and national security issues.",
        "title": "Election Coverage"
    },
    "AP890216-0001": {
        "text": "Stock markets rose sharply after positive economic indicators were released by the Federal Reserve, signaling economic recovery.",
        "title": "Financial News"
    },
    "AP880201-0001": {
        "text": "Renewable energy sources like solar and wind power are becoming more cost-effective alternatives to fossil fuels.",
        "title": "Green Energy Report"
    },
    "AP890301-0001": {
        "text": "The technology industry continues to grow rapidly, with artificial intelligence and machine learning driving innovation.",
        "title": "Tech Industry Update"
    },
}

def initialize_system():
    """Initialize the credibility system (lazy loading)."""
    global credibility_system, seo_analyzer
    
    if not SYSCRED_AVAILABLE:
        print("[SysCRED Backend] Cannot initialize - modules not available")
        return False
    
    try:
        # Initialize SEO analyzer (lightweight)
        seo_analyzer = SEOAnalyzer()
        print("[SysCRED Backend] SEO Analyzer initialized")
        
        # Initialize full system (may take time to load ML models)
        print("[SysCRED Backend] Initializing credibility system (loading ML models)...")
        ontology_base = str(config.ONTOLOGY_BASE_PATH) if config.ONTOLOGY_BASE_PATH else None
        ontology_data = str(config.ONTOLOGY_DATA_PATH) if config.ONTOLOGY_DATA_PATH else None
        credibility_system = CredibilityVerificationSystem(
            ontology_base_path=ontology_base if ontology_base and os.path.exists(ontology_base) else None,
            ontology_data_path=ontology_data,
            load_ml_models=config.LOAD_ML_MODELS,
            google_api_key=config.GOOGLE_FACT_CHECK_API_KEY
        )
        print("[SysCRED Backend] System initialized successfully!")
        return True
        
    except Exception as e:
        print(f"[SysCRED Backend] Error initializing system: {e}")
        traceback.print_exc()
        return False

# --- API Routes ---

@app.route('/')
def index():
    """Serve the frontend."""
    return send_from_directory('static', 'index.html')


@app.route('/api/health', methods=['GET'])
def health_check():
    """Health check endpoint."""
    return jsonify({
        'status': 'healthy',
        'syscred_available': SYSCRED_AVAILABLE,
        'system_initialized': credibility_system is not None,
        'seo_analyzer_ready': seo_analyzer is not None
    })


@app.route('/api/verify', methods=['POST'])
def verify_endpoint():
    """
    Main verification endpoint.
    
    Request JSON:
    {
        "input_data": "URL or text to verify",
        "include_seo": true/false (optional, default true),
        "include_pagerank": true/false (optional, default true)
    }
    """
    global credibility_system
    
    # Lazy initialization
    if credibility_system is None:
        if not initialize_system():
            return jsonify({
                'error': 'System initialization failed. Check server logs.'
            }), 503
    
    # Validate request
    if not request.is_json:
        return jsonify({'error': 'Request must be JSON'}), 400
    
    data = request.get_json()
    input_data = data.get('input_data', '').strip()
    
    if not input_data:
        return jsonify({'error': "'input_data' is required"}), 400
    
    include_seo = data.get('include_seo', True)
    include_pagerank = data.get('include_pagerank', True)
    
    print(f"[SysCRED Backend] Verifying: {input_data[:100]}...")
    
    try:
        # Run main verification
        result = credibility_system.verify_information(input_data)
        
        if 'error' in result:
            return jsonify(result), 400
        
        # Add SEO analysis if requested and it's a URL
        if include_seo and credibility_system.is_url(input_data):
            try:
                web_content = credibility_system.api_clients.fetch_web_content(input_data)
                if web_content.success:
                    seo_result = seo_analyzer.analyze_seo(
                        url=input_data,
                        title=web_content.title,
                        meta_description=web_content.meta_description,
                        text_content=web_content.text_content
                    )
                    result['seoAnalysis'] = {
                        'titleLength': seo_result.title_length,
                        'titleHasKeywords': seo_result.title_has_keywords,
                        'metaDescriptionLength': seo_result.meta_description_length,
                        'wordCount': seo_result.word_count,
                        'readabilityScore': round(seo_result.readability_score, 2),
                        'seoScore': round(seo_result.seo_score, 2),
                        'topKeywords': list(seo_result.keyword_density.keys())
                    }
            except Exception as e:
                print(f"[SysCRED Backend] SEO analysis error: {e}")
                result['seoAnalysis'] = {'error': str(e)}
        
        # Add PageRank estimation if requested
        if include_pagerank and credibility_system.is_url(input_data):
            try:
                external_data = credibility_system.api_clients.fetch_external_data(input_data)
                pr_result = seo_analyzer.estimate_pagerank(
                    url=input_data,
                    domain_age_days=external_data.domain_age_days,
                    source_reputation=external_data.source_reputation
                )
                result['pageRankEstimation'] = {
                    'estimatedPR': round(pr_result.estimated_pr, 3),
                    'confidence': round(pr_result.confidence, 2),
                    'factors': pr_result.factors,
                    'explanation': pr_result.explanation_text
                }
            except Exception as e:
                print(f"[SysCRED Backend] PageRank estimation error: {e}")
                result['pageRankEstimation'] = {'error': str(e)}
        
        print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
        
        # [NEW] TREC Evidence Search + IR Metrics
        try:
            global trec_retriever, eval_metrics
            
            # Initialize TREC if needed
            if trec_retriever is None and TREC_AVAILABLE:
                trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
                trec_retriever.corpus = TREC_DEMO_CORPUS
                eval_metrics = EvaluationMetrics()
                print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
            
            if trec_retriever and eval_metrics:
                import time
                start_time = time.time()
                
                # Use the input text as query
                query_text = input_data[:200] if not credibility_system.is_url(input_data) else result.get('informationEntree', input_data)[:200]
                
                trec_result = trec_retriever.retrieve_evidence(query_text, k=5, model='bm25')
                search_time = (time.time() - start_time) * 1000
                
                retrieved_ids = [e.doc_id for e in trec_result.evidences]
                
                # Use climate-related docs as "relevant" for demo evaluation
                # In production, this would come from qrels files
                relevant_ids = set(TREC_DEMO_CORPUS.keys())  # All docs as relevant pool
                
                # Compute IR metrics
                k = len(retrieved_ids) if retrieved_ids else 1
                precision = eval_metrics.precision_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
                recall = eval_metrics.recall_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
                ap = eval_metrics.average_precision(retrieved_ids, relevant_ids) if retrieved_ids else 0
                mrr = eval_metrics.mrr(retrieved_ids, relevant_ids) if retrieved_ids else 0
                
                relevance_dict = {doc: 1 for doc in relevant_ids}
                ndcg = eval_metrics.ndcg_at_k(retrieved_ids, relevance_dict, k) if retrieved_ids else 0
                
                # TF-IDF score from top result
                tfidf_score = trec_result.evidences[0].score if trec_result.evidences else 0
                
                result['trec_metrics'] = {
                    'precision': round(precision, 4),
                    'recall': round(recall, 4),
                    'map': round(ap, 4),
                    'ndcg': round(ndcg, 4),
                    'tfidf_score': round(tfidf_score, 4),
                    'mrr': round(mrr, 4),
                    'retrieved_count': len(retrieved_ids),
                    'corpus_size': len(TREC_DEMO_CORPUS),
                    'search_time_ms': round(search_time, 2)
                }
                print(f"[SysCRED Backend] TREC: P={precision:.3f} R={recall:.3f} MAP={ap:.3f} NDCG={ndcg:.3f} MRR={mrr:.3f}")
        except Exception as e:
            print(f"[SysCRED Backend] TREC metrics error: {e}")
            result['trec_metrics'] = {'error': str(e)}
        
        # [NEW] Persist to Database
        try:
            new_analysis = AnalysisResult(
                url=input_data[:500],
                credibility_score=result.get('scoreCredibilite', 0.5),
                summary=result.get('resumeAnalyse', ''),
                source_reputation=result.get('detailsScore', {}).get('factors', [{}])[0].get('value')
            )
            db.session.add(new_analysis)
            db.session.commit()
            print(f"[SysCRED-DB] Result saved. ID: {new_analysis.id}")
        except Exception as e:
            print(f"[SysCRED-DB] Save failed: {e}")

        return jsonify(result), 200
        
    except Exception as e:
        print(f"[SysCRED Backend] Error: {e}")
        traceback.print_exc()
        return jsonify({'error': f'Internal error: {str(e)}'}), 500


@app.route('/api/seo', methods=['POST'])
def seo_endpoint():
    """
    SEO-only analysis endpoint (faster, no ML models needed).
    
    Request JSON:
    {
        "url": "URL to analyze"
    }
    """
    global seo_analyzer
    
    if seo_analyzer is None:
        seo_analyzer = SEOAnalyzer()
    
    if not request.is_json:
        return jsonify({'error': 'Request must be JSON'}), 400
    
    data = request.get_json()
    url = data.get('url', '').strip()
    
    if not url or not url.startswith('http'):
        return jsonify({'error': 'Valid URL is required'}), 400
    
    try:
        # Fetch content
        from syscred.api_clients import ExternalAPIClients
        api_client = ExternalAPIClients()
        
        web_content = api_client.fetch_web_content(url)
        if not web_content.success:
            return jsonify({'error': f'Failed to fetch URL: {web_content.error}'}), 400
        
        # SEO analysis
        seo_result = seo_analyzer.analyze_seo(
            url=url,
            title=web_content.title,
            meta_description=web_content.meta_description,
            text_content=web_content.text_content
        )
        
        # IR metrics
        ir_metrics = seo_analyzer.get_ir_metrics(web_content.text_content)
        
        # PageRank estimation
        external_data = api_client.fetch_external_data(url)
        pr_result = seo_analyzer.estimate_pagerank(
            url=url,
            domain_age_days=external_data.domain_age_days,
            source_reputation=external_data.source_reputation
        )
        
        return jsonify({
            'url': url,
            'title': web_content.title,
            'seo': {
                'titleLength': seo_result.title_length,
                'metaDescriptionLength': seo_result.meta_description_length,
                'wordCount': seo_result.word_count,
                'readabilityScore': round(seo_result.readability_score, 2),
                'seoScore': round(seo_result.seo_score, 2),
                'keywordDensity': seo_result.keyword_density
            },
            'irMetrics': {
                'documentLength': ir_metrics.document_length,
                'topTerms': ir_metrics.top_terms[:5],
                'avgTermFrequency': round(ir_metrics.avg_term_frequency, 4)
            },
            'pageRank': {
                'estimated': round(pr_result.estimated_pr, 3),
                'confidence': round(pr_result.confidence, 2),
                'factors': pr_result.factors
            },
            'domain': {
                'reputation': external_data.source_reputation,
                'ageDays': external_data.domain_age_days
            }
        }), 200
        
    except Exception as e:
        print(f"[SysCRED Backend] SEO endpoint error: {e}")
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500



@app.route('/api/ontology/graph', methods=['GET'])
def ontology_graph():
    """Get ontology graph data for D3.js."""
    global credibility_system
    
    if credibility_system and credibility_system.ontology_manager:
        graph_data = credibility_system.ontology_manager.get_graph_json()
        return jsonify(graph_data), 200
    else:
        # Return empty graph rather than 400 to avoid breaking frontend
        return jsonify({'nodes': [], 'links': []}), 200


@app.route('/api/ontology/stats', methods=['GET'])
def ontology_stats():
    """Get ontology statistics."""
    global credibility_system
    
    if credibility_system and credibility_system.ontology_manager:
        stats = credibility_system.ontology_manager.get_statistics()
        return jsonify(stats), 200
    else:
        return jsonify({
            'error': 'Ontology not loaded',
            'base_triples': 0,
            'data_triples': 0
        }), 200


# --- TREC Endpoints ---

@app.route('/api/trec/search', methods=['POST'])
def trec_search():
    """
    Search for evidence using TREC retrieval methods.
    
    Request JSON:
    {
        "query": "Claim or query to search for",
        "k": 10,              # Number of results (optional, default 10)
        "model": "bm25"       # Retrieval model: bm25, tfidf, qld (optional)
    }
    
    Response:
    {
        "query": "original query",
        "results": [
            {"doc_id": "AP880101-0001", "score": 6.27, "rank": 1, "text": "...", "title": "..."},
            ...
        ],
        "total": 3,
        "model": "bm25",
        "search_time_ms": 12.5
    }
    """
    global trec_retriever, eval_metrics
    
    # Initialize TREC components if needed
    if trec_retriever is None:
        try:
            trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
            trec_retriever.corpus = TREC_DEMO_CORPUS
            eval_metrics = EvaluationMetrics()
            print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
        except Exception as e:
            return jsonify({'error': f'TREC initialization failed: {str(e)}'}), 503
    
    if not request.is_json:
        return jsonify({'error': 'Request must be JSON'}), 400
    
    data = request.get_json()
    query = data.get('query', '').strip()
    
    if not query:
        return jsonify({'error': "'query' is required"}), 400
    
    k = data.get('k', 10)
    model = data.get('model', 'bm25')
    
    try:
        import time
        start_time = time.time()
        
        # Retrieve evidence
        result = trec_retriever.retrieve_evidence(query, k=k, model=model)
        search_time_ms = (time.time() - start_time) * 1000
        
        # Format results
        results = []
        for ev in result.evidences:
            doc_info = trec_retriever.corpus.get(ev.doc_id, {})
            results.append({
                'doc_id': ev.doc_id,
                'score': round(ev.score, 4),
                'rank': ev.rank,
                'text': ev.text,
                'title': doc_info.get('title', ''),
                'model': ev.retrieval_model
            })
        
        return jsonify({
            'query': query,
            'results': results,
            'total': len(results),
            'model': model,
            'search_time_ms': round(search_time_ms, 2)
        }), 200
        
    except Exception as e:
        print(f"[SysCRED Backend] TREC search error: {e}")
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500


@app.route('/api/trec/corpus', methods=['GET'])
def trec_corpus():
    """
    Get the TREC demo corpus information.
    
    Response:
    {
        "corpus_size": 7,
        "corpus_type": "AP88-90 Demo",
        "documents": [
            {"doc_id": "AP880101-0001", "title": "...", "text_preview": "..."},
            ...
        ]
    }
    """
    docs = []
    for doc_id, doc in TREC_DEMO_CORPUS.items():
        docs.append({
            'doc_id': doc_id,
            'title': doc.get('title', ''),
            'text_preview': doc['text'][:150] + '...' if len(doc['text']) > 150 else doc['text']
        })
    
    return jsonify({
        'corpus_size': len(TREC_DEMO_CORPUS),
        'corpus_type': 'AP88-90 Demo',
        'documents': docs
    }), 200


@app.route('/api/trec/metrics', methods=['POST'])
def trec_metrics():
    """
    Calculate IR evaluation metrics for a retrieval result.
    
    Request JSON:
    {
        "retrieved": ["AP880101-0001", "AP890215-0001", "AP880101-0002"],
        "relevant": ["AP880101-0001", "AP880101-0002", "AP880102-0001"]
    }
    
    Response:
    {
        "precision_at_3": 0.67,
        "recall_at_3": 0.67,
        "average_precision": 0.81,
        "mrr": 1.0,
        "ndcg_at_3": 0.88
    }
    """
    global eval_metrics
    
    if eval_metrics is None:
        eval_metrics = EvaluationMetrics()
    
    if not request.is_json:
        return jsonify({'error': 'Request must be JSON'}), 400
    
    data = request.get_json()
    retrieved = data.get('retrieved', [])
    relevant = set(data.get('relevant', []))
    
    if not retrieved:
        return jsonify({'error': "'retrieved' list is required"}), 400
    
    k = len(retrieved)
    
    try:
        # Calculate metrics
        p_at_k = eval_metrics.precision_at_k(retrieved, relevant, k)
        r_at_k = eval_metrics.recall_at_k(retrieved, relevant, k)
        ap = eval_metrics.average_precision(retrieved, relevant)
        mrr = eval_metrics.mrr(retrieved, relevant)
        
        # For NDCG, create relevance dict (binary: 1 if relevant, 0 otherwise)
        relevance_dict = {doc: 1 for doc in relevant}
        ndcg = eval_metrics.ndcg_at_k(retrieved, relevance_dict, k)
        
        return jsonify({
            f'precision_at_{k}': round(p_at_k, 4),
            f'recall_at_{k}': round(r_at_k, 4),
            'average_precision': round(ap, 4),
            'mrr': round(mrr, 4),
            f'ndcg_at_{k}': round(ndcg, 4),
            'metrics_explanation': {
                'P@K': 'Proportion de documents pertinents parmi les K premiers récupérés',
                'R@K': 'Proportion de documents pertinents récupérés parmi tous les pertinents',
                'AP': 'Moyenne des précisions à chaque document pertinent trouvé',
                'MRR': 'Rang réciproque du premier document pertinent',
                'NDCG': 'Gain cumulatif normalisé avec décroissance logarithmique'
            }
        }), 200
        
    except Exception as e:
        print(f"[SysCRED Backend] TREC metrics error: {e}")
        return jsonify({'error': str(e)}), 500


@app.route('/api/trec/health', methods=['GET'])
def trec_health():
    """Health check for TREC module."""
    return jsonify({
        'status': 'healthy',
        'trec_available': TREC_AVAILABLE if 'TREC_AVAILABLE' in dir() else True,
        'retriever_initialized': trec_retriever is not None,
        'corpus_size': len(TREC_DEMO_CORPUS),
        'models_available': ['bm25', 'tfidf', 'qld']
    }), 200


# --- Main ---
if __name__ == '__main__':
    print("=" * 60)
    print("SysCRED Backend API Server")
    print("(c) Dominique S. Loyer - PhD Thesis Prototype")
    print("=" * 60)
    print()
    
    # Initialize system at startup
    print("[SysCRED Backend] Pre-initializing system...")
    initialize_system()
    
    print()
    print("[SysCRED Backend] Starting Flask server...")
    print("[SysCRED Backend] Endpoints:")
    print("  - POST /api/verify          - Full credibility verification")
    print("  - POST /api/seo             - SEO analysis only (faster)")
    print("  - GET  /api/ontology/stats  - Ontology statistics")
    print("  - GET  /api/health          - Health check")
    print("  --- TREC Endpoints ---")
    print("  - POST /api/trec/search     - Evidence retrieval (BM25/TF-IDF/QLD)")
    print("  - POST /api/trec/metrics    - Calculate IR metrics (MAP, P@K, NDCG)")
    print("  - GET  /api/trec/corpus     - Demo corpus info")
    print("  - GET  /api/trec/health     - TREC module health")
    print()
    
    app.run(host='0.0.0.0', port=5001, debug=False, threaded=True)