Spaces:

DevPatel0611
/

TruthLens

Sleeping

File size: 3,783 Bytes

86b932c

import os
import sys
import argparse
import subprocess
import logging

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(name)s | %(levelname)s | %(message)s")
logger = logging.getLogger("run_pipeline")

def execute_stage(stage_num):
    logger.info(f"========== TRIGGERING STAGE {stage_num} ==========")
    if stage_num == 1:
        script = "src/stage1_ingestion.py"
    elif stage_num == 2:
        script = "src/stage2_preprocessing.py"
    elif stage_num == 3:
        script = "src/stage3_training.py"
    else:
        logger.error(f"Unknown Stage: {stage_num}")
        return
        
    if not os.path.exists(script):
        logger.error(f"Cannot find script: {script}")
        sys.exit(1)
        
    res = subprocess.run([sys.executable, script])
    if res.returncode != 0:
        logger.error(f"Stage {stage_num} failed!")
        sys.exit(1)
    logger.info(f"========== STAGE {stage_num} FINISHED ==========\n")


def execute_evaluation():
    logger.info("========== TRIGGERING FINAL HOLD-OUT BENCHMARK ==========")
    import pandas as pd
    import numpy as np
    from tqdm import tqdm
    from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
    
    # Needs to be imported late so it doesn't fail if dependencies aren't setup
    from src.stage4_inference import predict_article
    
    df_path = "data/splits/df_holdout.csv"
    if not os.path.exists(df_path):
        logger.error(f"Holdout file missing at {df_path}. Run Stages 1-3 first.")
        sys.exit(1)
        
    df = pd.read_csv(df_path)
    logger.info(f"Loaded {len(df)} Stratified Holdout records.")
    
    y_true = df["binary_label"].values
    y_pred = []
    
    logger.info("Executing isolated pipeline inference across holdout targets (RAG safely bypassed)...")
    logger.info("NOTE: Since this evaluates the entire heavy 4-model ensemble locally, it may take several minutes.")
    
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Benchmarking Evaluator"):
        # We manually map the inference parameters directly into the ultimate test pipeline
        res = predict_article(
            title=row.get("title", ""),
            text=row.get("text", ""),
            source_domain=row.get("source_domain", ""),
            published_date=row.get("published_date", ""),
            mode="full",
            trigger_rag=False
        )
        
        # New 4-tier verdict mapping:
        #   TRUE / UNCERTAIN → 1 (real news)
        #   LIKELY FALSE / FALSE → 0 (fake news)
        v = res["verdict"]
        pred_label = 1 if v in ("TRUE", "UNCERTAIN") else 0
        y_pred.append(pred_label)
        
    y_pred = np.array(y_pred)
    acc = accuracy_score(y_true, y_pred)
    
    logger.info(f"\n================ BENCHMARK RESULTS ================")
    logger.info(f"Final Architecture Accuracy: {acc * 100:.2f}%")
    logger.info("\n" + classification_report(y_true, y_pred, target_names=["Fake News (0)", "True News (1)"]))
    logger.info(f"Confusion Matrix:\n{confusion_matrix(y_true, y_pred)}")
    logger.info("===================================================\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Fake News Detection System Pipeline")
    parser.add_argument("--stage", nargs="+", type=int, choices=[1, 2, 3], help="Specify stages to run (e.g. --stage 1 2 3)")
    parser.add_argument("--eval", action="store_true", help="Evaluate the architecture natively on the stratified holdout benchmark")
    
    args = parser.parse_args()
    
    if args.stage:
        for s in args.stage:
            execute_stage(s)
            
    if args.eval:
        execute_evaluation()
        
    if not args.stage and not args.eval:
        parser.print_help()