File size: 7,156 Bytes
e775b41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import gradio as gr
import warnings
from typing import Tuple
from config import MODEL_PATH, REAL_LABEL
import joblib
from helper import _combine
from schemas import PredictIn

# Suppress sklearn version warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
warnings.filterwarnings("ignore", message=".*InconsistentVersionWarning.*")

try:
    from sklearn.exceptions import InconsistentVersionWarning
    warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
except ImportError:
    pass

# Load model
print("Loading model from:", MODEL_PATH)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    PIPE = joblib.load(MODEL_PATH)
print("Model loaded successfully")

# Get class indices
try:
    classes = list(PIPE.named_steps["clf"].classes_)
except Exception:
    classes = list(getattr(PIPE, "classes_", [0, 1]))

print(f"Model classes: {classes}")
IDX_REAL = classes.index(REAL_LABEL)
IDX_FAKE = classes.index(0)

def predict_news(title: str, text: str) -> Tuple[str, float, float, str]:
    """
    Predict if news is real or fake
    
    Args:
        title: News article title
        text: News article content
        
    Returns:
        Tuple of (prediction, real_probability, fake_probability, confidence_level)
    """
    # Combine title and text
    text_all = _combine(title, text)
    
    # Get prediction probabilities
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        probs = PIPE.predict_proba([text_all])[0]
    
    prob_real = float(probs[IDX_REAL])
    prob_fake = float(probs[IDX_FAKE])
    
    # Determine prediction and confidence
    if prob_real >= 0.5:
        prediction = "REAL"
        confidence = prob_real
    else:
        prediction = "FAKE" 
        confidence = prob_fake
    
    # Determine confidence level
    if confidence >= 0.8:
        confidence_level = "High"
    elif confidence >= 0.6:
        confidence_level = "Medium"
    else:
        confidence_level = "Low"
    
    return prediction, prob_real, prob_fake, confidence_level

# Example articles for demonstration
examples = [
    [
        "Scientists Discover Breakthrough in Cancer Treatment",
        "Researchers at leading medical institutions have announced a significant breakthrough in cancer treatment methodology. The new approach shows promising results in early clinical trials, offering hope for millions of patients worldwide. The research, published in a peer-reviewed journal, demonstrates improved survival rates and reduced side effects compared to traditional treatments."
    ],
    [
        "SHOCKING: Aliens Found Living Among Us, Government Confirms",
        "In a stunning revelation that changes everything we know about humanity, government officials have finally confirmed that extraterrestrial beings have been living among humans for decades. Sources close to the matter reveal that these aliens have been secretly controlling world governments and manipulating global events from the shadows."
    ],
    [
        "Local Community Garden Helps Reduce Food Insecurity",
        "A grassroots initiative in downtown Springfield has transformed an abandoned lot into a thriving community garden that provides fresh produce to local food banks. The project, started by neighborhood volunteers, has grown to include educational programs and has become a model for similar initiatives in other cities."
    ]
]

# Create Gradio interface
with gr.Blocks(title="SVM Fake News Classifier", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # πŸ“° SVM Fake News Classifier
        
        This application uses a Support Vector Machine (SVM) with TF-IDF features to classify news articles as **real** or **fake**.
        The model has been trained on a large dataset and uses calibrated probabilities for more reliable predictions.
        
        ### How to use:
        1. Enter a news article title
        2. Enter the article content/text
        3. Click "Classify News" to get the prediction
        
        The model will return:
        - **Prediction**: Whether the article is classified as REAL or FAKE
        - **Probabilities**: Confidence scores for both real and fake classifications
        - **Confidence Level**: Overall confidence in the prediction (High/Medium/Low)
        """
    )
    
    with gr.Row():
        with gr.Column():
            title_input = gr.Textbox(
                label="πŸ“° News Title",
                placeholder="Enter the news article title...",
                lines=2
            )
            text_input = gr.Textbox(
                label="πŸ“„ News Content", 
                placeholder="Enter the news article content...",
                lines=8
            )
            classify_btn = gr.Button("πŸ” Classify News", variant="primary", size="lg")
        
        with gr.Column():
            with gr.Group():
                prediction_output = gr.Textbox(
                    label="🎯 Prediction",
                    interactive=False
                )
                confidence_output = gr.Textbox(
                    label="πŸ“Š Confidence Level",
                    interactive=False
                )
            
            with gr.Row():
                real_prob = gr.Number(
                    label="βœ… Real Probability",
                    interactive=False
                )
                fake_prob = gr.Number(
                    label="❌ Fake Probability", 
                    interactive=False
                )
    
    # Examples section
    gr.Markdown("### πŸ“š Try these examples:")
    gr.Examples(
        examples=examples,
        inputs=[title_input, text_input],
        label="Example Articles"
    )
    
    # Information section
    with gr.Accordion("ℹ️ Model Information", open=False):
        gr.Markdown(
            """
            **Model Details:**
            - **Algorithm**: Support Vector Machine (SVM) with TF-IDF vectorization
            - **Calibration**: Uses CalibratedClassifierCV for probability estimates
            - **Features**: Text preprocessing, TF-IDF feature extraction
            - **Training**: Trained on labeled real/fake news dataset
            
            **Interpretation:**
            - **Real Probability > 0.5**: Article classified as REAL news
            - **Fake Probability > 0.5**: Article classified as FAKE news
            - **Confidence Level**: Based on the highest probability score
              - High: β‰₯ 80% confidence
              - Medium: 60-79% confidence  
              - Low: < 60% confidence
            
            **Note**: This is a machine learning model and may not be 100% accurate. 
            Always verify important information through multiple reliable sources.
            """
        )
    
    # Set up the prediction function
    classify_btn.click(
        fn=predict_news,
        inputs=[title_input, text_input],
        outputs=[prediction_output, real_prob, fake_prob, confidence_output]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)