TrBn17 commited on
Commit
e775b41
·
1 Parent(s): 2a4d426
Files changed (18) hide show
  1. .dockerignore +56 -0
  2. .env.example +12 -0
  3. .gitattributes +2 -0
  4. Dockerfile +34 -0
  5. README.md +84 -13
  6. README_HF.md +10 -0
  7. README_gradio.md +47 -0
  8. __init__.py +1 -0
  9. app.py +128 -0
  10. config.py +14 -0
  11. docker-compose.yml +19 -0
  12. fake_news_model.joblib +3 -0
  13. gradio_app.py +188 -0
  14. helper.py +8 -0
  15. requirements.txt +6 -0
  16. requirements_gradio.txt +7 -0
  17. schemas.py +31 -0
  18. start.py +16 -0
.dockerignore ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+
34
+ # OS
35
+ .DS_Store
36
+ Thumbs.db
37
+
38
+ # Git
39
+ .git/
40
+ .gitignore
41
+
42
+ # Docker
43
+ Dockerfile*
44
+ docker-compose*
45
+ .dockerignore
46
+
47
+ # Logs
48
+ *.log
49
+
50
+ # Temporary files
51
+ *.tmp
52
+ *.temp
53
+
54
+ # Documentation
55
+ README.md
56
+ docs/
.env.example ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment Variables for SVM Classifier
2
+ # Copy this file to .env and set your values
3
+
4
+ # API Key for authentication
5
+ API_KEY=my-super-secret-api-key-2024
6
+
7
+ # Path to the model file
8
+ MODEL_PATH=fake_news_model.joblib
9
+
10
+ # Server configuration
11
+ HOST=0.0.0.0
12
+ PORT=8000
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.joblib filter=lfs diff=lfs merge=lfs -text
37
+ *.env
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 slim image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ gcc \
10
+ g++ \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements first for better caching
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code
20
+ COPY . .
21
+
22
+ # Create non-root user
23
+ RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
24
+ USER appuser
25
+
26
+ # Expose port
27
+ EXPOSE 8000
28
+
29
+ # Health check
30
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
31
+ CMD curl -f http://localhost:8000/health || exit 1
32
+
33
+ # Run the application
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -1,13 +1,84 @@
1
- ---
2
- title: Svm Model
3
- emoji: 🔥
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.42.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SVM Fake News Classifier
2
+
3
+ FastAPI application for classifying news articles as real or fake using Support Vector Machine with TF-IDF features.
4
+
5
+ ## Features
6
+
7
+ - FastAPI REST API
8
+ - SVM model with TF-IDF vectorization
9
+ - Calibrated probability predictions
10
+ - API key authentication
11
+ - Health check endpoint
12
+ - Docker support
13
+
14
+ ## API Endpoints
15
+
16
+ - `GET /` - API information
17
+ - `GET /health` - Health check
18
+ - `POST /predict` - Single prediction
19
+ - `POST /predict_batch` - Batch predictions
20
+ - `GET /docs` - Interactive API documentation
21
+
22
+ ## Quick Start
23
+
24
+ ### Using Docker
25
+
26
+ ```bash
27
+ # Build and run with docker-compose
28
+ docker-compose up --build
29
+
30
+ # Or build and run manually
31
+ docker build -t svm-classifier .
32
+ docker run -p 8000:8000 svm-classifier
33
+ ```
34
+
35
+ ### Local Development
36
+
37
+ ```bash
38
+ # Install dependencies
39
+ pip install -r requirements.txt
40
+
41
+ # Run the application
42
+ python app.py
43
+ # or
44
+ python start.py
45
+ # or
46
+ uvicorn app:app --host 0.0.0.0 --port 8000
47
+ ```
48
+
49
+ ## Usage
50
+
51
+ ### Single Prediction
52
+
53
+ ```bash
54
+ curl -X POST "http://localhost:8000/predict" \
55
+ -H "Content-Type: application/json" \
56
+ -H "x-api-key: super-secret-key" \
57
+ -d '{
58
+ "title": "Breaking News",
59
+ "text": "This is a news article text..."
60
+ }'
61
+ ```
62
+
63
+ ### Batch Prediction
64
+
65
+ ```bash
66
+ curl -X POST "http://localhost:8000/predict_batch" \
67
+ -H "Content-Type: application/json" \
68
+ -H "x-api-key: super-secret-key" \
69
+ -d '{
70
+ "items": [
71
+ {"title": "News 1", "text": "Text 1"},
72
+ {"title": "News 2", "text": "Text 2"}
73
+ ]
74
+ }'
75
+ ```
76
+
77
+ ## Environment Variables
78
+
79
+ - `MODEL_PATH`: Path to the model file (default: `fake_news_model.joblib`)
80
+ - `API_KEY`: API key for authentication (default: `super-secret-key`)
81
+
82
+ ## License
83
+
84
+ Apache 2.0
README_HF.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SVM Fake News Classifier
3
+ emoji: 📰
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: apache-2.0
10
+ ---
README_gradio.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SVM Fake News Classifier
3
+ emoji: 📰
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "4.44.0"
8
+ app_file: gradio_app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # SVM Fake News Classifier
14
+
15
+ An interactive web application for classifying news articles as real or fake using Support Vector Machine with TF-IDF features.
16
+
17
+ ## Features
18
+
19
+ - 🤖 **SVM Model**: Support Vector Machine with TF-IDF vectorization
20
+ - 📊 **Calibrated Probabilities**: Reliable confidence scores using CalibratedClassifierCV
21
+ - 🎯 **Interactive Interface**: User-friendly Gradio web interface
22
+ - 📚 **Example Articles**: Pre-loaded examples to test the model
23
+ - 📈 **Confidence Levels**: High/Medium/Low confidence indicators
24
+
25
+ ## How to Use
26
+
27
+ 1. **Enter Article Details**: Input the news title and content
28
+ 2. **Get Prediction**: Click "Classify News" to analyze the article
29
+ 3. **Review Results**: Check the prediction, probabilities, and confidence level
30
+
31
+ ## Model Information
32
+
33
+ - **Algorithm**: Support Vector Machine (SVM)
34
+ - **Features**: TF-IDF text vectorization
35
+ - **Calibration**: CalibratedClassifierCV for probability estimates
36
+ - **Output**: Binary classification (Real/Fake) with confidence scores
37
+
38
+ ## API Version
39
+
40
+ For programmatic access, a FastAPI version is also available with the following endpoints:
41
+ - `POST /predict` - Single article prediction
42
+ - `POST /predict_batch` - Batch predictions
43
+ - `GET /health` - Health check
44
+
45
+ ## Disclaimer
46
+
47
+ This is a machine learning model for educational and research purposes. Always verify important information through multiple reliable sources.
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # SVM Fake News Classifier Package
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from __future__ import annotations
3
+
4
+ import warnings
5
+ from typing import List, Literal, Optional, Tuple
6
+ from config import MODEL_PATH, REAL_LABEL, API_KEY
7
+ import joblib
8
+ from fastapi import FastAPI, Header, HTTPException
9
+ from helper import _combine
10
+ from schemas import PredictOut, PredictBatchIn, PredictIn, PredictBatchOut
11
+
12
+ # Suppress sklearn version warnings
13
+ warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
14
+ warnings.filterwarnings("ignore", message=".*InconsistentVersionWarning.*")
15
+ # =========================
16
+ # Load calibrated model
17
+ # (Pipeline: TF-IDF -> CalibratedClassifierCV(LinearSVC))
18
+ # =========================
19
+ # Additional specific suppression for sklearn version warnings
20
+ try:
21
+ from sklearn.exceptions import InconsistentVersionWarning
22
+ warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
23
+ except ImportError:
24
+ # Fallback for older sklearn versions
25
+ pass
26
+
27
+ # Guard against double loading
28
+ if 'PIPE' not in globals():
29
+ try:
30
+ print("Loading model from:", MODEL_PATH)
31
+ with warnings.catch_warnings():
32
+ warnings.simplefilter("ignore")
33
+ PIPE = joblib.load(MODEL_PATH)
34
+ print("Model loaded successfully")
35
+ except Exception as e:
36
+ print(f"Error loading model: {e}")
37
+ raise
38
+
39
+ # Lấy thứ tự class từ estimator cuối để map xác suất cho chắc
40
+ try:
41
+ classes = list(PIPE.named_steps["clf"].classes_)
42
+ except Exception:
43
+ classes = list(getattr(PIPE, "classes_", [0, 1])) # fallback
44
+
45
+ print(f"Model classes: {classes}")
46
+ IDX_REAL = classes.index(REAL_LABEL)
47
+ IDX_FAKE = classes.index(0)
48
+ print(f"Real index: {IDX_REAL}, Fake index: {IDX_FAKE}")
49
+ else:
50
+ print("Model already loaded, skipping reload...")
51
+
52
+ # =========================
53
+ # Core inference
54
+ # =========================
55
+ def infer_one(inp: PredictIn) -> PredictOut:
56
+ text_all = inp.text_all.strip().lower() if inp.text_all else _combine(inp.title, inp.text)
57
+
58
+ # Suppress warnings during prediction
59
+ with warnings.catch_warnings():
60
+ warnings.simplefilter("ignore")
61
+ probs = PIPE.predict_proba([text_all])[0]
62
+
63
+ prob_real = float(probs[IDX_REAL])
64
+ prob_fake = float(probs[IDX_FAKE])
65
+
66
+ label = "real" if prob_real >= 0.5 else "fake"
67
+
68
+ return PredictOut(
69
+ label=label,
70
+ prob_real=prob_real,
71
+ prob_fake=prob_fake,
72
+ )
73
+
74
+
75
+ def infer_batch(items: List[PredictIn]) -> List[PredictOut]:
76
+ return [infer_one(x) for x in items]
77
+
78
+
79
+ # =========================
80
+ # FastAPI endpoints
81
+ # =========================
82
+ app = FastAPI(
83
+ title="SVM Fake/Real News Classifier",
84
+ description="API for classifying news as real or fake using SVM with TF-IDF features",
85
+ version="1.0.0"
86
+ )
87
+
88
+ @app.get("/")
89
+ def root():
90
+ return {
91
+ "message": "SVM Fake/Real News Classifier API",
92
+ "endpoints": {
93
+ "predict": "/predict",
94
+ "predict_batch": "/predict_batch",
95
+ "health": "/health"
96
+ },
97
+ "model_info": {
98
+ "classes": ["fake", "real"],
99
+ "model_path": MODEL_PATH,
100
+ "calibrated": True
101
+ }
102
+ }
103
+
104
+ @app.get("/health")
105
+ def health_check():
106
+ return {"status": "healthy", "model_loaded": 'PIPE' in globals()}
107
+
108
+ @app.post("/predict", response_model=PredictOut)
109
+ def predict(payload: PredictIn, x_api_key: str = Header(default="")):
110
+ if x_api_key != API_KEY:
111
+ raise HTTPException(status_code=401, detail="Unauthorized")
112
+ return infer_one(payload)
113
+
114
+ @app.post("/predict_batch", response_model=PredictBatchOut)
115
+ def predict_batch(payload: PredictBatchIn, x_api_key: str = Header(default="")):
116
+ if x_api_key != API_KEY:
117
+ raise HTTPException(status_code=401, detail="Unauthorized")
118
+ return PredictBatchOut(results=infer_batch(payload.items))
119
+
120
+
121
+ if __name__ == "__main__":
122
+ import uvicorn
123
+ print("===== Application Ready =====")
124
+ print("FastAPI app initialized successfully")
125
+ print("API endpoints available at /predict and /predict_batch")
126
+ print("API documentation at /docs")
127
+ print("================================")
128
+ uvicorn.run(app, host="0.0.0.0", port=6778)
config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Model configuration
4
+ MODEL_PATH = os.getenv("MODEL_PATH", "fake_news_model.joblib")
5
+
6
+ # API Key configuration - có thể set qua nhiều cách:
7
+ # 1. Environment variable: export API_KEY="your-secret-key"
8
+ # 2. Docker: -e API_KEY="your-secret-key"
9
+ # 3. docker-compose.yml: environment section
10
+ # 4. Thay đổi default value dưới đây
11
+ API_KEY = os.getenv("API_KEY", "my-super-secret-api-key-2024")
12
+
13
+ # Model labels
14
+ REAL_LABEL = 1
docker-compose.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ svm-classifier:
5
+ build: .
6
+ ports:
7
+ - "8000:8000"
8
+ environment:
9
+ - MODEL_PATH=fake_news_model.joblib
10
+ - API_KEY=my-super-secret-api-key-2024 # THAY ĐỔI NÀY!
11
+ volumes:
12
+ - ./fake_news_model.joblib:/app/fake_news_model.joblib:ro
13
+ restart: unless-stopped
14
+ healthcheck:
15
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
16
+ interval: 30s
17
+ timeout: 10s
18
+ retries: 3
19
+ start_period: 40s
fake_news_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c754c955fbc75848b30dd061e1fd48ae8d7954c430bbf1e18440e41d8dcd39a8
3
+ size 46836547
gradio_app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import warnings
3
+ from typing import Tuple
4
+ from config import MODEL_PATH, REAL_LABEL
5
+ import joblib
6
+ from helper import _combine
7
+ from schemas import PredictIn
8
+
9
+ # Suppress sklearn version warnings
10
+ warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
11
+ warnings.filterwarnings("ignore", message=".*InconsistentVersionWarning.*")
12
+
13
+ try:
14
+ from sklearn.exceptions import InconsistentVersionWarning
15
+ warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
16
+ except ImportError:
17
+ pass
18
+
19
+ # Load model
20
+ print("Loading model from:", MODEL_PATH)
21
+ with warnings.catch_warnings():
22
+ warnings.simplefilter("ignore")
23
+ PIPE = joblib.load(MODEL_PATH)
24
+ print("Model loaded successfully")
25
+
26
+ # Get class indices
27
+ try:
28
+ classes = list(PIPE.named_steps["clf"].classes_)
29
+ except Exception:
30
+ classes = list(getattr(PIPE, "classes_", [0, 1]))
31
+
32
+ print(f"Model classes: {classes}")
33
+ IDX_REAL = classes.index(REAL_LABEL)
34
+ IDX_FAKE = classes.index(0)
35
+
36
+ def predict_news(title: str, text: str) -> Tuple[str, float, float, str]:
37
+ """
38
+ Predict if news is real or fake
39
+
40
+ Args:
41
+ title: News article title
42
+ text: News article content
43
+
44
+ Returns:
45
+ Tuple of (prediction, real_probability, fake_probability, confidence_level)
46
+ """
47
+ # Combine title and text
48
+ text_all = _combine(title, text)
49
+
50
+ # Get prediction probabilities
51
+ with warnings.catch_warnings():
52
+ warnings.simplefilter("ignore")
53
+ probs = PIPE.predict_proba([text_all])[0]
54
+
55
+ prob_real = float(probs[IDX_REAL])
56
+ prob_fake = float(probs[IDX_FAKE])
57
+
58
+ # Determine prediction and confidence
59
+ if prob_real >= 0.5:
60
+ prediction = "REAL"
61
+ confidence = prob_real
62
+ else:
63
+ prediction = "FAKE"
64
+ confidence = prob_fake
65
+
66
+ # Determine confidence level
67
+ if confidence >= 0.8:
68
+ confidence_level = "High"
69
+ elif confidence >= 0.6:
70
+ confidence_level = "Medium"
71
+ else:
72
+ confidence_level = "Low"
73
+
74
+ return prediction, prob_real, prob_fake, confidence_level
75
+
76
+ # Example articles for demonstration
77
+ examples = [
78
+ [
79
+ "Scientists Discover Breakthrough in Cancer Treatment",
80
+ "Researchers at leading medical institutions have announced a significant breakthrough in cancer treatment methodology. The new approach shows promising results in early clinical trials, offering hope for millions of patients worldwide. The research, published in a peer-reviewed journal, demonstrates improved survival rates and reduced side effects compared to traditional treatments."
81
+ ],
82
+ [
83
+ "SHOCKING: Aliens Found Living Among Us, Government Confirms",
84
+ "In a stunning revelation that changes everything we know about humanity, government officials have finally confirmed that extraterrestrial beings have been living among humans for decades. Sources close to the matter reveal that these aliens have been secretly controlling world governments and manipulating global events from the shadows."
85
+ ],
86
+ [
87
+ "Local Community Garden Helps Reduce Food Insecurity",
88
+ "A grassroots initiative in downtown Springfield has transformed an abandoned lot into a thriving community garden that provides fresh produce to local food banks. The project, started by neighborhood volunteers, has grown to include educational programs and has become a model for similar initiatives in other cities."
89
+ ]
90
+ ]
91
+
92
+ # Create Gradio interface
93
+ with gr.Blocks(title="SVM Fake News Classifier", theme=gr.themes.Soft()) as demo:
94
+ gr.Markdown(
95
+ """
96
+ # 📰 SVM Fake News Classifier
97
+
98
+ This application uses a Support Vector Machine (SVM) with TF-IDF features to classify news articles as **real** or **fake**.
99
+ The model has been trained on a large dataset and uses calibrated probabilities for more reliable predictions.
100
+
101
+ ### How to use:
102
+ 1. Enter a news article title
103
+ 2. Enter the article content/text
104
+ 3. Click "Classify News" to get the prediction
105
+
106
+ The model will return:
107
+ - **Prediction**: Whether the article is classified as REAL or FAKE
108
+ - **Probabilities**: Confidence scores for both real and fake classifications
109
+ - **Confidence Level**: Overall confidence in the prediction (High/Medium/Low)
110
+ """
111
+ )
112
+
113
+ with gr.Row():
114
+ with gr.Column():
115
+ title_input = gr.Textbox(
116
+ label="📰 News Title",
117
+ placeholder="Enter the news article title...",
118
+ lines=2
119
+ )
120
+ text_input = gr.Textbox(
121
+ label="📄 News Content",
122
+ placeholder="Enter the news article content...",
123
+ lines=8
124
+ )
125
+ classify_btn = gr.Button("🔍 Classify News", variant="primary", size="lg")
126
+
127
+ with gr.Column():
128
+ with gr.Group():
129
+ prediction_output = gr.Textbox(
130
+ label="🎯 Prediction",
131
+ interactive=False
132
+ )
133
+ confidence_output = gr.Textbox(
134
+ label="📊 Confidence Level",
135
+ interactive=False
136
+ )
137
+
138
+ with gr.Row():
139
+ real_prob = gr.Number(
140
+ label="✅ Real Probability",
141
+ interactive=False
142
+ )
143
+ fake_prob = gr.Number(
144
+ label="❌ Fake Probability",
145
+ interactive=False
146
+ )
147
+
148
+ # Examples section
149
+ gr.Markdown("### 📚 Try these examples:")
150
+ gr.Examples(
151
+ examples=examples,
152
+ inputs=[title_input, text_input],
153
+ label="Example Articles"
154
+ )
155
+
156
+ # Information section
157
+ with gr.Accordion("ℹ️ Model Information", open=False):
158
+ gr.Markdown(
159
+ """
160
+ **Model Details:**
161
+ - **Algorithm**: Support Vector Machine (SVM) with TF-IDF vectorization
162
+ - **Calibration**: Uses CalibratedClassifierCV for probability estimates
163
+ - **Features**: Text preprocessing, TF-IDF feature extraction
164
+ - **Training**: Trained on labeled real/fake news dataset
165
+
166
+ **Interpretation:**
167
+ - **Real Probability > 0.5**: Article classified as REAL news
168
+ - **Fake Probability > 0.5**: Article classified as FAKE news
169
+ - **Confidence Level**: Based on the highest probability score
170
+ - High: ≥ 80% confidence
171
+ - Medium: 60-79% confidence
172
+ - Low: < 60% confidence
173
+
174
+ **Note**: This is a machine learning model and may not be 100% accurate.
175
+ Always verify important information through multiple reliable sources.
176
+ """
177
+ )
178
+
179
+ # Set up the prediction function
180
+ classify_btn.click(
181
+ fn=predict_news,
182
+ inputs=[title_input, text_input],
183
+ outputs=[prediction_output, real_prob, fake_prob, confidence_output]
184
+ )
185
+
186
+ # Launch the app
187
+ if __name__ == "__main__":
188
+ demo.launch(server_name="0.0.0.0", server_port=7860)
helper.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ # =========================
3
+ # Helpers
4
+ # =========================
5
+ def _combine(title: Optional[str], text: Optional[str]) -> str:
6
+ title = title or ""
7
+ text = text or ""
8
+ return f"{title} {text}".strip().lower()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.30.1
3
+ scikit-learn==1.6.1
4
+ joblib==1.3.2
5
+ pydantic==2.8.2
6
+ numpy==1.26.4
requirements_gradio.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.30.1
3
+ scikit-learn==1.6.1
4
+ joblib==1.3.2
5
+ pydantic==2.8.2
6
+ numpy==1.26.4
7
+ gradio==4.44.0
schemas.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Literal, List
2
+ from pydantic import BaseModel, Field
3
+ from config import MODEL_PATH
4
+
5
+ # =========================
6
+ # Schemas
7
+ # =========================
8
+ class PredictIn(BaseModel):
9
+ title: Optional[str] = Field(default=None, description="Tiêu đề bài báo")
10
+ text: Optional[str] = Field(default=None, description="Nội dung bài báo")
11
+ text_all: Optional[str] = Field(default=None, description="title + text đã gộp sẵn")
12
+
13
+
14
+ class PredictOut(BaseModel):
15
+ label: Literal["real", "fake"]
16
+ prob_real: float
17
+ prob_fake: float
18
+ calibrated: bool = Field(
19
+ default=True,
20
+ description="True vì đã dùng CalibratedClassifierCV(method='sigmoid').",
21
+ )
22
+ model_path: str = Field(default=MODEL_PATH)
23
+
24
+
25
+ class PredictBatchIn(BaseModel):
26
+ items: List[PredictIn]
27
+
28
+
29
+ class PredictBatchOut(BaseModel):
30
+ results: List[PredictOut]
31
+
start.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Start script for the SVM Fake News Classifier
4
+ """
5
+ import uvicorn
6
+ from app import app
7
+
8
+ if __name__ == "__main__":
9
+ print("Starting SVM Fake News Classifier...")
10
+ uvicorn.run(
11
+ app,
12
+ host="0.0.0.0",
13
+ port=8000,
14
+ log_level="info",
15
+ reload=False
16
+ )