TrBn17
commited on
Commit
·
e775b41
1
Parent(s):
2a4d426
app
Browse files- .dockerignore +56 -0
- .env.example +12 -0
- .gitattributes +2 -0
- Dockerfile +34 -0
- README.md +84 -13
- README_HF.md +10 -0
- README_gradio.md +47 -0
- __init__.py +1 -0
- app.py +128 -0
- config.py +14 -0
- docker-compose.yml +19 -0
- fake_news_model.joblib +3 -0
- gradio_app.py +188 -0
- helper.py +8 -0
- requirements.txt +6 -0
- requirements_gradio.txt +7 -0
- schemas.py +31 -0
- start.py +16 -0
.dockerignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual environments
|
| 24 |
+
venv/
|
| 25 |
+
env/
|
| 26 |
+
ENV/
|
| 27 |
+
|
| 28 |
+
# IDE
|
| 29 |
+
.vscode/
|
| 30 |
+
.idea/
|
| 31 |
+
*.swp
|
| 32 |
+
*.swo
|
| 33 |
+
|
| 34 |
+
# OS
|
| 35 |
+
.DS_Store
|
| 36 |
+
Thumbs.db
|
| 37 |
+
|
| 38 |
+
# Git
|
| 39 |
+
.git/
|
| 40 |
+
.gitignore
|
| 41 |
+
|
| 42 |
+
# Docker
|
| 43 |
+
Dockerfile*
|
| 44 |
+
docker-compose*
|
| 45 |
+
.dockerignore
|
| 46 |
+
|
| 47 |
+
# Logs
|
| 48 |
+
*.log
|
| 49 |
+
|
| 50 |
+
# Temporary files
|
| 51 |
+
*.tmp
|
| 52 |
+
*.temp
|
| 53 |
+
|
| 54 |
+
# Documentation
|
| 55 |
+
README.md
|
| 56 |
+
docs/
|
.env.example
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment Variables for SVM Classifier
|
| 2 |
+
# Copy this file to .env and set your values
|
| 3 |
+
|
| 4 |
+
# API Key for authentication
|
| 5 |
+
API_KEY=my-super-secret-api-key-2024
|
| 6 |
+
|
| 7 |
+
# Path to the model file
|
| 8 |
+
MODEL_PATH=fake_news_model.joblib
|
| 9 |
+
|
| 10 |
+
# Server configuration
|
| 11 |
+
HOST=0.0.0.0
|
| 12 |
+
PORT=8000
|
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.env
|
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.10 slim image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
gcc \
|
| 10 |
+
g++ \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy requirements first for better caching
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
|
| 16 |
+
# Install Python dependencies
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Copy application code
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# Create non-root user
|
| 23 |
+
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
| 24 |
+
USER appuser
|
| 25 |
+
|
| 26 |
+
# Expose port
|
| 27 |
+
EXPOSE 8000
|
| 28 |
+
|
| 29 |
+
# Health check
|
| 30 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
| 31 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 32 |
+
|
| 33 |
+
# Run the application
|
| 34 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,13 +1,84 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SVM Fake News Classifier
|
| 2 |
+
|
| 3 |
+
FastAPI application for classifying news articles as real or fake using Support Vector Machine with TF-IDF features.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- FastAPI REST API
|
| 8 |
+
- SVM model with TF-IDF vectorization
|
| 9 |
+
- Calibrated probability predictions
|
| 10 |
+
- API key authentication
|
| 11 |
+
- Health check endpoint
|
| 12 |
+
- Docker support
|
| 13 |
+
|
| 14 |
+
## API Endpoints
|
| 15 |
+
|
| 16 |
+
- `GET /` - API information
|
| 17 |
+
- `GET /health` - Health check
|
| 18 |
+
- `POST /predict` - Single prediction
|
| 19 |
+
- `POST /predict_batch` - Batch predictions
|
| 20 |
+
- `GET /docs` - Interactive API documentation
|
| 21 |
+
|
| 22 |
+
## Quick Start
|
| 23 |
+
|
| 24 |
+
### Using Docker
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
# Build and run with docker-compose
|
| 28 |
+
docker-compose up --build
|
| 29 |
+
|
| 30 |
+
# Or build and run manually
|
| 31 |
+
docker build -t svm-classifier .
|
| 32 |
+
docker run -p 8000:8000 svm-classifier
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### Local Development
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
# Install dependencies
|
| 39 |
+
pip install -r requirements.txt
|
| 40 |
+
|
| 41 |
+
# Run the application
|
| 42 |
+
python app.py
|
| 43 |
+
# or
|
| 44 |
+
python start.py
|
| 45 |
+
# or
|
| 46 |
+
uvicorn app:app --host 0.0.0.0 --port 8000
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## Usage
|
| 50 |
+
|
| 51 |
+
### Single Prediction
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 55 |
+
-H "Content-Type: application/json" \
|
| 56 |
+
-H "x-api-key: super-secret-key" \
|
| 57 |
+
-d '{
|
| 58 |
+
"title": "Breaking News",
|
| 59 |
+
"text": "This is a news article text..."
|
| 60 |
+
}'
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### Batch Prediction
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
curl -X POST "http://localhost:8000/predict_batch" \
|
| 67 |
+
-H "Content-Type: application/json" \
|
| 68 |
+
-H "x-api-key: super-secret-key" \
|
| 69 |
+
-d '{
|
| 70 |
+
"items": [
|
| 71 |
+
{"title": "News 1", "text": "Text 1"},
|
| 72 |
+
{"title": "News 2", "text": "Text 2"}
|
| 73 |
+
]
|
| 74 |
+
}'
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## Environment Variables
|
| 78 |
+
|
| 79 |
+
- `MODEL_PATH`: Path to the model file (default: `fake_news_model.joblib`)
|
| 80 |
+
- `API_KEY`: API key for authentication (default: `super-secret-key`)
|
| 81 |
+
|
| 82 |
+
## License
|
| 83 |
+
|
| 84 |
+
Apache 2.0
|
README_HF.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SVM Fake News Classifier
|
| 3 |
+
emoji: 📰
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
---
|
README_gradio.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SVM Fake News Classifier
|
| 3 |
+
emoji: 📰
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "4.44.0"
|
| 8 |
+
app_file: gradio_app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# SVM Fake News Classifier
|
| 14 |
+
|
| 15 |
+
An interactive web application for classifying news articles as real or fake using Support Vector Machine with TF-IDF features.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- 🤖 **SVM Model**: Support Vector Machine with TF-IDF vectorization
|
| 20 |
+
- 📊 **Calibrated Probabilities**: Reliable confidence scores using CalibratedClassifierCV
|
| 21 |
+
- 🎯 **Interactive Interface**: User-friendly Gradio web interface
|
| 22 |
+
- 📚 **Example Articles**: Pre-loaded examples to test the model
|
| 23 |
+
- 📈 **Confidence Levels**: High/Medium/Low confidence indicators
|
| 24 |
+
|
| 25 |
+
## How to Use
|
| 26 |
+
|
| 27 |
+
1. **Enter Article Details**: Input the news title and content
|
| 28 |
+
2. **Get Prediction**: Click "Classify News" to analyze the article
|
| 29 |
+
3. **Review Results**: Check the prediction, probabilities, and confidence level
|
| 30 |
+
|
| 31 |
+
## Model Information
|
| 32 |
+
|
| 33 |
+
- **Algorithm**: Support Vector Machine (SVM)
|
| 34 |
+
- **Features**: TF-IDF text vectorization
|
| 35 |
+
- **Calibration**: CalibratedClassifierCV for probability estimates
|
| 36 |
+
- **Output**: Binary classification (Real/Fake) with confidence scores
|
| 37 |
+
|
| 38 |
+
## API Version
|
| 39 |
+
|
| 40 |
+
For programmatic access, a FastAPI version is also available with the following endpoints:
|
| 41 |
+
- `POST /predict` - Single article prediction
|
| 42 |
+
- `POST /predict_batch` - Batch predictions
|
| 43 |
+
- `GET /health` - Health check
|
| 44 |
+
|
| 45 |
+
## Disclaimer
|
| 46 |
+
|
| 47 |
+
This is a machine learning model for educational and research purposes. Always verify important information through multiple reliable sources.
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# SVM Fake News Classifier Package
|
app.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import warnings
|
| 5 |
+
from typing import List, Literal, Optional, Tuple
|
| 6 |
+
from config import MODEL_PATH, REAL_LABEL, API_KEY
|
| 7 |
+
import joblib
|
| 8 |
+
from fastapi import FastAPI, Header, HTTPException
|
| 9 |
+
from helper import _combine
|
| 10 |
+
from schemas import PredictOut, PredictBatchIn, PredictIn, PredictBatchOut
|
| 11 |
+
|
| 12 |
+
# Suppress sklearn version warnings
|
| 13 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
|
| 14 |
+
warnings.filterwarnings("ignore", message=".*InconsistentVersionWarning.*")
|
| 15 |
+
# =========================
|
| 16 |
+
# Load calibrated model
|
| 17 |
+
# (Pipeline: TF-IDF -> CalibratedClassifierCV(LinearSVC))
|
| 18 |
+
# =========================
|
| 19 |
+
# Additional specific suppression for sklearn version warnings
|
| 20 |
+
try:
|
| 21 |
+
from sklearn.exceptions import InconsistentVersionWarning
|
| 22 |
+
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
|
| 23 |
+
except ImportError:
|
| 24 |
+
# Fallback for older sklearn versions
|
| 25 |
+
pass
|
| 26 |
+
|
| 27 |
+
# Guard against double loading
|
| 28 |
+
if 'PIPE' not in globals():
|
| 29 |
+
try:
|
| 30 |
+
print("Loading model from:", MODEL_PATH)
|
| 31 |
+
with warnings.catch_warnings():
|
| 32 |
+
warnings.simplefilter("ignore")
|
| 33 |
+
PIPE = joblib.load(MODEL_PATH)
|
| 34 |
+
print("Model loaded successfully")
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"Error loading model: {e}")
|
| 37 |
+
raise
|
| 38 |
+
|
| 39 |
+
# Lấy thứ tự class từ estimator cuối để map xác suất cho chắc
|
| 40 |
+
try:
|
| 41 |
+
classes = list(PIPE.named_steps["clf"].classes_)
|
| 42 |
+
except Exception:
|
| 43 |
+
classes = list(getattr(PIPE, "classes_", [0, 1])) # fallback
|
| 44 |
+
|
| 45 |
+
print(f"Model classes: {classes}")
|
| 46 |
+
IDX_REAL = classes.index(REAL_LABEL)
|
| 47 |
+
IDX_FAKE = classes.index(0)
|
| 48 |
+
print(f"Real index: {IDX_REAL}, Fake index: {IDX_FAKE}")
|
| 49 |
+
else:
|
| 50 |
+
print("Model already loaded, skipping reload...")
|
| 51 |
+
|
| 52 |
+
# =========================
|
| 53 |
+
# Core inference
|
| 54 |
+
# =========================
|
| 55 |
+
def infer_one(inp: PredictIn) -> PredictOut:
|
| 56 |
+
text_all = inp.text_all.strip().lower() if inp.text_all else _combine(inp.title, inp.text)
|
| 57 |
+
|
| 58 |
+
# Suppress warnings during prediction
|
| 59 |
+
with warnings.catch_warnings():
|
| 60 |
+
warnings.simplefilter("ignore")
|
| 61 |
+
probs = PIPE.predict_proba([text_all])[0]
|
| 62 |
+
|
| 63 |
+
prob_real = float(probs[IDX_REAL])
|
| 64 |
+
prob_fake = float(probs[IDX_FAKE])
|
| 65 |
+
|
| 66 |
+
label = "real" if prob_real >= 0.5 else "fake"
|
| 67 |
+
|
| 68 |
+
return PredictOut(
|
| 69 |
+
label=label,
|
| 70 |
+
prob_real=prob_real,
|
| 71 |
+
prob_fake=prob_fake,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def infer_batch(items: List[PredictIn]) -> List[PredictOut]:
|
| 76 |
+
return [infer_one(x) for x in items]
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# =========================
|
| 80 |
+
# FastAPI endpoints
|
| 81 |
+
# =========================
|
| 82 |
+
app = FastAPI(
|
| 83 |
+
title="SVM Fake/Real News Classifier",
|
| 84 |
+
description="API for classifying news as real or fake using SVM with TF-IDF features",
|
| 85 |
+
version="1.0.0"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
@app.get("/")
|
| 89 |
+
def root():
|
| 90 |
+
return {
|
| 91 |
+
"message": "SVM Fake/Real News Classifier API",
|
| 92 |
+
"endpoints": {
|
| 93 |
+
"predict": "/predict",
|
| 94 |
+
"predict_batch": "/predict_batch",
|
| 95 |
+
"health": "/health"
|
| 96 |
+
},
|
| 97 |
+
"model_info": {
|
| 98 |
+
"classes": ["fake", "real"],
|
| 99 |
+
"model_path": MODEL_PATH,
|
| 100 |
+
"calibrated": True
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
@app.get("/health")
|
| 105 |
+
def health_check():
|
| 106 |
+
return {"status": "healthy", "model_loaded": 'PIPE' in globals()}
|
| 107 |
+
|
| 108 |
+
@app.post("/predict", response_model=PredictOut)
|
| 109 |
+
def predict(payload: PredictIn, x_api_key: str = Header(default="")):
|
| 110 |
+
if x_api_key != API_KEY:
|
| 111 |
+
raise HTTPException(status_code=401, detail="Unauthorized")
|
| 112 |
+
return infer_one(payload)
|
| 113 |
+
|
| 114 |
+
@app.post("/predict_batch", response_model=PredictBatchOut)
|
| 115 |
+
def predict_batch(payload: PredictBatchIn, x_api_key: str = Header(default="")):
|
| 116 |
+
if x_api_key != API_KEY:
|
| 117 |
+
raise HTTPException(status_code=401, detail="Unauthorized")
|
| 118 |
+
return PredictBatchOut(results=infer_batch(payload.items))
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
import uvicorn
|
| 123 |
+
print("===== Application Ready =====")
|
| 124 |
+
print("FastAPI app initialized successfully")
|
| 125 |
+
print("API endpoints available at /predict and /predict_batch")
|
| 126 |
+
print("API documentation at /docs")
|
| 127 |
+
print("================================")
|
| 128 |
+
uvicorn.run(app, host="0.0.0.0", port=6778)
|
config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# Model configuration
|
| 4 |
+
MODEL_PATH = os.getenv("MODEL_PATH", "fake_news_model.joblib")
|
| 5 |
+
|
| 6 |
+
# API Key configuration - có thể set qua nhiều cách:
|
| 7 |
+
# 1. Environment variable: export API_KEY="your-secret-key"
|
| 8 |
+
# 2. Docker: -e API_KEY="your-secret-key"
|
| 9 |
+
# 3. docker-compose.yml: environment section
|
| 10 |
+
# 4. Thay đổi default value dưới đây
|
| 11 |
+
API_KEY = os.getenv("API_KEY", "my-super-secret-api-key-2024")
|
| 12 |
+
|
| 13 |
+
# Model labels
|
| 14 |
+
REAL_LABEL = 1
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
svm-classifier:
|
| 5 |
+
build: .
|
| 6 |
+
ports:
|
| 7 |
+
- "8000:8000"
|
| 8 |
+
environment:
|
| 9 |
+
- MODEL_PATH=fake_news_model.joblib
|
| 10 |
+
- API_KEY=my-super-secret-api-key-2024 # THAY ĐỔI NÀY!
|
| 11 |
+
volumes:
|
| 12 |
+
- ./fake_news_model.joblib:/app/fake_news_model.joblib:ro
|
| 13 |
+
restart: unless-stopped
|
| 14 |
+
healthcheck:
|
| 15 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
| 16 |
+
interval: 30s
|
| 17 |
+
timeout: 10s
|
| 18 |
+
retries: 3
|
| 19 |
+
start_period: 40s
|
fake_news_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c754c955fbc75848b30dd061e1fd48ae8d7954c430bbf1e18440e41d8dcd39a8
|
| 3 |
+
size 46836547
|
gradio_app.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import warnings
|
| 3 |
+
from typing import Tuple
|
| 4 |
+
from config import MODEL_PATH, REAL_LABEL
|
| 5 |
+
import joblib
|
| 6 |
+
from helper import _combine
|
| 7 |
+
from schemas import PredictIn
|
| 8 |
+
|
| 9 |
+
# Suppress sklearn version warnings
|
| 10 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
|
| 11 |
+
warnings.filterwarnings("ignore", message=".*InconsistentVersionWarning.*")
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
from sklearn.exceptions import InconsistentVersionWarning
|
| 15 |
+
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
|
| 16 |
+
except ImportError:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
# Load model
|
| 20 |
+
print("Loading model from:", MODEL_PATH)
|
| 21 |
+
with warnings.catch_warnings():
|
| 22 |
+
warnings.simplefilter("ignore")
|
| 23 |
+
PIPE = joblib.load(MODEL_PATH)
|
| 24 |
+
print("Model loaded successfully")
|
| 25 |
+
|
| 26 |
+
# Get class indices
|
| 27 |
+
try:
|
| 28 |
+
classes = list(PIPE.named_steps["clf"].classes_)
|
| 29 |
+
except Exception:
|
| 30 |
+
classes = list(getattr(PIPE, "classes_", [0, 1]))
|
| 31 |
+
|
| 32 |
+
print(f"Model classes: {classes}")
|
| 33 |
+
IDX_REAL = classes.index(REAL_LABEL)
|
| 34 |
+
IDX_FAKE = classes.index(0)
|
| 35 |
+
|
| 36 |
+
def predict_news(title: str, text: str) -> Tuple[str, float, float, str]:
|
| 37 |
+
"""
|
| 38 |
+
Predict if news is real or fake
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
title: News article title
|
| 42 |
+
text: News article content
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
Tuple of (prediction, real_probability, fake_probability, confidence_level)
|
| 46 |
+
"""
|
| 47 |
+
# Combine title and text
|
| 48 |
+
text_all = _combine(title, text)
|
| 49 |
+
|
| 50 |
+
# Get prediction probabilities
|
| 51 |
+
with warnings.catch_warnings():
|
| 52 |
+
warnings.simplefilter("ignore")
|
| 53 |
+
probs = PIPE.predict_proba([text_all])[0]
|
| 54 |
+
|
| 55 |
+
prob_real = float(probs[IDX_REAL])
|
| 56 |
+
prob_fake = float(probs[IDX_FAKE])
|
| 57 |
+
|
| 58 |
+
# Determine prediction and confidence
|
| 59 |
+
if prob_real >= 0.5:
|
| 60 |
+
prediction = "REAL"
|
| 61 |
+
confidence = prob_real
|
| 62 |
+
else:
|
| 63 |
+
prediction = "FAKE"
|
| 64 |
+
confidence = prob_fake
|
| 65 |
+
|
| 66 |
+
# Determine confidence level
|
| 67 |
+
if confidence >= 0.8:
|
| 68 |
+
confidence_level = "High"
|
| 69 |
+
elif confidence >= 0.6:
|
| 70 |
+
confidence_level = "Medium"
|
| 71 |
+
else:
|
| 72 |
+
confidence_level = "Low"
|
| 73 |
+
|
| 74 |
+
return prediction, prob_real, prob_fake, confidence_level
|
| 75 |
+
|
| 76 |
+
# Example articles for demonstration
|
| 77 |
+
examples = [
|
| 78 |
+
[
|
| 79 |
+
"Scientists Discover Breakthrough in Cancer Treatment",
|
| 80 |
+
"Researchers at leading medical institutions have announced a significant breakthrough in cancer treatment methodology. The new approach shows promising results in early clinical trials, offering hope for millions of patients worldwide. The research, published in a peer-reviewed journal, demonstrates improved survival rates and reduced side effects compared to traditional treatments."
|
| 81 |
+
],
|
| 82 |
+
[
|
| 83 |
+
"SHOCKING: Aliens Found Living Among Us, Government Confirms",
|
| 84 |
+
"In a stunning revelation that changes everything we know about humanity, government officials have finally confirmed that extraterrestrial beings have been living among humans for decades. Sources close to the matter reveal that these aliens have been secretly controlling world governments and manipulating global events from the shadows."
|
| 85 |
+
],
|
| 86 |
+
[
|
| 87 |
+
"Local Community Garden Helps Reduce Food Insecurity",
|
| 88 |
+
"A grassroots initiative in downtown Springfield has transformed an abandoned lot into a thriving community garden that provides fresh produce to local food banks. The project, started by neighborhood volunteers, has grown to include educational programs and has become a model for similar initiatives in other cities."
|
| 89 |
+
]
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
# Create Gradio interface
|
| 93 |
+
with gr.Blocks(title="SVM Fake News Classifier", theme=gr.themes.Soft()) as demo:
|
| 94 |
+
gr.Markdown(
|
| 95 |
+
"""
|
| 96 |
+
# 📰 SVM Fake News Classifier
|
| 97 |
+
|
| 98 |
+
This application uses a Support Vector Machine (SVM) with TF-IDF features to classify news articles as **real** or **fake**.
|
| 99 |
+
The model has been trained on a large dataset and uses calibrated probabilities for more reliable predictions.
|
| 100 |
+
|
| 101 |
+
### How to use:
|
| 102 |
+
1. Enter a news article title
|
| 103 |
+
2. Enter the article content/text
|
| 104 |
+
3. Click "Classify News" to get the prediction
|
| 105 |
+
|
| 106 |
+
The model will return:
|
| 107 |
+
- **Prediction**: Whether the article is classified as REAL or FAKE
|
| 108 |
+
- **Probabilities**: Confidence scores for both real and fake classifications
|
| 109 |
+
- **Confidence Level**: Overall confidence in the prediction (High/Medium/Low)
|
| 110 |
+
"""
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
with gr.Row():
|
| 114 |
+
with gr.Column():
|
| 115 |
+
title_input = gr.Textbox(
|
| 116 |
+
label="📰 News Title",
|
| 117 |
+
placeholder="Enter the news article title...",
|
| 118 |
+
lines=2
|
| 119 |
+
)
|
| 120 |
+
text_input = gr.Textbox(
|
| 121 |
+
label="📄 News Content",
|
| 122 |
+
placeholder="Enter the news article content...",
|
| 123 |
+
lines=8
|
| 124 |
+
)
|
| 125 |
+
classify_btn = gr.Button("🔍 Classify News", variant="primary", size="lg")
|
| 126 |
+
|
| 127 |
+
with gr.Column():
|
| 128 |
+
with gr.Group():
|
| 129 |
+
prediction_output = gr.Textbox(
|
| 130 |
+
label="🎯 Prediction",
|
| 131 |
+
interactive=False
|
| 132 |
+
)
|
| 133 |
+
confidence_output = gr.Textbox(
|
| 134 |
+
label="📊 Confidence Level",
|
| 135 |
+
interactive=False
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
with gr.Row():
|
| 139 |
+
real_prob = gr.Number(
|
| 140 |
+
label="✅ Real Probability",
|
| 141 |
+
interactive=False
|
| 142 |
+
)
|
| 143 |
+
fake_prob = gr.Number(
|
| 144 |
+
label="❌ Fake Probability",
|
| 145 |
+
interactive=False
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Examples section
|
| 149 |
+
gr.Markdown("### 📚 Try these examples:")
|
| 150 |
+
gr.Examples(
|
| 151 |
+
examples=examples,
|
| 152 |
+
inputs=[title_input, text_input],
|
| 153 |
+
label="Example Articles"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# Information section
|
| 157 |
+
with gr.Accordion("ℹ️ Model Information", open=False):
|
| 158 |
+
gr.Markdown(
|
| 159 |
+
"""
|
| 160 |
+
**Model Details:**
|
| 161 |
+
- **Algorithm**: Support Vector Machine (SVM) with TF-IDF vectorization
|
| 162 |
+
- **Calibration**: Uses CalibratedClassifierCV for probability estimates
|
| 163 |
+
- **Features**: Text preprocessing, TF-IDF feature extraction
|
| 164 |
+
- **Training**: Trained on labeled real/fake news dataset
|
| 165 |
+
|
| 166 |
+
**Interpretation:**
|
| 167 |
+
- **Real Probability > 0.5**: Article classified as REAL news
|
| 168 |
+
- **Fake Probability > 0.5**: Article classified as FAKE news
|
| 169 |
+
- **Confidence Level**: Based on the highest probability score
|
| 170 |
+
- High: ≥ 80% confidence
|
| 171 |
+
- Medium: 60-79% confidence
|
| 172 |
+
- Low: < 60% confidence
|
| 173 |
+
|
| 174 |
+
**Note**: This is a machine learning model and may not be 100% accurate.
|
| 175 |
+
Always verify important information through multiple reliable sources.
|
| 176 |
+
"""
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Set up the prediction function
|
| 180 |
+
classify_btn.click(
|
| 181 |
+
fn=predict_news,
|
| 182 |
+
inputs=[title_input, text_input],
|
| 183 |
+
outputs=[prediction_output, real_prob, fake_prob, confidence_output]
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
# Launch the app
|
| 187 |
+
if __name__ == "__main__":
|
| 188 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
helper.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
# =========================
|
| 3 |
+
# Helpers
|
| 4 |
+
# =========================
|
| 5 |
+
def _combine(title: Optional[str], text: Optional[str]) -> str:
|
| 6 |
+
title = title or ""
|
| 7 |
+
text = text or ""
|
| 8 |
+
return f"{title} {text}".strip().lower()
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.111.0
|
| 2 |
+
uvicorn[standard]==0.30.1
|
| 3 |
+
scikit-learn==1.6.1
|
| 4 |
+
joblib==1.3.2
|
| 5 |
+
pydantic==2.8.2
|
| 6 |
+
numpy==1.26.4
|
requirements_gradio.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.111.0
|
| 2 |
+
uvicorn[standard]==0.30.1
|
| 3 |
+
scikit-learn==1.6.1
|
| 4 |
+
joblib==1.3.2
|
| 5 |
+
pydantic==2.8.2
|
| 6 |
+
numpy==1.26.4
|
| 7 |
+
gradio==4.44.0
|
schemas.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional, Literal, List
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from config import MODEL_PATH
|
| 4 |
+
|
| 5 |
+
# =========================
|
| 6 |
+
# Schemas
|
| 7 |
+
# =========================
|
| 8 |
+
class PredictIn(BaseModel):
|
| 9 |
+
title: Optional[str] = Field(default=None, description="Tiêu đề bài báo")
|
| 10 |
+
text: Optional[str] = Field(default=None, description="Nội dung bài báo")
|
| 11 |
+
text_all: Optional[str] = Field(default=None, description="title + text đã gộp sẵn")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class PredictOut(BaseModel):
|
| 15 |
+
label: Literal["real", "fake"]
|
| 16 |
+
prob_real: float
|
| 17 |
+
prob_fake: float
|
| 18 |
+
calibrated: bool = Field(
|
| 19 |
+
default=True,
|
| 20 |
+
description="True vì đã dùng CalibratedClassifierCV(method='sigmoid').",
|
| 21 |
+
)
|
| 22 |
+
model_path: str = Field(default=MODEL_PATH)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class PredictBatchIn(BaseModel):
|
| 26 |
+
items: List[PredictIn]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class PredictBatchOut(BaseModel):
|
| 30 |
+
results: List[PredictOut]
|
| 31 |
+
|
start.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Start script for the SVM Fake News Classifier
|
| 4 |
+
"""
|
| 5 |
+
import uvicorn
|
| 6 |
+
from app import app
|
| 7 |
+
|
| 8 |
+
if __name__ == "__main__":
|
| 9 |
+
print("Starting SVM Fake News Classifier...")
|
| 10 |
+
uvicorn.run(
|
| 11 |
+
app,
|
| 12 |
+
host="0.0.0.0",
|
| 13 |
+
port=8000,
|
| 14 |
+
log_level="info",
|
| 15 |
+
reload=False
|
| 16 |
+
)
|