Update api.py
Browse files
api.py
CHANGED
|
@@ -6,37 +6,54 @@ import os
|
|
| 6 |
import re
|
| 7 |
import requests
|
| 8 |
from bs4 import BeautifulSoup
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI(
|
| 11 |
-
title="API Deteksi Hoax
|
| 12 |
-
description="API untuk mendeteksi berita hoax
|
| 13 |
version="1.0.0"
|
| 14 |
)
|
| 15 |
|
| 16 |
-
# --- KONFIGURASI CORS ---
|
| 17 |
app.add_middleware(
|
| 18 |
CORSMiddleware,
|
| 19 |
allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"],
|
| 20 |
allow_credentials=True,
|
| 21 |
-
allow_methods=["
|
| 22 |
-
allow_headers=["
|
| 23 |
)
|
| 24 |
-
|
| 25 |
-
#
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
try:
|
| 28 |
-
if os.path.exists(
|
| 29 |
-
|
| 30 |
-
print("Model berhasil dimuat!")
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
except Exception as e:
|
| 34 |
-
|
| 35 |
-
print(f"Error loading model: {e}")
|
| 36 |
|
| 37 |
-
|
|
|
|
| 38 |
class PredictRequest(BaseModel):
|
| 39 |
input_text: str
|
|
|
|
|
|
|
| 40 |
|
| 41 |
def scrape_berita(url):
|
| 42 |
"""Fungsi pembaca halaman web (Scraper)"""
|
|
@@ -44,26 +61,29 @@ def scrape_berita(url):
|
|
| 44 |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
| 45 |
response = requests.get(url, headers=headers, timeout=10)
|
| 46 |
response.raise_for_status()
|
| 47 |
-
|
| 48 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 49 |
paragraf = soup.find_all('p')
|
| 50 |
teks_berita = " ".join([p.get_text() for p in paragraf])
|
| 51 |
-
|
| 52 |
return teks_berita.strip()
|
| 53 |
except Exception as e:
|
| 54 |
return f"GAGAL: {e}"
|
| 55 |
|
| 56 |
-
|
| 57 |
@app.post("/predict")
|
| 58 |
def deteksi_hoax_api(request: PredictRequest):
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
teks_mentah = request.input_text.strip()
|
| 63 |
if not teks_mentah:
|
| 64 |
raise HTTPException(status_code=400, detail="Input tidak boleh kosong.")
|
| 65 |
|
| 66 |
-
# Logika Smart Input: Cek apakah input berupa URL
|
| 67 |
if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
|
| 68 |
teks_untuk_dianalisis = scrape_berita(teks_mentah)
|
| 69 |
if teks_untuk_dianalisis.startswith("GAGAL:"):
|
|
@@ -71,21 +91,37 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 71 |
else:
|
| 72 |
teks_untuk_dianalisis = teks_mentah
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
# Proses Ekstraksi Kata untuk Highlight Frontend
|
| 80 |
-
try:
|
| 81 |
-
vec = model[0]
|
| 82 |
-
clf = model[1]
|
| 83 |
-
feature_names = vec.get_feature_names_out()
|
| 84 |
-
log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0]
|
| 85 |
-
kamus_bobot = dict(zip(feature_names, log_odds))
|
| 86 |
-
except Exception:
|
| 87 |
-
kamus_bobot = {}
|
| 88 |
|
|
|
|
| 89 |
kata_kata = teks_untuk_dianalisis.split()
|
| 90 |
teks_highlight = []
|
| 91 |
|
|
@@ -93,7 +129,6 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 93 |
kata_bersih = re.sub(r'[^a-z]', '', kata.lower())
|
| 94 |
bobot = float(kamus_bobot.get(kata_bersih, 0))
|
| 95 |
|
| 96 |
-
# Penentuan label per kata berdasarkan threshold
|
| 97 |
if bobot > 0.3:
|
| 98 |
label_kata = "Hoax"
|
| 99 |
elif bobot < -0.3:
|
|
@@ -107,10 +142,10 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 107 |
"bobot": round(bobot, 4)
|
| 108 |
})
|
| 109 |
|
| 110 |
-
# Mengembalikan response dalam format JSON
|
| 111 |
return {
|
| 112 |
"status": "success",
|
| 113 |
"hasil_analisis": {
|
|
|
|
| 114 |
"teks_dianalisis": teks_untuk_dianalisis,
|
| 115 |
"prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA",
|
| 116 |
"probabilitas": {
|
|
@@ -119,4 +154,4 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 119 |
}
|
| 120 |
},
|
| 121 |
"bedah_kata": teks_highlight
|
| 122 |
-
}
|
|
|
|
| 6 |
import re
|
| 7 |
import requests
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
+
from tensorflow.keras.models import load_model
|
| 10 |
|
| 11 |
app = FastAPI(
|
| 12 |
+
title="API Deteksi Hoax Multi-Model",
|
| 13 |
+
description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
|
| 14 |
version="1.0.0"
|
| 15 |
)
|
| 16 |
|
|
|
|
| 17 |
app.add_middleware(
|
| 18 |
CORSMiddleware,
|
| 19 |
allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"],
|
| 20 |
allow_credentials=True,
|
| 21 |
+
allow_methods=["*"],
|
| 22 |
+
allow_headers=["*"],
|
| 23 |
)
|
| 24 |
+
|
| 25 |
+
# --- 1. LOAD KEDUA MODEL ---
|
| 26 |
+
models = {
|
| 27 |
+
"naive_bayes": None,
|
| 28 |
+
"lstm": None
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# Load Model Naive Bayes
|
| 32 |
+
PATH_NB = 'model_hoax_complete.pkl'
|
| 33 |
try:
|
| 34 |
+
if os.path.exists(PATH_NB):
|
| 35 |
+
models["naive_bayes"] = joblib.load(PATH_NB)
|
| 36 |
+
print("Model Naive Bayes berhasil dimuat!")
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error loading Naive Bayes: {e}")
|
| 39 |
+
|
| 40 |
+
# Load Model LSTM
|
| 41 |
+
PATH_LSTM = 'lstm_fake_news_model.h5'
|
| 42 |
+
try:
|
| 43 |
+
if os.path.exists(PATH_LSTM):
|
| 44 |
+
# Gunakan joblib sesuai skrip Anda, atau load_model Keras jika error
|
| 45 |
+
models["lstm"] = joblib.load(PATH_LSTM)
|
| 46 |
+
# models["lstm"] = load_model(PATH_LSTM)
|
| 47 |
+
print("Model LSTM berhasil dimuat!")
|
| 48 |
except Exception as e:
|
| 49 |
+
print(f"Error loading LSTM: {e}")
|
|
|
|
| 50 |
|
| 51 |
+
|
| 52 |
+
# --- 2. UPDATE SKEMA REQUEST ---
|
| 53 |
class PredictRequest(BaseModel):
|
| 54 |
input_text: str
|
| 55 |
+
model_type: str = "naive_bayes" # Default pakai naive_bayes jika tidak dikirim
|
| 56 |
+
|
| 57 |
|
| 58 |
def scrape_berita(url):
|
| 59 |
"""Fungsi pembaca halaman web (Scraper)"""
|
|
|
|
| 61 |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
| 62 |
response = requests.get(url, headers=headers, timeout=10)
|
| 63 |
response.raise_for_status()
|
|
|
|
| 64 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 65 |
paragraf = soup.find_all('p')
|
| 66 |
teks_berita = " ".join([p.get_text() for p in paragraf])
|
|
|
|
| 67 |
return teks_berita.strip()
|
| 68 |
except Exception as e:
|
| 69 |
return f"GAGAL: {e}"
|
| 70 |
|
| 71 |
+
|
| 72 |
@app.post("/predict")
|
| 73 |
def deteksi_hoax_api(request: PredictRequest):
|
| 74 |
+
# --- 3. PILIH MODEL ---
|
| 75 |
+
jenis_model = request.model_type
|
| 76 |
+
if jenis_model not in models:
|
| 77 |
+
raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
|
| 78 |
+
|
| 79 |
+
aktif_model = models[jenis_model]
|
| 80 |
+
if aktif_model is None:
|
| 81 |
+
raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
|
| 82 |
|
| 83 |
teks_mentah = request.input_text.strip()
|
| 84 |
if not teks_mentah:
|
| 85 |
raise HTTPException(status_code=400, detail="Input tidak boleh kosong.")
|
| 86 |
|
|
|
|
| 87 |
if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
|
| 88 |
teks_untuk_dianalisis = scrape_berita(teks_mentah)
|
| 89 |
if teks_untuk_dianalisis.startswith("GAGAL:"):
|
|
|
|
| 91 |
else:
|
| 92 |
teks_untuk_dianalisis = teks_mentah
|
| 93 |
|
| 94 |
+
# --- 4. PREDIKSI BERDASARKAN MODEL ---
|
| 95 |
+
kamus_bobot = {}
|
| 96 |
+
|
| 97 |
+
if jenis_model == "naive_bayes":
|
| 98 |
+
# Logika untuk algoritma scikit-learn
|
| 99 |
+
proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
|
| 100 |
+
prob_fakta = float(proba[0])
|
| 101 |
+
prob_hoax = float(proba[1])
|
| 102 |
+
|
| 103 |
+
# Ekstraksi kata untuk highlight Frontend (biasanya hanya ada pada model linier/Naive Bayes)
|
| 104 |
+
try:
|
| 105 |
+
vec = aktif_model[0]
|
| 106 |
+
clf = aktif_model[1]
|
| 107 |
+
feature_names = vec.get_feature_names_out()
|
| 108 |
+
log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0]
|
| 109 |
+
kamus_bobot = dict(zip(feature_names, log_odds))
|
| 110 |
+
except Exception:
|
| 111 |
+
pass
|
| 112 |
+
|
| 113 |
+
elif jenis_model == "lstm":
|
| 114 |
+
# Logika untuk Deep Learning
|
| 115 |
+
# (Perhatikan: LSTM umumnya memerlukan padding dan tokenisasi sebelum predict)
|
| 116 |
+
proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
|
| 117 |
+
# Jika menggunakan TensorFlow, syntaxnya mungkin berubah jadi -> aktif_model.predict(padded_sequence)[0]
|
| 118 |
+
prob_fakta = float(proba[0])
|
| 119 |
+
prob_hoax = float(proba[1])
|
| 120 |
+
# Model Deep Learning umumnya tidak memiliki "feature_log_prob", jadi highlight kata kita bypass
|
| 121 |
+
# dan kamus_bobot dibiarkan kosong ({})
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
# Penentuan Highlight Kata (Akan berfungsi baik di Naive Bayes, dan jadi 'Netral' di LSTM jika tanpa LIME/SHAP)
|
| 125 |
kata_kata = teks_untuk_dianalisis.split()
|
| 126 |
teks_highlight = []
|
| 127 |
|
|
|
|
| 129 |
kata_bersih = re.sub(r'[^a-z]', '', kata.lower())
|
| 130 |
bobot = float(kamus_bobot.get(kata_bersih, 0))
|
| 131 |
|
|
|
|
| 132 |
if bobot > 0.3:
|
| 133 |
label_kata = "Hoax"
|
| 134 |
elif bobot < -0.3:
|
|
|
|
| 142 |
"bobot": round(bobot, 4)
|
| 143 |
})
|
| 144 |
|
|
|
|
| 145 |
return {
|
| 146 |
"status": "success",
|
| 147 |
"hasil_analisis": {
|
| 148 |
+
"model_digunakan": jenis_model,
|
| 149 |
"teks_dianalisis": teks_untuk_dianalisis,
|
| 150 |
"prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA",
|
| 151 |
"probabilitas": {
|
|
|
|
| 154 |
}
|
| 155 |
},
|
| 156 |
"bedah_kata": teks_highlight
|
| 157 |
+
}
|