Update api.py
Browse files
api.py
CHANGED
|
@@ -6,15 +6,25 @@ import os
|
|
| 6 |
import re
|
| 7 |
import requests
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
-
from tensorflow.keras.models import load_model
|
| 10 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 11 |
import pickle
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
app = FastAPI(
|
| 15 |
title="API Deteksi Hoax Multi-Model",
|
| 16 |
description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
|
| 17 |
-
version="1.0.
|
| 18 |
)
|
| 19 |
|
| 20 |
app.add_middleware(
|
|
@@ -25,7 +35,7 @@ app.add_middleware(
|
|
| 25 |
allow_headers=["*"],
|
| 26 |
)
|
| 27 |
|
| 28 |
-
# ---
|
| 29 |
models = {
|
| 30 |
"naive_bayes": None,
|
| 31 |
"lstm": None
|
|
@@ -41,35 +51,36 @@ try:
|
|
| 41 |
except Exception as e:
|
| 42 |
print(f"Error loading Naive Bayes: {e}")
|
| 43 |
|
| 44 |
-
# Load Model LSTM
|
| 45 |
-
PATH_LSTM = 'lstm_fake_news_model.h5'
|
| 46 |
try:
|
| 47 |
if os.path.exists(PATH_LSTM):
|
| 48 |
models["lstm"] = load_model(PATH_LSTM)
|
| 49 |
print("Model LSTM berhasil dimuat!")
|
| 50 |
except Exception as e:
|
| 51 |
print(f"Error loading LSTM: {e}")
|
|
|
|
| 52 |
# Load Tokenizer untuk LSTM
|
| 53 |
PATH_TOKENIZER = 'tokenizer.pkl'
|
| 54 |
try:
|
| 55 |
if os.path.exists(PATH_TOKENIZER):
|
| 56 |
-
#tokenizer
|
| 57 |
-
|
|
|
|
| 58 |
print("Tokenizer LSTM berhasil dimuat!")
|
| 59 |
except Exception as e:
|
| 60 |
print(f"Error loading Tokenizer: {e}")
|
| 61 |
|
| 62 |
|
| 63 |
-
# ---
|
| 64 |
class PredictRequest(BaseModel):
|
| 65 |
input_text: str
|
| 66 |
-
model_type: str = "naive_bayes"
|
| 67 |
-
|
| 68 |
|
| 69 |
def scrape_berita(url):
|
| 70 |
"""Fungsi pembaca halaman web (Scraper)"""
|
| 71 |
try:
|
| 72 |
-
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
|
| 73 |
response = requests.get(url, headers=headers, timeout=10)
|
| 74 |
response.raise_for_status()
|
| 75 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
@@ -82,11 +93,11 @@ def scrape_berita(url):
|
|
| 82 |
|
| 83 |
@app.post("/predict")
|
| 84 |
def deteksi_hoax_api(request: PredictRequest):
|
| 85 |
-
# ---
|
| 86 |
jenis_model = request.model_type
|
| 87 |
-
if jenis_model not in models:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
aktif_model = models[jenis_model]
|
| 91 |
if aktif_model is None:
|
| 92 |
raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
|
|
@@ -97,21 +108,22 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 97 |
|
| 98 |
if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
|
| 99 |
teks_untuk_dianalisis = scrape_berita(teks_mentah)
|
| 100 |
-
if teks_untuk_dianalisis.startswith("GAGAL:"):
|
| 101 |
-
|
| 102 |
else:
|
| 103 |
teks_untuk_dianalisis = teks_mentah
|
| 104 |
|
| 105 |
-
# ---
|
| 106 |
kamus_bobot = {}
|
|
|
|
|
|
|
| 107 |
|
| 108 |
if jenis_model == "naive_bayes":
|
| 109 |
-
# Logika untuk algoritma scikit-learn
|
| 110 |
proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
|
| 111 |
prob_fakta = float(proba[0])
|
| 112 |
prob_hoax = float(proba[1])
|
| 113 |
|
| 114 |
-
# Ekstraksi kata untuk highlight Frontend
|
| 115 |
try:
|
| 116 |
vec = aktif_model[0]
|
| 117 |
clf = aktif_model[1]
|
|
@@ -122,40 +134,29 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 122 |
pass
|
| 123 |
|
| 124 |
elif jenis_model == "lstm":
|
| 125 |
-
# Pastikan tokenizer sudah berhasil dimuat sebelumnya
|
| 126 |
if tokenizer is None:
|
| 127 |
raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.")
|
| 128 |
|
| 129 |
-
# 1. Konversi Teks
|
| 130 |
sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis])
|
| 131 |
|
| 132 |
-
# 2. Padding
|
| 133 |
-
|
| 134 |
-
# namun Anda HARUS menyesuaikan ini dengan 'maxlen' yang Anda gunakan saat di file Jupyter/Colab waktu training model.
|
| 135 |
-
# Bisa jadi 100, 200, atau 500. Silakan cek ulang notebook Anda jika hasilnya kurang akurat.
|
| 136 |
-
MAX_LEN = 150
|
| 137 |
padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
|
| 138 |
|
| 139 |
-
# 3. Prediksi
|
| 140 |
-
prediksi_mentah = aktif_model.predict(padded_sequence)[0]
|
| 141 |
|
| 142 |
-
# 4. Pengolahan Output Keras
|
| 143 |
if len(prediksi_mentah) >= 2:
|
| 144 |
-
# Jika model Anda Outputnya Softmax ([Prob_Fakta, Prob_Hoax])
|
| 145 |
prob_fakta = float(prediksi_mentah[0])
|
| 146 |
prob_hoax = float(prediksi_mentah[1])
|
| 147 |
else:
|
| 148 |
-
# Jika model Anda Outputnya Sigmoid (1 nilai saja, misal 0.9 = 90% Hoax)
|
| 149 |
nilai = float(prediksi_mentah[0])
|
| 150 |
prob_hoax = nilai
|
| 151 |
prob_fakta = 1.0 - nilai
|
| 152 |
|
| 153 |
-
|
| 154 |
-
# Model Deep Learning umumnya tidak memiliki log odds per-kata
|
| 155 |
-
# Maka highlight bedah kata kita biarkan kosong di backend
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
# Penentuan Highlight Kata (Akan berfungsi baik di Naive Bayes, dan jadi 'Netral' di LSTM jika tanpa LIME/SHAP)
|
| 159 |
kata_kata = teks_untuk_dianalisis.split()
|
| 160 |
teks_highlight = []
|
| 161 |
|
|
@@ -188,4 +189,4 @@ def deteksi_hoax_api(request: PredictRequest):
|
|
| 188 |
}
|
| 189 |
},
|
| 190 |
"bedah_kata": teks_highlight
|
| 191 |
-
}
|
|
|
|
| 6 |
import re
|
| 7 |
import requests
|
| 8 |
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
| 9 |
import pickle
|
| 10 |
|
| 11 |
+
# --- 1. KERAS 3 COMPATIBILITY PATCH & IMPORTS ---
|
| 12 |
+
import sys
|
| 13 |
+
import keras
|
| 14 |
+
|
| 15 |
+
# Patch darurat: Menjembatani Tokenizer lama (Keras 2) agar bisa dimuat di Keras 3
|
| 16 |
+
# Ini mencegah error "No module named 'keras.src.preprocessing'"
|
| 17 |
+
if 'keras.src.preprocessing' not in sys.modules:
|
| 18 |
+
sys.modules['keras.src.preprocessing'] = keras.preprocessing
|
| 19 |
+
|
| 20 |
+
# Gunakan import standar Keras 3 (Backend Agnostic)
|
| 21 |
+
from keras.models import load_model
|
| 22 |
+
from keras.utils import pad_sequences # Di Keras 3, pad_sequences pindah ke utils
|
| 23 |
|
| 24 |
app = FastAPI(
|
| 25 |
title="API Deteksi Hoax Multi-Model",
|
| 26 |
description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
|
| 27 |
+
version="1.0.1" # Versi update Keras 3
|
| 28 |
)
|
| 29 |
|
| 30 |
app.add_middleware(
|
|
|
|
| 35 |
allow_headers=["*"],
|
| 36 |
)
|
| 37 |
|
| 38 |
+
# --- 2. LOAD KEDUA MODEL ---
|
| 39 |
models = {
|
| 40 |
"naive_bayes": None,
|
| 41 |
"lstm": None
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
print(f"Error loading Naive Bayes: {e}")
|
| 53 |
|
| 54 |
+
# Load Model LSTM (Format Keras 3)
|
| 55 |
+
PATH_LSTM = 'lstm_fake_news_model.h5' # Atau .keras jika Anda menggunakan format baru
|
| 56 |
try:
|
| 57 |
if os.path.exists(PATH_LSTM):
|
| 58 |
models["lstm"] = load_model(PATH_LSTM)
|
| 59 |
print("Model LSTM berhasil dimuat!")
|
| 60 |
except Exception as e:
|
| 61 |
print(f"Error loading LSTM: {e}")
|
| 62 |
+
|
| 63 |
# Load Tokenizer untuk LSTM
|
| 64 |
PATH_TOKENIZER = 'tokenizer.pkl'
|
| 65 |
try:
|
| 66 |
if os.path.exists(PATH_TOKENIZER):
|
| 67 |
+
# Memuat tokenizer menggunakan pickle bawaan Python
|
| 68 |
+
with open(PATH_TOKENIZER, 'rb') as f:
|
| 69 |
+
tokenizer = pickle.load(f)
|
| 70 |
print("Tokenizer LSTM berhasil dimuat!")
|
| 71 |
except Exception as e:
|
| 72 |
print(f"Error loading Tokenizer: {e}")
|
| 73 |
|
| 74 |
|
| 75 |
+
# --- 3. SKEMA REQUEST & SCRAPER ---
|
| 76 |
class PredictRequest(BaseModel):
|
| 77 |
input_text: str
|
| 78 |
+
model_type: str = "naive_bayes"
|
|
|
|
| 79 |
|
| 80 |
def scrape_berita(url):
|
| 81 |
"""Fungsi pembaca halaman web (Scraper)"""
|
| 82 |
try:
|
| 83 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
| 84 |
response = requests.get(url, headers=headers, timeout=10)
|
| 85 |
response.raise_for_status()
|
| 86 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
| 93 |
|
| 94 |
@app.post("/predict")
|
| 95 |
def deteksi_hoax_api(request: PredictRequest):
|
| 96 |
+
# --- 4. VALIDASI INPUT ---
|
| 97 |
jenis_model = request.model_type
|
| 98 |
+
if jenis_model not in models:
|
| 99 |
+
raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
|
| 100 |
+
|
| 101 |
aktif_model = models[jenis_model]
|
| 102 |
if aktif_model is None:
|
| 103 |
raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
|
|
|
|
| 108 |
|
| 109 |
if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
|
| 110 |
teks_untuk_dianalisis = scrape_berita(teks_mentah)
|
| 111 |
+
if teks_untuk_dianalisis.startswith("GAGAL:"):
|
| 112 |
+
raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}")
|
| 113 |
else:
|
| 114 |
teks_untuk_dianalisis = teks_mentah
|
| 115 |
|
| 116 |
+
# --- 5. PREDIKSI BERDASARKAN MODEL ---
|
| 117 |
kamus_bobot = {}
|
| 118 |
+
prob_fakta = 0.0
|
| 119 |
+
prob_hoax = 0.0
|
| 120 |
|
| 121 |
if jenis_model == "naive_bayes":
|
|
|
|
| 122 |
proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
|
| 123 |
prob_fakta = float(proba[0])
|
| 124 |
prob_hoax = float(proba[1])
|
| 125 |
|
| 126 |
+
# Ekstraksi kata untuk highlight Frontend
|
| 127 |
try:
|
| 128 |
vec = aktif_model[0]
|
| 129 |
clf = aktif_model[1]
|
|
|
|
| 134 |
pass
|
| 135 |
|
| 136 |
elif jenis_model == "lstm":
|
|
|
|
| 137 |
if tokenizer is None:
|
| 138 |
raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.")
|
| 139 |
|
| 140 |
+
# 1. Konversi Teks ke Sequence Angka
|
| 141 |
sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis])
|
| 142 |
|
| 143 |
+
# 2. Padding
|
| 144 |
+
MAX_LEN = 150 # Sesuaikan dengan panjang saat training
|
|
|
|
|
|
|
|
|
|
| 145 |
padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
|
| 146 |
|
| 147 |
+
# 3. Prediksi (Keras 3 mengembalikan array numpy standar)
|
| 148 |
+
prediksi_mentah = aktif_model.predict(padded_sequence, verbose=0)[0]
|
| 149 |
|
| 150 |
+
# 4. Pengolahan Output Keras 3
|
| 151 |
if len(prediksi_mentah) >= 2:
|
|
|
|
| 152 |
prob_fakta = float(prediksi_mentah[0])
|
| 153 |
prob_hoax = float(prediksi_mentah[1])
|
| 154 |
else:
|
|
|
|
| 155 |
nilai = float(prediksi_mentah[0])
|
| 156 |
prob_hoax = nilai
|
| 157 |
prob_fakta = 1.0 - nilai
|
| 158 |
|
| 159 |
+
# --- 6. PENENTUAN HIGHLIGHT KATA ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
kata_kata = teks_untuk_dianalisis.split()
|
| 161 |
teks_highlight = []
|
| 162 |
|
|
|
|
| 189 |
}
|
| 190 |
},
|
| 191 |
"bedah_kata": teks_highlight
|
| 192 |
+
}
|