from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import joblib import os import re import requests from bs4 import BeautifulSoup import json # --- 1. KERAS 3 IMPORTS --- from keras.models import load_model from keras.utils import pad_sequences # Menggunakan modul legacy bawaan TensorFlow untuk memuat JSON from tensorflow.keras.preprocessing.text import tokenizer_from_json app = FastAPI( title="API Deteksi Hoax Multi-Model", description="API untuk mendeteksi berita hoax menggunakan pilihan model.", version="1.0.2" # Versi Keras 3 + JSON Tokenizer ) app.add_middleware( CORSMiddleware, allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # --- 2. LOAD KEDUA MODEL --- models = { "naive_bayes": None, "lstm": None } tokenizer = None # Load Model Naive Bayes PATH_NB = 'model_hoax_complete.pkl' try: if os.path.exists(PATH_NB): models["naive_bayes"] = joblib.load(PATH_NB) print("Model Naive Bayes berhasil dimuat!") except Exception as e: print(f"Error loading Naive Bayes: {e}") # Load Model LSTM (Format Keras 3) PATH_LSTM = 'lstm_fake_news_model.h5' try: if os.path.exists(PATH_LSTM): models["lstm"] = load_model(PATH_LSTM) print("Model LSTM berhasil dimuat!") except Exception as e: print(f"Error loading LSTM: {e}") # Load Tokenizer untuk LSTM (Format JSON) PATH_TOKENIZER = 'tokenizer.json' try: if os.path.exists(PATH_TOKENIZER): with open(PATH_TOKENIZER) as f: data = json.load(f) tokenizer = tokenizer_from_json(data) print("Tokenizer LSTM (JSON) berhasil dimuat!") except Exception as e: print(f"Error loading Tokenizer: {e}") # --- 3. SKEMA REQUEST & SCRAPER --- class PredictRequest(BaseModel): input_text: str model_type: str = "naive_bayes" def scrape_berita(url): """Fungsi pembaca halaman web (Scraper)""" try: headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') paragraf = soup.find_all('p') teks_berita = " ".join([p.get_text() for p in paragraf]) return teks_berita.strip() except Exception as e: return f"GAGAL: {e}" @app.post("/predict") def deteksi_hoax_api(request: PredictRequest): # --- 4. VALIDASI INPUT --- jenis_model = request.model_type if jenis_model not in models: raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.") aktif_model = models[jenis_model] if aktif_model is None: raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.") teks_mentah = request.input_text.strip() if not teks_mentah: raise HTTPException(status_code=400, detail="Input tidak boleh kosong.") if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"): teks_untuk_dianalisis = scrape_berita(teks_mentah) if teks_untuk_dianalisis.startswith("GAGAL:"): raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}") else: teks_untuk_dianalisis = teks_mentah # --- 5. PREDIKSI BERDASARKAN MODEL --- kamus_bobot = {} prob_fakta = 0.0 prob_hoax = 0.0 if jenis_model == "naive_bayes": proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0] prob_fakta = float(proba[0]) prob_hoax = float(proba[1]) # Ekstraksi kata untuk highlight Frontend try: vec = aktif_model[0] clf = aktif_model[1] feature_names = vec.get_feature_names_out() log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0] kamus_bobot = dict(zip(feature_names, log_odds)) except Exception: pass elif jenis_model == "lstm": if tokenizer is None: raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.") # 1. Konversi Teks ke Sequence Angka sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis]) # 2. Padding MAX_LEN = 150 # Sesuaikan dengan panjang saat training padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post') # 3. Prediksi (Keras 3 mengembalikan array numpy standar) prediksi_mentah = aktif_model.predict(padded_sequence, verbose=0)[0] # 4. Pengolahan Output Keras 3 if len(prediksi_mentah) >= 2: prob_fakta = float(prediksi_mentah[0]) prob_hoax = float(prediksi_mentah[1]) else: nilai = float(prediksi_mentah[0]) prob_hoax = nilai prob_fakta = 1.0 - nilai # --- 6. PENENTUAN HIGHLIGHT KATA --- kata_kata = teks_untuk_dianalisis.split() teks_highlight = [] for kata in kata_kata: kata_bersih = re.sub(r'[^a-z]', '', kata.lower()) bobot = float(kamus_bobot.get(kata_bersih, 0)) if bobot > 0.3: label_kata = "Hoax" elif bobot < -0.3: label_kata = "Fakta" else: label_kata = "Netral" teks_highlight.append({ "kata": kata, "label": label_kata, "bobot": round(bobot, 4) }) return { "status": "success", "hasil_analisis": { "model_digunakan": jenis_model, "teks_dianalisis": teks_untuk_dianalisis, "prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA", "probabilitas": { "fakta": round(prob_fakta * 100, 2), "hoax": round(prob_hoax * 100, 2) } }, "bedah_kata": teks_highlight }