File size: 6,126 Bytes
6f020f2
 
 
 
 
 
 
 
5c4c049
b086d17
5c4c049
c39208b
6702d0f
5c4c049
 
6f020f2
 
139c84f
 
5c4c049
6f020f2
 
 
 
 
 
139c84f
 
6f020f2
139c84f
c39208b
139c84f
 
 
 
b086d17
139c84f
 
 
6f020f2
139c84f
 
 
 
 
 
c39208b
5c4c049
139c84f
 
c229061
139c84f
6f020f2
139c84f
c39208b
5c4c049
 
b086d17
 
5c4c049
 
 
 
b086d17
 
6f020f2
c39208b
6f020f2
 
c39208b
6f020f2
 
 
 
c39208b
6f020f2
 
 
 
 
 
 
 
 
 
 
c39208b
139c84f
c39208b
 
 
139c84f
 
 
6f020f2
 
 
 
 
 
 
c39208b
 
6f020f2
 
 
c39208b
139c84f
c39208b
 
139c84f
 
 
 
 
 
c39208b
139c84f
 
 
 
 
 
 
 
 
 
b086d17
 
 
c39208b
b086d17
975c61d
c39208b
 
b086d17
975c61d
c39208b
 
b086d17
c39208b
975c61d
 
 
 
 
 
 
b086d17
c39208b
6f020f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139c84f
6f020f2
 
 
 
 
 
 
 
c39208b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import joblib
import os
import re
import requests
from bs4 import BeautifulSoup
import json

# --- 1. KERAS 3 IMPORTS ---
from keras.models import load_model
from keras.utils import pad_sequences
# Menggunakan modul legacy bawaan TensorFlow untuk memuat JSON
from tensorflow.keras.preprocessing.text import tokenizer_from_json 

app = FastAPI(
    title="API Deteksi Hoax Multi-Model",
    description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
    version="1.0.2" # Versi Keras 3 + JSON Tokenizer
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# --- 2. LOAD KEDUA MODEL ---
models = {
    "naive_bayes": None,
    "lstm": None
}
tokenizer = None

# Load Model Naive Bayes
PATH_NB = 'model_hoax_complete.pkl'
try:
    if os.path.exists(PATH_NB):
        models["naive_bayes"] = joblib.load(PATH_NB)
        print("Model Naive Bayes berhasil dimuat!")
except Exception as e:
    print(f"Error loading Naive Bayes: {e}")

# Load Model LSTM (Format Keras 3)
PATH_LSTM = 'lstm_fake_news_model.h5'
try:
    if os.path.exists(PATH_LSTM):
        models["lstm"] = load_model(PATH_LSTM)
        print("Model LSTM berhasil dimuat!")
except Exception as e:
    print(f"Error loading LSTM: {e}")

# Load Tokenizer untuk LSTM (Format JSON)
PATH_TOKENIZER = 'tokenizer.json'
try:
    if os.path.exists(PATH_TOKENIZER):
        with open(PATH_TOKENIZER) as f:
            data = json.load(f)
            tokenizer = tokenizer_from_json(data)
        print("Tokenizer LSTM (JSON) berhasil dimuat!")
except Exception as e:
    print(f"Error loading Tokenizer: {e}")

# --- 3. SKEMA REQUEST & SCRAPER ---
class PredictRequest(BaseModel):
    input_text: str
    model_type: str = "naive_bayes"  

def scrape_berita(url):
    """Fungsi pembaca halaman web (Scraper)"""
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        paragraf = soup.find_all('p')
        teks_berita = " ".join([p.get_text() for p in paragraf])
        return teks_berita.strip()
    except Exception as e:
        return f"GAGAL: {e}"

@app.post("/predict")
def deteksi_hoax_api(request: PredictRequest):
    # --- 4. VALIDASI INPUT ---
    jenis_model = request.model_type
    if jenis_model not in models: 
        raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
    
    aktif_model = models[jenis_model]
    if aktif_model is None:
        raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")

    teks_mentah = request.input_text.strip()
    if not teks_mentah:
        raise HTTPException(status_code=400, detail="Input tidak boleh kosong.")

    if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
        teks_untuk_dianalisis = scrape_berita(teks_mentah)
        if teks_untuk_dianalisis.startswith("GAGAL:"): 
            raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}")
    else:
        teks_untuk_dianalisis = teks_mentah

    # --- 5. PREDIKSI BERDASARKAN MODEL ---
    kamus_bobot = {}
    prob_fakta = 0.0
    prob_hoax = 0.0
    
    if jenis_model == "naive_bayes":
        proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
        prob_fakta = float(proba[0])
        prob_hoax = float(proba[1])
        
        # Ekstraksi kata untuk highlight Frontend
        try:
            vec = aktif_model[0]  
            clf = aktif_model[1]  
            feature_names = vec.get_feature_names_out()
            log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0]
            kamus_bobot = dict(zip(feature_names, log_odds))
        except Exception:
            pass

    elif jenis_model == "lstm":
        if tokenizer is None:
            raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.")

        # 1. Konversi Teks ke Sequence Angka
        sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis])
        
        # 2. Padding
        MAX_LEN = 150 # Sesuaikan dengan panjang saat training
        padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
        
        # 3. Prediksi (Keras 3 mengembalikan array numpy standar)
        prediksi_mentah = aktif_model.predict(padded_sequence, verbose=0)[0]
        
        # 4. Pengolahan Output Keras 3
        if len(prediksi_mentah) >= 2:
            prob_fakta = float(prediksi_mentah[0])
            prob_hoax = float(prediksi_mentah[1])
        else:
            nilai = float(prediksi_mentah[0])
            prob_hoax = nilai
            prob_fakta = 1.0 - nilai

    # --- 6. PENENTUAN HIGHLIGHT KATA ---
    kata_kata = teks_untuk_dianalisis.split() 
    teks_highlight = []
    
    for kata in kata_kata:
        kata_bersih = re.sub(r'[^a-z]', '', kata.lower())
        bobot = float(kamus_bobot.get(kata_bersih, 0))
        
        if bobot > 0.3:
            label_kata = "Hoax"
        elif bobot < -0.3:
            label_kata = "Fakta"
        else:
            label_kata = "Netral"
            
        teks_highlight.append({
            "kata": kata,
            "label": label_kata,
            "bobot": round(bobot, 4)
        })

    return {
        "status": "success",
        "hasil_analisis": {
            "model_digunakan": jenis_model,
            "teks_dianalisis": teks_untuk_dianalisis,
            "prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA",
            "probabilitas": {
                "fakta": round(prob_fakta * 100, 2),
                "hoax": round(prob_hoax * 100, 2)
            }
        },
        "bedah_kata": teks_highlight
    }