DINO00 commited on
Commit
c39208b
·
verified ·
1 Parent(s): ef2f1d5

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +41 -40
api.py CHANGED
@@ -6,15 +6,25 @@ import os
6
  import re
7
  import requests
8
  from bs4 import BeautifulSoup
9
- from tensorflow.keras.models import load_model
10
- from tensorflow.keras.preprocessing.sequence import pad_sequences
11
  import pickle
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  app = FastAPI(
15
  title="API Deteksi Hoax Multi-Model",
16
  description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
17
- version="1.0.0"
18
  )
19
 
20
  app.add_middleware(
@@ -25,7 +35,7 @@ app.add_middleware(
25
  allow_headers=["*"],
26
  )
27
 
28
- # --- 1. LOAD KEDUA MODEL ---
29
  models = {
30
  "naive_bayes": None,
31
  "lstm": None
@@ -41,35 +51,36 @@ try:
41
  except Exception as e:
42
  print(f"Error loading Naive Bayes: {e}")
43
 
44
- # Load Model LSTM
45
- PATH_LSTM = 'lstm_fake_news_model.h5'
46
  try:
47
  if os.path.exists(PATH_LSTM):
48
  models["lstm"] = load_model(PATH_LSTM)
49
  print("Model LSTM berhasil dimuat!")
50
  except Exception as e:
51
  print(f"Error loading LSTM: {e}")
 
52
  # Load Tokenizer untuk LSTM
53
  PATH_TOKENIZER = 'tokenizer.pkl'
54
  try:
55
  if os.path.exists(PATH_TOKENIZER):
56
- #tokenizer = joblib.load(PATH_TOKENIZER)
57
- tokenizer = pickle.load(open(PATH_TOKENIZER, 'rb'))
 
58
  print("Tokenizer LSTM berhasil dimuat!")
59
  except Exception as e:
60
  print(f"Error loading Tokenizer: {e}")
61
 
62
 
63
- # --- 2. UPDATE SKEMA REQUEST ---
64
  class PredictRequest(BaseModel):
65
  input_text: str
66
- model_type: str = "naive_bayes" # Default pakai naive_bayes jika tidak dikirim
67
-
68
 
69
  def scrape_berita(url):
70
  """Fungsi pembaca halaman web (Scraper)"""
71
  try:
72
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
73
  response = requests.get(url, headers=headers, timeout=10)
74
  response.raise_for_status()
75
  soup = BeautifulSoup(response.content, 'html.parser')
@@ -82,11 +93,11 @@ def scrape_berita(url):
82
 
83
  @app.post("/predict")
84
  def deteksi_hoax_api(request: PredictRequest):
85
- # --- 3. PILIH MODEL ---
86
  jenis_model = request.model_type
87
- if jenis_model not in models:
88
- raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
89
-
90
  aktif_model = models[jenis_model]
91
  if aktif_model is None:
92
  raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
@@ -97,21 +108,22 @@ def deteksi_hoax_api(request: PredictRequest):
97
 
98
  if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
99
  teks_untuk_dianalisis = scrape_berita(teks_mentah)
100
- if teks_untuk_dianalisis.startswith("GAGAL:"):
101
- raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}")
102
  else:
103
  teks_untuk_dianalisis = teks_mentah
104
 
105
- # --- 4. PREDIKSI BERDASARKAN MODEL ---
106
  kamus_bobot = {}
 
 
107
 
108
  if jenis_model == "naive_bayes":
109
- # Logika untuk algoritma scikit-learn
110
  proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
111
  prob_fakta = float(proba[0])
112
  prob_hoax = float(proba[1])
113
 
114
- # Ekstraksi kata untuk highlight Frontend (biasanya hanya ada pada model linier/Naive Bayes)
115
  try:
116
  vec = aktif_model[0]
117
  clf = aktif_model[1]
@@ -122,40 +134,29 @@ def deteksi_hoax_api(request: PredictRequest):
122
  pass
123
 
124
  elif jenis_model == "lstm":
125
- # Pastikan tokenizer sudah berhasil dimuat sebelumnya
126
  if tokenizer is None:
127
  raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.")
128
 
129
- # 1. Konversi Teks Teks ke Sequence Angka
130
  sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis])
131
 
132
- # 2. Padding (Menyeragamkan panjang kalimat)
133
- # PENTING: maxlen=100 adalah angka standar,
134
- # namun Anda HARUS menyesuaikan ini dengan 'maxlen' yang Anda gunakan saat di file Jupyter/Colab waktu training model.
135
- # Bisa jadi 100, 200, atau 500. Silakan cek ulang notebook Anda jika hasilnya kurang akurat.
136
- MAX_LEN = 150
137
  padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
138
 
139
- # 3. Prediksi Menggunakan .predict() BUKAN .predict_proba()
140
- prediksi_mentah = aktif_model.predict(padded_sequence)[0]
141
 
142
- # 4. Pengolahan Output Keras/TensorFlow
143
  if len(prediksi_mentah) >= 2:
144
- # Jika model Anda Outputnya Softmax ([Prob_Fakta, Prob_Hoax])
145
  prob_fakta = float(prediksi_mentah[0])
146
  prob_hoax = float(prediksi_mentah[1])
147
  else:
148
- # Jika model Anda Outputnya Sigmoid (1 nilai saja, misal 0.9 = 90% Hoax)
149
  nilai = float(prediksi_mentah[0])
150
  prob_hoax = nilai
151
  prob_fakta = 1.0 - nilai
152
 
153
-
154
- # Model Deep Learning umumnya tidak memiliki log odds per-kata
155
- # Maka highlight bedah kata kita biarkan kosong di backend
156
-
157
-
158
- # Penentuan Highlight Kata (Akan berfungsi baik di Naive Bayes, dan jadi 'Netral' di LSTM jika tanpa LIME/SHAP)
159
  kata_kata = teks_untuk_dianalisis.split()
160
  teks_highlight = []
161
 
@@ -188,4 +189,4 @@ def deteksi_hoax_api(request: PredictRequest):
188
  }
189
  },
190
  "bedah_kata": teks_highlight
191
- }
 
6
  import re
7
  import requests
8
  from bs4 import BeautifulSoup
 
 
9
  import pickle
10
 
11
+ # --- 1. KERAS 3 COMPATIBILITY PATCH & IMPORTS ---
12
+ import sys
13
+ import keras
14
+
15
+ # Patch darurat: Menjembatani Tokenizer lama (Keras 2) agar bisa dimuat di Keras 3
16
+ # Ini mencegah error "No module named 'keras.src.preprocessing'"
17
+ if 'keras.src.preprocessing' not in sys.modules:
18
+ sys.modules['keras.src.preprocessing'] = keras.preprocessing
19
+
20
+ # Gunakan import standar Keras 3 (Backend Agnostic)
21
+ from keras.models import load_model
22
+ from keras.utils import pad_sequences # Di Keras 3, pad_sequences pindah ke utils
23
 
24
  app = FastAPI(
25
  title="API Deteksi Hoax Multi-Model",
26
  description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
27
+ version="1.0.1" # Versi update Keras 3
28
  )
29
 
30
  app.add_middleware(
 
35
  allow_headers=["*"],
36
  )
37
 
38
+ # --- 2. LOAD KEDUA MODEL ---
39
  models = {
40
  "naive_bayes": None,
41
  "lstm": None
 
51
  except Exception as e:
52
  print(f"Error loading Naive Bayes: {e}")
53
 
54
+ # Load Model LSTM (Format Keras 3)
55
+ PATH_LSTM = 'lstm_fake_news_model.h5' # Atau .keras jika Anda menggunakan format baru
56
  try:
57
  if os.path.exists(PATH_LSTM):
58
  models["lstm"] = load_model(PATH_LSTM)
59
  print("Model LSTM berhasil dimuat!")
60
  except Exception as e:
61
  print(f"Error loading LSTM: {e}")
62
+
63
  # Load Tokenizer untuk LSTM
64
  PATH_TOKENIZER = 'tokenizer.pkl'
65
  try:
66
  if os.path.exists(PATH_TOKENIZER):
67
+ # Memuat tokenizer menggunakan pickle bawaan Python
68
+ with open(PATH_TOKENIZER, 'rb') as f:
69
+ tokenizer = pickle.load(f)
70
  print("Tokenizer LSTM berhasil dimuat!")
71
  except Exception as e:
72
  print(f"Error loading Tokenizer: {e}")
73
 
74
 
75
+ # --- 3. SKEMA REQUEST & SCRAPER ---
76
  class PredictRequest(BaseModel):
77
  input_text: str
78
+ model_type: str = "naive_bayes"
 
79
 
80
  def scrape_berita(url):
81
  """Fungsi pembaca halaman web (Scraper)"""
82
  try:
83
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
84
  response = requests.get(url, headers=headers, timeout=10)
85
  response.raise_for_status()
86
  soup = BeautifulSoup(response.content, 'html.parser')
 
93
 
94
  @app.post("/predict")
95
  def deteksi_hoax_api(request: PredictRequest):
96
+ # --- 4. VALIDASI INPUT ---
97
  jenis_model = request.model_type
98
+ if jenis_model not in models:
99
+ raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
100
+
101
  aktif_model = models[jenis_model]
102
  if aktif_model is None:
103
  raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
 
108
 
109
  if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
110
  teks_untuk_dianalisis = scrape_berita(teks_mentah)
111
+ if teks_untuk_dianalisis.startswith("GAGAL:"):
112
+ raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}")
113
  else:
114
  teks_untuk_dianalisis = teks_mentah
115
 
116
+ # --- 5. PREDIKSI BERDASARKAN MODEL ---
117
  kamus_bobot = {}
118
+ prob_fakta = 0.0
119
+ prob_hoax = 0.0
120
 
121
  if jenis_model == "naive_bayes":
 
122
  proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
123
  prob_fakta = float(proba[0])
124
  prob_hoax = float(proba[1])
125
 
126
+ # Ekstraksi kata untuk highlight Frontend
127
  try:
128
  vec = aktif_model[0]
129
  clf = aktif_model[1]
 
134
  pass
135
 
136
  elif jenis_model == "lstm":
 
137
  if tokenizer is None:
138
  raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.")
139
 
140
+ # 1. Konversi Teks ke Sequence Angka
141
  sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis])
142
 
143
+ # 2. Padding
144
+ MAX_LEN = 150 # Sesuaikan dengan panjang saat training
 
 
 
145
  padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
146
 
147
+ # 3. Prediksi (Keras 3 mengembalikan array numpy standar)
148
+ prediksi_mentah = aktif_model.predict(padded_sequence, verbose=0)[0]
149
 
150
+ # 4. Pengolahan Output Keras 3
151
  if len(prediksi_mentah) >= 2:
 
152
  prob_fakta = float(prediksi_mentah[0])
153
  prob_hoax = float(prediksi_mentah[1])
154
  else:
 
155
  nilai = float(prediksi_mentah[0])
156
  prob_hoax = nilai
157
  prob_fakta = 1.0 - nilai
158
 
159
+ # --- 6. PENENTUAN HIGHLIGHT KATA ---
 
 
 
 
 
160
  kata_kata = teks_untuk_dianalisis.split()
161
  teks_highlight = []
162
 
 
189
  }
190
  },
191
  "bedah_kata": teks_highlight
192
+ }