DINO00 commited on
Commit
139c84f
·
verified ·
1 Parent(s): 6acc8ac

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +73 -38
api.py CHANGED
@@ -6,37 +6,54 @@ import os
6
  import re
7
  import requests
8
  from bs4 import BeautifulSoup
 
9
 
10
  app = FastAPI(
11
- title="API Deteksi Hoax Naive Bayes",
12
- description="API untuk mendeteksi berita hoax dari teks atau URL portal berita.",
13
  version="1.0.0"
14
  )
15
 
16
- # --- KONFIGURASI CORS ---
17
  app.add_middleware(
18
  CORSMiddleware,
19
  allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"],
20
  allow_credentials=True,
21
- allow_methods=["https://deteksi-berita-hoax-kappa.vercel.app/"],
22
- allow_headers=["https://deteksi-berita-hoax-kappa.vercel.app/"],
23
  )
24
- # Load Model saat server menyala
25
- #MODEL_PATH = 'model_hoax_complete.pkl'
26
- MODEL_PATH = 'lstm_fake_news_model.h5'
 
 
 
 
 
 
27
  try:
28
- if os.path.exists(MODEL_PATH):
29
- model = joblib.load(MODEL_PATH)
30
- print("Model berhasil dimuat!")
31
- else:
32
- model = None
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
- model = None
35
- print(f"Error loading model: {e}")
36
 
37
- # Skema Request dari Client
 
38
  class PredictRequest(BaseModel):
39
  input_text: str
 
 
40
 
41
  def scrape_berita(url):
42
  """Fungsi pembaca halaman web (Scraper)"""
@@ -44,26 +61,29 @@ def scrape_berita(url):
44
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
45
  response = requests.get(url, headers=headers, timeout=10)
46
  response.raise_for_status()
47
-
48
  soup = BeautifulSoup(response.content, 'html.parser')
49
  paragraf = soup.find_all('p')
50
  teks_berita = " ".join([p.get_text() for p in paragraf])
51
-
52
  return teks_berita.strip()
53
  except Exception as e:
54
  return f"GAGAL: {e}"
55
 
56
- # Endpoint Utama untuk Prediksi
57
  @app.post("/predict")
58
  def deteksi_hoax_api(request: PredictRequest):
59
- if model is None:
60
- raise HTTPException(status_code=500, detail="Model tidak ditemukan di server.")
 
 
 
 
 
 
61
 
62
  teks_mentah = request.input_text.strip()
63
  if not teks_mentah:
64
  raise HTTPException(status_code=400, detail="Input tidak boleh kosong.")
65
 
66
- # Logika Smart Input: Cek apakah input berupa URL
67
  if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
68
  teks_untuk_dianalisis = scrape_berita(teks_mentah)
69
  if teks_untuk_dianalisis.startswith("GAGAL:"):
@@ -71,21 +91,37 @@ def deteksi_hoax_api(request: PredictRequest):
71
  else:
72
  teks_untuk_dianalisis = teks_mentah
73
 
74
- # Prediksi Probabilitas
75
- proba = model.predict_proba([teks_untuk_dianalisis])[0]
76
- prob_fakta = float(proba[0])
77
- prob_hoax = float(proba[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # Proses Ekstraksi Kata untuk Highlight Frontend
80
- try:
81
- vec = model[0]
82
- clf = model[1]
83
- feature_names = vec.get_feature_names_out()
84
- log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0]
85
- kamus_bobot = dict(zip(feature_names, log_odds))
86
- except Exception:
87
- kamus_bobot = {}
88
 
 
89
  kata_kata = teks_untuk_dianalisis.split()
90
  teks_highlight = []
91
 
@@ -93,7 +129,6 @@ def deteksi_hoax_api(request: PredictRequest):
93
  kata_bersih = re.sub(r'[^a-z]', '', kata.lower())
94
  bobot = float(kamus_bobot.get(kata_bersih, 0))
95
 
96
- # Penentuan label per kata berdasarkan threshold
97
  if bobot > 0.3:
98
  label_kata = "Hoax"
99
  elif bobot < -0.3:
@@ -107,10 +142,10 @@ def deteksi_hoax_api(request: PredictRequest):
107
  "bobot": round(bobot, 4)
108
  })
109
 
110
- # Mengembalikan response dalam format JSON
111
  return {
112
  "status": "success",
113
  "hasil_analisis": {
 
114
  "teks_dianalisis": teks_untuk_dianalisis,
115
  "prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA",
116
  "probabilitas": {
@@ -119,4 +154,4 @@ def deteksi_hoax_api(request: PredictRequest):
119
  }
120
  },
121
  "bedah_kata": teks_highlight
122
- }
 
6
  import re
7
  import requests
8
  from bs4 import BeautifulSoup
9
+ from tensorflow.keras.models import load_model
10
 
11
  app = FastAPI(
12
+ title="API Deteksi Hoax Multi-Model",
13
+ description="API untuk mendeteksi berita hoax menggunakan pilihan model.",
14
  version="1.0.0"
15
  )
16
 
 
17
  app.add_middleware(
18
  CORSMiddleware,
19
  allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"],
20
  allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
  )
24
+
25
+ # --- 1. LOAD KEDUA MODEL ---
26
+ models = {
27
+ "naive_bayes": None,
28
+ "lstm": None
29
+ }
30
+
31
+ # Load Model Naive Bayes
32
+ PATH_NB = 'model_hoax_complete.pkl'
33
  try:
34
+ if os.path.exists(PATH_NB):
35
+ models["naive_bayes"] = joblib.load(PATH_NB)
36
+ print("Model Naive Bayes berhasil dimuat!")
37
+ except Exception as e:
38
+ print(f"Error loading Naive Bayes: {e}")
39
+
40
+ # Load Model LSTM
41
+ PATH_LSTM = 'lstm_fake_news_model.h5'
42
+ try:
43
+ if os.path.exists(PATH_LSTM):
44
+ # Gunakan joblib sesuai skrip Anda, atau load_model Keras jika error
45
+ models["lstm"] = joblib.load(PATH_LSTM)
46
+ # models["lstm"] = load_model(PATH_LSTM)
47
+ print("Model LSTM berhasil dimuat!")
48
  except Exception as e:
49
+ print(f"Error loading LSTM: {e}")
 
50
 
51
+
52
+ # --- 2. UPDATE SKEMA REQUEST ---
53
  class PredictRequest(BaseModel):
54
  input_text: str
55
+ model_type: str = "naive_bayes" # Default pakai naive_bayes jika tidak dikirim
56
+
57
 
58
  def scrape_berita(url):
59
  """Fungsi pembaca halaman web (Scraper)"""
 
61
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
62
  response = requests.get(url, headers=headers, timeout=10)
63
  response.raise_for_status()
 
64
  soup = BeautifulSoup(response.content, 'html.parser')
65
  paragraf = soup.find_all('p')
66
  teks_berita = " ".join([p.get_text() for p in paragraf])
 
67
  return teks_berita.strip()
68
  except Exception as e:
69
  return f"GAGAL: {e}"
70
 
71
+
72
  @app.post("/predict")
73
  def deteksi_hoax_api(request: PredictRequest):
74
+ # --- 3. PILIH MODEL ---
75
+ jenis_model = request.model_type
76
+ if jenis_model not in models:
77
+ raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.")
78
+
79
+ aktif_model = models[jenis_model]
80
+ if aktif_model is None:
81
+ raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.")
82
 
83
  teks_mentah = request.input_text.strip()
84
  if not teks_mentah:
85
  raise HTTPException(status_code=400, detail="Input tidak boleh kosong.")
86
 
 
87
  if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"):
88
  teks_untuk_dianalisis = scrape_berita(teks_mentah)
89
  if teks_untuk_dianalisis.startswith("GAGAL:"):
 
91
  else:
92
  teks_untuk_dianalisis = teks_mentah
93
 
94
+ # --- 4. PREDIKSI BERDASARKAN MODEL ---
95
+ kamus_bobot = {}
96
+
97
+ if jenis_model == "naive_bayes":
98
+ # Logika untuk algoritma scikit-learn
99
+ proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
100
+ prob_fakta = float(proba[0])
101
+ prob_hoax = float(proba[1])
102
+
103
+ # Ekstraksi kata untuk highlight Frontend (biasanya hanya ada pada model linier/Naive Bayes)
104
+ try:
105
+ vec = aktif_model[0]
106
+ clf = aktif_model[1]
107
+ feature_names = vec.get_feature_names_out()
108
+ log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0]
109
+ kamus_bobot = dict(zip(feature_names, log_odds))
110
+ except Exception:
111
+ pass
112
+
113
+ elif jenis_model == "lstm":
114
+ # Logika untuk Deep Learning
115
+ # (Perhatikan: LSTM umumnya memerlukan padding dan tokenisasi sebelum predict)
116
+ proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0]
117
+ # Jika menggunakan TensorFlow, syntaxnya mungkin berubah jadi -> aktif_model.predict(padded_sequence)[0]
118
+ prob_fakta = float(proba[0])
119
+ prob_hoax = float(proba[1])
120
+ # Model Deep Learning umumnya tidak memiliki "feature_log_prob", jadi highlight kata kita bypass
121
+ # dan kamus_bobot dibiarkan kosong ({})
122
 
 
 
 
 
 
 
 
 
 
123
 
124
+ # Penentuan Highlight Kata (Akan berfungsi baik di Naive Bayes, dan jadi 'Netral' di LSTM jika tanpa LIME/SHAP)
125
  kata_kata = teks_untuk_dianalisis.split()
126
  teks_highlight = []
127
 
 
129
  kata_bersih = re.sub(r'[^a-z]', '', kata.lower())
130
  bobot = float(kamus_bobot.get(kata_bersih, 0))
131
 
 
132
  if bobot > 0.3:
133
  label_kata = "Hoax"
134
  elif bobot < -0.3:
 
142
  "bobot": round(bobot, 4)
143
  })
144
 
 
145
  return {
146
  "status": "success",
147
  "hasil_analisis": {
148
+ "model_digunakan": jenis_model,
149
  "teks_dianalisis": teks_untuk_dianalisis,
150
  "prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA",
151
  "probabilitas": {
 
154
  }
155
  },
156
  "bedah_kata": teks_highlight
157
+ }