Spaces:
Sleeping
Sleeping
File size: 6,976 Bytes
b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 b544dec b1e6e49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from flask import Flask, request, jsonify, send_from_directory
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
import os
import re
from scipy.stats import mode
import requests
from bs4 import BeautifulSoup
import traceback # Impor untuk melacak error detail
# --- 1. Inisialisasi Aplikasi Flask ---
app = Flask(__name__, static_folder='frontend')
# --- 2. Konfigurasi dan Pemuatan Model ---
MODELS_DIR = "/data/models"
MODEL_CONFIG = {
"BERT": os.path.join(MODELS_DIR, "bert"),
"RoBERTa": os.path.join(MODELS_DIR, "roberta"),
"ELECTRA": os.path.join(MODELS_DIR, "electra"),
"XLNet": os.path.join(MODELS_DIR, "xlnet")
}
models_cache = {}
# Di server Hugging Face (CPU), kita akan selalu menggunakan CPU.
device = torch.device("cpu")
print(f"Perangkat komputasi diatur ke: {device}")
def scrape_news_from_url(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
title_tag = soup.find('h1')
title = title_tag.get_text(strip=True) if title_tag else ""
article_body = ""
main_content = soup.find('article') or soup.find('div', class_=re.compile(r'content|body|main|post|article|detail', re.I)) or soup.find('main')
if main_content:
paragraphs = main_content.find_all('p')
article_body = " ".join([p.get_text(strip=True) for p in paragraphs])
if not article_body or len(article_body) < 150:
all_paragraphs = soup.find_all('p')
article_body = " ".join([p.get_text(strip=True) for p in all_paragraphs])
full_text = f"{title}. {article_body}"
if not full_text.strip() or full_text.strip() == ".": return None, "Gagal mengekstrak konten artikel."
return full_text, None
except Exception as e: return None, f"Gagal mengakses atau memproses link: {e}"
def clean_text_for_prediction(text_input):
if not isinstance(text_input, str): return ""
text = re.sub(r'\[\s*salah\s*\]|\(\s*salah\s*\)|<.*?>|http\S+|www\S+|https\S+|\@\w+|\#\w+|[^\x00-\x7F]+|[^a-zA-Z0-9\s\.\,\?\!]', ' ', text_input, flags=re.IGNORECASE|re.MULTILINE).strip()
text = text.lower()
text = re.sub(r'\s+', ' ', text).strip()
return text
def load_all_models():
"""Memuat semua model dan tokenizer ke memori dan secara eksplisit memindahkannya ke CPU."""
print("*" * 50)
print("Memuat semua model AI dari persistent storage...")
for model_name, model_path in MODEL_CONFIG.items():
if os.path.exists(model_path):
print(f" > Memuat {model_name} dari {model_path}...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
# PERBAIKAN: Langsung pindahkan model ke CPU saat dimuat
model.to(device)
model.eval()
models_cache[model_name] = (model, tokenizer)
print(f" > {model_name} berhasil dikonfigurasi dan dipindahkan ke CPU.")
except Exception as e: print(f" ERROR saat memuat model {model_name}: {e}")
else:
print(f" PERINGATAN: Direktori model untuk {model_name} tidak ditemukan di {model_path}")
print("Semua model yang tersedia telah dimuat.")
print("*" * 50)
@app.route('/predict', methods=['POST'])
def predict():
# Tambahkan logging untuk setiap langkah di dalam fungsi ini
print("\n[LOG] Menerima permintaan di /predict")
try:
data = request.get_json()
url_input = data.get('url', '')
print(f"[LOG] URL yang diterima: {url_input}")
if not url_input or not url_input.strip(): return jsonify({"error": "URL tidak boleh kosong"}), 400
print("[LOG] Memulai proses scraping...")
text_from_url, error_message = scrape_news_from_url(url_input)
if error_message: return jsonify({"error": error_message}), 400
print("[LOG] Scraping berhasil.")
cleaned_text = clean_text_for_prediction(text_from_url)
print("[LOG] Teks berhasil dibersihkan.")
all_predictions = {}
individual_preds_list = []
for model_name, (model, tokenizer) in models_cache.items():
print(f"[LOG] Melakukan prediksi dengan {model_name}...")
try:
inputs = tokenizer.encode_plus(cleaned_text, add_special_tokens=True, max_length=256, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt')
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
confidence, predicted_class_idx = torch.max(probs, dim=1)
predicted_class = "Hoax" if predicted_class_idx.item() == 1 else "Fakta"
individual_preds_list.append(predicted_class_idx.item())
all_predictions[model_name] = {"prediction": predicted_class, "confidence": f"{confidence.item():.2%}"}
print(f"[LOG] Prediksi {model_name} berhasil: {predicted_class}")
except Exception as e:
print(f"[ERROR] Prediksi dengan {model_name} gagal: {e}")
all_predictions[model_name] = {"prediction": "Error", "confidence": "N/A"}
if individual_preds_list:
print("[LOG] Melakukan ensemble voting...")
ensemble_vote_result = mode(np.array(individual_preds_list))
final_prediction_idx = ensemble_vote_result.mode[0] if isinstance(ensemble_vote_result.mode, np.ndarray) else ensemble_vote_result.mode
final_prediction = "Hoax" if final_prediction_idx == 1 else "Fakta"
agreement = np.mean([p == final_prediction_idx for p in individual_preds_list])
all_predictions["Bagging (Ensemble)"] = {"prediction": final_prediction, "confidence": f"{agreement:.2%}"}
print("[LOG] Ensemble voting selesai.")
print("[LOG] Mengirimkan hasil ke frontend.")
return jsonify(all_predictions)
except Exception as e:
print(f"[FATAL ERROR] Terjadi error tak terduga di rute /predict:")
# PERBAIKAN: Cetak traceback error untuk debugging yang lebih detail
traceback.print_exc()
return jsonify({"error": "Kesalahan internal server."}), 500
@app.route('/')
def serve_index(): return send_from_directory('frontend', 'index.html')
if __name__ == '__main__':
load_all_models()
app.run(host="0.0.0.0", port=7860, debug=False)
|