Spaces:
Sleeping
Sleeping
Refactor: Add detailed logging and preload flag for stability
Browse files- Dockerfile +3 -2
- app.py +30 -9
Dockerfile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Dockerfile
|
| 2 |
|
| 3 |
# 1. Gunakan base image Python yang stabil
|
| 4 |
FROM python:3.11-slim
|
|
@@ -27,4 +27,5 @@ EXPOSE 7860
|
|
| 27 |
|
| 28 |
# 9. Perintah untuk menjalankan aplikasi
|
| 29 |
# Jalankan setup.sh untuk mengunduh model, LALU jalankan server Gunicorn.
|
| 30 |
-
|
|
|
|
|
|
| 1 |
+
# Dockerfile (Perbaikan Izin)
|
| 2 |
|
| 3 |
# 1. Gunakan base image Python yang stabil
|
| 4 |
FROM python:3.11-slim
|
|
|
|
| 27 |
|
| 28 |
# 9. Perintah untuk menjalankan aplikasi
|
| 29 |
# Jalankan setup.sh untuk mengunduh model, LALU jalankan server Gunicorn.
|
| 30 |
+
# Semua akan berjalan sebagai root, yang akan menyelesaikan masalah izin.
|
| 31 |
+
CMD ["/bin/bash", "-c", "./setup.sh && gunicorn --bind 0.0.0.0:7860 --timeout 600 --preload app:app"]
|
app.py
CHANGED
|
@@ -7,11 +7,13 @@ import re
|
|
| 7 |
from scipy.stats import mode
|
| 8 |
import requests
|
| 9 |
from bs4 import BeautifulSoup
|
|
|
|
| 10 |
|
|
|
|
| 11 |
app = Flask(__name__, static_folder='frontend')
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
MODELS_DIR = "/data/models"
|
| 15 |
MODEL_CONFIG = {
|
| 16 |
"BERT": os.path.join(MODELS_DIR, "bert"),
|
| 17 |
"RoBERTa": os.path.join(MODELS_DIR, "roberta"),
|
|
@@ -20,7 +22,9 @@ MODEL_CONFIG = {
|
|
| 20 |
}
|
| 21 |
|
| 22 |
models_cache = {}
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def scrape_news_from_url(url):
|
| 26 |
try:
|
|
@@ -51,6 +55,7 @@ def clean_text_for_prediction(text_input):
|
|
| 51 |
return text
|
| 52 |
|
| 53 |
def load_all_models():
|
|
|
|
| 54 |
print("*" * 50)
|
| 55 |
print("Memuat semua model AI dari persistent storage...")
|
| 56 |
for model_name, model_path in MODEL_CONFIG.items():
|
|
@@ -59,9 +64,11 @@ def load_all_models():
|
|
| 59 |
try:
|
| 60 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 61 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
|
|
|
|
|
|
| 62 |
model.eval()
|
| 63 |
models_cache[model_name] = (model, tokenizer)
|
| 64 |
-
print(f" > {model_name} berhasil dikonfigurasi.")
|
| 65 |
except Exception as e: print(f" ERROR saat memuat model {model_name}: {e}")
|
| 66 |
else:
|
| 67 |
print(f" PERINGATAN: Direktori model untuk {model_name} tidak ditemukan di {model_path}")
|
|
@@ -71,24 +78,31 @@ def load_all_models():
|
|
| 71 |
|
| 72 |
@app.route('/predict', methods=['POST'])
|
| 73 |
def predict():
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
data = request.get_json()
|
| 76 |
url_input = data.get('url', '')
|
|
|
|
| 77 |
if not url_input or not url_input.strip(): return jsonify({"error": "URL tidak boleh kosong"}), 400
|
| 78 |
|
|
|
|
| 79 |
text_from_url, error_message = scrape_news_from_url(url_input)
|
| 80 |
if error_message: return jsonify({"error": error_message}), 400
|
|
|
|
| 81 |
|
| 82 |
cleaned_text = clean_text_for_prediction(text_from_url)
|
|
|
|
| 83 |
|
| 84 |
all_predictions = {}
|
| 85 |
individual_preds_list = []
|
| 86 |
|
| 87 |
for model_name, (model, tokenizer) in models_cache.items():
|
| 88 |
-
|
| 89 |
try:
|
| 90 |
inputs = tokenizer.encode_plus(cleaned_text, add_special_tokens=True, max_length=256, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt')
|
| 91 |
-
input_ids = inputs['input_ids'].to(device)
|
|
|
|
| 92 |
with torch.no_grad():
|
| 93 |
outputs = model(input_ids, attention_mask=attention_mask)
|
| 94 |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
|
@@ -96,24 +110,31 @@ def predict():
|
|
| 96 |
predicted_class = "Hoax" if predicted_class_idx.item() == 1 else "Fakta"
|
| 97 |
individual_preds_list.append(predicted_class_idx.item())
|
| 98 |
all_predictions[model_name] = {"prediction": predicted_class, "confidence": f"{confidence.item():.2%}"}
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
-
print(f"
|
| 101 |
all_predictions[model_name] = {"prediction": "Error", "confidence": "N/A"}
|
| 102 |
|
| 103 |
if individual_preds_list:
|
|
|
|
| 104 |
ensemble_vote_result = mode(np.array(individual_preds_list))
|
| 105 |
final_prediction_idx = ensemble_vote_result.mode[0] if isinstance(ensemble_vote_result.mode, np.ndarray) else ensemble_vote_result.mode
|
| 106 |
final_prediction = "Hoax" if final_prediction_idx == 1 else "Fakta"
|
| 107 |
agreement = np.mean([p == final_prediction_idx for p in individual_preds_list])
|
| 108 |
all_predictions["Bagging (Ensemble)"] = {"prediction": final_prediction, "confidence": f"{agreement:.2%}"}
|
|
|
|
| 109 |
|
|
|
|
| 110 |
return jsonify(all_predictions)
|
| 111 |
except Exception as e:
|
| 112 |
-
print(f"[FATAL ERROR]
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
@app.route('/')
|
| 115 |
def serve_index(): return send_from_directory('frontend', 'index.html')
|
| 116 |
|
| 117 |
if __name__ == '__main__':
|
| 118 |
load_all_models()
|
| 119 |
-
app.run(host="0.0.0.0", port=7860, debug=False)
|
|
|
|
| 7 |
from scipy.stats import mode
|
| 8 |
import requests
|
| 9 |
from bs4 import BeautifulSoup
|
| 10 |
+
import traceback # Impor untuk melacak error detail
|
| 11 |
|
| 12 |
+
# --- 1. Inisialisasi Aplikasi Flask ---
|
| 13 |
app = Flask(__name__, static_folder='frontend')
|
| 14 |
|
| 15 |
+
# --- 2. Konfigurasi dan Pemuatan Model ---
|
| 16 |
+
MODELS_DIR = "/data/models"
|
| 17 |
MODEL_CONFIG = {
|
| 18 |
"BERT": os.path.join(MODELS_DIR, "bert"),
|
| 19 |
"RoBERTa": os.path.join(MODELS_DIR, "roberta"),
|
|
|
|
| 22 |
}
|
| 23 |
|
| 24 |
models_cache = {}
|
| 25 |
+
# Di server Hugging Face (CPU), kita akan selalu menggunakan CPU.
|
| 26 |
+
device = torch.device("cpu")
|
| 27 |
+
print(f"Perangkat komputasi diatur ke: {device}")
|
| 28 |
|
| 29 |
def scrape_news_from_url(url):
|
| 30 |
try:
|
|
|
|
| 55 |
return text
|
| 56 |
|
| 57 |
def load_all_models():
|
| 58 |
+
"""Memuat semua model dan tokenizer ke memori dan secara eksplisit memindahkannya ke CPU."""
|
| 59 |
print("*" * 50)
|
| 60 |
print("Memuat semua model AI dari persistent storage...")
|
| 61 |
for model_name, model_path in MODEL_CONFIG.items():
|
|
|
|
| 64 |
try:
|
| 65 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 66 |
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
| 67 |
+
# PERBAIKAN: Langsung pindahkan model ke CPU saat dimuat
|
| 68 |
+
model.to(device)
|
| 69 |
model.eval()
|
| 70 |
models_cache[model_name] = (model, tokenizer)
|
| 71 |
+
print(f" > {model_name} berhasil dikonfigurasi dan dipindahkan ke CPU.")
|
| 72 |
except Exception as e: print(f" ERROR saat memuat model {model_name}: {e}")
|
| 73 |
else:
|
| 74 |
print(f" PERINGATAN: Direktori model untuk {model_name} tidak ditemukan di {model_path}")
|
|
|
|
| 78 |
|
| 79 |
@app.route('/predict', methods=['POST'])
|
| 80 |
def predict():
|
| 81 |
+
# Tambahkan logging untuk setiap langkah di dalam fungsi ini
|
| 82 |
+
print("\n[LOG] Menerima permintaan di /predict")
|
| 83 |
try:
|
| 84 |
data = request.get_json()
|
| 85 |
url_input = data.get('url', '')
|
| 86 |
+
print(f"[LOG] URL yang diterima: {url_input}")
|
| 87 |
if not url_input or not url_input.strip(): return jsonify({"error": "URL tidak boleh kosong"}), 400
|
| 88 |
|
| 89 |
+
print("[LOG] Memulai proses scraping...")
|
| 90 |
text_from_url, error_message = scrape_news_from_url(url_input)
|
| 91 |
if error_message: return jsonify({"error": error_message}), 400
|
| 92 |
+
print("[LOG] Scraping berhasil.")
|
| 93 |
|
| 94 |
cleaned_text = clean_text_for_prediction(text_from_url)
|
| 95 |
+
print("[LOG] Teks berhasil dibersihkan.")
|
| 96 |
|
| 97 |
all_predictions = {}
|
| 98 |
individual_preds_list = []
|
| 99 |
|
| 100 |
for model_name, (model, tokenizer) in models_cache.items():
|
| 101 |
+
print(f"[LOG] Melakukan prediksi dengan {model_name}...")
|
| 102 |
try:
|
| 103 |
inputs = tokenizer.encode_plus(cleaned_text, add_special_tokens=True, max_length=256, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt')
|
| 104 |
+
input_ids = inputs['input_ids'].to(device)
|
| 105 |
+
attention_mask = inputs['attention_mask'].to(device)
|
| 106 |
with torch.no_grad():
|
| 107 |
outputs = model(input_ids, attention_mask=attention_mask)
|
| 108 |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
|
|
|
| 110 |
predicted_class = "Hoax" if predicted_class_idx.item() == 1 else "Fakta"
|
| 111 |
individual_preds_list.append(predicted_class_idx.item())
|
| 112 |
all_predictions[model_name] = {"prediction": predicted_class, "confidence": f"{confidence.item():.2%}"}
|
| 113 |
+
print(f"[LOG] Prediksi {model_name} berhasil: {predicted_class}")
|
| 114 |
except Exception as e:
|
| 115 |
+
print(f"[ERROR] Prediksi dengan {model_name} gagal: {e}")
|
| 116 |
all_predictions[model_name] = {"prediction": "Error", "confidence": "N/A"}
|
| 117 |
|
| 118 |
if individual_preds_list:
|
| 119 |
+
print("[LOG] Melakukan ensemble voting...")
|
| 120 |
ensemble_vote_result = mode(np.array(individual_preds_list))
|
| 121 |
final_prediction_idx = ensemble_vote_result.mode[0] if isinstance(ensemble_vote_result.mode, np.ndarray) else ensemble_vote_result.mode
|
| 122 |
final_prediction = "Hoax" if final_prediction_idx == 1 else "Fakta"
|
| 123 |
agreement = np.mean([p == final_prediction_idx for p in individual_preds_list])
|
| 124 |
all_predictions["Bagging (Ensemble)"] = {"prediction": final_prediction, "confidence": f"{agreement:.2%}"}
|
| 125 |
+
print("[LOG] Ensemble voting selesai.")
|
| 126 |
|
| 127 |
+
print("[LOG] Mengirimkan hasil ke frontend.")
|
| 128 |
return jsonify(all_predictions)
|
| 129 |
except Exception as e:
|
| 130 |
+
print(f"[FATAL ERROR] Terjadi error tak terduga di rute /predict:")
|
| 131 |
+
# PERBAIKAN: Cetak traceback error untuk debugging yang lebih detail
|
| 132 |
+
traceback.print_exc()
|
| 133 |
+
return jsonify({"error": "Kesalahan internal server."}), 500
|
| 134 |
|
| 135 |
@app.route('/')
|
| 136 |
def serve_index(): return send_from_directory('frontend', 'index.html')
|
| 137 |
|
| 138 |
if __name__ == '__main__':
|
| 139 |
load_all_models()
|
| 140 |
+
app.run(host="0.0.0.0", port=7860, debug=False)
|