import gradio as gr import numpy as np import joblib import re import nltk from nltk.corpus import stopwords from sentence_transformers import SentenceTransformer # === Pastikan stopwords tersedia === try: stopwords.words("english") except LookupError: nltk.download("stopwords") stop_words = set(stopwords.words("english")) # === Load SentenceTransformer === st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") # === Load trained XGBoost models === models = joblib.load("xgb_models_all.joblib") # === Preprocessing function === def preprocess_text(text: str) -> str: if not isinstance(text, str) or text.strip() == "": return "" text = text.lower() text = re.sub(r"\r\n", " ", text) text = re.sub(r"[^a-z\s]", "", text) tokens = [w for w in text.split() if w not in stop_words] return " ".join(tokens) # === Prediction function === def predict(text: str, normalize: bool = True): text = (text or "").strip() if not text: return {}, [], 0 # 1. Preprocess clean_text = preprocess_text(text) # 2. Embedding vec = st_model.encode([clean_text], normalize_embeddings=normalize)[0] # 3. Tambah fitur essay_length essay_length = len(text) X = np.concatenate([vec, [essay_length]]) # 4. Prediksi dari semua model results = {} for col, model in models.items(): results[col] = float(model.predict(X.reshape(1, -1))[0]) return results, vec.tolist(), int(vec.shape[0]) # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("# Essay Scoring Demo") gr.Markdown("Masukkan teks") with gr.Row(): text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5) normalize = gr.Checkbox(value=True, label="Normalize embedding (L2)") btn = gr.Button("Prediksi", variant="primary") with gr.Row(): pred_out = gr.JSON(label="Prediksi Skor") with gr.Row(): vec_out = gr.JSON(label="Embedding Vector (list of floats)") dim_out = gr.Number(label="Dimensi vektor", interactive=False) gr.Examples( examples=[ ["Halo dunia!"], ["Machine learning is fun."], ["This is a sample essay for IELTS task."], ], inputs=[text_in], label="Contoh input", ) btn.click(predict, inputs=[text_in, normalize], outputs=[pred_out, vec_out, dim_out]) demo.queue() if __name__ == "__main__": demo.launch()