Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import joblib | |
| import re | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from sentence_transformers import SentenceTransformer | |
| # === Pastikan stopwords tersedia === | |
| try: | |
| stopwords.words("english") | |
| except LookupError: | |
| nltk.download("stopwords") | |
| stop_words = set(stopwords.words("english")) | |
| # === Load SentenceTransformer === | |
| st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") | |
| # === Load trained XGBoost models === | |
| models = joblib.load("xgb_models_all.joblib") | |
| # === Preprocessing function === | |
| def preprocess_text(text: str) -> str: | |
| if not isinstance(text, str) or text.strip() == "": | |
| return "" | |
| text = text.lower() | |
| text = re.sub(r"\r\n", " ", text) | |
| text = re.sub(r"[^a-z\s]", "", text) | |
| tokens = [w for w in text.split() if w not in stop_words] | |
| return " ".join(tokens) | |
| # === Prediction function === | |
| def predict(text: str, normalize: bool = True): | |
| text = (text or "").strip() | |
| if not text: | |
| return {}, [], 0 | |
| # 1. Preprocess | |
| clean_text = preprocess_text(text) | |
| # 2. Embedding | |
| vec = st_model.encode([clean_text], normalize_embeddings=normalize)[0] | |
| # 3. Tambah fitur essay_length | |
| essay_length = len(text) | |
| X = np.concatenate([vec, [essay_length]]) | |
| # 4. Prediksi dari semua model | |
| results = {} | |
| for col, model in models.items(): | |
| results[col] = float(model.predict(X.reshape(1, -1))[0]) | |
| return results, vec.tolist(), int(vec.shape[0]) | |
| # === Gradio UI === | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Essay Scoring Demo") | |
| gr.Markdown("Masukkan teks") | |
| with gr.Row(): | |
| text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5) | |
| normalize = gr.Checkbox(value=True, label="Normalize embedding (L2)") | |
| btn = gr.Button("Prediksi", variant="primary") | |
| with gr.Row(): | |
| pred_out = gr.JSON(label="Prediksi Skor") | |
| with gr.Row(): | |
| vec_out = gr.JSON(label="Embedding Vector (list of floats)") | |
| dim_out = gr.Number(label="Dimensi vektor", interactive=False) | |
| gr.Examples( | |
| examples=[ | |
| ["Halo dunia!"], | |
| ["Machine learning is fun."], | |
| ["This is a sample essay for IELTS task."], | |
| ], | |
| inputs=[text_in], | |
| label="Contoh input", | |
| ) | |
| btn.click(predict, inputs=[text_in, normalize], outputs=[pred_out, vec_out, dim_out]) | |
| demo.queue() | |
| if __name__ == "__main__": | |
| demo.launch() | |