Spaces:
Sleeping
Sleeping
File size: 2,485 Bytes
0f94aaf 745cc73 87d3f3f 745cc73 87d3f3f 0f94aaf 745cc73 0f94aaf 745cc73 0f94aaf 745cc73 87d3f3f 745cc73 0f94aaf 745cc73 0f94aaf 745cc73 0f94aaf e118a81 0f94aaf 745cc73 0f94aaf 745cc73 0f94aaf e118a81 745cc73 0f94aaf 745cc73 0f94aaf 745cc73 0f94aaf 745cc73 0f94aaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import gradio as gr
import numpy as np
import joblib
import re
import nltk
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer
# === Pastikan stopwords tersedia ===
try:
stopwords.words("english")
except LookupError:
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
# === Load SentenceTransformer ===
st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# === Load trained XGBoost models ===
models = joblib.load("xgb_models_all.joblib")
# === Preprocessing function ===
def preprocess_text(text: str) -> str:
if not isinstance(text, str) or text.strip() == "":
return ""
text = text.lower()
text = re.sub(r"\r\n", " ", text)
text = re.sub(r"[^a-z\s]", "", text)
tokens = [w for w in text.split() if w not in stop_words]
return " ".join(tokens)
# === Prediction function ===
def predict(text: str, normalize: bool = True):
text = (text or "").strip()
if not text:
return {}, [], 0
# 1. Preprocess
clean_text = preprocess_text(text)
# 2. Embedding
vec = st_model.encode([clean_text], normalize_embeddings=normalize)[0]
# 3. Tambah fitur essay_length
essay_length = len(text)
X = np.concatenate([vec, [essay_length]])
# 4. Prediksi dari semua model
results = {}
for col, model in models.items():
results[col] = float(model.predict(X.reshape(1, -1))[0])
return results, vec.tolist(), int(vec.shape[0])
# === Gradio UI ===
with gr.Blocks() as demo:
gr.Markdown("# Essay Scoring Demo")
gr.Markdown("Masukkan teks")
with gr.Row():
text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5)
normalize = gr.Checkbox(value=True, label="Normalize embedding (L2)")
btn = gr.Button("Prediksi", variant="primary")
with gr.Row():
pred_out = gr.JSON(label="Prediksi Skor")
with gr.Row():
vec_out = gr.JSON(label="Embedding Vector (list of floats)")
dim_out = gr.Number(label="Dimensi vektor", interactive=False)
gr.Examples(
examples=[
["Halo dunia!"],
["Machine learning is fun."],
["This is a sample essay for IELTS task."],
],
inputs=[text_in],
label="Contoh input",
)
btn.click(predict, inputs=[text_in, normalize], outputs=[pred_out, vec_out, dim_out])
demo.queue()
if __name__ == "__main__":
demo.launch()
|