farwew commited on
Commit
745cc73
·
verified ·
1 Parent(s): 8c6fa3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -24
app.py CHANGED
@@ -1,53 +1,77 @@
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
  import numpy as np
 
 
 
 
4
 
5
- # Load model once at startup
6
  st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
7
 
8
- TITLE = "# Text Vector (all-mpnet-base-v2)"
9
- DESC = (
10
- "Masukkan **kalimat** lalu dapatkan **embedding vector** "
11
- "(opsional dinormalisasi L2). Model: `sentence-transformers/all-mpnet-base-v2`."
12
- )
13
 
14
- def embed(text: str, normalize: bool = True):
 
 
 
 
 
 
 
 
 
 
 
15
  text = (text or "").strip()
16
  if not text:
17
- return [], 0
18
- vec = st_model.encode([text], normalize_embeddings=normalize)[0]
19
- return vec.tolist(), int(vec.shape[0])
 
 
 
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  with gr.Blocks() as demo:
22
- gr.Markdown(TITLE)
23
- gr.Markdown(DESC)
24
 
25
  with gr.Row():
26
- text_in = gr.Textbox(
27
- label="Kalimat",
28
- placeholder="Tulis kalimat di sini...",
29
- lines=3,
30
- )
31
  normalize = gr.Checkbox(value=True, label="Normalize embedding (L2)")
32
- btn = gr.Button("Compute Embedding", variant="primary")
33
 
34
  with gr.Row():
35
- vec_out = gr.JSON(label="Vector (list of floats)")
 
 
36
  dim_out = gr.Number(label="Dimensi vektor", interactive=False)
37
 
38
  gr.Examples(
39
  examples=[
40
  ["Halo dunia!"],
41
  ["Machine learning is fun."],
42
- ["Saya sedang membangun demo embedding sederhana."],
43
  ],
44
  inputs=[text_in],
45
- label="Contoh",
46
  )
47
 
48
- btn.click(embed, inputs=[text_in, normalize], outputs=[vec_out, dim_out])
49
 
50
- # Enable queue for concurrency
51
  demo.queue()
52
 
53
  if __name__ == "__main__":
 
1
  import gradio as gr
 
2
  import numpy as np
3
+ import joblib
4
+ import re
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
7
 
8
+ # === Load SentenceTransformer ===
9
  st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
10
 
11
+ # === Load trained XGBoost models ===
12
+ models = joblib.load("xgb_models_all.joblib")
 
 
 
13
 
14
+ # === Preprocessing function ===
15
+ def preprocess_text(text: str) -> str:
16
+ if not isinstance(text, str) or text.strip() == "":
17
+ return ""
18
+ text = text.lower()
19
+ text = re.sub(r"\r\n", " ", text)
20
+ text = re.sub(r"[^a-z\s]", "", text)
21
+ tokens = [w for w in text.split() if w not in ENGLISH_STOP_WORDS]
22
+ return " ".join(tokens)
23
+
24
+ # === Prediction function ===
25
+ def predict(text: str, normalize: bool = True):
26
  text = (text or "").strip()
27
  if not text:
28
+ return {}, [], 0
29
+
30
+ # 1. Preprocess
31
+ clean_text = preprocess_text(text)
32
+
33
+ # 2. Embedding
34
+ vec = st_model.encode([clean_text], normalize_embeddings=normalize)[0]
35
 
36
+ # 3. Tambah fitur essay_length
37
+ essay_length = len(text)
38
+ X = np.concatenate([vec, [essay_length]])
39
+
40
+ # 4. Prediksi dari semua model
41
+ results = {}
42
+ for col, model in models.items():
43
+ results[col] = float(model.predict(X.reshape(1, -1))[0])
44
+
45
+ return results, vec.tolist(), int(vec.shape[0])
46
+
47
+ # === Gradio UI ===
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("# Essay Scoring Demo (Embedding + XGBoost)")
50
+ gr.Markdown("Masukkan teks → embedding dengan `all-mpnet-base-v2` → prediksi 4 skor dengan model XGBoost.")
51
 
52
  with gr.Row():
53
+ text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5)
 
 
 
 
54
  normalize = gr.Checkbox(value=True, label="Normalize embedding (L2)")
55
+ btn = gr.Button("Prediksi", variant="primary")
56
 
57
  with gr.Row():
58
+ pred_out = gr.JSON(label="Prediksi Skor (XGBoost)")
59
+ with gr.Row():
60
+ vec_out = gr.JSON(label="Embedding Vector (list of floats)")
61
  dim_out = gr.Number(label="Dimensi vektor", interactive=False)
62
 
63
  gr.Examples(
64
  examples=[
65
  ["Halo dunia!"],
66
  ["Machine learning is fun."],
67
+ ["This is a sample essay for IELTS task."],
68
  ],
69
  inputs=[text_in],
70
+ label="Contoh input",
71
  )
72
 
73
+ btn.click(predict, inputs=[text_in, normalize], outputs=[pred_out, vec_out, dim_out])
74
 
 
75
  demo.queue()
76
 
77
  if __name__ == "__main__":