farwew commited on
Commit
87d3f3f
·
verified ·
1 Parent(s): 9acb26f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -2,8 +2,17 @@ import gradio as gr
2
  import numpy as np
3
  import joblib
4
  import re
 
 
5
  from sentence_transformers import SentenceTransformer
6
- from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
 
 
 
 
 
 
 
7
 
8
  # === Load SentenceTransformer ===
9
  st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
@@ -18,7 +27,7 @@ def preprocess_text(text: str) -> str:
18
  text = text.lower()
19
  text = re.sub(r"\r\n", " ", text)
20
  text = re.sub(r"[^a-z\s]", "", text)
21
- tokens = [w for w in text.split() if w not in ENGLISH_STOP_WORDS]
22
  return " ".join(tokens)
23
 
24
  # === Prediction function ===
@@ -47,7 +56,7 @@ def predict(text: str, normalize: bool = True):
47
  # === Gradio UI ===
48
  with gr.Blocks() as demo:
49
  gr.Markdown("# Essay Scoring Demo (Embedding + XGBoost)")
50
- gr.Markdown("Masukkan teks → embedding dengan `all-mpnet-base-v2` → prediksi 4 skor dengan model XGBoost.")
51
 
52
  with gr.Row():
53
  text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5)
 
2
  import numpy as np
3
  import joblib
4
  import re
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
  from sentence_transformers import SentenceTransformer
8
+
9
+ # === Pastikan stopwords tersedia ===
10
+ try:
11
+ stopwords.words("english")
12
+ except LookupError:
13
+ nltk.download("stopwords")
14
+
15
+ stop_words = set(stopwords.words("english"))
16
 
17
  # === Load SentenceTransformer ===
18
  st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
 
27
  text = text.lower()
28
  text = re.sub(r"\r\n", " ", text)
29
  text = re.sub(r"[^a-z\s]", "", text)
30
+ tokens = [w for w in text.split() if w not in stop_words]
31
  return " ".join(tokens)
32
 
33
  # === Prediction function ===
 
56
  # === Gradio UI ===
57
  with gr.Blocks() as demo:
58
  gr.Markdown("# Essay Scoring Demo (Embedding + XGBoost)")
59
+ gr.Markdown("Masukkan teks → preprocessing (pakai NLTK stopwords) embedding → prediksi skor.")
60
 
61
  with gr.Row():
62
  text_in = gr.Textbox(label="Input Kalimat / Essay", placeholder="Tulis di sini...", lines=5)