farwew commited on
Commit
6abfa81
·
verified ·
1 Parent(s): ad9cd2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -3,36 +3,30 @@ from pydantic import BaseModel
3
  import joblib
4
  from sentence_transformers import SentenceTransformer
5
  import re
6
- import nltk
7
  from nltk.corpus import stopwords
8
  import numpy as np
9
 
10
- # Pastikan stopwords ada
11
- try:
12
- stopwords.words("english")
13
- except LookupError:
14
- nltk.download("stopwords")
15
-
16
  # === Preprocessing Function ===
 
 
17
  def preprocess_text(text: str) -> str:
18
  if not isinstance(text, str) or text.strip() == "":
19
  return ""
20
  text = text.lower()
21
  text = re.sub(r"\r\n", " ", text)
22
  text = re.sub(r"[^a-z\s]", "", text)
23
- tokens = text.split()
24
- stop_words = set(stopwords.words("english"))
25
- tokens = [word for word in tokens if word not in stop_words]
26
  return " ".join(tokens)
27
 
28
- # === Load Models ===
29
  print("Loading SentenceTransformer...")
30
- st_model = SentenceTransformer("all-mpnet-base-v2")
31
 
 
32
  print("Loading XGBoost models...")
33
  models = joblib.load("xgb_models_all.joblib")
34
 
35
- # === FastAPI ===
36
  app = FastAPI(title="Essay Scoring API")
37
 
38
  class EssayInput(BaseModel):
@@ -46,11 +40,11 @@ def predict(input_data: EssayInput):
46
  # 2. Embedding
47
  vec = st_model.encode([clean_text], normalize_embeddings=True)
48
 
49
- # 3. Tambah feature essay_length
50
  essay_length = len(input_data.text)
51
  X = np.concatenate([vec, [[essay_length]]], axis=1)
52
 
53
- # 4. Predict dengan setiap model
54
  results = {}
55
  for col, model in models.items():
56
  results[col] = float(model.predict(X)[0])
 
3
  import joblib
4
  from sentence_transformers import SentenceTransformer
5
  import re
 
6
  from nltk.corpus import stopwords
7
  import numpy as np
8
 
 
 
 
 
 
 
9
  # === Preprocessing Function ===
10
+ stop_words = set(stopwords.words("english"))
11
+
12
  def preprocess_text(text: str) -> str:
13
  if not isinstance(text, str) or text.strip() == "":
14
  return ""
15
  text = text.lower()
16
  text = re.sub(r"\r\n", " ", text)
17
  text = re.sub(r"[^a-z\s]", "", text)
18
+ tokens = [w for w in text.split() if w not in stop_words]
 
 
19
  return " ".join(tokens)
20
 
21
+ # === Load SentenceTransformer ===
22
  print("Loading SentenceTransformer...")
23
+ st_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
24
 
25
+ # === Load XGBoost models ===
26
  print("Loading XGBoost models...")
27
  models = joblib.load("xgb_models_all.joblib")
28
 
29
+ # === FastAPI app ===
30
  app = FastAPI(title="Essay Scoring API")
31
 
32
  class EssayInput(BaseModel):
 
40
  # 2. Embedding
41
  vec = st_model.encode([clean_text], normalize_embeddings=True)
42
 
43
+ # 3. Add essay_length feature
44
  essay_length = len(input_data.text)
45
  X = np.concatenate([vec, [[essay_length]]], axis=1)
46
 
47
+ # 4. Predictions from all models
48
  results = {}
49
  for col, model in models.items():
50
  results[col] = float(model.predict(X)[0])