Spaces:

Di12
/

KC_Classifier

Sleeping

App Files Files Community

Di12 commited on Jun 11, 2025

Commit

bddd7a2

1 Parent(s): ca66ee2

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -14

app.py CHANGED Viewed

@@ -1,11 +1,9 @@
-# app.py
 import re
 import unicodedata
 from bs4 import BeautifulSoup
 import joblib
 import gradio as gr
-# —— 1. Preprocess functions —— #
 def clean_html(raw_html: str) -> str:
     """Loại bỏ <img>, <math>, giữ text thuần."""
     soup = BeautifulSoup(raw_html, "html.parser")
@@ -29,22 +27,15 @@ def normalize_text(text: str) -> str:
     # xóa khoảng trắng thừa
     return re.sub(r"\s+", " ", text).strip()
-def preprocess(content_html: str) -> str:
-    """Pipeline: HTML → clean → normalize"""
     text = clean_html(content_html)
     text = normalize_text(text)
     return text
-# —— 2. Load vectorizer & model —— #
 vect = joblib.load("vectorizer.joblib")
 clf  = joblib.load("nbc_model.joblib")
-# —— 3. Inference function —— #
-def predict_kc(content_html: str) -> str:
-    """
-    Nhận HTML content, trả về mã KC dự đoán.
-    Nếu bỏ trống hoặc không parse được, trả về thông báo.
-    """
     if not content_html or not isinstance(content_html, str):
         return "Không có input hợp lệ."
     text = preprocess(content_html)
@@ -54,18 +45,17 @@ def predict_kc(content_html: str) -> str:
     pred = clf.predict(Xv)[0]
     return pred
-# —— 4. Xây dựng giao diện Gradio —— #
 demo = gr.Interface(
     fn=predict_kc,
     inputs=gr.Textbox(
         lines=6,
-        placeholder="Dán HTML Content (có thể kèm <p>, <img>, <math>) vào đây…"
     ),
     outputs=gr.Label(num_top_classes=1, label="Mã KC dự đoán"),
     title="Naive Bayes KC Predictor",
     description="""
         Nhập nội dung câu hỏi (HTML) và nhấn Submit để nhận về
-        mã kiến thức (KC) do model Naive Bayes dự đoán.
     """,
     allow_flagging="never",
 )

 import re
 import unicodedata
 from bs4 import BeautifulSoup
 import joblib
 import gradio as gr
 def clean_html(raw_html: str) -> str:
     """Loại bỏ <img>, <math>, giữ text thuần."""
     soup = BeautifulSoup(raw_html, "html.parser")
     # xóa khoảng trắng thừa
     return re.sub(r"\s+", " ", text).strip()
+def preprocess(content_html: str) -> str
     text = clean_html(content_html)
     text = normalize_text(text)
     return text
 vect = joblib.load("vectorizer.joblib")
 clf  = joblib.load("nbc_model.joblib")
+def predict_kc(content_html: str) -> str:
     if not content_html or not isinstance(content_html, str):
         return "Không có input hợp lệ."
     text = preprocess(content_html)
     pred = clf.predict(Xv)[0]
     return pred
 demo = gr.Interface(
     fn=predict_kc,
     inputs=gr.Textbox(
         lines=6,
+        placeholder="Dán HTML Content (có thể kèm <p>, <img>, <math>) vào đây"
     ),
     outputs=gr.Label(num_top_classes=1, label="Mã KC dự đoán"),
     title="Naive Bayes KC Predictor",
     description="""
         Nhập nội dung câu hỏi (HTML) và nhấn Submit để nhận về
+        mã kiến thức (KC) do mô hình Naive Bayes dự đoán.
     """,
     allow_flagging="never",
 )