Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np, re, os, joblib | |
| from sentence_transformers import SentenceTransformer | |
| from huggingface_hub import hf_hub_download | |
| import fitz, docx, chardet | |
| MODEL_REPO = "kauzan25/pathora-bert-classifier" | |
| print("Loading...") | |
| model_path = hf_hub_download(repo_id=MODEL_REPO, filename="artifacts/sklearn_model.joblib", repo_type="model") | |
| le_path = hf_hub_download(repo_id=MODEL_REPO, filename="artifacts/label_encoder.joblib", repo_type="model") | |
| clf = joblib.load(model_path) | |
| le = joblib.load(le_path) | |
| st = SentenceTransformer("all-MiniLM-L6-v2") | |
| print("Loaded!") | |
| def process(file, text_input): | |
| if file is not None: | |
| ext = os.path.splitext(file.name)[1].lower() | |
| if ext == ".pdf": | |
| doc = fitz.open(file.name) | |
| text = "".join(p.get_text() for p in doc); doc.close() | |
| elif ext in (".doc", ".docx"): | |
| doc = docx.Document(file.name) | |
| text = chr(10).join(p.text for p in doc.paragraphs) | |
| elif ext == ".txt": | |
| with open(file.name, "rb") as f: | |
| raw = f.read() | |
| text = raw.decode(chardet.detect(raw)["encoding"] or "utf-8", errors="ignore") | |
| else: | |
| return "Unsupported.", None | |
| src = ext.upper().lstrip(".") | |
| elif text_input and text_input.strip(): | |
| text = text_input.strip(); src = "Text" | |
| else: | |
| return "Upload file or enter text.", None | |
| if len(text) < 20: | |
| return "Text too short.", None | |
| text = re.sub("<[^>]+>", " ", text) | |
| text = re.sub("\\s+", " ", text).strip() | |
| emb = st.encode([text], convert_to_numpy=True) | |
| probs = clf.predict_proba(emb)[0] | |
| top = np.argsort(probs)[::-1][:5] | |
| results = [{"category": le.classes_[i], "confidence": float(probs[i])} for i in top] | |
| out = "### Hasil: **{}** ({:.1f}%)".format(results[0]["category"], results[0]["confidence"]*100) | |
| for i, p in enumerate(results): | |
| out += chr(10) + "{}. {} ({:.1f}%)".format(i+1, p["category"], p["confidence"]*100) | |
| return out, {"predictions": results} | |
| with gr.Blocks(title="PathOra") as demo: | |
| gr.Markdown("# PathOra - Career Prediction") | |
| f_in = gr.File(label="File", file_types=[".pdf",".doc",".docx",".txt"]) | |
| t_in = gr.Textbox(label="Or paste text", lines=3) | |
| btn = gr.Button("Analyze") | |
| out_md = gr.Markdown() | |
| out_json = gr.JSON() | |
| btn.click(process, [f_in, t_in], [out_md, out_json]) | |
| demo.launch() | |