Spaces:

kauzan25
/

pathora-app

Sleeping

App Files Files Community

pathora-app / app.py

kauzan25

Upload app.py with huggingface_hub

382d48f verified 5 days ago

raw

history blame contribute delete

2.42 kB

	import gradio as gr
	import numpy as np, re, os, joblib
	from sentence_transformers import SentenceTransformer
	from huggingface_hub import hf_hub_download
	import fitz, docx, chardet

	MODEL_REPO = "kauzan25/pathora-bert-classifier"
	print("Loading...")
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename="artifacts/sklearn_model.joblib", repo_type="model")
	le_path = hf_hub_download(repo_id=MODEL_REPO, filename="artifacts/label_encoder.joblib", repo_type="model")
	clf = joblib.load(model_path)
	le = joblib.load(le_path)
	st = SentenceTransformer("all-MiniLM-L6-v2")
	print("Loaded!")

	def process(file, text_input):
	if file is not None:
	ext = os.path.splitext(file.name)[1].lower()
	if ext == ".pdf":
	doc = fitz.open(file.name)
	text = "".join(p.get_text() for p in doc); doc.close()
	elif ext in (".doc", ".docx"):
	doc = docx.Document(file.name)
	text = chr(10).join(p.text for p in doc.paragraphs)
	elif ext == ".txt":
	with open(file.name, "rb") as f:
	raw = f.read()
	text = raw.decode(chardet.detect(raw)["encoding"] or "utf-8", errors="ignore")
	else:
	return "Unsupported.", None
	src = ext.upper().lstrip(".")
	elif text_input and text_input.strip():
	text = text_input.strip(); src = "Text"
	else:
	return "Upload file or enter text.", None
	if len(text) < 20:
	return "Text too short.", None
	text = re.sub("<[^>]+>", " ", text)
	text = re.sub("\\s+", " ", text).strip()
	emb = st.encode([text], convert_to_numpy=True)
	probs = clf.predict_proba(emb)[0]
	top = np.argsort(probs)[::-1][:5]
	results = [{"category": le.classes_[i], "confidence": float(probs[i])} for i in top]
	out = "### Hasil: {} ({:.1f}%)".format(results[0]["category"], results[0]["confidence"]*100)
	for i, p in enumerate(results):
	out += chr(10) + "{}. {} ({:.1f}%)".format(i+1, p["category"], p["confidence"]*100)
	return out, {"predictions": results}

	with gr.Blocks(title="PathOra") as demo:
	gr.Markdown("# PathOra - Career Prediction")
	f_in = gr.File(label="File", file_types=[".pdf",".doc",".docx",".txt"])
	t_in = gr.Textbox(label="Or paste text", lines=3)
	btn = gr.Button("Analyze")
	out_md = gr.Markdown()
	out_json = gr.JSON()
	btn.click(process, [f_in, t_in], [out_md, out_json])

	demo.launch()