Spaces:

FaiyazAzam
/

book-recommendation-predictor

Sleeping

App Files Files Community

book-recommendation-predictor / app.py

FaiyazAzam

Update app.py

34b0951 verified 7 months ago

raw

history blame contribute delete

7.31 kB


	import os
	import pandas as pd
	import gradio as gr
	from huggingface_hub import snapshot_download
	from autogluon.tabular import TabularPredictor

	MODEL_REPO_ID = "bareethul/AutoML-books-classification"
	PREDICTOR_SUBDIR = None # None to auto-discover

	def find_predictor_dir(root: str) -> str:
	hits = []
	for dirpath, _, filenames in os.walk(root):
	if {"learner.pkl","predictor.pkl"}.issubset(set(filenames)):
	hits.append(dirpath)
	if not hits:
	raise FileNotFoundError(
	f"Could not find an AutoGluon predictor folder under {root}. "
	"Repo must contain a directory with learner.pkl and predictor.pkl."
	)
	hits.sort(key=lambda p: len(p))
	return hits[-1]

	def load_predictor_from_repo(repo_id: str, subdir: str \| None = None) -> TabularPredictor:
	local_root = snapshot_download(repo_id=repo_id, repo_type="model")
	predictor_dir = os.path.join(local_root, subdir) if subdir else find_predictor_dir(local_root)
	print("Using predictor dir:", predictor_dir, flush=True)
	return TabularPredictor.load(predictor_dir, require_py_version_match=False)

	PREDICTOR = load_predictor_from_repo(MODEL_REPO_ID, PREDICTOR_SUBDIR)

	TARGET_COL = PREDICTOR.label
	ALL_FEATURES = PREDICTOR.feature_metadata.get_features()
	PROBLEM_TYPE = PREDICTOR.problem_type

	def safe_get_models(predictor) -> list:
	try:
	return list(predictor.get_model_names())
	except Exception:
	try:
	lb = predictor.leaderboard(silent=True)
	return lb["model"].tolist()
	except Exception:
	return []
	ALL_MODELS = safe_get_models(PREDICTOR)
	HAS_MODEL_CHOICES = len(ALL_MODELS) > 0

	PREFERRED_FEATURE_ORDER = ["FictionorNonfiction","NumPages","ThicknessInches","ReadUnfinishedorUnread"]
	ORDERED_FEATURES = [c for c in PREFERRED_FEATURE_ORDER if c in ALL_FEATURES] + [c for c in ALL_FEATURES if c not in PREFERRED_FEATURE_ORDER]

	DD_FICTION = ["Fiction", "Nonfiction"]
	DD_READ_STATUS = ["Read", "Unfinished", "Unread"]

	DEFAULTS = {"FictionorNonfiction":"Fiction","NumPages":250,"ThicknessInches":0.85,"ReadUnfinishedorUnread":"Unread"}

	def build_input_df(values_dict):
	row = {col: values_dict.get(col, None) for col in ORDERED_FEATURES}
	return pd.DataFrame([row], columns=ORDERED_FEATURES)

	def predict_one(fiction_nonfiction, num_pages, thickness, read_status, decision_threshold, selected_model):
	values = {}
	if "FictionorNonfiction" in ORDERED_FEATURES: values["FictionorNonfiction"] = fiction_nonfiction
	if "NumPages" in ORDERED_FEATURES: values["NumPages"] = int(num_pages)
	if "ThicknessInches" in ORDERED_FEATURES: values["ThicknessInches"] = float(thickness)
	if "ReadUnfinishedorUnread" in ORDERED_FEATURES: values["ReadUnfinishedorUnread"] = read_status
	X = build_input_df(values)

	# probabilities (if available)
	try:
	use_model = selected_model if selected_model else None
	proba = PREDICTOR.predict_proba(X, model=use_model)
	if isinstance(proba, pd.Series): # normalize binary
	proba = proba.to_frame().T
	proba_pretty = {str(k): float(v) for k, v in proba.iloc[0].to_dict().items()}
	except Exception:
	proba, proba_pretty = None, None

	# default predicted label
	pred_series = PREDICTOR.predict(X, model=(selected_model or None))
	final_label = str(pred_series.iloc[0])

	# Optional threshold (binary only)
	if proba is not None and len(proba.columns) == 2:
	classes = list(map(str, proba.columns))
	positive_class = "Yes" if "Yes" in classes else classes[-1]
	p_pos = float(proba.iloc[0][positive_class])
	final_label = positive_class if p_pos >= decision_threshold else [c for c in classes if c != positive_class][0]

	top_conf = None
	if proba_pretty:
	top_conf = round(100.0 * float(proba_pretty.get(final_label, 0.0)), 2)

	table = pd.DataFrame([{"Predicted": final_label,
	"Confidence (%)": top_conf if top_conf is not None else "—",
	"Model (optional)": (selected_model or "(auto)"),
	"Target": TARGET_COL}])
	return table, proba_pretty

	APP_TITLE = "📚 Book Insights — Will I Recommend It?"
	APP_SUBTITLE = "Single-record tabular inference on a bookshelf dataset (AutoGluon + Gradio)"

	with gr.Blocks(title=APP_TITLE) as demo:
	gr.Markdown(
	f"# {APP_TITLE}\n{APP_SUBTITLE}\n\n"
	f"Target: `{TARGET_COL}` • Problem: `{PROBLEM_TYPE}`"
	)
	with gr.Row():
	with gr.Column():
	fiction_nonfiction = gr.Radio(DD_FICTION, value=DEFAULTS["FictionorNonfiction"],
	label="Fiction or Nonfiction", visible=("FictionorNonfiction" in ORDERED_FEATURES))
	read_status = gr.Radio(DD_READ_STATUS, value=DEFAULTS["ReadUnfinishedorUnread"],
	label="Read / Unfinished / Unread", visible=("ReadUnfinishedorUnread" in ORDERED_FEATURES))
	num_pages = gr.Slider(50, 1200, step=1, value=DEFAULTS["NumPages"],
	label="Number of Pages", visible=("NumPages" in ORDERED_FEATURES))
	thickness = gr.Slider(0.2, 3.0, step=0.01, value=DEFAULTS["ThicknessInches"],
	label="Thickness (inches)", visible=("ThicknessInches" in ORDERED_FEATURES))
	with gr.Accordion("Inference Parameters", open=False):
	decision_threshold = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="Decision Threshold (binary only)")
	base_model = gr.Dropdown(choices=([""] + ALL_MODELS) if HAS_MODEL_CHOICES else [""], value="",
	label="Restrict to a specific base model (optional)", visible=HAS_MODEL_CHOICES)
	run_btn = gr.Button("Run Prediction")
	with gr.Column():
	# No height/wrap args for broader Gradio compatibility
	out_table = gr.Dataframe(headers=["Predicted","Confidence (%)","Model (optional)","Target"],
	interactive=False, label="Prediction")
	out_probs = gr.Label(num_top_classes=5, label="Class Probabilities (top-k)")
	inputs = [fiction_nonfiction, num_pages, thickness, read_status, decision_threshold,
	base_model if HAS_MODEL_CHOICES else gr.State("")]
	run_btn.click(predict_one, inputs=inputs, outputs=[out_table, out_probs])
	# [Fiction/Nonfiction, NumPages, ThicknessInches, ReadStatus, DecisionThreshold, BaseModel(or "")]
	_tail = [""] # placeholder for the optional base-model input (works whether visible or hidden)
	examples = [
	["Fiction", 211, 0.84, "Unread", 0.50] + _tail,
	["Fiction", 361, 0.99, "Unfinished", 0.50] + _tail,
	["Nonfiction", 260, 0.95, "Unfinished", 0.50] + _tail,
	["Fiction", 402, 1.31, "Read", 0.60] + _tail,
	["Fiction", 122, 0.52, "Unread", 0.40] + _tail,
	]

	gr.Examples(
	examples=examples,
	inputs=inputs,
	label="Representative examples",
	examples_per_page=5,
	cache_examples=False, # keep it live with your current model
	)

	if __name__ == "__main__":
	demo.launch(debug=True)