EricCRX's picture
Update app.py
95d1a83 verified
# -*- coding: utf-8 -*-
"""Sign Identification.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1FvdIFI2M1fguL5hc3elT9wSKOBYBd3wY
"""
# -*- coding: utf-8 -*-
# HW3: Gradio interface for image classification (sign identification)
import os, shutil, zipfile, pathlib, tempfile
import pandas as pd
import numpy as np
from PIL import Image
import gradio as gr
from huggingface_hub import hf_hub_download
from autogluon.multimodal import MultiModalPredictor
import torchvision.transforms as T
# -------------------------------
# 1.
# -------------------------------
MODEL_REPO_ID = "cassieli226/sign-identification-automl"
ZIP_FILENAME = "autogluon_predictor_dir.zip"
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
def _prepare_predictor_dir():
CACHE_DIR.mkdir(parents=True, exist_ok=True)
local_zip = hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
local_dir=str(CACHE_DIR),
local_dir_use_symlinks=False,
)
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
return str(EXTRACT_DIR)
PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = MultiModalPredictor.load(PREDICTOR_DIR)
# -------------------------------
# 2. (224x224)
# -------------------------------
transform = T.Compose([
T.Resize((224,224)),
T.ToTensor(),
])
def preprocess_image(pil_img: Image.Image):
"""Return (preprocessed_image_array, tmp_path_for_model)"""
tmp_path = pathlib.Path(tempfile.mkdtemp()) / "input.png"
pil_img.save(tmp_path)
proc = transform(pil_img).permute(1,2,0).numpy() # HWC array for display
proc = (proc * 255).astype(np.uint8) # convert to displayable image
return proc, str(tmp_path)
# -------------------------------
# 3.
# -------------------------------
def predict(pil_img: Image.Image, top_k: int = 2):
if pil_img is None:
return None, None, {"Error": 1.0}
pil_img = pil_img.convert("RGB")
tmpdir = pathlib.Path(tempfile.mkdtemp())
path = tmpdir / "input.png"
pil_img.save(path, format="PNG")
size_mb = os.path.getsize(path) / (1024 * 1024)
if size_mb > 5:
return pil_img, None, {"File too large (>5MB)": 1.0}
proc = transform(pil_img).permute(1, 2, 0).numpy()
proc = (proc * 255).astype(np.uint8)
#
df = pd.DataFrame({"image": [str(path)]})
proba = PREDICTOR.predict_proba(df).iloc[0].to_dict()
sorted_dict = dict(sorted(proba.items(), key=lambda kv: kv[1], reverse=True)[:top_k])
return pil_img, proc, sorted_dict
# -------------------------------
# 4. Example
# -------------------------------
EXAMPLES = [
["https://images.pexels.com/photos/39080/stop-shield-traffic-sign-road-sign-39080.jpeg?auto=compress&cs=tinysrgb&w=1200"],
["https://images.pexels.com/photos/17639322/pexels-photo-17639322.jpeg?auto=compress&cs=tinysrgb&w=1200"],
["https://images.pexels.com/photos/14037751/pexels-photo-14037751.jpeg?auto=compress&cs=tinysrgb&w=1200"],
]
# -------------------------------
# 5. Gradio UI
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🪧 Sign Identification (AutoML)")
gr.Markdown("Upload a sign image and see both the **original** and the **preprocessed** (224×224) image the model actually sees. Results show class probabilities. Adjust Top-K to control how many labels are shown.")
with gr.Row():
image_in = gr.Image(type="pil", label="Upload Image", sources=["upload","webcam"])
param_k = gr.Slider(1,5,value=2,step=1,label="Top-K classes")
with gr.Row():
orig = gr.Image(label="Original Image")
proc = gr.Image(label="Preprocessed (224×224)")
label_out = gr.Label(label="Class Probabilities")
image_in.change(fn=predict, inputs=[image_in, param_k], outputs=[orig, proc, label_out])
gr.Examples(
examples=EXAMPLES,
inputs=[image_in],
label="Example Images",
examples_per_page=3,
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()