hand_reconizer / app.py
Anyuhhh's picture
Create app.py
fd94015 verified
import os
import shutil
import zipfile
import pathlib
import tempfile
import gradio
import pandas
import PIL.Image
import huggingface_hub
import autogluon.multimodal
# Model configuration
MODEL_REPO_ID = "Anyuhhh/sign-language-recognition" # Your Hugging Face Space repo
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
HF_TOKEN = os.getenv("HF_TOKEN", None) # Optional: only if your repo is private
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
def _prepare_predictor_dir() -> str:
"""Download and extract the predictor from Hugging Face"""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
print(f"Downloading model from HuggingFace: {MODEL_REPO_ID}/{ZIP_FILENAME}")
try:
# Download from Hugging Face
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="space", # Changed to "space" since you're uploading to your Space repo
token=HF_TOKEN,
local_dir=str(CACHE_DIR),
local_dir_use_symlinks=False,
)
print(f"Downloaded to: {local_zip}")
except Exception as e:
print(f"Error downloading from Space repo: {e}")
print("Trying as model repo instead...")
# Fallback: try as model repo
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
token=HF_TOKEN,
local_dir=str(CACHE_DIR),
local_dir_use_symlinks=False,
)
# Clean and recreate extraction directory
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
# Extract the ZIP file
print(f"Extracting to: {EXTRACT_DIR}")
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
# Find the predictor directory
contents = list(EXTRACT_DIR.iterdir())
predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
print(f"Predictor directory: {predictor_root}")
return str(predictor_root)
# Initialize predictor
print("Initializing predictor...")
PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.multimodal.MultiModalPredictor.load(PREDICTOR_DIR)
print("Predictor loaded successfully!")
# Sign language classes A-Z (26 classes)
CLASS_LABELS = {i: chr(65+i) for i in range(26)} # 0='A', 1='B', ..., 25='Z'
def _human_label(c):
"""Convert class index to human-readable label"""
try:
ci = int(c)
return CLASS_LABELS.get(ci, str(c))
except Exception:
return CLASS_LABELS.get(c, str(c))
def do_predict(pil_img: PIL.Image.Image):
"""Predict sign language letter from image"""
if pil_img is None:
return {}, None
tmpdir = pathlib.Path(tempfile.mkdtemp())
img_path = tmpdir / "input.png"
# Preprocess image - resize and convert to RGB
if pil_img.mode != 'RGB':
pil_img = pil_img.convert('RGB')
processed_img = pil_img.resize((224, 224))
processed_img.save(img_path)
# Create DataFrame for prediction
df = pandas.DataFrame({"image": [str(img_path)]})
# Get predictions
proba_df = PREDICTOR.predict_proba(df)
# Create pretty labels with probabilities
pretty_dict = {}
for col in proba_df.columns:
if isinstance(col, int) and col < 26:
label = f"Letter {CLASS_LABELS[col]}"
else:
label = str(col)
pretty_dict[label] = float(proba_df[col].iloc[0])
# Sort by probability (highest first)
pretty_dict = dict(sorted(pretty_dict.items(), key=lambda x: x[1], reverse=True))
# Cleanup
shutil.rmtree(tmpdir, ignore_errors=True)
return pretty_dict, processed_img
# Example sign language images
EXAMPLES = [
["https://www.signingsavvy.com/images/words/alphabet/2/a1.jpg"],
["https://www.signingsavvy.com/images/words/alphabet/2/b1.jpg"],
["https://www.signingsavvy.com/images/words/alphabet/2/c1.jpg"]
]
# Gradio UI
with gradio.Blocks(theme=gradio.themes.Soft()) as demo:
gradio.Markdown("# 🤟 Sign Language Recognition")
gradio.Markdown("""
This app uses an AutoGluon multimodal predictor to recognize American Sign Language (ASL) letters.
**How to use:**
1. Upload a photo of a hand sign or use your webcam
2. The model will predict which letter (A-Z) it represents
3. View the top 5 predictions with confidence scores
""")
with gradio.Row():
with gradio.Column():
image_in = gradio.Image(
type="pil",
label="Upload hand sign image",
sources=["upload", "webcam"]
)
with gradio.Column():
processed_out = gradio.Image(
type="pil",
label="Preprocessed image (what model sees - 224x224)"
)
proba_pretty = gradio.Label(num_top_classes=5, label="Top 5 predictions")
# Update on image change
image_in.change(
fn=do_predict,
inputs=[image_in],
outputs=[proba_pretty, processed_out]
)
gradio.Examples(
examples=EXAMPLES,
inputs=[image_in],
label="Example ASL signs (click to try)",
examples_per_page=3,
cache_examples=False,
)
gradio.Markdown("""
---
**Note:** This model recognizes static ASL letters (A-Z). For best results, use clear images with good lighting.
""")
if __name__ == "__main__":
demo.launch()