import os import shutil import zipfile import pathlib import tempfile import gradio import pandas import PIL.Image import huggingface_hub import autogluon.multimodal # Model configuration MODEL_REPO_ID = "Anyuhhh/sign-language-recognition" # Your Hugging Face Space repo ZIP_FILENAME = "autogluon_image_predictor_dir.zip" HF_TOKEN = os.getenv("HF_TOKEN", None) # Optional: only if your repo is private CACHE_DIR = pathlib.Path("hf_assets") EXTRACT_DIR = CACHE_DIR / "predictor_native" def _prepare_predictor_dir() -> str: """Download and extract the predictor from Hugging Face""" CACHE_DIR.mkdir(parents=True, exist_ok=True) print(f"Downloading model from HuggingFace: {MODEL_REPO_ID}/{ZIP_FILENAME}") try: # Download from Hugging Face local_zip = huggingface_hub.hf_hub_download( repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="space", # Changed to "space" since you're uploading to your Space repo token=HF_TOKEN, local_dir=str(CACHE_DIR), local_dir_use_symlinks=False, ) print(f"Downloaded to: {local_zip}") except Exception as e: print(f"Error downloading from Space repo: {e}") print("Trying as model repo instead...") # Fallback: try as model repo local_zip = huggingface_hub.hf_hub_download( repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="model", token=HF_TOKEN, local_dir=str(CACHE_DIR), local_dir_use_symlinks=False, ) # Clean and recreate extraction directory if EXTRACT_DIR.exists(): shutil.rmtree(EXTRACT_DIR) EXTRACT_DIR.mkdir(parents=True, exist_ok=True) # Extract the ZIP file print(f"Extracting to: {EXTRACT_DIR}") with zipfile.ZipFile(local_zip, "r") as zf: zf.extractall(str(EXTRACT_DIR)) # Find the predictor directory contents = list(EXTRACT_DIR.iterdir()) predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR print(f"Predictor directory: {predictor_root}") return str(predictor_root) # Initialize predictor print("Initializing predictor...") PREDICTOR_DIR = _prepare_predictor_dir() PREDICTOR = autogluon.multimodal.MultiModalPredictor.load(PREDICTOR_DIR) print("Predictor loaded successfully!") # Sign language classes A-Z (26 classes) CLASS_LABELS = {i: chr(65+i) for i in range(26)} # 0='A', 1='B', ..., 25='Z' def _human_label(c): """Convert class index to human-readable label""" try: ci = int(c) return CLASS_LABELS.get(ci, str(c)) except Exception: return CLASS_LABELS.get(c, str(c)) def do_predict(pil_img: PIL.Image.Image): """Predict sign language letter from image""" if pil_img is None: return {}, None tmpdir = pathlib.Path(tempfile.mkdtemp()) img_path = tmpdir / "input.png" # Preprocess image - resize and convert to RGB if pil_img.mode != 'RGB': pil_img = pil_img.convert('RGB') processed_img = pil_img.resize((224, 224)) processed_img.save(img_path) # Create DataFrame for prediction df = pandas.DataFrame({"image": [str(img_path)]}) # Get predictions proba_df = PREDICTOR.predict_proba(df) # Create pretty labels with probabilities pretty_dict = {} for col in proba_df.columns: if isinstance(col, int) and col < 26: label = f"Letter {CLASS_LABELS[col]}" else: label = str(col) pretty_dict[label] = float(proba_df[col].iloc[0]) # Sort by probability (highest first) pretty_dict = dict(sorted(pretty_dict.items(), key=lambda x: x[1], reverse=True)) # Cleanup shutil.rmtree(tmpdir, ignore_errors=True) return pretty_dict, processed_img # Example sign language images EXAMPLES = [ ["https://www.signingsavvy.com/images/words/alphabet/2/a1.jpg"], ["https://www.signingsavvy.com/images/words/alphabet/2/b1.jpg"], ["https://www.signingsavvy.com/images/words/alphabet/2/c1.jpg"] ] # Gradio UI with gradio.Blocks(theme=gradio.themes.Soft()) as demo: gradio.Markdown("# 🤟 Sign Language Recognition") gradio.Markdown(""" This app uses an AutoGluon multimodal predictor to recognize American Sign Language (ASL) letters. **How to use:** 1. Upload a photo of a hand sign or use your webcam 2. The model will predict which letter (A-Z) it represents 3. View the top 5 predictions with confidence scores """) with gradio.Row(): with gradio.Column(): image_in = gradio.Image( type="pil", label="Upload hand sign image", sources=["upload", "webcam"] ) with gradio.Column(): processed_out = gradio.Image( type="pil", label="Preprocessed image (what model sees - 224x224)" ) proba_pretty = gradio.Label(num_top_classes=5, label="Top 5 predictions") # Update on image change image_in.change( fn=do_predict, inputs=[image_in], outputs=[proba_pretty, processed_out] ) gradio.Examples( examples=EXAMPLES, inputs=[image_in], label="Example ASL signs (click to try)", examples_per_page=3, cache_examples=False, ) gradio.Markdown(""" --- **Note:** This model recognizes static ASL letters (A-Z). For best results, use clear images with good lighting. """) if __name__ == "__main__": demo.launch()