| """ | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β CISC 121 - HAND GESTURE RECOGNITION APP β | |
| β Queen's University β | |
| β β | |
| β PURPOSE: This app uses AI to recognize hand gestures (one, peace, etc.) β | |
| β VERSION: Procedural (step-by-step) - Great for beginners! β | |
| β β | |
| β HOW TO RUN: python app.py β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| # ============================================================================== | |
| # SECTION 1: IMPORTS | |
| # ============================================================================== | |
| # What are imports? | |
| # Imports let us use code that other people wrote. | |
| # Instead of writing everything from scratch, we can use "libraries". | |
| # | |
| # Think of it like borrowing tools: | |
| # - gradio = tools for building web pages | |
| # - transformers = tools for AI/machine learning | |
| # - time = tools for measuring how long things take | |
| # - os = tools for working with the operating system (like reading files) | |
| # ============================================================================== | |
| import gradio as gr | |
| # "gr" is a short nickname for "gradio" - it saves us typing! | |
| # Example: instead of gradio.Button(), we can write gr.Button() | |
| from transformers import pipeline | |
| # "pipeline" is a function that makes using AI models easy. | |
| # It handles all the complicated setup for us. | |
| from time import perf_counter | |
| # "perf_counter" is like a stopwatch - it measures time very precisely. | |
| import os | |
| # "os" lets us interact with the operating system | |
| # We use it to read environment variables (like secret tokens) | |
| # ============================================================================== | |
| # SECTION 2: CONFIGURATION (SETTINGS) | |
| # ============================================================================== | |
| # What is configuration? | |
| # These are settings we can change to customize how the app works. | |
| # By putting them at the top, they're easy to find and modify. | |
| # ============================================================================== | |
| # The AI model we will use for hand gesture recognition | |
| # | |
| # MODEL OPTIONS: | |
| # 1. "dima806/hand_gestures_image_detection" (RECOMMENDED) | |
| # - Recognizes: one, two, three, four, fist, ok, like, peace, etc. | |
| # - Trained specifically for hand gestures! | |
| # | |
| # 2. "google/vit-base-patch16-224" (General purpose) | |
| # - Recognizes 1000 everyday objects (cats, cars, etc.) | |
| # - NOT trained for hand gestures - won't work for finger counting | |
| # | |
| # 3. "microsoft/resnet-50" (General purpose, faster) | |
| # - Similar to Google's model, but faster | |
| # | |
| MODEL_NAME = "dima806/hand_gestures_image_detection" | |
| # Hugging Face Token (Optional but recommended) | |
| # Some models require authentication to download. | |
| # Get your free token at: https://huggingface.co/settings/tokens | |
| # | |
| # Option 1: Set as environment variable (recommended for security) | |
| # export HF_TOKEN="your_token_here" | |
| # | |
| # Option 2: Paste directly here (less secure, but okay for learning) | |
| # HF_TOKEN = "hf_xxxxxxxxxxxxxxxxxxxxx" | |
| # | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| # os.environ.get() tries to read the HF_TOKEN from environment variables | |
| # If not found, it returns None (which means "no token") | |
| # App title and description | |
| APP_TITLE = "## π CISC 121 - Hand Gesture Recognition App" | |
| APP_DESCRIPTION = """ | |
| Welcome! This app uses AI to recognize **hand gestures**. | |
| **Supported Gestures:** | |
| β one, βοΈ two/peace, π€ three, π four, β fist, π like, π dislike, π ok, π€ stop | |
| **How to use:** | |
| 1. **Upload an image** OR **use your webcam** | |
| 2. Show a hand gesture clearly in frame | |
| 3. Click **"π Analyze Image"** to see the AI's prediction | |
| > π‘ **Tip:** Make sure your hand is well-lit and clearly visible! | |
| """ | |
| # ============================================================================== | |
| # SECTION 3: HELPER FUNCTIONS | |
| # ============================================================================== | |
| # What are functions? | |
| # Functions are reusable blocks of code that do one specific job. | |
| # We give them a name, and then we can "call" them whenever we need them. | |
| # | |
| # Why use functions? | |
| # 1. Reusability - write once, use many times | |
| # 2. Organization - break big problems into small pieces | |
| # 3. Readability - give meaningful names to actions | |
| # ============================================================================== | |
| def create_greeting(name): | |
| """ | |
| Creates a personalized greeting message. | |
| What is a docstring? (This text you're reading!) | |
| A docstring explains what a function does. | |
| It helps other programmers (and future you!) understand the code. | |
| Parameters: | |
| ----------- | |
| name : str | |
| The name of the person to greet. | |
| "str" means "string" - a piece of text. | |
| Returns: | |
| -------- | |
| str | |
| A greeting message as a string. | |
| Example: | |
| -------- | |
| >>> create_greeting("Alice") | |
| "Hello Alice! Welcome to CISC 121!" | |
| """ | |
| # f-strings let us put variables inside text | |
| # The {name} gets replaced with the actual value of 'name' | |
| greeting = f"Hello {name}! Welcome to CISC 121!" | |
| return greeting | |
| def analyze_image(image): | |
| """ | |
| Sends an image to the AI model and gets back predictions. | |
| How does this work? | |
| 1. We send the image to Hugging Face's servers | |
| 2. The AI model analyzes the image | |
| 3. We get back a list of predictions with confidence scores | |
| Parameters: | |
| ----------- | |
| image : PIL.Image or numpy.ndarray | |
| The image to analyze. Gradio handles the format for us. | |
| Returns: | |
| -------- | |
| tuple | |
| A tuple containing: | |
| - results (list): The AI's predictions | |
| - elapsed_time (float): How long the analysis took in seconds | |
| What is a tuple? | |
| A tuple is like a container that holds multiple values. | |
| We use it when a function needs to return more than one thing. | |
| """ | |
| # Safety check: make sure we actually received an image | |
| # "None" means "nothing" - the user might not have taken a photo yet | |
| if image is None: | |
| print("β οΈ No image provided") | |
| return None, 0.0 | |
| # Debug: Print what type of image we received | |
| print(f"π· Received image type: {type(image)}") | |
| print(f"π· Image info: {image if not hasattr(image, 'size') else f'Size: {image.size}'}") | |
| # Start the stopwatch | |
| start_time = perf_counter() | |
| # Create the AI classifier | |
| # "pipeline" sets up everything we need to use the model | |
| try: | |
| print(f"π Loading model: {MODEL_NAME}") | |
| print(f"π HF Token: {'Set' if HF_TOKEN else 'Not set (may limit some models)'}") | |
| # Create the classifier with optional token | |
| classifier = pipeline( | |
| task="image-classification", # What kind of task? | |
| model=MODEL_NAME, # Which AI model to use? | |
| token=HF_TOKEN # Authentication token (optional) | |
| ) | |
| print("π· Analyzing image...") | |
| # Handle different image formats that Gradio might send | |
| # Gradio can send: PIL Image, numpy array, or file path | |
| from PIL import Image | |
| if isinstance(image, str): | |
| # It's a file path - open it | |
| print(" (Converting from file path)") | |
| image = Image.open(image) | |
| elif hasattr(image, 'convert'): | |
| # It's already a PIL Image - ensure it's in RGB format | |
| print(" (Image is PIL format)") | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| else: | |
| # It might be a numpy array - convert to PIL | |
| print(" (Converting from numpy array)") | |
| import numpy as np | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| # Send the image to the model and get predictions | |
| results = classifier(image) | |
| print(f"β Analysis complete! Found {len(results)} predictions.") | |
| except Exception as error: | |
| # If something goes wrong, we catch the error | |
| # This prevents the app from crashing | |
| print(f"β Error during image analysis: {error}") | |
| print(f" Error type: {type(error).__name__}") | |
| # Print full traceback for debugging | |
| import traceback | |
| traceback.print_exc() | |
| # Common error explanations | |
| if "401" in str(error) or "unauthorized" in str(error).lower(): | |
| print(" π‘ This might be an authentication issue. Try setting HF_TOKEN.") | |
| elif "connection" in str(error).lower() or "network" in str(error).lower(): | |
| print(" π‘ Check your internet connection.") | |
| elif "memory" in str(error).lower(): | |
| print(" π‘ The model might be too large. Try a smaller model.") | |
| return None, 0.0 | |
| # Stop the stopwatch | |
| end_time = perf_counter() | |
| # Calculate how long it took | |
| elapsed_time = end_time - start_time | |
| return results, elapsed_time | |
| def format_results(results, elapsed_time): | |
| """ | |
| Formats the AI predictions into a readable string. | |
| Why format results? | |
| The raw data from the AI is hard to read. | |
| We transform it into a nice, human-friendly format. | |
| Parameters: | |
| ----------- | |
| results : list or None | |
| The predictions from the AI model. | |
| Each prediction has a 'label' and a 'score' (confidence). | |
| elapsed_time : float | |
| How long the analysis took, in seconds. | |
| Returns: | |
| -------- | |
| str | |
| A formatted string showing the predictions. | |
| """ | |
| # Handle the case where analysis failed | |
| if results is None: | |
| return "β Could not analyze the image. Please try again." | |
| # Start building our output message | |
| output_lines = [] | |
| # Add a header | |
| output_lines.append("## π Analysis Results\n") | |
| output_lines.append(f"β±οΈ *Analysis completed in {elapsed_time:.2f} seconds*\n") | |
| # What does :.2f mean? | |
| # It formats a number to show 2 decimal places. | |
| # Example: 1.23456 becomes "1.23" | |
| output_lines.append("### Top Predictions:\n") | |
| # Loop through the top 5 predictions | |
| # enumerate() gives us both the index (i) and the item (prediction) | |
| for i, prediction in enumerate(results[:5]): | |
| label = prediction['label'] # What the AI thinks it sees | |
| score = prediction['score'] # How confident it is (0 to 1) | |
| percentage = score * 100 # Convert to percentage | |
| # Add a medal emoji for top 3 | |
| if i == 0: | |
| medal = "π₯" | |
| elif i == 1: | |
| medal = "π₯" | |
| elif i == 2: | |
| medal = "π₯" | |
| else: | |
| medal = " " | |
| output_lines.append(f"{medal} **{label}**: {percentage:.1f}%\n") | |
| # Join all lines into one string | |
| # '\n' means "new line" (like pressing Enter) | |
| return ''.join(output_lines) | |
| # ============================================================================== | |
| # SECTION 4: MAIN APPLICATION | |
| # ============================================================================== | |
| # This section builds the actual web interface. | |
| # We use Gradio's "Blocks" system to create a custom layout. | |
| # | |
| # What is gr.Blocks()? | |
| # It's like a container for our app. | |
| # Everything inside the "with" block becomes part of the interface. | |
| # | |
| # What does "with" do? | |
| # "with" creates a context - it's like saying "everything in here belongs together" | |
| # When we exit the "with" block, Gradio knows our app is complete. | |
| # ============================================================================== | |
| def create_app(): | |
| """ | |
| Creates and returns the Gradio application. | |
| Why put this in a function? | |
| 1. It keeps the code organized | |
| 2. We can easily test or modify the app | |
| 3. It's a good habit for larger programs | |
| Returns: | |
| -------- | |
| gr.Blocks | |
| The complete Gradio application, ready to launch. | |
| """ | |
| # Create the app container | |
| # Note: We use try/except for theme to support different Gradio versions | |
| # Older versions don't support the theme parameter the same way | |
| try: | |
| # Try modern Gradio syntax (4.x+) | |
| app = gr.Blocks( | |
| title="CISC 121 Gesture App", # Browser tab title | |
| theme=gr.themes.Soft() # A nice, modern look | |
| ) | |
| except TypeError: | |
| # Fall back for older Gradio versions | |
| app = gr.Blocks(title="CISC 121 Gesture App") | |
| with app: | |
| # ---------------------------------------------------------------------- | |
| # PART A: HEADER SECTION | |
| # ---------------------------------------------------------------------- | |
| # gr.Markdown() lets us add formatted text using Markdown syntax | |
| # Markdown is a simple way to format text (like in README files) | |
| gr.Markdown(APP_TITLE) | |
| gr.Markdown(APP_DESCRIPTION) | |
| # Add a horizontal line for visual separation | |
| gr.Markdown("---") | |
| # ---------------------------------------------------------------------- | |
| # PART B: IMAGE INPUT AND RESULTS SECTION | |
| # ---------------------------------------------------------------------- | |
| # gr.Row() puts components side by side (horizontal layout) | |
| # gr.Column() stacks components on top of each other (vertical layout) | |
| with gr.Row(): | |
| # Left column: Image input | |
| with gr.Column(scale=1): | |
| gr.Markdown("### πΈ Image Input") | |
| # Create tabs for different input methods | |
| # This makes it clearer for users how to provide an image | |
| with gr.Tabs(): | |
| # Tab 1: Upload an image file | |
| with gr.TabItem("π Upload"): | |
| upload_input = gr.Image( | |
| label="Click to upload or drag an image here", | |
| sources=["upload"], | |
| type="pil", | |
| height=250 | |
| ) | |
| # Tab 2: Use webcam (captures on click) | |
| with gr.TabItem("π· Webcam"): | |
| webcam_input = gr.Image( | |
| label="Click the π· button below the preview to capture", | |
| sources=["webcam"], | |
| type="pil", | |
| height=250 | |
| # Note: mirror_webcam removed for compatibility with older Gradio | |
| ) | |
| # Status indicator - shows when image is ready | |
| status_display = gr.Markdown("π *Choose a tab above and provide an image*") | |
| # The submit button | |
| submit_button = gr.Button( | |
| value="π Analyze Image", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Right column: Results | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Results") | |
| # gr.Markdown() can also display dynamic content | |
| # We'll update this when the user clicks the button | |
| results_display = gr.Markdown( | |
| value="*Upload or capture an image, then click 'Analyze Image' to see results.*" | |
| ) | |
| # ---------------------------------------------------------------------- | |
| # PART C: CONNECTING COMPONENTS (EVENT HANDLING) | |
| # ---------------------------------------------------------------------- | |
| # Now we connect the inputs to our functions. | |
| # We have TWO input sources (upload and webcam) that both need to work. | |
| # State variable to store the current image (from either source) | |
| # gr.State() is a special Gradio component that stores data between interactions | |
| current_image = gr.State(value=None) | |
| def on_upload(image): | |
| """Called when user uploads an image.""" | |
| if image is not None: | |
| return image, "β **Image uploaded!** Click 'Analyze Image' to continue." | |
| return None, "π *Choose a tab above and provide an image*" | |
| def on_webcam_capture(image): | |
| """Called when user captures from webcam.""" | |
| if image is not None: | |
| return image, "β **Photo captured!** Click 'Analyze Image' to continue." | |
| return None, "π *Choose a tab above and provide an image*" | |
| def on_submit(stored_image): | |
| """ | |
| This function runs when the user clicks the submit button. | |
| It's called an "event handler" because it handles the click event. | |
| Parameters: | |
| ----------- | |
| stored_image : PIL.Image | |
| The image stored from upload or webcam capture. | |
| Returns: | |
| -------- | |
| str | |
| Formatted results to display. | |
| """ | |
| # Check if we have an image | |
| if stored_image is None: | |
| return "β οΈ **No image detected!**\n\n**To fix this:**\n\nπ **Upload Tab:** Click the upload area and select an image file\n\nπ· **Webcam Tab:** Click the camera button (π·) to capture a photo\n\nThen click 'Analyze Image' again." | |
| # Step 1: Analyze the image | |
| results, elapsed_time = analyze_image(stored_image) | |
| # Step 2: Format the results nicely | |
| formatted = format_results(results, elapsed_time) | |
| # Step 3: Return the formatted text (Gradio displays it) | |
| return formatted | |
| # Connect upload input - when image changes, store it | |
| upload_input.change( | |
| fn=on_upload, | |
| inputs=[upload_input], | |
| outputs=[current_image, status_display] | |
| ) | |
| # Connect webcam input - when image is captured, store it | |
| webcam_input.change( | |
| fn=on_webcam_capture, | |
| inputs=[webcam_input], | |
| outputs=[current_image, status_display] | |
| ) | |
| # Connect the button click to analyze the stored image | |
| submit_button.click( | |
| fn=on_submit, | |
| inputs=[current_image], | |
| outputs=[results_display] | |
| ) | |
| # ---------------------------------------------------------------------- | |
| # PART D: FOOTER | |
| # ---------------------------------------------------------------------- | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| "*Made for CISC 121 at Queen's University* π" | |
| ) | |
| # Return the completed app | |
| return app | |
| # ============================================================================== | |
| # SECTION 5: RUNNING THE APP | |
| # ============================================================================== | |
| # This is where we actually start the application. | |
| # | |
| # What does if __name__ == "__main__" mean? | |
| # This checks if we're running this file directly (not importing it). | |
| # If we run: python hf_gradio_proj.py β this code runs | |
| # If we import: from hf_gradio_proj import create_app β this code doesn't run | |
| # | |
| # Why is this useful? | |
| # It lets us use the same file in two ways: | |
| # 1. As a standalone app (run it directly) | |
| # 2. As a module (import functions into other files) | |
| # ============================================================================== | |
| if __name__ == "__main__": | |
| # Print a welcome message to the terminal | |
| print("=" * 60) | |
| print("π CISC 121 - Gesture Recognition App") | |
| print("=" * 60) | |
| print("Starting the application...") | |
| print("Once ready, open the URL shown below in your browser.") | |
| print("=" * 60) | |
| # Create the app | |
| app = create_app() | |
| # Launch the app | |
| # share=True creates a public URL anyone can access | |
| # This is useful for sharing with classmates or instructors | |
| app.launch(share=True) | |