Spaces:

halictus
/

bee_ml_1

Sleeping

App Files Files Community

JackRabbit commited on Feb 1, 2025

Commit

885f8ec

1 Parent(s): 41e69d7

added app file

Browse files

Files changed (1) hide show

app.py +272 -0

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import streamlit as st
+from PIL import Image
+import pandas as pd
+import io
+import os
+import requests
+from autogluon.multimodal import MultiModalPredictor
+from huggingface_hub import snapshot_download
+import logging
+import datetime
+import re
+# Configure logging
+log_filename = "model_predictions.log"
+logging.basicConfig(filename=log_filename, level=logging.INFO, format='%(asctime)s - %(message)s')
+# Set the page to wide mode
+st.set_page_config(page_title="Honey Bee Image Classification")
+# -------------------------
+# MODEL LOADING
+# -------------------------
+@st.cache_resource
+def load_model():
+    repo_id = "Honey-Bee-Society/honeybee_ml_v1"
+    # Download the model files from Hugging Face
+    local_dir = snapshot_download(repo_id)
+    # Ensure the necessary files exist in the local directory
+    assets_path = os.path.join(local_dir, "assets.json")
+    model_checkpoint = os.path.join(local_dir, "model.ckpt")
+    if not os.path.exists(assets_path) or not os.path.exists(model_checkpoint):
+        raise FileNotFoundError("Required model files not found in the downloaded directory.")
+    # Load the model using the downloaded directory path
+    return MultiModalPredictor.load(local_dir)
+# -------------------------
+# HELPER FUNCTIONS
+# -------------------------
+def resize_image_proportionally(image, max_size_mb=1):
+    """Resize the image if it exceeds max_size_mb in memory."""
+    img_byte_array = io.BytesIO()
+    image.save(img_byte_array, format='PNG')
+    img_size = len(img_byte_array.getvalue()) / (1024 * 1024)
+    if img_size > max_size_mb:
+        scale_factor = (max_size_mb / img_size) ** 0.5
+        new_width = int(image.width * scale_factor)
+        new_height = int(image.height * scale_factor)
+        image = image.resize((new_width, new_height))
+    return image
+def predict_image(image, predictor):
+    """Predict probabilities for an in-memory PIL image using the given predictor."""
+    img_byte_array = io.BytesIO()
+    image.save(img_byte_array, format='PNG')
+    img_data = img_byte_array.getvalue()
+    df = pd.DataFrame({"image": [img_data]})
+    probabilities = predictor.predict_proba(df, realtime=True)
+    return probabilities
+def save_image(image, img_name, target_size_kb=500):
+    """Compress and save the image to ensure it is <= target_size_kb KB."""
+    processed_image_path = os.path.join("processed_images", img_name)
+    if not os.path.exists("processed_images"):
+        os.makedirs("processed_images")
+    quality = 95  # Start with high quality
+    img_byte_array = io.BytesIO()
+    while quality > 10:  # Stop if quality gets too low
+        img_byte_array.seek(0)
+        image.save(img_byte_array, format='JPEG', quality=quality)
+        img_size_kb = len(img_byte_array.getvalue()) / 1024
+        if img_size_kb <= target_size_kb:
+            break
+        quality -= 5
+    with open(processed_image_path, "wb") as f:
+        f.write(img_byte_array.getvalue())
+    return processed_image_path
+def log_predictions(image_path, honeybee_score, bumblebee_score, vespidae_score):
+    logging.info(
+        f"Image Path: {image_path}, "
+        f"Honeybee: {honeybee_score:.2f}%, "
+        f"Bumblebee: {bumblebee_score:.2f}%, "
+        f"Vespidae: {vespidae_score:.2f}%"
+    )
+def sanitize_filename(filename):
+    """Remove unsafe characters from filenames."""
+    safe_filename = re.sub(r'[^A-Za-z0-9_.-]', '_', filename)
+    return safe_filename
+def check_file_size(uploaded_file, max_size_mb=10):
+    """Return False if file size exceeds `max_size_mb`."""
+    uploaded_file.seek(0, os.SEEK_END)
+    file_size = uploaded_file.tell() / (1024 * 1024)
+    uploaded_file.seek(0)
+    if file_size > max_size_mb:
+        st.error(f"File size exceeds {max_size_mb}MB limit. Please upload a smaller file.")
+        return False
+    return True
+# -------------------------
+# API HANDLER
+# -------------------------
+def run_api(predictor):
+    """
+    A simple 'API-like' endpoint in Streamlit.
+    Usage example:
+    ?api=1&image_url=https://somewhere.com/bee.jpg
+    """
+    params = st.query_params  # Replaced st.experimental_get_query_params with st.query_params
+    image_url = params.get("image_url")
+    if not image_url:
+        st.json({"error": "No 'image_url' provided. Example: ?api=1&image_url=<URL>"})
+        return
+    # Download the image
+    response = requests.get(
+        image_url,
+        headers={"User-Agent": "HoneyBeeClassification/1.0 (+https://honeybeeclassification.streamlit.app)"}
+    )
+    if response.status_code != 200:
+        st.json({"error": f"Failed to retrieve image from {image_url}. HTTP {response.status_code}"})
+        return
+    image_bytes = response.content
+    # Check file size (limit 10MB as in the UI)
+    image_size_mb = len(image_bytes)/(1024*1024)
+    if image_size_mb > 10:
+        st.json({"error": f"Image size {image_size_mb:.2f}MB exceeds 10MB limit."})
+        return
+    # Convert to PIL for processing
+    try:
+        image = Image.open(io.BytesIO(image_bytes))
+    except Exception as e:
+        st.json({"error": f"Could not open image: {e}"})
+        return
+    # Optional: resize to keep memory usage low (same logic as UI)
+    image = resize_image_proportionally(image)
+    # Predict
+    try:
+        probabilities = predict_image(image, predictor)
+        honeybee_score = float(probabilities[1].iloc[0]) * 100
+        bumblebee_score = float(probabilities[2].iloc[0]) * 100
+        vespidae_score = float(probabilities[3].iloc[0]) * 100
+    except Exception as e:
+        st.json({"error": f"Prediction failed: {e}"})
+        return
+    # Determine highest-scoring label
+    highest_score = max(honeybee_score, bumblebee_score, vespidae_score)
+    if highest_score < 80:
+        prediction_label = "No bee detected (scores too low)."
+    else:
+        if honeybee_score == highest_score:
+            prediction_label = "Honey Bee"
+        elif bumblebee_score == highest_score:
+            prediction_label = "Bumblebee"
+        else:
+            prediction_label = "Vespidae (wasp/hornet)"
+    # Return results as JSON
+    st.json({
+        "honeybee_score": honeybee_score,
+        "bumblebee_score": bumblebee_score,
+        "vespidae_score": vespidae_score,
+        "prediction_label": prediction_label
+    })
+# -------------------------
+# UI HANDLER
+# -------------------------
+def run_ui(predictor):
+    st.title("Honey Bee Image Classification")
+    # File uploader
+    uploaded_file = st.file_uploader(
+        "Upload a photo of the suspected bee to see if you have honey bees. :bee:",
+        type=["png", "jpg", "jpeg"]
+    )
+    with st.expander("ML Model Details"):
+        st.write("""
+            We trained a MultiModalPredictor from the AutoGluon library to classify images of bees,
+            focusing primarily on Honey Bees. The model is fine-tuned on a curated dataset from inaturalist
+            images (70k+ images) with an accuracy of ~97.5%. It classifies the image as Honey Bee, Bumblebee,
+            or a Vespidae (wasp/hornet).
+            **Open Source**:
+            [Honey-Bee-Society/honeybee_ml_v1](https://huggingface.co/Honey-Bee-Society/honeybee_ml_v1)
+        """)
+    if uploaded_file is not None:
+        if check_file_size(uploaded_file):
+            image = Image.open(uploaded_file)
+            image = resize_image_proportionally(image)
+            progress_bar = st.progress(0)
+            try:
+                probabilities = predict_image(image, predictor)
+                progress_bar.progress(100)
+                honeybee_score = float(probabilities[1].iloc[0]) * 100
+                bumblebee_score = float(probabilities[2].iloc[0]) * 100
+                vespidae_score = float(probabilities[3].iloc[0]) * 100
+                # Generate a safe and unique filename
+                timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+                sanitized_filename = sanitize_filename(uploaded_file.name)
+                img_name = f"processed_{sanitized_filename}_{timestamp}.jpg"
+                # Save compressed image
+                image_path = save_image(image, img_name)
+                # Log predictions
+                log_predictions(image_path, honeybee_score, bumblebee_score, vespidae_score)
+                # Find highest score
+                highest_score = max(honeybee_score, bumblebee_score, vespidae_score)
+                # Display result
+                if highest_score < 80:
+                    st.warning("We are fairly confident there is no bee in this photo. Try another image.")
+                else:
+                    if honeybee_score == highest_score:
+                        st.success("Yes! This is a honey bee!")
+                    elif bumblebee_score == highest_score:
+                        st.info("This is likely a bumblebee, not a honey bee.")
+                    else:
+                        st.info("This is likely a member of the vespidae family (wasp, hornet, etc.).")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+            finally:
+                progress_bar.empty()
+# -------------------------
+# MAIN ENTRY POINT
+# -------------------------
+def main():
+    predictor = load_model()
+    # Check if we're in "API mode" or "UI mode"
+    query_params = st.query_params  # Replaced st.experimental_get_query_params with st.query_params
+    if "api" in query_params:
+        # Run as an API (no UI)
+        run_api(predictor)
+    else:
+        # Run the standard UI
+        run_ui(predictor)
+if __name__ == '__main__':
+    main()