Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.DS_Store +0 -0
README.md +92 -0
app.py +210 -0
config.json +38 -0
handler.py +160 -0
model.joblib +3 -0
requirements.txt +4 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,3 +1,95 @@
 ---
 license: mit
 ---

 ---
 license: mit
+library_name: sklearn
+tags:
+  - sklearn
+  - classification
+  - random-forest
+  - food-science
+  - milk-quality
+pipeline_tag: tabular-classification
 ---
+# Milk Spoilage Classification Model
+A Random Forest classifier for predicting milk spoilage type based on microbial count data.
+## Model Description
+This model classifies milk samples into three spoilage categories based on Standard Plate Count (SPC) and Total Gram-Negative (TGN) bacterial counts measured at days 7, 14, and 21 of shelf life.
+### Classes
+- **PPC**: Post-Pasteurization Contamination
+- **no spoilage**: No spoilage detected
+- **spore spoilage**: Spore-forming bacteria spoilage
+### Input Features
+| Feature | Description |
+|---------|-------------|
+| SPC_D7 | Standard Plate Count at Day 7 (log CFU/mL) |
+| SPC_D14 | Standard Plate Count at Day 14 (log CFU/mL) |
+| SPC_D21 | Standard Plate Count at Day 21 (log CFU/mL) |
+| TGN_D7 | Total Gram-Negative count at Day 7 (log CFU/mL) |
+| TGN_D14 | Total Gram-Negative count at Day 14 (log CFU/mL) |
+| TGN_D21 | Total Gram-Negative count at Day 21 (log CFU/mL) |
+## Performance
+- **Test Accuracy**: 95.76%
+## Usage
+### Using the Inference API
+```python
+import requests
+API_URL = "https://api-inference.huggingface.co/models/chenhaoq87/MilkSpoilageClassifier"
+headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}
+# Input: [SPC_D7, SPC_D14, SPC_D21, TGN_D7, TGN_D14, TGN_D21]
+payload = {"inputs": [[4.5, 5.2, 6.1, 3.2, 4.0, 4.8]]}
+response = requests.post(API_URL, headers=headers, json=payload)
+print(response.json())
+```
+### Local Usage
+```python
+import joblib
+import numpy as np
+# Load the model
+model = joblib.load("model.joblib")
+# Prepare input features
+# [SPC_D7, SPC_D14, SPC_D21, TGN_D7, TGN_D14, TGN_D21]
+features = np.array([[4.5, 5.2, 6.1, 3.2, 4.0, 4.8]])
+# Make prediction
+prediction = model.predict(features)
+probabilities = model.predict_proba(features)
+print(f"Predicted class: {prediction[0]}")
+print(f"Class probabilities: {dict(zip(model.classes_, probabilities[0]))}")
+```
+## Model Details
+- **Model Type**: Random Forest Classifier
+- **Framework**: scikit-learn
+- **Number of Estimators**: 100
+- **Max Depth**: None (unlimited)
+- **Min Samples Split**: 5
+- **Min Samples Leaf**: 1
+## Citation
+If you use this model, please cite the original research on milk spoilage classification.
+## License
+MIT License

app.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""
+Gradio Web Interface for Milk Spoilage Classification
+This app provides an interactive web interface for predicting
+milk spoilage type based on microbial count data.
+"""
+import gradio as gr
+import joblib
+import numpy as np
+# Load the trained model
+model = joblib.load("model.joblib")
+# Feature information for the UI
+FEATURE_INFO = {
+    "SPC_D7": ("Standard Plate Count - Day 7", "log CFU/mL", 0.0, 10.0, 4.0),
+    "SPC_D14": ("Standard Plate Count - Day 14", "log CFU/mL", 0.0, 10.0, 5.0),
+    "SPC_D21": ("Standard Plate Count - Day 21", "log CFU/mL", 0.0, 10.0, 6.0),
+    "TGN_D7": ("Total Gram-Negative - Day 7", "log CFU/mL", 0.0, 10.0, 3.0),
+    "TGN_D14": ("Total Gram-Negative - Day 14", "log CFU/mL", 0.0, 10.0, 4.0),
+    "TGN_D21": ("Total Gram-Negative - Day 21", "log CFU/mL", 0.0, 10.0, 5.0),
+}
+# Class descriptions
+CLASS_DESCRIPTIONS = {
+    "PPC": "Post-Pasteurization Contamination - Bacteria introduced after pasteurization",
+    "no spoilage": "No significant spoilage detected in the sample",
+    "spore spoilage": "Spoilage caused by spore-forming bacteria"
+}
+def predict_spoilage(spc_d7, spc_d14, spc_d21, tgn_d7, tgn_d14, tgn_d21):
+    """
+    Predict milk spoilage type based on microbial counts.
+    Args:
+        spc_d7: Standard Plate Count at Day 7
+        spc_d14: Standard Plate Count at Day 14
+        spc_d21: Standard Plate Count at Day 21
+        tgn_d7: Total Gram-Negative count at Day 7
+        tgn_d14: Total Gram-Negative count at Day 14
+        tgn_d21: Total Gram-Negative count at Day 21
+    Returns:
+        Dictionary of class probabilities for Gradio Label component
+    """
+    # Prepare input features
+    features = np.array([[spc_d7, spc_d14, spc_d21, tgn_d7, tgn_d14, tgn_d21]])
+    # Get prediction and probabilities
+    prediction = model.predict(features)[0]
+    probabilities = model.predict_proba(features)[0]
+    # Create probability dictionary for Gradio Label
+    prob_dict = {
+        cls: float(prob)
+        for cls, prob in zip(model.classes_, probabilities)
+    }
+    return prob_dict
+def create_interface():
+    """Create and configure the Gradio interface."""
+    # Custom CSS for styling
+    custom_css = """
+    .gradio-container {
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .feature-group {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        border-radius: 10px;
+        padding: 15px;
+        margin: 10px 0;
+    }
+    """
+    with gr.Blocks(
+        title="Milk Spoilage Classifier",
+        theme=gr.themes.Soft(
+            primary_hue="indigo",
+            secondary_hue="purple",
+        ),
+        css=custom_css
+    ) as demo:
+        # Header
+        gr.Markdown(
+            """
+            # 🥛 Milk Spoilage Classification Model
+            Predict milk spoilage type based on microbial count data measured at different time points.
+            Enter the Standard Plate Count (SPC) and Total Gram-Negative (TGN) values below.
+            """
+        )
+        with gr.Row():
+            # Input Section
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Standard Plate Count (SPC)")
+                gr.Markdown("*Total bacterial count in log CFU/mL*")
+                spc_d7 = gr.Number(
+                    label="Day 7",
+                    value=4.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="SPC measurement at day 7"
+                )
+                spc_d14 = gr.Number(
+                    label="Day 14",
+                    value=5.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="SPC measurement at day 14"
+                )
+                spc_d21 = gr.Number(
+                    label="Day 21",
+                    value=6.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="SPC measurement at day 21"
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### 🦠 Total Gram-Negative (TGN)")
+                gr.Markdown("*Gram-negative bacterial count in log CFU/mL*")
+                tgn_d7 = gr.Number(
+                    label="Day 7",
+                    value=3.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="TGN measurement at day 7"
+                )
+                tgn_d14 = gr.Number(
+                    label="Day 14",
+                    value=4.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="TGN measurement at day 14"
+                )
+                tgn_d21 = gr.Number(
+                    label="Day 21",
+                    value=5.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    info="TGN measurement at day 21"
+                )
+        # Predict button
+        predict_btn = gr.Button("🔬 Classify Spoilage Type", variant="primary", size="lg")
+        # Output Section
+        gr.Markdown("### 📋 Prediction Results")
+        output_label = gr.Label(
+            label="Spoilage Classification",
+            num_top_classes=3
+        )
+        # Connect the prediction function
+        predict_btn.click(
+            fn=predict_spoilage,
+            inputs=[spc_d7, spc_d14, spc_d21, tgn_d7, tgn_d14, tgn_d21],
+            outputs=output_label
+        )
+        # Also trigger on any input change
+        for input_component in [spc_d7, spc_d14, spc_d21, tgn_d7, tgn_d14, tgn_d21]:
+            input_component.change(
+                fn=predict_spoilage,
+                inputs=[spc_d7, spc_d14, spc_d21, tgn_d7, tgn_d14, tgn_d21],
+                outputs=output_label
+            )
+        # Information Section
+        gr.Markdown(
+            """
+            ---
+            ### ℹ️ About the Classes
+            | Class | Description |
+            |-------|-------------|
+            | **PPC** | Post-Pasteurization Contamination - Bacteria introduced after pasteurization process |
+            | **no spoilage** | No significant spoilage detected in the sample |
+            | **spore spoilage** | Spoilage caused by spore-forming bacteria that survive pasteurization |
+            ---
+            ### 📖 How to Use
+            1. Enter the microbial count values (in log CFU/mL) for each time point
+            2. Click "Classify Spoilage Type" or wait for automatic prediction
+            3. View the predicted spoilage category and confidence scores
+            ---
+            *Model: Random Forest Classifier trained on milk quality data*
+            """
+        )
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "model_type": "RandomForestClassifier",
+  "framework": "sklearn",
+  "task": "classification",
+  "features": [
+    "SPC_D7",
+    "SPC_D14",
+    "SPC_D21",
+    "TGN_D7",
+    "TGN_D14",
+    "TGN_D21"
+  ],
+  "feature_descriptions": {
+    "SPC_D7": "Standard Plate Count at Day 7 (log CFU/mL)",
+    "SPC_D14": "Standard Plate Count at Day 14 (log CFU/mL)",
+    "SPC_D21": "Standard Plate Count at Day 21 (log CFU/mL)",
+    "TGN_D7": "Total Gram-Negative count at Day 7 (log CFU/mL)",
+    "TGN_D14": "Total Gram-Negative count at Day 14 (log CFU/mL)",
+    "TGN_D21": "Total Gram-Negative count at Day 21 (log CFU/mL)"
+  },
+  "classes": [
+    "PPC",
+    "no spoilage",
+    "spore spoilage"
+  ],
+  "class_descriptions": {
+    "PPC": "Post-Pasteurization Contamination",
+    "no spoilage": "No spoilage detected",
+    "spore spoilage": "Spore-forming bacteria spoilage"
+  },
+  "hyperparameters": {
+    "n_estimators": 100,
+    "max_depth": null,
+    "min_samples_split": 5,
+    "min_samples_leaf": 1,
+    "random_state": 42
+  }
+}

handler.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+Custom Inference Handler for Hugging Face Inference Endpoints
+This handler loads the trained RandomForest model and provides
+prediction functionality for the Hugging Face Inference API.
+"""
+import joblib
+import numpy as np
+from typing import Dict, List, Any, Union
+import os
+class EndpointHandler:
+    """
+    Custom handler for Hugging Face Inference Endpoints.
+    This class is automatically instantiated by the Inference API
+    and handles incoming prediction requests.
+    """
+    def __init__(self, path: str = ""):
+        """
+        Initialize the handler by loading the model.
+        Args:
+            path: Path to the model directory (provided by HF Inference API)
+        """
+        model_path = os.path.join(path, "model.joblib") if path else "model.joblib"
+        self.model = joblib.load(model_path)
+        # Feature names in expected order
+        self.feature_names = [
+            "SPC_D7", "SPC_D14", "SPC_D21",
+            "TGN_D7", "TGN_D14", "TGN_D21"
+        ]
+        # Class names from the model
+        self.class_names = list(self.model.classes_)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Handle prediction requests.
+        Args:
+            data: Input data dictionary. Supports multiple formats:
+                - {"inputs": [[f1, f2, f3, f4, f5, f6], ...]}  # List of feature arrays
+                - {"inputs": {"SPC_D7": 4.5, ...}}  # Dict with feature names
+                - {"inputs": [{"SPC_D7": 4.5, ...}, ...]}  # List of dicts
+        Returns:
+            List of prediction results with labels and probabilities
+        """
+        # Extract inputs from the data
+        inputs = data.get("inputs", data)
+        # Convert inputs to numpy array
+        X = self._process_inputs(inputs)
+        # Make predictions
+        predictions = self.model.predict(X)
+        probabilities = self.model.predict_proba(X)
+        # Format results
+        results = []
+        for pred, probs in zip(predictions, probabilities):
+            result = {
+                "label": str(pred),
+                "score": float(max(probs)),
+                "probabilities": {
+                    cls: float(prob)
+                    for cls, prob in zip(self.class_names, probs)
+                }
+            }
+            results.append(result)
+        return results
+    def _process_inputs(self, inputs: Union[List, Dict]) -> np.ndarray:
+        """
+        Process various input formats into a numpy array.
+        Args:
+            inputs: Input data in various formats
+        Returns:
+            Numpy array of shape (n_samples, n_features)
+        """
+        # Case 1: List of lists/arrays (direct feature values)
+        if isinstance(inputs, list) and len(inputs) > 0:
+            if isinstance(inputs[0], (list, tuple, np.ndarray)):
+                return np.array(inputs).reshape(-1, len(self.feature_names))
+            # Case 2: List of dictionaries with feature names
+            elif isinstance(inputs[0], dict):
+                return np.array([
+                    [sample.get(feat, 0) for feat in self.feature_names]
+                    for sample in inputs
+                ])
+            # Case 3: Single sample as flat list
+            else:
+                return np.array(inputs).reshape(1, -1)
+        # Case 4: Single dictionary with feature names
+        elif isinstance(inputs, dict):
+            return np.array([[
+                inputs.get(feat, 0) for feat in self.feature_names
+            ]])
+        # Fallback: try to convert directly
+        return np.array(inputs).reshape(-1, len(self.feature_names))
+# For local testing
+if __name__ == "__main__":
+    # Test the handler locally
+    print("Testing EndpointHandler locally...")
+    try:
+        handler = EndpointHandler()
+        # Test with list format
+        test_data_list = {
+            "inputs": [[4.5, 5.2, 6.1, 3.2, 4.0, 4.8]]
+        }
+        result = handler(test_data_list)
+        print(f"\nTest 1 (list format):")
+        print(f"  Input: {test_data_list}")
+        print(f"  Output: {result}")
+        # Test with dict format
+        test_data_dict = {
+            "inputs": {
+                "SPC_D7": 4.5, "SPC_D14": 5.2, "SPC_D21": 6.1,
+                "TGN_D7": 3.2, "TGN_D14": 4.0, "TGN_D21": 4.8
+            }
+        }
+        result = handler(test_data_dict)
+        print(f"\nTest 2 (dict format):")
+        print(f"  Input: {test_data_dict}")
+        print(f"  Output: {result}")
+        # Test batch prediction
+        test_data_batch = {
+            "inputs": [
+                [4.5, 5.2, 6.1, 3.2, 4.0, 4.8],
+                [2.0, 2.5, 3.0, 1.5, 2.0, 2.5],
+                [6.0, 7.0, 8.0, 5.0, 6.0, 7.0]
+            ]
+        }
+        result = handler(test_data_batch)
+        print(f"\nTest 3 (batch format):")
+        print(f"  Input: {test_data_batch}")
+        print(f"  Output: {result}")
+        print("\nAll tests passed!")
+    except FileNotFoundError:
+        print("Note: model.joblib not found. Run 'python prepare_model.py' first.")

model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f56cb2839629f726b040cf8fa19fbc7a61e5b47a6fdbd414b96cccbc8a83b876
+size 302097

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+scikit-learn>=1.0
+joblib>=1.0
+numpy>=1.20
+pandas>=1.3