Deepanshu042
/

biosentnel

Joblib

Model card Files Files and versions

xet

Community

Deepanshu1230 commited on Aug 16, 2025

Commit

1df0cf8

1 Parent(s): 9625576

Added the improve pipline

Browse files

Files changed (1) hide show

pipeline.py +200 -35

pipeline.py CHANGED Viewed

@@ -1,41 +1,206 @@
 import joblib
 import numpy as np
-# Load the model
-model = joblib.load("model.joblib")
-# Define the expected columns (must match your training dataset)
-FEATURE_COLUMNS = [
-    "pH",
-    "Hardness",
-    "Solids",
-    "Chloramines",
-    "Sulfate",
-    "Conductivity",
-    "Organic_carbon",
-    "Trihalomethanes",
-    "Turbidity"
-]
-def predict(features: dict):
-    """
-    Make a prediction using the trained model.
-    Args:
-        features (dict): Dictionary of feature_name -> value
-    Returns:
-        dict: Prediction result
-    """
-    try:
-        # Ensure feature order matches training
-        input_data = np.array([[features[col] for col in FEATURE_COLUMNS]])
-        # Get prediction
-        prediction = model.predict(input_data)[0]
-        return {"prediction": int(prediction)}
-    except Exception as e:
-        return {"error": str(e)}
-# For Hugging Face Inference API compatibility
-def __call__(self, inputs):
-    return predict(inputs)

 import joblib
 import numpy as np
+import json
+from typing import Dict, Union, List
+class BioSentinelModel:
+    def __init__(self, model_path: str = "model.joblib"):
+        """
+        Initialize the BioSentinel water quality prediction model.
+        Args:
+            model_path (str): Path to the trained model file
+        """
+        try:
+            self.model = joblib.load(model_path)
+            print(f"Model loaded successfully from {model_path}")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            self.model = None
+        # Define the expected feature columns (must match training dataset)
+        self.FEATURE_COLUMNS = [
+            "pH",
+            "Hardness",
+            "Solids",
+            "Chloramines",
+            "Sulfate",
+            "Conductivity",
+            "Organic_carbon",
+            "Trihalomethanes",
+            "Turbidity"
+        ]
+        # Feature descriptions for better user understanding
+        self.feature_descriptions = {
+            "pH": "pH level of water (0-14)",
+            "Hardness": "Water hardness (mg/L)",
+            "Solids": "Total dissolved solids (ppm)",
+            "Chloramines": "Chloramine content (ppm)",
+            "Sulfate": "Sulfate content (mg/L)",
+            "Conductivity": "Electrical conductivity (μS/cm)",
+            "Organic_carbon": "Total organic carbon (ppm)",
+            "Trihalomethanes": "Trihalomethane content (μg/L)",
+            "Turbidity": "Water turbidity (NTU)"
+        }
+    def predict(self, features: Dict[str, float]) -> Dict[str, Union[int, str, float]]:
+        """
+        Make a water quality prediction using the trained model.
+        Args:
+            features (dict): Dictionary of feature_name -> value
+        Returns:
+            dict: Prediction result with additional metadata
+        """
+        if self.model is None:
+            return {"error": "Model not loaded properly"}
+        try:
+            # Validate input features
+            missing_features = [col for col in self.FEATURE_COLUMNS if col not in features]
+            if missing_features:
+                return {"error": f"Missing features: {missing_features}"}
+            # Ensure feature order matches training data
+            input_data = np.array([[features[col] for col in self.FEATURE_COLUMNS]])
+            # Get prediction
+            prediction = self.model.predict(input_data)[0]
+            # Get prediction probability if available (for classifiers)
+            confidence = None
+            if hasattr(self.model, 'predict_proba'):
+                try:
+                    proba = self.model.predict_proba(input_data)[0]
+                    confidence = float(max(proba))
+                except:
+                    pass
+            # Interpret result (assuming binary classification: 0=unsafe, 1=safe)
+            quality_status = "Safe" if prediction == 1 else "Unsafe"
+            result = {
+                "prediction": int(prediction),
+                "quality_status": quality_status,
+                "input_features": features
+            }
+            if confidence is not None:
+                result["confidence"] = round(confidence, 3)
+            return result
+        except Exception as e:
+            return {"error": f"Prediction error: {str(e)}"}
+    def __call__(self, inputs: Union[Dict, List[Dict]]) -> Union[Dict, List[Dict]]:
+        """
+        Hugging Face Inference API compatibility method.
+        Args:
+            inputs: Single feature dict or list of feature dicts
+        Returns:
+            Prediction result(s)
+        """
+        if isinstance(inputs, list):
+            return [self.predict(inp) for inp in inputs]
+        else:
+            return self.predict(inputs)
+    def validate_input(self, features: Dict[str, float]) -> Dict[str, Union[bool, List[str]]]:
+        """
+        Validate input features against expected ranges.
+        Args:
+            features (dict): Input features to validate
+        Returns:
+            dict: Validation result
+        """
+        validation_ranges = {
+            "pH": (0, 14),
+            "Hardness": (0, 500),
+            "Solids": (0, 50000),
+            "Chloramines": (0, 20),
+            "Sulfate": (0, 500),
+            "Conductivity": (0, 2000),
+            "Organic_carbon": (0, 30),
+            "Trihalomethanes": (0, 200),
+            "Turbidity": (0, 10)
+        }
+        warnings = []
+        for feature, value in features.items():
+            if feature in validation_ranges:
+                min_val, max_val = validation_ranges[feature]
+                if not (min_val <= value <= max_val):
+                    warnings.append(f"{feature}: {value} is outside typical range ({min_val}-{max_val})")
+        return {
+            "is_valid": len(warnings) == 0,
+            "warnings": warnings
+        }
+    def get_feature_info(self) -> Dict[str, str]:
+        """
+        Get information about required features.
+        Returns:
+            dict: Feature descriptions
+        """
+        return self.feature_descriptions
+# Initialize the global model instance for Hugging Face compatibility
+try:
+    biosentinel = BioSentinelModel("model.joblib")
+    # Global functions for Hugging Face API compatibility
+    def predict(features: Dict[str, float]) -> Dict:
+        """Global predict function for direct API calls."""
+        return biosentinel.predict(features)
+    def __call__(inputs: Union[Dict, List[Dict]]) -> Union[Dict, List[Dict]]:
+        """Global call function for Hugging Face inference API."""
+        return biosentinel(inputs)
+except Exception as e:
+    print(f"Error initializing BioSentinel model: {e}")
+# Example usage and testing
+if __name__ == "__main__":
+    # Example water quality data for testing
+    sample_data = {
+        "pH": 7.2,
+        "Hardness": 180.5,
+        "Solids": 15000.0,
+        "Chloramines": 8.5,
+        "Sulfate": 250.0,
+        "Conductivity": 400.0,
+        "Organic_carbon": 12.5,
+        "Trihalomethanes": 75.0,
+        "Turbidity": 3.2
+    }
+    # Test the model
+    if 'biosentinel' in locals():
+        print("Testing BioSentinel Model:")
+        print("-" * 40)
+        # Validate input
+        validation = biosentinel.validate_input(sample_data)
+        print(f"Input validation: {validation}")
+        # Make prediction
+        result = biosentinel.predict(sample_data)
+        print(f"Prediction result: {result}")
+        # Test Hugging Face API compatibility
+        api_result = biosentinel(sample_data)
+        print(f"API call result: {api_result}")
+        # Get feature information
+        feature_info = biosentinel.get_feature_info()
+        print(f"Required features: {list(feature_info.keys())}")