Deepanshu1230 commited on
Commit
1df0cf8
·
1 Parent(s): 9625576

Added the improve pipline

Browse files
Files changed (1) hide show
  1. pipeline.py +200 -35
pipeline.py CHANGED
@@ -1,41 +1,206 @@
1
  import joblib
2
  import numpy as np
 
 
3
 
4
- # Load the model
5
- model = joblib.load("model.joblib")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Define the expected columns (must match your training dataset)
8
- FEATURE_COLUMNS = [
9
- "pH",
10
- "Hardness",
11
- "Solids",
12
- "Chloramines",
13
- "Sulfate",
14
- "Conductivity",
15
- "Organic_carbon",
16
- "Trihalomethanes",
17
- "Turbidity"
18
- ]
19
 
20
- def predict(features: dict):
21
- """
22
- Make a prediction using the trained model.
23
- Args:
24
- features (dict): Dictionary of feature_name -> value
25
- Returns:
26
- dict: Prediction result
27
- """
28
- try:
29
- # Ensure feature order matches training
30
- input_data = np.array([[features[col] for col in FEATURE_COLUMNS]])
31
-
32
- # Get prediction
33
- prediction = model.predict(input_data)[0]
34
-
35
- return {"prediction": int(prediction)}
36
- except Exception as e:
37
- return {"error": str(e)}
38
 
39
- # For Hugging Face Inference API compatibility
40
- def __call__(self, inputs):
41
- return predict(inputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import joblib
2
  import numpy as np
3
+ import json
4
+ from typing import Dict, Union, List
5
 
6
+ class BioSentinelModel:
7
+ def __init__(self, model_path: str = "model.joblib"):
8
+ """
9
+ Initialize the BioSentinel water quality prediction model.
10
+
11
+ Args:
12
+ model_path (str): Path to the trained model file
13
+ """
14
+ try:
15
+ self.model = joblib.load(model_path)
16
+ print(f"Model loaded successfully from {model_path}")
17
+ except Exception as e:
18
+ print(f"Error loading model: {e}")
19
+ self.model = None
20
+
21
+ # Define the expected feature columns (must match training dataset)
22
+ self.FEATURE_COLUMNS = [
23
+ "pH",
24
+ "Hardness",
25
+ "Solids",
26
+ "Chloramines",
27
+ "Sulfate",
28
+ "Conductivity",
29
+ "Organic_carbon",
30
+ "Trihalomethanes",
31
+ "Turbidity"
32
+ ]
33
+
34
+ # Feature descriptions for better user understanding
35
+ self.feature_descriptions = {
36
+ "pH": "pH level of water (0-14)",
37
+ "Hardness": "Water hardness (mg/L)",
38
+ "Solids": "Total dissolved solids (ppm)",
39
+ "Chloramines": "Chloramine content (ppm)",
40
+ "Sulfate": "Sulfate content (mg/L)",
41
+ "Conductivity": "Electrical conductivity (μS/cm)",
42
+ "Organic_carbon": "Total organic carbon (ppm)",
43
+ "Trihalomethanes": "Trihalomethane content (μg/L)",
44
+ "Turbidity": "Water turbidity (NTU)"
45
+ }
46
+
47
+ def predict(self, features: Dict[str, float]) -> Dict[str, Union[int, str, float]]:
48
+ """
49
+ Make a water quality prediction using the trained model.
50
+
51
+ Args:
52
+ features (dict): Dictionary of feature_name -> value
53
+
54
+ Returns:
55
+ dict: Prediction result with additional metadata
56
+ """
57
+ if self.model is None:
58
+ return {"error": "Model not loaded properly"}
59
+
60
+ try:
61
+ # Validate input features
62
+ missing_features = [col for col in self.FEATURE_COLUMNS if col not in features]
63
+ if missing_features:
64
+ return {"error": f"Missing features: {missing_features}"}
65
+
66
+ # Ensure feature order matches training data
67
+ input_data = np.array([[features[col] for col in self.FEATURE_COLUMNS]])
68
+
69
+ # Get prediction
70
+ prediction = self.model.predict(input_data)[0]
71
+
72
+ # Get prediction probability if available (for classifiers)
73
+ confidence = None
74
+ if hasattr(self.model, 'predict_proba'):
75
+ try:
76
+ proba = self.model.predict_proba(input_data)[0]
77
+ confidence = float(max(proba))
78
+ except:
79
+ pass
80
+
81
+ # Interpret result (assuming binary classification: 0=unsafe, 1=safe)
82
+ quality_status = "Safe" if prediction == 1 else "Unsafe"
83
+
84
+ result = {
85
+ "prediction": int(prediction),
86
+ "quality_status": quality_status,
87
+ "input_features": features
88
+ }
89
+
90
+ if confidence is not None:
91
+ result["confidence"] = round(confidence, 3)
92
+
93
+ return result
94
+
95
+ except Exception as e:
96
+ return {"error": f"Prediction error: {str(e)}"}
97
+
98
+ def __call__(self, inputs: Union[Dict, List[Dict]]) -> Union[Dict, List[Dict]]:
99
+ """
100
+ Hugging Face Inference API compatibility method.
101
+
102
+ Args:
103
+ inputs: Single feature dict or list of feature dicts
104
+
105
+ Returns:
106
+ Prediction result(s)
107
+ """
108
+ if isinstance(inputs, list):
109
+ return [self.predict(inp) for inp in inputs]
110
+ else:
111
+ return self.predict(inputs)
112
+
113
+ def validate_input(self, features: Dict[str, float]) -> Dict[str, Union[bool, List[str]]]:
114
+ """
115
+ Validate input features against expected ranges.
116
+
117
+ Args:
118
+ features (dict): Input features to validate
119
+
120
+ Returns:
121
+ dict: Validation result
122
+ """
123
+ validation_ranges = {
124
+ "pH": (0, 14),
125
+ "Hardness": (0, 500),
126
+ "Solids": (0, 50000),
127
+ "Chloramines": (0, 20),
128
+ "Sulfate": (0, 500),
129
+ "Conductivity": (0, 2000),
130
+ "Organic_carbon": (0, 30),
131
+ "Trihalomethanes": (0, 200),
132
+ "Turbidity": (0, 10)
133
+ }
134
+
135
+ warnings = []
136
+ for feature, value in features.items():
137
+ if feature in validation_ranges:
138
+ min_val, max_val = validation_ranges[feature]
139
+ if not (min_val <= value <= max_val):
140
+ warnings.append(f"{feature}: {value} is outside typical range ({min_val}-{max_val})")
141
+
142
+ return {
143
+ "is_valid": len(warnings) == 0,
144
+ "warnings": warnings
145
+ }
146
 
147
+ def get_feature_info(self) -> Dict[str, str]:
148
+ """
149
+ Get information about required features.
150
+
151
+ Returns:
152
+ dict: Feature descriptions
153
+ """
154
+ return self.feature_descriptions
 
 
 
 
155
 
156
+ # Initialize the global model instance for Hugging Face compatibility
157
+ try:
158
+ biosentinel = BioSentinelModel("model.joblib")
159
+
160
+ # Global functions for Hugging Face API compatibility
161
+ def predict(features: Dict[str, float]) -> Dict:
162
+ """Global predict function for direct API calls."""
163
+ return biosentinel.predict(features)
164
+
165
+ def __call__(inputs: Union[Dict, List[Dict]]) -> Union[Dict, List[Dict]]:
166
+ """Global call function for Hugging Face inference API."""
167
+ return biosentinel(inputs)
168
+
169
+ except Exception as e:
170
+ print(f"Error initializing BioSentinel model: {e}")
 
 
 
171
 
172
+ # Example usage and testing
173
+ if __name__ == "__main__":
174
+ # Example water quality data for testing
175
+ sample_data = {
176
+ "pH": 7.2,
177
+ "Hardness": 180.5,
178
+ "Solids": 15000.0,
179
+ "Chloramines": 8.5,
180
+ "Sulfate": 250.0,
181
+ "Conductivity": 400.0,
182
+ "Organic_carbon": 12.5,
183
+ "Trihalomethanes": 75.0,
184
+ "Turbidity": 3.2
185
+ }
186
+
187
+ # Test the model
188
+ if 'biosentinel' in locals():
189
+ print("Testing BioSentinel Model:")
190
+ print("-" * 40)
191
+
192
+ # Validate input
193
+ validation = biosentinel.validate_input(sample_data)
194
+ print(f"Input validation: {validation}")
195
+
196
+ # Make prediction
197
+ result = biosentinel.predict(sample_data)
198
+ print(f"Prediction result: {result}")
199
+
200
+ # Test Hugging Face API compatibility
201
+ api_result = biosentinel(sample_data)
202
+ print(f"API call result: {api_result}")
203
+
204
+ # Get feature information
205
+ feature_info = biosentinel.get_feature_info()
206
+ print(f"Required features: {list(feature_info.keys())}")