Spaces:
Sleeping
Sleeping
Sahil Garg commited on
Commit ·
d8c5e83
1
Parent(s): a56f3fb
temperture input handling built
Browse files- README.md +5 -1
- app.py +1 -1
- ml/artifacts/lstm_autoencoder.safetensors +2 -2
- ml/artifacts/ml_config.json +5 -2
- ml/artifacts/scaler.json +20 -14
- ml/artifacts/training_data.json +0 -0
- ml/artifacts/xgb_fail.json +0 -0
- ml/artifacts/xgb_ttf.json +0 -0
- ml/features.py +3 -0
- ml/inference.py +4 -2
- src/models.py +1 -0
- src/services.py +3 -3
- src/utils.py +9 -5
README.md
CHANGED
|
@@ -53,6 +53,7 @@ Analyzes sensor data and returns ML predictions with optional AI diagnosis.
|
|
| 53 |
{
|
| 54 |
"vdc1": [600.0, 601.0, 602.0],
|
| 55 |
"idc1": [10.0, 10.1, 10.2],
|
|
|
|
| 56 |
"api_key": "your_google_api_key_here",
|
| 57 |
"asset_id": "PV_INVERTER_001"
|
| 58 |
}
|
|
@@ -60,6 +61,7 @@ Analyzes sensor data and returns ML predictions with optional AI diagnosis.
|
|
| 60 |
|
| 61 |
**Parameters:**
|
| 62 |
- `vdc1`, `idc1`: Voltage and current sensor readings
|
|
|
|
| 63 |
- `api_key`: Optional Google API key for AI diagnosis
|
| 64 |
- `asset_id`: Optional asset identifier (auto-generated if not provided)
|
| 65 |
|
|
@@ -86,11 +88,12 @@ Analyzes sensor data and returns ML predictions with optional AI diagnosis.
|
|
| 86 |
|
| 87 |
1. **Input Validation**: Ensures voltage/current arrays match and contain sufficient data points
|
| 88 |
2. **Data Preparation**: Pads input to 100 data points for consistent processing
|
| 89 |
-
3. **Feature Engineering**: Creates
|
| 90 |
- Voltage mean/standard deviation
|
| 91 |
- Power mean/standard deviation
|
| 92 |
- Power delta and slope
|
| 93 |
- Normalized efficiency
|
|
|
|
| 94 |
4. **ML Inference**: Processes features through anomaly detection and prediction models
|
| 95 |
5. **Agent Analysis**: LLM analyzes ML results for human-readable diagnosis (if API key provided)
|
| 96 |
|
|
@@ -116,6 +119,7 @@ curl -X POST "http://localhost:7860/analyze" \
|
|
| 116 |
-d '{
|
| 117 |
"vdc1": [600.0, 601.0, 602.0],
|
| 118 |
"idc1": [10.0, 10.1, 10.2],
|
|
|
|
| 119 |
"api_key": "your_api_key",
|
| 120 |
"asset_id": "PV_INVERTER_001"
|
| 121 |
}'
|
|
|
|
| 53 |
{
|
| 54 |
"vdc1": [600.0, 601.0, 602.0],
|
| 55 |
"idc1": [10.0, 10.1, 10.2],
|
| 56 |
+
"pvt": [25.0, 25.1, 25.2],
|
| 57 |
"api_key": "your_google_api_key_here",
|
| 58 |
"asset_id": "PV_INVERTER_001"
|
| 59 |
}
|
|
|
|
| 61 |
|
| 62 |
**Parameters:**
|
| 63 |
- `vdc1`, `idc1`: Voltage and current sensor readings
|
| 64 |
+
- `pvt`: PV temperature readings (required)
|
| 65 |
- `api_key`: Optional Google API key for AI diagnosis
|
| 66 |
- `asset_id`: Optional asset identifier (auto-generated if not provided)
|
| 67 |
|
|
|
|
| 88 |
|
| 89 |
1. **Input Validation**: Ensures voltage/current arrays match and contain sufficient data points
|
| 90 |
2. **Data Preparation**: Pads input to 100 data points for consistent processing
|
| 91 |
+
3. **Feature Engineering**: Creates 10 statistical features using rolling window analysis:
|
| 92 |
- Voltage mean/standard deviation
|
| 93 |
- Power mean/standard deviation
|
| 94 |
- Power delta and slope
|
| 95 |
- Normalized efficiency
|
| 96 |
+
- Temperature mean/standard deviation/delta
|
| 97 |
4. **ML Inference**: Processes features through anomaly detection and prediction models
|
| 98 |
5. **Agent Analysis**: LLM analyzes ML results for human-readable diagnosis (if API key provided)
|
| 99 |
|
|
|
|
| 119 |
-d '{
|
| 120 |
"vdc1": [600.0, 601.0, 602.0],
|
| 121 |
"idc1": [10.0, 10.1, 10.2],
|
| 122 |
+
"pvt": [25.0, 25.1, 25.2],
|
| 123 |
"api_key": "your_api_key",
|
| 124 |
"asset_id": "PV_INVERTER_001"
|
| 125 |
}'
|
app.py
CHANGED
|
@@ -14,7 +14,7 @@ service = AnalysisService(config)
|
|
| 14 |
async def analyze_sensor_data(data: SensorData):
|
| 15 |
try:
|
| 16 |
logger.info(f"Processing request with {len(data.vdc1)} voltage and {len(data.idc1)} current data points")
|
| 17 |
-
ml_output, agent_output = service.analyze(data.vdc1, data.idc1, data.api_key, data.asset_id)
|
| 18 |
return AnalysisResponse(ml_output=ml_output, agent_output=agent_output)
|
| 19 |
except HTTPException:
|
| 20 |
raise
|
|
|
|
| 14 |
async def analyze_sensor_data(data: SensorData):
|
| 15 |
try:
|
| 16 |
logger.info(f"Processing request with {len(data.vdc1)} voltage and {len(data.idc1)} current data points")
|
| 17 |
+
ml_output, agent_output = service.analyze(data.vdc1, data.idc1, data.pvt, data.api_key, data.asset_id)
|
| 18 |
return AnalysisResponse(ml_output=ml_output, agent_output=agent_output)
|
| 19 |
except HTTPException:
|
| 20 |
raise
|
ml/artifacts/lstm_autoencoder.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a766a985bdb1649847bbe1a8025a324fa4361cd0599275ae3400afb13905ecd
|
| 3 |
+
size 30216
|
ml/artifacts/ml_config.json
CHANGED
|
@@ -6,13 +6,16 @@
|
|
| 6 |
"pdc_std",
|
| 7 |
"pdc_delta",
|
| 8 |
"pdc_slope",
|
| 9 |
-
"efficiency_norm"
|
|
|
|
|
|
|
|
|
|
| 10 |
],
|
| 11 |
"window": 50,
|
| 12 |
"seq_len": 30,
|
| 13 |
"downsample": 10,
|
| 14 |
"failure_horizon_days": 30,
|
| 15 |
-
"lstm_input_dim":
|
| 16 |
"lstm_hidden_dim": 32,
|
| 17 |
"design_life_days": 10958
|
| 18 |
}
|
|
|
|
| 6 |
"pdc_std",
|
| 7 |
"pdc_delta",
|
| 8 |
"pdc_slope",
|
| 9 |
+
"efficiency_norm",
|
| 10 |
+
"temp_mean",
|
| 11 |
+
"temp_std",
|
| 12 |
+
"temp_delta"
|
| 13 |
],
|
| 14 |
"window": 50,
|
| 15 |
"seq_len": 30,
|
| 16 |
"downsample": 10,
|
| 17 |
"failure_horizon_days": 30,
|
| 18 |
+
"lstm_input_dim": 10,
|
| 19 |
"lstm_hidden_dim": 32,
|
| 20 |
"design_life_days": 10958
|
| 21 |
}
|
ml/artifacts/scaler.json
CHANGED
|
@@ -1,20 +1,26 @@
|
|
| 1 |
{
|
| 2 |
"mean": [
|
| 3 |
-
|
| 4 |
-
3.
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
1.
|
|
|
|
|
|
|
|
|
|
| 10 |
],
|
| 11 |
"scale": [
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
22.
|
| 17 |
-
3.
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
]
|
| 20 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"mean": [
|
| 3 |
+
277.14991332828424,
|
| 4 |
+
3.8061906228008,
|
| 5 |
+
1362.8301711669567,
|
| 6 |
+
29.156264912725195,
|
| 7 |
+
-0.0001488146566343459,
|
| 8 |
+
8.639857281699144e-05,
|
| 9 |
+
1.0000000000001135,
|
| 10 |
+
34.75638390717355,
|
| 11 |
+
0.6580879751931045,
|
| 12 |
+
0.00028594267085960774
|
| 13 |
],
|
| 14 |
"scale": [
|
| 15 |
+
17.600415431393053,
|
| 16 |
+
4.996958923838216,
|
| 17 |
+
796.0751430809097,
|
| 18 |
+
68.8226394869352,
|
| 19 |
+
22.78849103980007,
|
| 20 |
+
3.9935425420114656,
|
| 21 |
+
9.124015105734193e-10,
|
| 22 |
+
12.189764634031704,
|
| 23 |
+
0.6327830276832512,
|
| 24 |
+
0.17153519012956447
|
| 25 |
]
|
| 26 |
}
|
ml/artifacts/training_data.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ml/artifacts/xgb_fail.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ml/artifacts/xgb_ttf.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ml/features.py
CHANGED
|
@@ -14,4 +14,7 @@ def build_features(df, window):
|
|
| 14 |
)
|
| 15 |
df["efficiency"] = df["pdc1"] / (df["vdc1"] * df["idc1"] + 1e-6)
|
| 16 |
df["efficiency_norm"] = df["efficiency"] / df["efficiency"].rolling(window).mean()
|
|
|
|
|
|
|
|
|
|
| 17 |
return df
|
|
|
|
| 14 |
)
|
| 15 |
df["efficiency"] = df["pdc1"] / (df["vdc1"] * df["idc1"] + 1e-6)
|
| 16 |
df["efficiency_norm"] = df["efficiency"] / df["efficiency"].rolling(window).mean()
|
| 17 |
+
df["temp_mean"] = df["pvt"].rolling(window).mean()
|
| 18 |
+
df["temp_std"] = df["pvt"].rolling(window).std()
|
| 19 |
+
df["temp_delta"] = df["pvt"].diff()
|
| 20 |
return df
|
ml/inference.py
CHANGED
|
@@ -36,6 +36,8 @@ class MLEngine:
|
|
| 36 |
self.window = config["window"]
|
| 37 |
self.seq_len = config["seq_len"]
|
| 38 |
self.design_life_days = config["design_life_days"]
|
|
|
|
|
|
|
| 39 |
|
| 40 |
def _load_scaler(self):
|
| 41 |
"""Load and reconstruct StandardScaler from JSON."""
|
|
@@ -68,8 +70,8 @@ class MLEngine:
|
|
| 68 |
def _load_lstm_model(self):
|
| 69 |
"""Load LSTM autoencoder from safetensors."""
|
| 70 |
self.lstm = LSTMAutoencoder(
|
| 71 |
-
input_dim=
|
| 72 |
-
hidden_dim=
|
| 73 |
)
|
| 74 |
state_dict = load_file(os.path.join(ARTIFACTS_DIR, "lstm_autoencoder.safetensors"))
|
| 75 |
self.lstm.load_state_dict(state_dict)
|
|
|
|
| 36 |
self.window = config["window"]
|
| 37 |
self.seq_len = config["seq_len"]
|
| 38 |
self.design_life_days = config["design_life_days"]
|
| 39 |
+
self.lstm_input_dim = config.get("lstm_input_dim", len(self.feature_cols))
|
| 40 |
+
self.lstm_hidden_dim = config.get("lstm_hidden_dim", 32)
|
| 41 |
|
| 42 |
def _load_scaler(self):
|
| 43 |
"""Load and reconstruct StandardScaler from JSON."""
|
|
|
|
| 70 |
def _load_lstm_model(self):
|
| 71 |
"""Load LSTM autoencoder from safetensors."""
|
| 72 |
self.lstm = LSTMAutoencoder(
|
| 73 |
+
input_dim=self.lstm_input_dim,
|
| 74 |
+
hidden_dim=self.lstm_hidden_dim
|
| 75 |
)
|
| 76 |
state_dict = load_file(os.path.join(ARTIFACTS_DIR, "lstm_autoencoder.safetensors"))
|
| 77 |
self.lstm.load_state_dict(state_dict)
|
src/models.py
CHANGED
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
| 3 |
class SensorData(BaseModel):
|
| 4 |
vdc1: list[float]
|
| 5 |
idc1: list[float]
|
|
|
|
| 6 |
api_key: str = None # Optional Google API key for LLM features
|
| 7 |
asset_id: str = None # Optional asset identifier
|
| 8 |
|
|
|
|
| 3 |
class SensorData(BaseModel):
|
| 4 |
vdc1: list[float]
|
| 5 |
idc1: list[float]
|
| 6 |
+
pvt: list[float]
|
| 7 |
api_key: str = None # Optional Google API key for LLM features
|
| 8 |
asset_id: str = None # Optional asset identifier
|
| 9 |
|
src/services.py
CHANGED
|
@@ -12,11 +12,11 @@ class AnalysisService:
|
|
| 12 |
self.config = config
|
| 13 |
self.ml_engine = MLEngine()
|
| 14 |
|
| 15 |
-
def analyze(self, vdc1: list, idc1: list, api_key: str, asset_id: str = None) -> tuple:
|
| 16 |
"""Analyze sensor data and return ML and agent outputs."""
|
| 17 |
logger.info(f"Complete analysis start - processing {len(vdc1)} data points")
|
| 18 |
-
validate_sensor_data(vdc1, idc1)
|
| 19 |
-
raw_df = prepare_dataframe(vdc1, idc1)
|
| 20 |
ml_output = self.ml_engine.predict_from_raw(raw_df, asset_id)
|
| 21 |
agent_output = self.get_agent_output(api_key, ml_output)
|
| 22 |
|
|
|
|
| 12 |
self.config = config
|
| 13 |
self.ml_engine = MLEngine()
|
| 14 |
|
| 15 |
+
def analyze(self, vdc1: list, idc1: list, pvt: list, api_key: str, asset_id: str = None) -> tuple:
|
| 16 |
"""Analyze sensor data and return ML and agent outputs."""
|
| 17 |
logger.info(f"Complete analysis start - processing {len(vdc1)} data points")
|
| 18 |
+
validate_sensor_data(vdc1, idc1, pvt)
|
| 19 |
+
raw_df = prepare_dataframe(vdc1, idc1, pvt)
|
| 20 |
ml_output = self.ml_engine.predict_from_raw(raw_df, asset_id)
|
| 21 |
agent_output = self.get_agent_output(api_key, ml_output)
|
| 22 |
|
src/utils.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
| 1 |
from fastapi import HTTPException
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
-
def validate_sensor_data(vdc1: list, idc1: list) -> None:
|
| 5 |
"""Validate sensor data consistency. Raises HTTPException on error."""
|
| 6 |
if len(vdc1) != len(idc1):
|
| 7 |
raise HTTPException(status_code=400, detail="Voltage and current lists must have the same length")
|
|
|
|
|
|
|
| 8 |
if len(vdc1) < 3:
|
| 9 |
raise HTTPException(status_code=400, detail="Need at least 3 data points")
|
| 10 |
|
| 11 |
-
def prepare_dataframe(vdc1: list, idc1: list) -> pd.DataFrame:
|
| 12 |
"""Prepare sensor data for ML inference by padding to 100 points."""
|
| 13 |
-
|
| 14 |
"vdc1": (vdc1 * (100 // len(vdc1) + 1))[:100],
|
| 15 |
-
"idc1": (idc1 * (100 // len(idc1) + 1))[:100]
|
| 16 |
-
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import HTTPException
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
+
def validate_sensor_data(vdc1: list, idc1: list, pvt: list) -> None:
|
| 5 |
"""Validate sensor data consistency. Raises HTTPException on error."""
|
| 6 |
if len(vdc1) != len(idc1):
|
| 7 |
raise HTTPException(status_code=400, detail="Voltage and current lists must have the same length")
|
| 8 |
+
if len(vdc1) != len(pvt):
|
| 9 |
+
raise HTTPException(status_code=400, detail="Voltage, current, and temperature lists must have the same length")
|
| 10 |
if len(vdc1) < 3:
|
| 11 |
raise HTTPException(status_code=400, detail="Need at least 3 data points")
|
| 12 |
|
| 13 |
+
def prepare_dataframe(vdc1: list, idc1: list, pvt: list) -> pd.DataFrame:
|
| 14 |
"""Prepare sensor data for ML inference by padding to 100 points."""
|
| 15 |
+
df = pd.DataFrame({
|
| 16 |
"vdc1": (vdc1 * (100 // len(vdc1) + 1))[:100],
|
| 17 |
+
"idc1": (idc1 * (100 // len(idc1) + 1))[:100],
|
| 18 |
+
"pvt": (pvt * (100 // len(pvt) + 1))[:100]
|
| 19 |
+
})
|
| 20 |
+
return df
|