Sahil Garg commited on
Commit
7690851
·
0 Parent(s):

Initial commit: Solar PV Predictive Maintenance API

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Environments
7
+ .env
8
+ .venv/
9
+
10
+ # IDE
11
+ .vscode/
12
+
13
+ # OS
14
+ .DS_Store
15
+ Thumbs.db
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimal production-ready Dockerfile for Solar PV PdM API
2
+ FROM python:3.9-slim
3
+
4
+ # Create non-root user for security
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV PATH="/home/user/.local/bin:$PATH"
8
+
9
+ WORKDIR /app
10
+
11
+ # Install dependencies
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy app code
16
+ COPY --chown=user . /app
17
+
18
+ EXPOSE 7860
19
+
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Solar PV Predictive Maintenance
3
+ emoji: ☀️
4
+ colorFrom: yellow
5
+ colorTo: orange
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Solar PV Predictive Maintenance API
12
+
13
+ AI-powered predictive maintenance for solar PV inverters using ML models and LLM-based diagnosis.
14
+
15
+ ## API Endpoints
16
+
17
+ ### POST /analyze
18
+ Accepts voltage and current sensor data, returns ML predictions and agent diagnosis.
19
+
20
+ **Request:**
21
+ ```json
22
+ {
23
+ "vdc1": [600.0, 601.0, 602.0],
24
+ "idc1": [10.0, 10.1, 10.2]
25
+ }
26
+ ```
27
+
28
+ **Response:**
29
+ ```json
30
+ {
31
+ "ml_output": {
32
+ "asset_id": "PV_INVERTER_001",
33
+ "failure_probability": 0.12,
34
+ "expected_ttf_days": 450.5,
35
+ "expected_rul_days": 9800.0,
36
+ "confidence": 0.85
37
+ },
38
+ "agent_output": {
39
+ "diagnosis": "...",
40
+ "urgency": "Low",
41
+ "recommended_action": "...",
42
+ "justification": ["..."]
43
+ }
44
+ }
45
+ ```
46
+
47
+ ## ML Pipeline
48
+ - **Anomaly Detection**: Isolation Forest + LSTM Autoencoder
49
+ - **Failure Forecasting**: XGBoost (Time-to-Failure + Failure Probability)
50
+ - **Agent Reasoning**: Gemini 2.5 Flash Lite via LangChain
agent/agent.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from langchain_google_genai import GoogleGenerativeAI
3
+
4
+ class MaintenanceAgent:
5
+ def __init__(self, api_key, model_name="gemini-2.5-flash-lite", temperature=0.0):
6
+ self.llm = GoogleGenerativeAI(
7
+ model=model_name,
8
+ temperature=temperature,
9
+ google_api_key=api_key
10
+ )
11
+
12
+ def run(self, phase2_output: dict) -> dict:
13
+ prompt = f"""
14
+ You are a maintenance decision AI.
15
+ You must reason ONLY from the provided JSON.
16
+ Do NOT invent data.
17
+
18
+ INPUT:
19
+ {json.dumps(phase2_output, indent=2)}
20
+
21
+ MANDATORY: Return output strictly in JSON format only. Do not include any markdown, code blocks, or extra text.
22
+
23
+ OUTPUT FORMAT:
24
+ {{
25
+ "diagnosis": "...",
26
+ "urgency": "Low | Medium | High",
27
+ "recommended_action": "...",
28
+ "justification": ["...", "..."]
29
+ }}
30
+ """
31
+
32
+ response = self.llm.invoke(prompt)
33
+ try:
34
+ return json.loads(response)
35
+ except json.JSONDecodeError:
36
+ import re
37
+ match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL) or re.search(r'\{.*\}', response, re.DOTALL)
38
+ if match:
39
+ return json.loads(match.group(1) if '```' in response else match.group(0))
40
+ raise ValueError(f"Could not parse LLM response: {response[:200]}")
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import pandas as pd
4
+ import os
5
+ import logging
6
+ from dotenv import load_dotenv
7
+ from ml.inference import MLEngine
8
+ from agent.agent import MaintenanceAgent
9
+
10
+ load_dotenv()
11
+ logging.basicConfig(level=logging.INFO)
12
+
13
+ app = FastAPI(title="Solar PV Predictive Maintenance API", version="1.0.0")
14
+
15
+ # Load models once on startup for production performance
16
+ ml_engine = MLEngine()
17
+ agent = MaintenanceAgent(
18
+ api_key=os.getenv("GOOGLE_API_KEY"),
19
+ model_name="gemini-2.5-flash-lite",
20
+ temperature=0.0
21
+ )
22
+
23
+ class SensorData(BaseModel):
24
+ vdc1: list[float]
25
+ idc1: list[float]
26
+
27
+ class AnalysisResponse(BaseModel):
28
+ ml_output: dict
29
+ agent_output: dict
30
+
31
+ @app.post("/analyze", response_model=AnalysisResponse)
32
+ async def analyze_sensor_data(data: SensorData):
33
+ try:
34
+ logging.info(f"Processing request with {len(data.vdc1)} voltage and {len(data.idc1)} current data points")
35
+
36
+ if len(data.vdc1) != len(data.idc1):
37
+ raise HTTPException(status_code=400, detail="Voltage and current lists must have the same length")
38
+
39
+ if len(data.vdc1) < 3:
40
+ raise HTTPException(status_code=400, detail="Need at least 3 data points")
41
+
42
+ # Repeat to make at least 100 points if needed
43
+ raw_df = pd.DataFrame({
44
+ "vdc1": (data.vdc1 * (100 // len(data.vdc1) + 1))[:100],
45
+ "idc1": (data.idc1 * (100 // len(data.idc1) + 1))[:100]
46
+ })
47
+
48
+ # ML Inference
49
+ phase2_output = ml_engine.predict_from_raw(raw_df)
50
+
51
+ # Agent Reasoning
52
+ agent_output = agent.run(phase2_output)
53
+
54
+ return AnalysisResponse(ml_output=phase2_output, agent_output=agent_output)
55
+
56
+ except HTTPException:
57
+ raise
58
+ except Exception as e:
59
+ logging.error(f"Error processing request: {e}")
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
62
+ @app.get("/")
63
+ async def root():
64
+ return {"message": "Solar PV Predictive Maintenance API", "endpoint": "/analyze (POST)"}
65
+
66
+ if __name__ == "__main__":
67
+ import uvicorn
68
+ uvicorn.run(app, host="0.0.0.0", port=7860)
data/phase2_output.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "asset_id": "PV_INVERTER_001",
3
+ "failure_probability": 0.0,
4
+ "expected_ttf_days": 10338.5,
5
+ "expected_rul_days": 10942.0,
6
+ "confidence": 1.0
7
+ }
docker-compose.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ pdm-api:
5
+ build: .
6
+ ports:
7
+ - "7860:7860"
8
+ environment:
9
+ - GOOGLE_API_KEY=${GOOGLE_API_KEY}
10
+ volumes:
11
+ - .:/app
12
+ command: uvicorn app:app --host 0.0.0.0 --port 7860 --reload
main.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from ml.inference import MLEngine
5
+ from agent.agent import MaintenanceAgent
6
+ import numpy as np
7
+
8
+ load_dotenv()
9
+
10
+ raw_df = pd.DataFrame({
11
+ "vdc1": np.random.normal(600, 3, 200),
12
+ "idc1": np.random.normal(10.0, 0.2, 200)
13
+ })
14
+
15
+
16
+ engine = MLEngine()
17
+ phase2_output = engine.predict_from_raw(raw_df)
18
+
19
+ print("\n=== ML OUTPUT ===")
20
+ print(phase2_output)
21
+
22
+ # ---- LLM AGENT ----
23
+ agent = MaintenanceAgent(
24
+ api_key=os.getenv("GOOGLE_API_KEY"),
25
+ model_name="gemini-2.5-flash-lite",
26
+ temperature=0.0
27
+ )
28
+
29
+ agent_output = agent.run(phase2_output)
30
+
31
+ print("\n=== AGENT OUTPUT ===")
32
+ print(agent_output)
ml/__init__.py ADDED
File without changes
ml/artifacts/lstm_autoencoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d18871f0809d8d26332184f269943ea757df9529a535fa031314877c7eefb0
3
+ size 26232
ml/artifacts/ml_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_cols": [
3
+ "vdc_mean",
4
+ "vdc_std",
5
+ "pdc_mean",
6
+ "pdc_std",
7
+ "pdc_delta",
8
+ "pdc_slope",
9
+ "efficiency_norm"
10
+ ],
11
+ "window": 50,
12
+ "seq_len": 30,
13
+ "downsample": 10,
14
+ "failure_horizon_days": 30,
15
+ "design_life_days": 10958
16
+ }
ml/artifacts/scaler.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mean": [
3
+ 142.25967157616776,
4
+ 3.5676508560940654,
5
+ 635.9378633904394,
6
+ 16.520853955427857,
7
+ 1.1263708972285581e-07,
8
+ 1.271437542337093e-07,
9
+ 1.0000000011260433
10
+ ],
11
+ "scale": [
12
+ 135.83560758590238,
13
+ 9.960201153946878,
14
+ 867.4447952608547,
15
+ 72.25659454825748,
16
+ 22.465312616553394,
17
+ 3.96862260102056,
18
+ 8.867862393270387e-07
19
+ ]
20
+ }
ml/artifacts/training_data.json ADDED
The diff for this file is too large to render. See raw diff
 
ml/artifacts/xgb_fail.json ADDED
The diff for this file is too large to render. See raw diff
 
ml/artifacts/xgb_ttf.json ADDED
The diff for this file is too large to render. See raw diff
 
ml/features.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ def build_features(df, window):
5
+ df = df.copy()
6
+
7
+ df["pdc1"] = df["vdc1"] * df["idc1"]
8
+
9
+ df["vdc_mean"] = df["vdc1"].rolling(window).mean()
10
+ df["vdc_std"] = df["vdc1"].rolling(window).std()
11
+
12
+ df["pdc_mean"] = df["pdc1"].rolling(window).mean()
13
+ df["pdc_std"] = df["pdc1"].rolling(window).std()
14
+
15
+ df["pdc_delta"] = df["pdc1"].diff()
16
+
17
+ df["pdc_slope"] = df["pdc1"].rolling(window).apply(
18
+ lambda x: np.polyfit(range(len(x)), x, 1)[0],
19
+ raw=False
20
+ )
21
+
22
+ df["efficiency"] = df["pdc1"] / (df["vdc1"] * df["idc1"] + 1e-6)
23
+ df["efficiency_norm"] = (
24
+ df["efficiency"] / df["efficiency"].rolling(window).mean()
25
+ )
26
+
27
+ return df
ml/inference.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import joblib
4
+ import torch
5
+ import numpy as np
6
+ import pandas as pd
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.ensemble import IsolationForest
9
+ from safetensors.torch import load_file
10
+
11
+ from ml.features import build_features
12
+ from ml.lstm_model import LSTMAutoencoder
13
+
14
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
15
+ ARTIFACTS_DIR = os.path.join(BASE_DIR, "artifacts")
16
+
17
+ class MLEngine:
18
+ def __init__(self):
19
+ with open(os.path.join(ARTIFACTS_DIR, "ml_config.json")) as f:
20
+ self.config = json.load(f)
21
+
22
+ self.feature_cols = self.config["feature_cols"]
23
+ self.window = self.config["window"]
24
+ self.seq_len = self.config["seq_len"]
25
+ self.design_life_days = self.config["design_life_days"]
26
+
27
+ # Load scaler from JSON
28
+ with open(os.path.join(ARTIFACTS_DIR, "scaler.json"), "r") as f:
29
+ params = json.load(f)
30
+ self.scaler = StandardScaler()
31
+ self.scaler.mean_ = np.array(params["mean"])
32
+ self.scaler.scale_ = np.array(params["scale"])
33
+ self.scaler.var_ = self.scaler.scale_ ** 2
34
+ self.scaler.n_features_in_ = len(self.scaler.mean_)
35
+
36
+ # Retrain IsolationForest at startup using saved training data
37
+ self.iso = IsolationForest(
38
+ n_estimators=200,
39
+ contamination=0.05,
40
+ random_state=42
41
+ )
42
+ # Load training data (scaled features from Colab) and fit
43
+ train_data = pd.read_json(os.path.join(ARTIFACTS_DIR, "training_data.json"))
44
+ self.iso.fit(train_data[self.feature_cols])
45
+
46
+ # Load XGBoost from JSON
47
+ import xgboost as xgb
48
+ self.ttf_model = xgb.XGBRegressor()
49
+ self.ttf_model.load_model(os.path.join(ARTIFACTS_DIR, "xgb_ttf.json"))
50
+ self.fail_model = xgb.XGBClassifier()
51
+ self.fail_model.load_model(os.path.join(ARTIFACTS_DIR, "xgb_fail.json"))
52
+
53
+ # Load LSTM from safetensors
54
+ self.lstm = LSTMAutoencoder(
55
+ input_dim=len(self.feature_cols),
56
+ hidden_dim=32
57
+ )
58
+ state_dict = load_file(os.path.join(ARTIFACTS_DIR, "lstm_autoencoder.safetensors"))
59
+ self.lstm.load_state_dict(state_dict)
60
+ self.lstm.eval()
61
+
62
+ def predict_from_raw(self, raw_df: pd.DataFrame):
63
+ # --- Feature engineering ---
64
+ df = build_features(raw_df, self.window)
65
+ df = df[self.feature_cols].dropna()
66
+
67
+ if len(df) < self.seq_len:
68
+ raise ValueError("Not enough data for LSTM sequence")
69
+
70
+ # --- Scaling ---
71
+ df_scaled = pd.DataFrame(
72
+ self.scaler.transform(df),
73
+ columns=self.feature_cols,
74
+ index=df.index
75
+ )
76
+
77
+ # --- Isolation Forest anomaly ---
78
+ df_scaled["anomaly_iforest"] = -self.iso.decision_function(df_scaled)
79
+
80
+ # --- LSTM anomaly ---
81
+ X = df_scaled[self.feature_cols].values
82
+ X_seq = np.array([X[-self.seq_len:]])
83
+
84
+ with torch.no_grad():
85
+ recon = self.lstm(torch.tensor(X_seq, dtype=torch.float32))
86
+
87
+ anomaly_lstm = float(((recon - torch.tensor(X_seq)) ** 2).mean())
88
+
89
+ # --- Health (0–1) ---
90
+ # Normalize anomaly_lstm (assuming max error ~1e6 from training)
91
+ anomaly_norm = min(anomaly_lstm / 1e6, 1.0)
92
+ health = max(0.0, 1.0 - anomaly_norm)
93
+
94
+ # --- ML predictions ---
95
+ latest_features = df_scaled[self.feature_cols].iloc[[-1]].copy()
96
+ latest_features["anomaly_lstm"] = anomaly_lstm
97
+ latest_features["health_index"] = health
98
+
99
+ expected_ttf_days = float(
100
+ self.ttf_model.predict(latest_features, validate_features=False)[0]
101
+ )
102
+
103
+ failure_probability = float(
104
+ self.fail_model.predict_proba(latest_features, validate_features=False)[0][1]
105
+ )
106
+
107
+ # --- RUL ---
108
+ expected_rul_days = float(health * self.design_life_days)
109
+
110
+ # --- Confidence ---
111
+ confidence = round(
112
+ 0.5 * abs(failure_probability - 0.5) * 2
113
+ + 0.5 * health,
114
+ 2
115
+ )
116
+
117
+ return {
118
+ "asset_id": "PV_INVERTER_001",
119
+ "failure_probability": round(failure_probability, 2),
120
+ "expected_ttf_days": round(expected_ttf_days, 1),
121
+ "expected_rul_days": round(expected_rul_days, 1),
122
+ "confidence": confidence
123
+ }
ml/lstm_model.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+ class LSTMAutoencoder(nn.Module):
4
+ def __init__(self, input_dim, hidden_dim):
5
+ super().__init__()
6
+ self.encoder = nn.LSTM(input_dim, hidden_dim, batch_first=True)
7
+ self.decoder = nn.LSTM(hidden_dim, input_dim, batch_first=True)
8
+
9
+ def forward(self, x):
10
+ encoded, _ = self.encoder(x)
11
+ decoded, _ = self.decoder(encoded)
12
+ return decoded
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain-google-genai
2
+ python-dotenv
3
+ joblib
4
+ torch
5
+ numpy
6
+ pandas
7
+ scikit-learn
8
+ xgboost
9
+ fastapi
10
+ uvicorn
11
+ safetensors