abhinavvvvv commited on
Commit
db65b8b
·
1 Parent(s): 3a61d5c

restrucuted whole model

Browse files
Dockerfile CHANGED
@@ -2,11 +2,10 @@ FROM python:3.10
2
 
3
  WORKDIR /app
4
 
5
- COPY requirements.txt .
6
- RUN pip install --no-cache-dir -r requirements.txt
7
 
8
- COPY app ./app
9
 
10
  EXPOSE 7860
11
 
12
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
2
 
3
  WORKDIR /app
4
 
5
+ COPY . /app
 
6
 
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
 
9
  EXPOSE 7860
10
 
11
+ CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
api/main.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ import shutil
3
+ import os
4
+
5
+ from predictor import predict_logs
6
+ from shap_explainer import explain_logs
7
+
8
+ app = FastAPI(
9
+ title="RTL Failure Prediction API",
10
+ description="Predict RTL module failure risk from verification logs",
11
+ version="1.0"
12
+ )
13
+
14
+
15
+ @app.get("/")
16
+ def health():
17
+ return {"status": "running"}
18
+
19
+
20
+ @app.post("/predict_file")
21
+ async def predict_file(file: UploadFile = File(...)):
22
+
23
+ path = f"temp_{file.filename}"
24
+
25
+ with open(path, "wb") as buffer:
26
+ shutil.copyfileobj(file.file, buffer)
27
+
28
+ result = predict_logs(path)
29
+
30
+ os.remove(path)
31
+
32
+ return result
33
+
34
+
35
+ @app.post("/predict_single")
36
+ def predict_single(log_line: str):
37
+
38
+ path = "temp_single.txt"
39
+
40
+ with open(path, "w") as f:
41
+ f.write(log_line)
42
+
43
+ result = predict_logs(path)
44
+
45
+ os.remove(path)
46
+
47
+ return result
48
+
49
+
50
+ @app.post("/explain")
51
+ async def explain(file: UploadFile = File(...)):
52
+
53
+ path = f"temp_{file.filename}"
54
+
55
+ with open(path, "wb") as buffer:
56
+ shutil.copyfileobj(file.file, buffer)
57
+
58
+ result = explain_logs(path)
59
+
60
+ os.remove(path)
61
+
62
+ return result
api/predictor.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+
4
+ from features.log_feature_extraction import run_pipeline
5
+
6
+
7
+ MODEL_PATH = "models/failure_model.pkl"
8
+ FEATURE_PATH = "models/feature_columns.pkl"
9
+
10
+
11
+ def predict_logs(log_file):
12
+
13
+ run_pipeline(log_file, "temp_features.csv")
14
+
15
+ df = pd.read_csv("temp_features.csv")
16
+
17
+ model = joblib.load(MODEL_PATH)
18
+ feature_cols = joblib.load(FEATURE_PATH)
19
+
20
+ X = df[feature_cols]
21
+
22
+ probs = model.predict_proba(X)[:, 1]
23
+
24
+ df["failure_probability"] = probs
25
+
26
+ module_risk = (
27
+ df.groupby("module")["failure_probability"]
28
+ .mean()
29
+ .sort_values(ascending=False)
30
+ )
31
+
32
+ module_results = []
33
+
34
+ for module, prob in module_risk.items():
35
+
36
+ if prob > 0.75:
37
+ risk = "HIGH"
38
+ elif prob > 0.4:
39
+ risk = "MEDIUM"
40
+ else:
41
+ risk = "LOW"
42
+
43
+ module_results.append({
44
+ "module": module,
45
+ "failure_probability": float(prob),
46
+ "risk": risk
47
+ })
48
+
49
+ summary = {
50
+ "total_logs": int(len(df)),
51
+ "modules_analyzed": int(df["module"].nunique())
52
+ }
53
+
54
+ return {
55
+ "summary": summary,
56
+ "module_risk": module_results
57
+ }
api/shap_explainer.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ import shap
4
+
5
+ from features.log_feature_extraction import run_pipeline
6
+
7
+
8
+ MODEL_PATH = "models/failure_model.pkl"
9
+ FEATURE_PATH = "models/feature_columns.pkl"
10
+
11
+
12
+ def explain_logs(log_file):
13
+
14
+ run_pipeline(log_file, "temp_features.csv")
15
+
16
+ df = pd.read_csv("temp_features.csv")
17
+
18
+ model = joblib.load(MODEL_PATH)
19
+ feature_cols = joblib.load(FEATURE_PATH)
20
+
21
+ X = df[feature_cols]
22
+
23
+ explainer = shap.TreeExplainer(model.estimator)
24
+
25
+ shap_values = explainer.shap_values(X)
26
+
27
+ importance = abs(shap_values).mean(axis=0)
28
+
29
+ feature_importance = sorted(
30
+ zip(feature_cols, importance),
31
+ key=lambda x: x[1],
32
+ reverse=True
33
+ )[:10]
34
+
35
+ return {
36
+ "top_features": [
37
+ {"feature": f, "impact": float(v)}
38
+ for f, v in feature_importance
39
+ ]
40
+ }
app/main.py DELETED
@@ -1,28 +0,0 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from fastapi.middleware.cors import CORSMiddleware
3
- import pandas as pd
4
-
5
- app = FastAPI(title="RTL Reliability Engine")
6
-
7
- app.add_middleware(
8
- CORSMiddleware,
9
- allow_origins=["*"],
10
- allow_credentials=True,
11
- allow_methods=["*"],
12
- allow_headers=["*"],
13
- )
14
-
15
- @app.get("/")
16
- def root():
17
- return {"status": "running"}
18
-
19
-
20
- @app.post("/analyze")
21
- async def analyze(file: UploadFile = File(...)):
22
-
23
- df = pd.read_csv(file.file, sep="\t")
24
-
25
- return {
26
- "rows": len(df),
27
- "columns": list(df.columns)
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/parser.py DELETED
@@ -1,32 +0,0 @@
1
- import re
2
- import pandas as pd
3
-
4
- LOG_PATTERN = re.compile(r"\[(\d+)\]\s+\[(\w+)\]\s+(.*)")
5
-
6
- def parse_log_file(file_content: str) -> pd.DataFrame:
7
- """
8
- Parse RTL simulation logs into structured dataframe
9
- """
10
-
11
- records = []
12
-
13
- for line in file_content.splitlines():
14
-
15
- match = LOG_PATTERN.match(line.strip())
16
-
17
- if not match:
18
- continue
19
-
20
- sim_time = int(match.group(1))
21
- severity = match.group(2).upper()
22
- message = match.group(3)
23
-
24
- records.append({
25
- "time": sim_time,
26
- "severity": severity,
27
- "message": message
28
- })
29
-
30
- df = pd.DataFrame(records)
31
-
32
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/reliability_engine.py DELETED
@@ -1,61 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
- SEVERITY_WEIGHTS = {
5
- "INFO": 1,
6
- "WARNING": 2,
7
- "ERROR": 4,
8
- "CRITICAL": 8,
9
- "PASS": 0,
10
- "DRV": 0
11
- }
12
-
13
-
14
- def compute_metrics(df: pd.DataFrame):
15
-
16
- if df.empty:
17
- return {}
18
-
19
- total_logs = len(df)
20
-
21
- severity_counts = df["severity"].value_counts().to_dict()
22
-
23
- info = severity_counts.get("INFO", 0)
24
- warning = severity_counts.get("WARNING", 0)
25
- error = severity_counts.get("ERROR", 0)
26
- critical = severity_counts.get("CRITICAL", 0)
27
-
28
- failures = error + critical
29
-
30
- severity_score = (
31
- info * SEVERITY_WEIGHTS["INFO"]
32
- + warning * SEVERITY_WEIGHTS["WARNING"]
33
- + error * SEVERITY_WEIGHTS["ERROR"]
34
- + critical * SEVERITY_WEIGHTS["CRITICAL"]
35
- )
36
-
37
- failure_rate = failures / total_logs if total_logs > 0 else 0
38
-
39
- critical_ratio = critical / failures if failures > 0 else 0
40
-
41
- mtbf = None
42
- failure_times = df[df["severity"].isin(["ERROR", "CRITICAL"])]["time"]
43
-
44
- if len(failure_times) > 1:
45
- mtbf = np.mean(np.diff(failure_times))
46
-
47
- risk_score = (
48
- 0.5 * severity_score
49
- + 0.3 * failure_rate * 100
50
- + 0.2 * critical_ratio * 100
51
- )
52
-
53
- return {
54
- "total_logs": total_logs,
55
- "severity_counts": severity_counts,
56
- "failure_rate": round(failure_rate, 4),
57
- "critical_ratio": round(critical_ratio, 4),
58
- "severity_score": severity_score,
59
- "mtbf_cycles": mtbf,
60
- "risk_score": round(risk_score, 3)
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
features/log_feature_extraction.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import re
4
+ import joblib
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+
7
+
8
+ WINDOW = 10
9
+
10
+
11
+ def parse_log_file(log_file):
12
+
13
+ records = []
14
+
15
+ pattern = re.compile(r"(\d+)ns\s+\[(\w+)\]\s+(\w+)\s+(.*)")
16
+
17
+ with open(log_file) as f:
18
+
19
+ for line in f:
20
+
21
+ m = pattern.match(line.strip())
22
+
23
+ if m:
24
+
25
+ records.append({
26
+ "time": int(m.group(1)),
27
+ "severity": m.group(2),
28
+ "module": m.group(3),
29
+ "message": m.group(4)
30
+ })
31
+
32
+ return pd.DataFrame(records)
33
+
34
+
35
+ def severity_flags(df):
36
+
37
+ df["error_flag"] = (df["severity"] == "ERROR").astype(int)
38
+ df["critical_flag"] = (df["severity"] == "CRITICAL").astype(int)
39
+ df["warning_flag"] = (df["severity"] == "WARNING").astype(int)
40
+
41
+ return df
42
+
43
+
44
+ def temporal_features(df):
45
+
46
+ df = df.sort_values("time")
47
+
48
+ df["time_since_last_event"] = df["time"].diff().fillna(0)
49
+
50
+ last_error = df["time"].where(df["severity"] == "ERROR")
51
+ last_critical = df["time"].where(df["severity"] == "CRITICAL")
52
+
53
+ df["time_since_last_error"] = df["time"] - last_error.ffill()
54
+ df["time_since_last_critical"] = df["time"] - last_critical.ffill()
55
+
56
+ df["time_since_last_error"] = df["time_since_last_error"].fillna(0)
57
+ df["time_since_last_critical"] = df["time_since_last_critical"].fillna(0)
58
+
59
+ # transform to reduce dominance
60
+ df["log_time_since_last_error"] = np.log1p(df["time_since_last_error"])
61
+ df["log_time_since_last_critical"] = np.log1p(df["time_since_last_critical"])
62
+
63
+ return df
64
+
65
+
66
+ def rolling_features(df):
67
+
68
+ df["error_count_last_10"] = df["error_flag"].rolling(WINDOW).sum().shift(1).fillna(0)
69
+
70
+ df["critical_count_last_10"] = df["critical_flag"].rolling(WINDOW).sum().shift(1).fillna(0)
71
+
72
+ df["warning_count_last_10"] = df["warning_flag"].rolling(WINDOW).sum().shift(1).fillna(0)
73
+
74
+ df["failure_rate_recent_window"] = (
75
+ df["error_count_last_10"] + df["critical_count_last_10"]
76
+ ) / WINDOW
77
+
78
+ # trend features
79
+ df["rolling_error_rate_20"] = df["error_flag"].rolling(20).mean().shift(1)
80
+
81
+ df["rolling_warning_rate_20"] = df["warning_flag"].rolling(20).mean().shift(1)
82
+
83
+ df["error_acceleration"] = df["error_flag"].diff().rolling(10).sum()
84
+
85
+ return df
86
+
87
+
88
+ def module_features(df):
89
+
90
+ stats = df.groupby("module").agg(
91
+ total_logs=("severity", "count"),
92
+ error_logs=("error_flag", "sum"),
93
+ critical_logs=("critical_flag", "sum")
94
+ )
95
+
96
+ stats["historical_error_rate"] = stats["error_logs"] / stats["total_logs"]
97
+
98
+ stats["historical_critical_ratio"] = stats["critical_logs"] / stats["total_logs"]
99
+
100
+ stats["module_failure_density"] = (
101
+ stats["error_logs"] + stats["critical_logs"]
102
+ ) / stats["total_logs"]
103
+
104
+ df = df.merge(stats, on="module", how="left")
105
+
106
+ return df
107
+
108
+
109
+ def text_features(df):
110
+
111
+ df["clean_message"] = df["message"].str.lower()
112
+
113
+ df["message_length"] = df["clean_message"].str.len()
114
+
115
+ keywords = ["timeout", "overflow", "stall", "violation"]
116
+
117
+ for k in keywords:
118
+ df[f"kw_{k}"] = df["clean_message"].str.contains(k).astype(int)
119
+
120
+ vectorizer = TfidfVectorizer(max_features=300)
121
+
122
+ X = vectorizer.fit_transform(df["clean_message"])
123
+
124
+ tfidf = pd.DataFrame(
125
+ X.toarray(),
126
+ columns=[f"tfidf_{i}" for i in range(X.shape[1])]
127
+ )
128
+
129
+ df = pd.concat([df.reset_index(drop=True), tfidf], axis=1)
130
+
131
+ joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
132
+
133
+ return df
134
+
135
+
136
+ def run_pipeline(input_file, output_file):
137
+
138
+ df = parse_log_file(input_file)
139
+
140
+ df = severity_flags(df)
141
+
142
+ df = temporal_features(df)
143
+
144
+ df = rolling_features(df)
145
+
146
+ df = module_features(df)
147
+
148
+ df = text_features(df)
149
+
150
+ df.to_csv(output_file, index=False)
151
+
152
+ print("Feature extraction complete")
153
+
154
+
155
+ if __name__ == "__main__":
156
+
157
+ run_pipeline("C:/Codes/SanDisk/rtl_logs_with_severity.txt", "data/features.csv")
models/failure_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7d881191a6708f1597c5f554fe87f5126032168f39c01527f56dc41ff21976
3
+ size 7879632
models/feature_columns.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de2e899bd0973534279487769ebeff0422283eed3ede266db7c6ad4d50e4dfa
3
+ size 1406
models/tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c902fd5973c55d702cf0b9390d674236f8c86872d0d1c441f419235c403117fc
3
+ size 1941
requirements.txt CHANGED
@@ -2,4 +2,7 @@ fastapi
2
  uvicorn
3
  pandas
4
  numpy
5
- python-multipart
 
 
 
 
2
  uvicorn
3
  pandas
4
  numpy
5
+ scikit-learn
6
+ lightgbm
7
+ joblib
8
+ shap