Spaces:
Configuration error
Configuration error
File size: 1,763 Bytes
01ca3ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# src/feature.py
import pandas as pd
import numpy as np
from typing import List
from src.utils import logger
def engineer_features(df:pd.DataFrame) -> pd.DataFrame:
"""
Engineer Features from raw metrics
Args:
df(pd.DataFrame): Raw Data from the system
Returns:
pf.DataFrame: Data with added Features
"""
try:
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values(["node", "timestamp"])
grouped = df.groupby("node")
df["cpu_trend"] = grouped["cpu_usage"].transform(lambda x:x.diff())
df["cpu_rolling_mean"] = grouped["cpu_usage"].transform(lambda x:x.rolling(window=5, min_periods=1).mean())
df["error_rate_lag1"] = grouped["rpc_error_rate"].shift(1)
df["latency_rolling_std"] = grouped["rpc_latency_ms"].transform(lambda x:x.rolling(window=5).std())
df = df.fillna(0)
return df
except KeyError as e:
logger.error(f"Missing Column in Data: {e}")
raise
except Exception as e:
logger.error(f"Error engineering features: {e}")
def main(input_path:str = "data/raw/synthetic_rpc_metrics_realistic.csv", output_path:str = "data/processed/engineered_metrics.csv") -> None:
"""
Main function to engineer features from raw data
Args:
input_path(str): Path to raw data CSV
output_path(str): Path to save engineered features CSV
"""
try:
df = pd.read_csv(input_path)
df_engineered = engineer_features(df)
df_engineered.to_csv(output_path, index=False)
logger.info(f"Engineered features saved to {output_path}")
except Exception as e:
logger.error(f"Error in main function: {e}")
if __name__ == "__main__":
main() |