File size: 1,763 Bytes
01ca3ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# src/feature.py

import pandas as pd
import numpy as np
from typing import List
from src.utils import logger


def engineer_features(df:pd.DataFrame) -> pd.DataFrame:
    """
    Engineer Features from raw metrics

    Args:
        df(pd.DataFrame): Raw Data from the system

    Returns:
        pf.DataFrame: Data with added Features
    """
    try:
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        df = df.sort_values(["node", "timestamp"])

        grouped = df.groupby("node")
        df["cpu_trend"] = grouped["cpu_usage"].transform(lambda x:x.diff())
        df["cpu_rolling_mean"] = grouped["cpu_usage"].transform(lambda x:x.rolling(window=5, min_periods=1).mean())
        df["error_rate_lag1"] = grouped["rpc_error_rate"].shift(1)
        df["latency_rolling_std"] = grouped["rpc_latency_ms"].transform(lambda x:x.rolling(window=5).std())

        df = df.fillna(0)

        return df

    except KeyError as e:
        logger.error(f"Missing Column in Data: {e}")
        raise
    except Exception as e:
        logger.error(f"Error engineering features: {e}")


def main(input_path:str = "data/raw/synthetic_rpc_metrics_realistic.csv", output_path:str = "data/processed/engineered_metrics.csv") -> None:
    """
    Main function to engineer features from raw data

    Args:
        input_path(str): Path to raw data CSV
        output_path(str): Path to save engineered features CSV
    """
    try:
        df = pd.read_csv(input_path)
        df_engineered = engineer_features(df)
        df_engineered.to_csv(output_path, index=False)
        logger.info(f"Engineered features saved to {output_path}")
    except Exception as e:
        logger.error(f"Error in main function: {e}")

    
if __name__ == "__main__":
    main()