Spaces:

Nuzz23
/

Chronos2AD_AF

Sleeping

App Files Files Community

Nuzz23 commited on Feb 15

Commit

5c8c1e3

1 Parent(s): e3cde9c

final fix

Browse files

Files changed (2) hide show

app.py +0 -3
utils.py +33 -31

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import gradio as gr
 import os
 import torch
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
 from chronos import Chronos2Pipeline

 import gradio as gr
 import os
 import torch
 from chronos import Chronos2Pipeline

utils.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import torch
 import os
 import numpy as np
 import pandas as pd
@@ -6,6 +5,8 @@ import matplotlib.pyplot as plt
 from chronos import Chronos2Pipeline
 MIN_LENGTH_CONTEXT = 64
 PREDICTION_LENGTH = 32
@@ -34,7 +35,7 @@ def validateData(file, timestamp_column:str=None):
     """
     assert os.path.getsize(file) < 256 * 1024 * 1024, "File size exceeds the maximum limit of 256MB. Please upload a smaller file."
     assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
-    assert file.endswith('.csv'), "Invalid file format. Please upload a CSV file."
     df = pd.read_csv(file, index_col=None, header=0)
     if timestamp_column is not None:
         assert timestamp_column in df.columns, f"Timestamp column '{timestamp_column}' not found in the uploaded file. Please provide a valid timestamp column name."
@@ -93,7 +94,7 @@ def  preProcessData(file, timestamp_column:str=None)->list[pd.DataFrame, pd.Seri
 ## Main prediction function
-def predictData(chronos2:Chronos2Pipeline, preProcessedData:pd.DataFrame, target_cols:list[str]) -> dict[str, pd.DataFrame]:
     """ Predict future values for the time series data using the Chronos2 pipeline. This function performs the following steps:
     1. Identifies the segments of the time series data that require predictions based on the 'item_id' column.
     2. For each segment, prepares the input data for the Chronos2 pipeline by selecting the appropriate context length of historical data.
@@ -106,9 +107,9 @@ def predictData(chronos2:Chronos2Pipeline, preProcessedData:pd.DataFrame, target
         target_cols (list[str]): A list of target column names for which predictions are to be made.
     Returns:
-        dict[str, pd.DataFrame]: A dictionary where keys are target column names and values are DataFrames containing the predictions for each column.
     """
-    prediction_item_ids = [iid for iid in sorted(preProcessedData['item_id'].unique()) if iid > 0][:-1] # Exclude last segment, since we don't have the labels for it (it would be in the future of the series)
     tasks, segment_start_indices = [], []  # Track where each segment starts in original data
     for item_id in prediction_item_ids:
@@ -152,15 +153,11 @@ def predictData(chronos2:Chronos2Pipeline, preProcessedData:pd.DataFrame, target
         # For each target column (variate)
         for d_idx, col in enumerate(target_cols):
-            seg_df = pd.DataFrame({
                 'item_id': prediction_item_ids[seg_idx],  # item_id for this segment
                 'timestep': np.arange(seg_start, seg_start + PREDICTION_LENGTH),
-            })
-            # Add quantile columns
-            for q_idx, q_level in enumerate(chronos2.quantiles):
-                seg_df[str(q_level)] = pred_np[d_idx, q_idx, :]
-            predictions_dict[col].append(seg_df)
         # Track original indices
         all_indices.extend(range(seg_start, seg_start + PREDICTION_LENGTH))
@@ -234,7 +231,7 @@ def aggregateAnomalyScores(continuousScores: dict[str, np.ndarray], percentile:
 ## Computing the discrete anomaly scores and labels
-def computeDiscreteScores(predictions_dict: dict[str, pd.DataFrame], time_series_df: pd.DataFrame, target_cols:list[str],
                         horizons: list[int] = [1, 8, 32, 64])-> pd.Series:
     """ Compute discrete anomaly scores and labels based on the predictions from the Chronos2 pipeline. This function performs the following steps:
     1. Identifies the indices in the original time series corresponding to the predictions made by the Chronos2 pipeline.
@@ -245,18 +242,16 @@ def computeDiscreteScores(predictions_dict: dict[str, pd.DataFrame], time_series
         predictions_dict (dict[str, pd.DataFrame]): A dictionary where keys are target column names and values are DataFrames containing the predictions for each column.
         time_series_df (pd.DataFrame): The original time series DataFrame containing the actual values for each target column.
         target_cols (list[str]): A list of target column names for which predictions were made and anomaly scores are to be computed.
         horizons (list[int], optional): A list of forecast horizons to consider for computing anomaly scores. Default is [1, 8, 32, 64].
     Returns:
         pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point in the original time series.
     """
-    predictionIndexes = predictions_dict[target_cols[0]]['item_id'].index[predictions_dict[target_cols[0]]['item_id'] > 0].tolist()  # Indices of predictions in the original time series
     continuousScores = {col: computeMultiHorizonAnomalyScore(
             predictions_df=predictions_dict[col],
             actual_values=time_series_df[col].values,
-            prediction_indices=predictionIndexes,
             horizons=horizons
         ) for col in target_cols}
@@ -264,7 +259,7 @@ def computeDiscreteScores(predictions_dict: dict[str, pd.DataFrame], time_series
-def assembleResults(preProcessedData: pd.DataFrame, timestamp_old: pd.Series|None, target_cols:list[str], scores: pd.Series)->pd.DataFrame:
     """
     Assemble the final results DataFrame containing the original time series data along with the computed anomaly labels. This function performs the following steps:
     1. Creates a copy of the pre-processed DataFrame to serve as the base for the results.
@@ -277,7 +272,7 @@ def assembleResults(preProcessedData: pd.DataFrame, timestamp_old: pd.Series|Non
         preProcessedData (pd.DataFrame): The pre-processed DataFrame formatted for Chronos2 input, which serves as the base for the results.
         timestamp_old (pd.Series|None): The original timestamp values extracted from the uploaded data, or None if no timestamp column was provided.
         target_cols (list[str]): A list of target column names for which predictions were made and anomaly scores were computed.
-        scores (pd.Series): A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point corresponding to the predictions.
     Returns:
         pd.DataFrame: A DataFrame containing the original time series data along with an additional column 'anomaly_label' indicating the anomaly labels, and the original timestamps if available.
@@ -285,7 +280,9 @@ def assembleResults(preProcessedData: pd.DataFrame, timestamp_old: pd.Series|Non
     result_df = preProcessedData.copy()
-    result_df['anomaly_label'] = pd.concat([pd.Series([-1] * (len(preProcessedData) - len(scores)), index=preProcessedData.index[len(scores):]), scores])
     if timestamp_old is not None:
         result_df['timestamp'] = timestamp_old
         return result_df.drop(columns=['item_id'])
@@ -310,23 +307,28 @@ def plotResults(df, target_cols:list[str]=None)->plt.Figure|None:
     if df.shape[1] - 2 < MAX_NUMBER_OF_PLOTTABLE_SERIES and df.shape[0] < MAX_NUMBER_OF_POINTS_PLOTTABLE:  # -2 to exclude timestamp and anomaly_label columns
         fig, ax = plt.subplots(1,1,figsize=(15, 5))
-        df['timestamp_2'] = df['timestamp'] if 'timestamp' in df.columns else df.index
         for col in target_cols:
-            ax.plot(df['timestamp_2'], df[col], label=col)
-        for _, row in df[df['anomaly_label'] == 1].iterrows():
-            ax.axvspan( row['timestamp_2'] - pd.Timedelta(minutes=0.5), row['timestamp_2'] + pd.Timedelta(minutes=0.5),
-                    color='red', alpha=0.15 )
         ax.legend()
         ax.set_title('Time Series with Detected Anomalies')
         ax.set_xlabel('Timestamp')
         ax.set_ylabel('Values')
-        plt.xticks(rotation=45)
         plt.tight_layout()
-        df.drop(columns=['timestamp_2'], inplace=True)
         return fig  # Return the figure object
     else:
-        return None

 import os
 import numpy as np
 import pandas as pd
 from chronos import Chronos2Pipeline
+OUT_PATH = "./savedPredictions/results.csv"
 MIN_LENGTH_CONTEXT = 64
 PREDICTION_LENGTH = 32
     """
     assert os.path.getsize(file) < 256 * 1024 * 1024, "File size exceeds the maximum limit of 256MB. Please upload a smaller file."
     assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
+    assert file.endswith('.csv') and os.path.basename(file).count(".") == 1, "Invalid file format. Please upload a CSV file."
     df = pd.read_csv(file, index_col=None, header=0)
     if timestamp_column is not None:
         assert timestamp_column in df.columns, f"Timestamp column '{timestamp_column}' not found in the uploaded file. Please provide a valid timestamp column name."
 ## Main prediction function
+def predictData(chronos2:Chronos2Pipeline, preProcessedData:pd.DataFrame, target_cols:list[str]) -> tuple[dict[str, pd.DataFrame], np.ndarray]:
     """ Predict future values for the time series data using the Chronos2 pipeline. This function performs the following steps:
     1. Identifies the segments of the time series data that require predictions based on the 'item_id' column.
     2. For each segment, prepares the input data for the Chronos2 pipeline by selecting the appropriate context length of historical data.
         target_cols (list[str]): A list of target column names for which predictions are to be made.
     Returns:
+        tuple[dict[str, pd.DataFrame], np.ndarray]: A tuple where the first element is a dictionary where keys are target column names and values are DataFrames containing the predictions for each column, and the second element is an array of indices in the original time series that correspond to the predictions.
     """
+    prediction_item_ids = [iid for iid in sorted(preProcessedData['item_id'].unique()) if iid > 0]
     tasks, segment_start_indices = [], []  # Track where each segment starts in original data
     for item_id in prediction_item_ids:
         # For each target column (variate)
         for d_idx, col in enumerate(target_cols):
+            predictions_dict[col].append(pd.DataFrame({
                 'item_id': prediction_item_ids[seg_idx],  # item_id for this segment
                 'timestep': np.arange(seg_start, seg_start + PREDICTION_LENGTH),
+                'predictions': pred_np[d_idx, 10, :]
+            }))
         # Track original indices
         all_indices.extend(range(seg_start, seg_start + PREDICTION_LENGTH))
 ## Computing the discrete anomaly scores and labels
+def computeDiscreteScores(predictions_dict: dict[str, pd.DataFrame], time_series_df: pd.DataFrame, target_cols:list[str], indexes: np.ndarray,
                         horizons: list[int] = [1, 8, 32, 64])-> pd.Series:
     """ Compute discrete anomaly scores and labels based on the predictions from the Chronos2 pipeline. This function performs the following steps:
     1. Identifies the indices in the original time series corresponding to the predictions made by the Chronos2 pipeline.
         predictions_dict (dict[str, pd.DataFrame]): A dictionary where keys are target column names and values are DataFrames containing the predictions for each column.
         time_series_df (pd.DataFrame): The original time series DataFrame containing the actual values for each target column.
         target_cols (list[str]): A list of target column names for which predictions were made and anomaly scores are to be computed.
+        indexes (np.ndarray): The array of indices in the original time series that correspond to the predictions.
         horizons (list[int], optional): A list of forecast horizons to consider for computing anomaly scores. Default is [1, 8, 32, 64].
     Returns:
         pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point in the original time series.
     """
     continuousScores = {col: computeMultiHorizonAnomalyScore(
             predictions_df=predictions_dict[col],
             actual_values=time_series_df[col].values,
+            prediction_indices=indexes,
             horizons=horizons
         ) for col in target_cols}
+def assembleResults(preProcessedData: pd.DataFrame, timestamp_old: pd.Series|None, target_cols:list[str], scores: np.ndarray)->pd.DataFrame:
     """
     Assemble the final results DataFrame containing the original time series data along with the computed anomaly labels. This function performs the following steps:
     1. Creates a copy of the pre-processed DataFrame to serve as the base for the results.
         preProcessedData (pd.DataFrame): The pre-processed DataFrame formatted for Chronos2 input, which serves as the base for the results.
         timestamp_old (pd.Series|None): The original timestamp values extracted from the uploaded data, or None if no timestamp column was provided.
         target_cols (list[str]): A list of target column names for which predictions were made and anomaly scores were computed.
+        scores (np.ndarray): A numpy array containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point corresponding to the predictions.
     Returns:
         pd.DataFrame: A DataFrame containing the original time series data along with an additional column 'anomaly_label' indicating the anomaly labels, and the original timestamps if available.
     result_df = preProcessedData.copy()
+    result_df['anomaly_label'] = -1
+    result_df.loc[result_df['item_id'] > 0, 'anomaly_label'] = scores[:len(result_df)-MIN_LENGTH_CONTEXT]
     if timestamp_old is not None:
         result_df['timestamp'] = timestamp_old
         return result_df.drop(columns=['item_id'])
     if df.shape[1] - 2 < MAX_NUMBER_OF_PLOTTABLE_SERIES and df.shape[0] < MAX_NUMBER_OF_POINTS_PLOTTABLE:  # -2 to exclude timestamp and anomaly_label columns
         fig, ax = plt.subplots(1,1,figsize=(15, 5))
+        df['temp_timestamp_for_print'] = np.arange(len(df))  # posizione numerica dei punti
         for col in target_cols:
+            ax.plot(df['temp_timestamp_for_print'], df[col], label=col)
+        # Evidenziazione anomalie con background rosso
+        for _, row in df[df['anomaly_label'] == 1].iterrows():
+            ax.axvspan(
+                row['temp_timestamp_for_print'] - 0.5,
+                row['temp_timestamp_for_print'] + 0.5,
+                color='red',
+                alpha=0.15
+            )
         ax.legend()
         ax.set_title('Time Series with Detected Anomalies')
         ax.set_xlabel('Timestamp')
         ax.set_ylabel('Values')
+        ax.grid(True, which='both', linestyle='--', linewidth=0.5)
+        ax.set_xticks(df['temp_timestamp_for_print'][::max(1, len(df)//10)], labels=df['timestamp'][::max(1, len(df)//10)], rotation=45)
         plt.tight_layout()
+        df.drop(columns=['temp_timestamp_for_print'], inplace=True)
         return fig  # Return the figure object
     else:
+        return None