Nuzz23 commited on
Commit
e3cde9c
·
1 Parent(s): f4e0b44
Files changed (1) hide show
  1. utils.py +6 -4
utils.py CHANGED
@@ -46,7 +46,7 @@ def validateData(file, timestamp_column:str=None):
46
 
47
  assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
48
  assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."
49
- assert 'object' not in df.columns.dtype, "Non-numeric value columns detected. Please ensure all value columns in your CSV file contain numeric data for accurate anomaly detection."
50
 
51
 
52
 
@@ -223,7 +223,7 @@ def aggregateAnomalyScores(continuousScores: dict[str, np.ndarray], percentile:
223
  pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point.
224
  """
225
  # Normalize scores per column before aggregation
226
- normalized_scores = {(scores - np.median(scores)) / (np.percentile(scores, 75) - np.percentile(scores, 25) + 1e-8) for col, scores in continuousScores.items()}
227
 
228
  # Stack normalized scores and aggregate
229
  aggregated_scores = np.mean(np.column_stack(list(normalized_scores.values())), axis=1)
@@ -315,8 +315,10 @@ def plotResults(df, target_cols:list[str]=None)->plt.Figure|None:
315
  for col in target_cols:
316
  ax.plot(df['timestamp_2'], df[col], label=col)
317
 
318
- anomalies = df[df['anomaly_label'] == 1]
319
- ax.scatter(anomalies['timestamp_2'], anomalies[target_cols], color='red', label='Anomalies', marker='x')
 
 
320
  ax.legend()
321
  ax.set_title('Time Series with Detected Anomalies')
322
  ax.set_xlabel('Timestamp')
 
46
 
47
  assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
48
  assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."
49
+ assert df.select_dtypes(exclude=[np.number]).columns.empty, "Non-numeric value columns detected. Please ensure all value columns in your CSV file contain numeric data for accurate anomaly detection."
50
 
51
 
52
 
 
223
  pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point.
224
  """
225
  # Normalize scores per column before aggregation
226
+ normalized_scores = {col:(scores - np.median(scores)) / (np.percentile(scores, 75) - np.percentile(scores, 25) + 1e-8) for col, scores in continuousScores.items()}
227
 
228
  # Stack normalized scores and aggregate
229
  aggregated_scores = np.mean(np.column_stack(list(normalized_scores.values())), axis=1)
 
315
  for col in target_cols:
316
  ax.plot(df['timestamp_2'], df[col], label=col)
317
 
318
+ for _, row in df[df['anomaly_label'] == 1].iterrows():
319
+ ax.axvspan( row['timestamp_2'] - pd.Timedelta(minutes=0.5), row['timestamp_2'] + pd.Timedelta(minutes=0.5),
320
+ color='red', alpha=0.15 )
321
+
322
  ax.legend()
323
  ax.set_title('Time Series with Detected Anomalies')
324
  ax.set_xlabel('Timestamp')