Spaces:
Running
Running
new fix
Browse files
utils.py
CHANGED
|
@@ -46,7 +46,7 @@ def validateData(file, timestamp_column:str=None):
|
|
| 46 |
|
| 47 |
assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
|
| 48 |
assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."
|
| 49 |
-
assert
|
| 50 |
|
| 51 |
|
| 52 |
|
|
@@ -223,7 +223,7 @@ def aggregateAnomalyScores(continuousScores: dict[str, np.ndarray], percentile:
|
|
| 223 |
pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point.
|
| 224 |
"""
|
| 225 |
# Normalize scores per column before aggregation
|
| 226 |
-
normalized_scores = {(scores - np.median(scores)) / (np.percentile(scores, 75) - np.percentile(scores, 25) + 1e-8) for col, scores in continuousScores.items()}
|
| 227 |
|
| 228 |
# Stack normalized scores and aggregate
|
| 229 |
aggregated_scores = np.mean(np.column_stack(list(normalized_scores.values())), axis=1)
|
|
@@ -315,8 +315,10 @@ def plotResults(df, target_cols:list[str]=None)->plt.Figure|None:
|
|
| 315 |
for col in target_cols:
|
| 316 |
ax.plot(df['timestamp_2'], df[col], label=col)
|
| 317 |
|
| 318 |
-
|
| 319 |
-
|
|
|
|
|
|
|
| 320 |
ax.legend()
|
| 321 |
ax.set_title('Time Series with Detected Anomalies')
|
| 322 |
ax.set_xlabel('Timestamp')
|
|
|
|
| 46 |
|
| 47 |
assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
|
| 48 |
assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."
|
| 49 |
+
assert df.select_dtypes(exclude=[np.number]).columns.empty, "Non-numeric value columns detected. Please ensure all value columns in your CSV file contain numeric data for accurate anomaly detection."
|
| 50 |
|
| 51 |
|
| 52 |
|
|
|
|
| 223 |
pd.Series: A pandas Series containing the discrete anomaly labels (1 for anomaly, 0 for normal) for each time point.
|
| 224 |
"""
|
| 225 |
# Normalize scores per column before aggregation
|
| 226 |
+
normalized_scores = {col:(scores - np.median(scores)) / (np.percentile(scores, 75) - np.percentile(scores, 25) + 1e-8) for col, scores in continuousScores.items()}
|
| 227 |
|
| 228 |
# Stack normalized scores and aggregate
|
| 229 |
aggregated_scores = np.mean(np.column_stack(list(normalized_scores.values())), axis=1)
|
|
|
|
| 315 |
for col in target_cols:
|
| 316 |
ax.plot(df['timestamp_2'], df[col], label=col)
|
| 317 |
|
| 318 |
+
for _, row in df[df['anomaly_label'] == 1].iterrows():
|
| 319 |
+
ax.axvspan( row['timestamp_2'] - pd.Timedelta(minutes=0.5), row['timestamp_2'] + pd.Timedelta(minutes=0.5),
|
| 320 |
+
color='red', alpha=0.15 )
|
| 321 |
+
|
| 322 |
ax.legend()
|
| 323 |
ax.set_title('Time Series with Detected Anomalies')
|
| 324 |
ax.set_xlabel('Timestamp')
|