timeseries-anomaly-detection-autoencoders

Runtime error

App Files Files Community

Zack commited on Jul 31, 2023

Commit

1f82d2d

1 Parent(s): 05bf296

chore: Fix clobbered code

Browse files

Files changed (1) hide show

app.py +24 -21

app.py CHANGED Viewed

@@ -68,7 +68,11 @@ def clean_data(df):
     # Check if DataFrame contains the columns to be converted
     elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
         # Convert "Date" and "Hour" columns into datetime format
-        df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00")
         # Keep only necessary columns
         df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
@@ -81,31 +85,30 @@ def clean_data(df):
     else:
         raise ValueError("Dataframe does not contain necessary columns.")
-def clean_data(df):
-    # Check if the DataFrame already contains the correct columns
-    if "timestamp" in df.columns and "value" in df.columns:
-        df["timestamp"] = pd.to_datetime(df["timestamp"])
-        return df
-    # Check if DataFrame contains the columns to be converted
-    elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
-        # Convert "Date" and "Hour" columns into datetime format
-        df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
-        # Handle the case where hour is 24
-        df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
-        df["timestamp"] = df["timestamp"].dt.floor('h')
-        # Keep only necessary columns
-        df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
-        # Rename column
-        df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
-        return df
-    else:
-        raise ValueError("Dataframe does not contain necessary columns.")
 outputs = gr.outputs.Image()

     # Check if DataFrame contains the columns to be converted
     elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
         # Convert "Date" and "Hour" columns into datetime format
+        df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
+        # Handle the case where hour is 24
+        df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
+        df["timestamp"] = df["timestamp"].dt.floor('h')
         # Keep only necessary columns
         df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
     else:
         raise ValueError("Dataframe does not contain necessary columns.")
+def master(file):
+    # read file
+    data = pd.read_csv(file.name)
+    # clean data
+    data = clean_data(data)
+    # Convert timestamp to datetime after cleaning
+    data['timestamp'] = pd.to_datetime(data['timestamp'])
+    data.set_index("timestamp", inplace=True)
+    # Check if data has enough records to create sequences
+    if len(data) < TIME_STEPS:
+        return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)
+    df_test_value = normalize_data(data)
+    # plot input test data
+    plot1 = plot_test_data(df_test_value)
+    # predict
+    anomalies = get_anomalies(df_test_value)
+    #plot anomalous data points
+    plot2 = plot_anomalies(df_test_value, data, anomalies)
+    return plot2
 outputs = gr.outputs.Image()