Spaces:
Runtime error
Runtime error
Zack
commited on
Commit
·
1f82d2d
1
Parent(s):
05bf296
chore: Fix clobbered code
Browse files
app.py
CHANGED
|
@@ -68,7 +68,11 @@ def clean_data(df):
|
|
| 68 |
# Check if DataFrame contains the columns to be converted
|
| 69 |
elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
|
| 70 |
# Convert "Date" and "Hour" columns into datetime format
|
| 71 |
-
df["timestamp"] = pd.to_datetime(df["Date"] +
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# Keep only necessary columns
|
| 74 |
df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
|
|
@@ -81,31 +85,30 @@ def clean_data(df):
|
|
| 81 |
else:
|
| 82 |
raise ValueError("Dataframe does not contain necessary columns.")
|
| 83 |
|
| 84 |
-
def
|
| 85 |
-
#
|
| 86 |
-
|
| 87 |
-
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
| 88 |
-
return df
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
|
| 92 |
-
# Convert "Date" and "Hour" columns into datetime format
|
| 93 |
-
df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
|
| 94 |
-
|
| 95 |
-
# Handle the case where hour is 24
|
| 96 |
-
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
|
| 97 |
-
df["timestamp"] = df["timestamp"].dt.floor('h')
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
|
| 102 |
-
|
| 103 |
-
df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
outputs = gr.outputs.Image()
|
| 111 |
|
|
|
|
| 68 |
# Check if DataFrame contains the columns to be converted
|
| 69 |
elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
|
| 70 |
# Convert "Date" and "Hour" columns into datetime format
|
| 71 |
+
df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
|
| 72 |
+
|
| 73 |
+
# Handle the case where hour is 24
|
| 74 |
+
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
|
| 75 |
+
df["timestamp"] = df["timestamp"].dt.floor('h')
|
| 76 |
|
| 77 |
# Keep only necessary columns
|
| 78 |
df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
|
|
|
|
| 85 |
else:
|
| 86 |
raise ValueError("Dataframe does not contain necessary columns.")
|
| 87 |
|
| 88 |
+
def master(file):
|
| 89 |
+
# read file
|
| 90 |
+
data = pd.read_csv(file.name)
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# clean data
|
| 93 |
+
data = clean_data(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
# Convert timestamp to datetime after cleaning
|
| 96 |
+
data['timestamp'] = pd.to_datetime(data['timestamp'])
|
| 97 |
|
| 98 |
+
data.set_index("timestamp", inplace=True)
|
|
|
|
| 99 |
|
| 100 |
+
# Check if data has enough records to create sequences
|
| 101 |
+
if len(data) < TIME_STEPS:
|
| 102 |
+
return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)
|
| 103 |
|
| 104 |
+
df_test_value = normalize_data(data)
|
| 105 |
+
# plot input test data
|
| 106 |
+
plot1 = plot_test_data(df_test_value)
|
| 107 |
+
# predict
|
| 108 |
+
anomalies = get_anomalies(df_test_value)
|
| 109 |
+
#plot anomalous data points
|
| 110 |
+
plot2 = plot_anomalies(df_test_value, data, anomalies)
|
| 111 |
+
return plot2
|
| 112 |
|
| 113 |
outputs = gr.outputs.Image()
|
| 114 |
|