Spaces:
Runtime error
Runtime error
Zack
commited on
Commit
·
747accb
1
Parent(s):
0702187
fix: Possible fix to missing values
Browse files
app.py
CHANGED
|
@@ -61,7 +61,7 @@ def plot_anomalies(df_test_value, data, anomalies):
|
|
| 61 |
|
| 62 |
def clean_data(df):
|
| 63 |
# Drop rows with any null data
|
| 64 |
-
df = df.dropna()
|
| 65 |
|
| 66 |
# Check if the DataFrame already contains the correct columns
|
| 67 |
if "timestamp" in df.columns and "value" in df.columns:
|
|
@@ -83,12 +83,12 @@ def clean_data(df):
|
|
| 83 |
# Rename column
|
| 84 |
df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
|
| 85 |
|
| 86 |
-
|
| 87 |
# Convert "Date_CY" and "Hour" columns into datetime format
|
| 88 |
df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
|
| 89 |
|
| 90 |
# Handle the case where hour is 24
|
| 91 |
-
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"]
|
| 92 |
df["timestamp"] = df["timestamp"].dt.floor('h')
|
| 93 |
|
| 94 |
# Keep only necessary columns
|
|
@@ -97,6 +97,9 @@ def clean_data(df):
|
|
| 97 |
# Rename column
|
| 98 |
df.rename(columns={"Net_Sales_CY": "value"}, inplace=True)
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 100 |
return df
|
| 101 |
|
| 102 |
else:
|
|
|
|
| 61 |
|
| 62 |
def clean_data(df):
|
| 63 |
# Drop rows with any null data
|
| 64 |
+
# df = df.dropna()
|
| 65 |
|
| 66 |
# Check if the DataFrame already contains the correct columns
|
| 67 |
if "timestamp" in df.columns and "value" in df.columns:
|
|
|
|
| 83 |
# Rename column
|
| 84 |
df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
|
| 85 |
|
| 86 |
+
elif "Date_CY" in df.columns and "Hour" in df.columns and "Net_Sales_CY" in df.columns:
|
| 87 |
# Convert "Date_CY" and "Hour" columns into datetime format
|
| 88 |
df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
|
| 89 |
|
| 90 |
# Handle the case where hour is 24
|
| 91 |
+
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] - pd.DateOffset(days=1)
|
| 92 |
df["timestamp"] = df["timestamp"].dt.floor('h')
|
| 93 |
|
| 94 |
# Keep only necessary columns
|
|
|
|
| 97 |
# Rename column
|
| 98 |
df.rename(columns={"Net_Sales_CY": "value"}, inplace=True)
|
| 99 |
|
| 100 |
+
# Drop rows where 'value' is NaN
|
| 101 |
+
df = df.dropna(subset=['value'])
|
| 102 |
+
|
| 103 |
return df
|
| 104 |
|
| 105 |
else:
|