Spaces:

anujkum0x
/

apollo

Sleeping

App Files Files Community

anujkum0x commited on Feb 18, 2025

Commit

555bf89

verified ·

1 Parent(s): 4622a9e

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -43

app.py CHANGED Viewed

@@ -157,7 +157,7 @@ def create_plot(data, forecast_data, time_col, target_col):
     )
     return fig
-def full_forecast_pipeline(file_obj, time_col, target_col, finetune_steps, freq, start_date, end_date, start_time, end_time, resample_freq, merge_data, forecast_start_date, forecast_end_date) -> Tuple[str, object, str, str]:
     """
     Full pipeline: loads the data, calls the forecast function, and then processes the data.
     """
@@ -172,63 +172,73 @@ def full_forecast_pipeline(file_obj, time_col, target_col, finetune_steps, freq,
         # Sort the DataFrame by the time column
         data = data.sort_values(by=time_col)
-        # Apply date range selection
         if start_date and end_date:
             start_datetime = pd.to_datetime(start_date)
             end_datetime = pd.to_datetime(end_date)
             data = data[(data[time_col] >= start_datetime) & (data[time_col] <= end_datetime)]
             logger.info(f"Data filtered from {start_datetime} to {end_datetime}. Shape: {data.shape}")
         # Resample the data
         data = data.resample(resample_freq).mean()
         data.reset_index(inplace=True)
-        # Convert forecast start and end dates to datetime
-        if forecast_start_date and forecast_end_date:
-            forecast_start_datetime = pd.to_datetime(forecast_start_date)
             forecast_end_datetime = pd.to_datetime(forecast_end_date)
-            # Calculate the time difference
-            time_difference = forecast_end_datetime - forecast_start_datetime
-            # Calculate forecast horizon based on frequency
-            if freq == 'D':
-                forecast_horizon = time_difference.days
-            elif freq == 'W':
-                forecast_horizon = time_difference.days / 7
-            elif freq == 'M':
-                forecast_horizon = time_difference.days / 30.44  # Average days in a month
-            elif freq == 'Y':
-                forecast_horizon = time_difference.days / 365.25  # Average days in a year
-            elif 'min' in freq:
-                minutes = int(freq.replace('min', ''))
-                forecast_horizon = time_difference.total_seconds() / (minutes * 60)
-            elif 'H' in freq:
-                hours = int(freq.replace('H', ''))
-                forecast_horizon = time_difference.total_seconds() / (hours * 3600)
             else:
-                raise ValueError("Unsupported frequency. Please select a valid frequency.")
-            forecast_horizon = int(forecast_horizon)
-        else:
-            raise ValueError("Forecast start and end dates must be provided.")
         forecast_result = forecast_nixtla(data, forecast_horizon, finetune_steps, freq, time_col, target_col)
         processed_data = process_forecast_data(forecast_result, time_col)
         processed_data = apply_zero_patterns(data.copy(), processed_data, time_col, target_col)
         if merge_data:
             merged_data = pd.merge(data.reset_index(), processed_data, on=time_col, how='inner')
         else:
             merged_data = processed_data
-        # Filter forecast data based on forecast start and end dates
-        merged_data[time_col] = pd.to_datetime(merged_data[time_col])  # Ensure time_col is datetime
-        merged_data = merged_data[(merged_data[time_col] >= forecast_start_datetime) & (merged_data[time_col] <= forecast_end_datetime)]
-        logger.info(f"Forecast data filtered from {forecast_start_datetime} to {forecast_end_datetime}. Shape: {merged_data.shape}")
-        plot = create_plot(data, merged_data, time_col, target_col)
-        csv_data = merged_data.to_csv(index=False)
         # Create a temporary file and write the CSV data to it
         with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".csv") as tmpfile:
@@ -282,15 +292,21 @@ def create_interface():
             target_col_input = gr.Textbox(label="Target Column", placeholder="Enter target column name")
         with gr.Row():
-            forecast_horizon_input = gr.Number(label="Forecast Horizon", value=10)
             finetune_steps_input = gr.Number(label="Finetune Steps", value=100)
             freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Frequency", value='D')
-            with gr.Row():
-                start_date_input = gr.Textbox(label="Start Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-01-01")
-                start_time_input = gr.Textbox(label="Start Time (HH:MM)", placeholder="HH:MM", value="00:00")
-                end_date_input = gr.Textbox(label="End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-12-31")
-                end_time_input = gr.Textbox(label="End Time (HH:MM)", placeholder="HH:MM", value="23:59")
         resample_freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Resample Frequency", value='D')
@@ -303,10 +319,10 @@ def create_interface():
         btn = gr.Button("Generate Forecast")
         btn.click(
             fn=full_forecast_pipeline,
-            inputs=[file_input, time_col_input, target_col_input, forecast_horizon_input, finetune_steps_input, freq_dropdown, start_date_input, end_date_input, start_time_input, end_time_input, resample_freq_dropdown, gr.Checkbox(label="Merge Data", value=False), gr.Textbox(label="Forecast Start Date", placeholder="YYYY-MM-DD", value="2023-01-01"), gr.Textbox(label="Forecast End Date", placeholder="YYYY-MM-DD", value="2023-12-31")],
             outputs=[output_csv, output_plot, download_button, error_output]
         )
     return iface
 iface = create_interface()
-iface.launch()

     )
     return fig
+def full_forecast_pipeline(file_obj, time_col, target_col, forecast_horizon, finetune_steps, freq, start_date, end_date, start_time, end_time, resample_freq, merge_data, forecast_start_date, forecast_end_date) -> Tuple[str, object, str, str]:
     """
     Full pipeline: loads the data, calls the forecast function, and then processes the data.
     """
         # Sort the DataFrame by the time column
         data = data.sort_values(by=time_col)
+        # Get min and max dates from the data
+        min_date = data[time_col].min().strftime('%Y-%m-%d')
+        max_date = data[time_col].max().strftime('%Y-%m-%d')
+        # Fill missing values with 0
+        data = data.fillna(0)
+        # Apply date range selection for historical data
         if start_date and end_date:
             start_datetime = pd.to_datetime(start_date)
             end_datetime = pd.to_datetime(end_date)
             data = data[(data[time_col] >= start_datetime) & (data[time_col] <= end_datetime)]
             logger.info(f"Data filtered from {start_datetime} to {end_datetime}. Shape: {data.shape}")
+        data = data.set_index(time_col)
         # Resample the data
         data = data.resample(resample_freq).mean()
         data.reset_index(inplace=True)
+        # Calculate forecast horizon if forecast_end_date is provided
+        if forecast_end_date:
+            historical_end_date = pd.to_datetime(end_date) if end_date else data[time_col].max()
             forecast_end_datetime = pd.to_datetime(forecast_end_date)
+            day_difference = (forecast_end_datetime - historical_end_date).days
+            if day_difference <= 0:
+                raise ValueError("Forecast end date must be after the historical data end date.")
+            # Adjust forecast_horizon based on frequency
+            if freq == 'H':
+                forecast_horizon = day_difference * 24
+            elif freq == '30min':
+                forecast_horizon = day_difference * 48
+            elif freq == '15min':
+                forecast_horizon = day_difference * 96
+            elif freq == 'D':
+                forecast_horizon = day_difference
+            elif freq == 'W': # Approximation: 7 days in a week
+                forecast_horizon = day_difference / 7
+            elif freq == 'M': # Approximation: 30 days in a month
+                forecast_horizon = day_difference / 30
+            elif freq == 'Y': # Approximation: 365 days in a year
+                forecast_horizon = day_difference / 365
             else:
+                forecast_horizon = day_difference # Default to days if frequency is not recognized
+            forecast_horizon = max(1, int(round(forecast_horizon))) # Ensure forecast_horizon is at least 1 and integer
         forecast_result = forecast_nixtla(data, forecast_horizon, finetune_steps, freq, time_col, target_col)
         processed_data = process_forecast_data(forecast_result, time_col)
         processed_data = apply_zero_patterns(data.copy(), processed_data, time_col, target_col)
+        # Apply forecast date range selection
+        if forecast_start_date and forecast_end_date:
+            forecast_start_datetime = pd.to_datetime(forecast_start_date)
+            forecast_end_datetime = pd.to_datetime(forecast_end_date)
+            processed_data = processed_data[(processed_data[time_col] >= forecast_start_datetime) & (processed_data[time_col] <= forecast_end_datetime)]
+            logger.info(f"Forecast data filtered from {forecast_start_datetime} to {forecast_end_datetime}. Shape: {processed_data.shape}")
         if merge_data:
             merged_data = pd.merge(data.reset_index(), processed_data, on=time_col, how='inner')
         else:
             merged_data = processed_data
+        plot = create_plot(data, processed_data, time_col, target_col)
+        csv_data = processed_data.to_csv(index=False)
         # Create a temporary file and write the CSV data to it
         with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".csv") as tmpfile:
             target_col_input = gr.Textbox(label="Target Column", placeholder="Enter target column name")
         with gr.Row():
+            forecast_horizon_input = gr.Number(label="Forecast Horizon", value=10, visible=False) # Hide forecast horizon input
             finetune_steps_input = gr.Number(label="Finetune Steps", value=100)
             freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Frequency", value='D')
+            with gr.Column(): # Group date inputs in a column
+                with gr.Row():
+                    start_date_input = gr.Textbox(label="Historical Start Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-01-01")
+                    start_time_input = gr.Textbox(label="Start Time (HH:MM)", placeholder="HH:MM", value="00:00", visible=False) # Hide start time input
+                with gr.Row():
+                    end_date_input = gr.Textbox(label="Historical End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-12-31")
+                    end_time_input = gr.Textbox(label="End Time (HH:MM)", placeholder="HH:MM", value="23:59", visible=False) # Hide end time input
+                with gr.Row():
+                    forecast_start_date_input = gr.Textbox(label="Forecast Start Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
+                    forecast_end_date_input = gr.Textbox(label="Forecast End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
         resample_freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Resample Frequency", value='D')
         btn = gr.Button("Generate Forecast")
         btn.click(
             fn=full_forecast_pipeline,
+            inputs=[file_input, time_col_input, target_col_input, forecast_horizon_input, finetune_steps_input, freq_dropdown, start_date_input, end_date_input, start_time_input, end_time_input, resample_freq_dropdown, gr.Checkbox(label="Merge Data", value=False), forecast_start_date_input, forecast_end_date_input],
             outputs=[output_csv, output_plot, download_button, error_output]
         )
     return iface
 iface = create_interface()
+iface.launch()