Spaces:

anujkum0x
/

apollo

Sleeping

App Files Files Community

anujkum0x commited on Feb 18, 2025

Commit

4c65297

verified ·

1 Parent(s): c9a20e0

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -16

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ def load_data(file_obj):
         logger.error(f"Error loading data: {e}", exc_info=True)
         raise ValueError(f"Error loading data: {e}")
-def forecast_nixtla(df, forecast_horizon, finetune_steps, freq):
     """
     Function to call the Nixtla API directly.
     """
@@ -61,8 +61,8 @@ def forecast_nixtla(df, forecast_horizon, finetune_steps, freq):
             df=df,
             h=forecast_horizon,
             finetune_steps=finetune_steps,
-            time_col="start_time",
-            target_col="num_calls_queued",
             freq=freq
         )
         logger.info("Nixtla API call successful")
@@ -157,7 +157,7 @@ def create_plot(data, forecast_data, time_col, target_col):
     )
     return fig
-def full_forecast_pipeline(file_obj, forecast_horizon, finetune_steps, freq, start_date, end_date, start_time, end_time, resample_freq, merge_data) -> Tuple[str, object, str, str]:
     """
     Full pipeline: loads the data, calls the forecast function, and then processes the data.
     """
@@ -167,14 +167,14 @@ def full_forecast_pipeline(file_obj, forecast_horizon, finetune_steps, freq, sta
             return "Error loading data. Please check the file format and content.", None, None, None
         # Convert time column to datetime
-        data["start_time"] = pd.to_datetime(data["start_time"])
         # Sort the DataFrame by the time column
-        data = data.sort_values(by="start_time")
         # Get min and max dates from the data
-        min_date = data["start_time"].min().strftime('%Y-%m-%d')
-        max_date = data["start_time"].max().strftime('%Y-%m-%d')
         # Fill missing values with 0
         data = data.fillna(0)
@@ -183,25 +183,64 @@ def full_forecast_pipeline(file_obj, forecast_horizon, finetune_steps, freq, sta
         if start_date and end_date:
             start_datetime = pd.to_datetime(start_date)
             end_datetime = pd.to_datetime(end_date)
-            data = data[(data["start_time"] >= start_datetime) & (data["start_time"] <= end_datetime)]
             logger.info(f"Data filtered from {start_datetime} to {end_datetime}. Shape: {data.shape}")
-        data = data.set_index("start_time")
         # Resample the data
         data = data.resample(resample_freq).mean()
         data.reset_index(inplace=True)
-        forecast_result = forecast_nixtla(data, forecast_horizon, finetune_steps, freq)
-        processed_data = process_forecast_data(forecast_result, "start_time")
-        processed_data = apply_zero_patterns(data.copy(), processed_data, "start_time", "num_calls_queued")
         if merge_data:
-            merged_data = pd.merge(data.reset_index(), processed_data, on="start_time", how='inner')
         else:
             merged_data = processed_data
-        plot = create_plot(data, processed_data, "start_time", "num_calls_queued")
         csv_data = processed_data.to_csv(index=False)
         # Create a temporary file and write the CSV data to it
@@ -217,6 +256,31 @@ def full_forecast_pipeline(file_obj, forecast_horizon, finetune_steps, freq, sta
         logger.exception("An unexpected error occurred:")
         return f"Error: An unexpected error occurred: {e}", None, None, None
 def create_interface():
     with gr.Blocks() as iface:
         gr.Markdown("""
@@ -226,6 +290,10 @@ def create_interface():
         file_input = gr.File(label="Upload Time Series Data (CSV, Excel, JSON, YAML)")
         with gr.Row():
             forecast_horizon_input = gr.Number(label="Forecast Horizon", value=10)
             finetune_steps_input = gr.Number(label="Finetune Steps", value=100)
@@ -237,6 +305,10 @@ def create_interface():
                 end_date_input = gr.Textbox(label="End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-12-31")
                 end_time_input = gr.Textbox(label="End Time (HH:MM)", placeholder="HH:MM", value="23:59")
         resample_freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Resample Frequency", value='D')
         output_csv = gr.Textbox(label="Forecast Data (CSV)")
@@ -248,7 +320,7 @@ def create_interface():
         btn = gr.Button("Generate Forecast")
         btn.click(
             fn=full_forecast_pipeline,
-            inputs=[file_input, forecast_horizon_input, finetune_steps_input, freq_dropdown, start_date_input, end_date_input, start_time_input, end_time_input, resample_freq_dropdown],
             outputs=[output_csv, output_plot, download_button, error_output]
         )
     return iface

         logger.error(f"Error loading data: {e}", exc_info=True)
         raise ValueError(f"Error loading data: {e}")
+def forecast_nixtla(df, forecast_horizon, finetune_steps, freq, time_col, target_col):
     """
     Function to call the Nixtla API directly.
     """
             df=df,
             h=forecast_horizon,
             finetune_steps=finetune_steps,
+            time_col=time_col,
+            target_col=target_col,
             freq=freq
         )
         logger.info("Nixtla API call successful")
     )
     return fig
+def full_forecast_pipeline(file_obj, time_col, target_col, forecast_horizon, finetune_steps, freq, start_date, end_date, start_time, end_time, resample_freq, merge_data, forecast_start_date, forecast_end_date) -> Tuple[str, object, str, str]:
     """
     Full pipeline: loads the data, calls the forecast function, and then processes the data.
     """
             return "Error loading data. Please check the file format and content.", None, None, None
         # Convert time column to datetime
+        data[time_col] = pd.to_datetime(data[time_col])
         # Sort the DataFrame by the time column
+        data = data.sort_values(by=time_col)
         # Get min and max dates from the data
+        min_date = data[time_col].min().strftime('%Y-%m-%d')
+        max_date = data[time_col].max().strftime('%Y-%m-%d')
         # Fill missing values with 0
         data = data.fillna(0)
         if start_date and end_date:
             start_datetime = pd.to_datetime(start_date)
             end_datetime = pd.to_datetime(end_date)
+            data = data[(data[time_col] >= start_datetime) & (data[time_col] <= end_datetime)]
             logger.info(f"Data filtered from {start_datetime} to {end_datetime}. Shape: {data.shape}")
+        data = data.set_index(time_col)
         # Resample the data
         data = data.resample(resample_freq).mean()
         data.reset_index(inplace=True)
+        if forecast_start_date and forecast_end_date:
+            forecast_start_datetime = pd.to_datetime(forecast_start_date)
+            forecast_end_datetime = pd.to_datetime(forecast_end_date)
+            # Calculate the time difference in days
+            time_difference = (forecast_end_datetime - forecast_start_datetime).days
+            # Adjust forecast horizon based on frequency
+            if freq == 'D':
+                forecast_horizon = time_difference
+            elif freq == 'W':
+                forecast_horizon = time_difference / 7
+            elif freq == 'M':
+                forecast_horizon = time_difference / 30  # Approximation
+            elif freq == 'Y':
+                forecast_horizon = time_difference / 365 # Approximation
+            elif 'min' in freq:
+                 minutes = int(freq.replace('min', ''))
+                 forecast_horizon = time_difference * 24 * 60 / minutes
+            elif 'H' in freq:
+                 hours = int(freq.replace('H', ''))
+                 forecast_horizon = time_difference * 24 / hours
+            else:
+                raise ValueError("Unsupported frequency. Please select a valid frequency.")
+            forecast_horizon = int(forecast_horizon)  # Convert to integer
+        # Generate complete date range
+        start_datetime = data[time_col].min()
+        end_datetime = data[time_col].max()
+        complete_date_range = pd.date_range(start=start_datetime, end=end_datetime, freq=resample_freq)
+        # Reindex the data
+        data = data.set_index(time_col)
+        data = data.reindex(complete_date_range)
+        data = data.fillna(0)
+        data = data.reset_index()
+        data = data.rename(columns={'index': time_col})
+        forecast_result = forecast_nixtla(data, forecast_horizon, finetune_steps, freq, time_col, target_col)
+        processed_data = process_forecast_data(forecast_result, time_col)
+        processed_data = apply_zero_patterns(data.copy(), processed_data, time_col, target_col)
         if merge_data:
+            merged_data = pd.merge(data.reset_index(), processed_data, on=time_col, how='inner')
         else:
             merged_data = processed_data
+        plot = create_plot(data, processed_data, time_col, target_col)
         csv_data = processed_data.to_csv(index=False)
         # Create a temporary file and write the CSV data to it
         logger.exception("An unexpected error occurred:")
         return f"Error: An unexpected error occurred: {e}", None, None, None
+def get_column_names(file_obj):
+    """
+    Extracts column names from the uploaded file.
+    """
+    try:
+        df = load_data(file_obj)
+        columns = df.columns.tolist()
+        print(f"Column names: {columns}")
+        return columns
+    except Exception as e:
+        logger.error(f"Error in get_column_names: {e}", exc_info=True)
+        print(f"Error in get_column_names: {e}")
+        return []
+def update_dropdown_choices(file_obj):
+    """
+    Updates the dropdown choices based on the uploaded file.
+    """
+    try:
+        columns = get_column_names(file_obj)
+        return gr.Dropdown.update(choices=columns), gr.Dropdown.update(choices=columns)
+    except Exception as e:
+        logger.error(f"Error updating dropdown choices: {e}", exc_info=True)
+        return gr.Dropdown.update(choices=[]), gr.Dropdown.update(choices=[])
 def create_interface():
     with gr.Blocks() as iface:
         gr.Markdown("""
         file_input = gr.File(label="Upload Time Series Data (CSV, Excel, JSON, YAML)")
+        with gr.Row():
+            time_col_input = gr.Textbox(label="Time Column", placeholder="Enter time column name")
+            target_col_input = gr.Textbox(label="Target Column", placeholder="Enter target column name")
         with gr.Row():
             forecast_horizon_input = gr.Number(label="Forecast Horizon", value=10)
             finetune_steps_input = gr.Number(label="Finetune Steps", value=100)
                 end_date_input = gr.Textbox(label="End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD", value="2023-12-31")
                 end_time_input = gr.Textbox(label="End Time (HH:MM)", placeholder="HH:MM", value="23:59")
+            with gr.Row():
+                forecast_start_date_input = gr.Textbox(label="Forecast Start Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
+                forecast_end_date_input = gr.Textbox(label="Forecast End Date (YYYY-MM-DD)", placeholder="YYYY-MM-DD")
         resample_freq_dropdown = gr.Dropdown(choices=['15min', '30min', 'H', '2H', '3H', '4H', '5H', '6H', '12H', 'D', 'W', 'M', 'Y'], label="Resample Frequency", value='D')
         output_csv = gr.Textbox(label="Forecast Data (CSV)")
         btn = gr.Button("Generate Forecast")
         btn.click(
             fn=full_forecast_pipeline,
+            inputs=[file_input, time_col_input, target_col_input, forecast_horizon_input, finetune_steps_input, freq_dropdown, start_date_input, end_date_input, start_time_input, end_time_input, resample_freq_dropdown, gr.Checkbox(label="Merge Data", value=False), forecast_start_date_input, forecast_end_date_input],
             outputs=[output_csv, output_plot, download_button, error_output]
         )
     return iface