Spaces:

Snxt1
/

tirex-forecaster

Sleeping

App Files Files Community

Snxt1 commited on Oct 20

Commit

2742aad

verified ·

1 Parent(s): 3a84b34

Update app.py

Browse files

Shipping some updates.

Files changed (1) hide show

app.py +197 -92

app.py CHANGED Viewed

@@ -14,89 +14,126 @@ model = load_model("NX-AI/TiRex")
 def load_columns(file):
     if file is None:
-        return gr.Dropdown(choices=[], label="Select Column to Forecast:", interactive=True)
     try:
         # Handle file as path string (Gradio convention)
         with open(file, 'rb') as f:
             content = f.read()
         df_preview = pd.read_csv(io.BytesIO(content))
-        # Assume first column is date-like, rename if 'Day'
-        date_cols = [col for col in df_preview.columns if 'day' in col.lower() or 'date' in col.lower()]
-        if date_cols:
-            df_preview = df_preview.rename(columns={date_cols[0]: 'date'})
-        # Available numeric columns for forecast (exclude date)
         numeric_cols = df_preview.select_dtypes(include=['number']).columns.tolist()
-        if 'date' in numeric_cols:
-            numeric_cols.remove('date')
         if numeric_cols:
-            return gr.Dropdown(
-                choices=[(col, col) for col in numeric_cols],
-                value=numeric_cols[0],
-                label="Select Column to Forecast:",
-                interactive=True
-            )
         else:
-            return gr.Dropdown(
-                choices=[],
-                value=None,
-                label="No numeric columns found",
-                interactive=False
-            )
     except Exception as e:
-        return gr.Dropdown(
             choices=[],
             value=None,
             label=f"Error loading CSV: {str(e)}",
             interactive=False
-        )
-def run_forecast(file, selected_col, prediction_length, confidence):
-    if file is None or selected_col is None:
-        return None, "### Error\nPlease upload a CSV and select a column!"
     try:
         # Handle file as path string (Gradio convention)
         with open(file, 'rb') as f:
             content = f.read()
         df = pd.read_csv(io.BytesIO(content))
-        # Rename date column if needed
-        date_cols = [col for col in df.columns if 'day' in col.lower() or 'date' in col.lower()]
-        if date_cols:
-            df = df.rename(columns={date_cols[0]: 'date'})
-        else:
-            return None, "### Error\nNo date column found (looking for 'Day' or 'date'). Edit CSV."
-        # Use selected column as 'sales'
-        df = df.rename(columns={selected_col: 'sales'})
         # Validate
         required_cols = ['date', 'sales']
         if not all(col in df.columns for col in required_cols):
-            return None, f"### Error\nMissing 'date' or selected column '{selected_col}'."
         # Prep data
         df['date'] = pd.to_datetime(df['date'])
         df = df.set_index('date').sort_index()
-        if len(df) < 10:
-            return None, "### Error\nNeed at least 10 data points."
-        series = df['sales'].dropna().values
-        print(f"Loaded: {len(series)} points from {df.index.min().date()} to {df.index.max().date()} (Column: {selected_col})")  # For logs
         # Infer freq
-        freq = pd.infer_freq(df.index)
         if freq is None:
             freq = 'D'
             print(f"Frequency: '{freq}'.")
         # Prep context
-        context_len = min(len(series), 2048)
-        context = torch.tensor(series[-context_len:]).unsqueeze(0).float()
         pred_len = prediction_length
         conf_level = confidence / 100.0
@@ -122,6 +159,8 @@ def run_forecast(file, selected_col, prediction_length, confidence):
         lower_slider = np.zeros(pred_len)
         upper_slider = np.zeros(pred_len)
         for t in range(pred_len):
             q_t = q[t]
             lower50[t] = np.interp(lower_alpha_50, alphas, q_t)
@@ -129,11 +168,22 @@ def run_forecast(file, selected_col, prediction_length, confidence):
             lower_slider[t] = np.interp(lower_alpha_slider, alphas, q_t)
             upper_slider[t] = np.interp(upper_alpha_slider, alphas, q_t)
         # Mean forecast
         mean_forecast = mean[0].detach().numpy()
         # Future dates
-        last_date = df.index[-1]
         if freq == 'D':
             future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=pred_len, freq='D')
         else:
@@ -144,35 +194,49 @@ def run_forecast(file, selected_col, prediction_length, confidence):
             'predicted_sales_median': median,
             'predicted_sales_lower': lower_slider,
             'predicted_sales_upper': upper_slider,
-            'predicted_sales_mean': mean_forecast
         }).set_index('date')
         # Prepare markdown output (broken into smaller strings to avoid multiline f-string parsing issues)
-        markdown_text = "### ✅ TiRex Forecast Results (Median + {}% Interval)\n\n".format(confidence)
-        markdown_text += "| Date | Median | Lower Bound | Upper Bound | Mean |\n"
-        markdown_text += "|------|--------|-------------|-------------|------|\n"
-        for idx, row in pred_df.iterrows():
-            markdown_text += "| {} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |\n".format(
-                idx.strftime('%Y-%m-%d'),
-                row['predicted_sales_median'],
-                row['predicted_sales_lower'],
-                row['predicted_sales_upper'],
-                row['predicted_sales_mean']
-            )
-        markdown_text += "\n### 📊 Summary\n"
         markdown_text += "- **Prediction Length:** {} periods\n".format(pred_len)
         markdown_text += "- **Confidence Level:** {}% (alphas: {:.3f} - {:.3f})\n".format(confidence, lower_alpha_slider, upper_alpha_slider)
         markdown_text += "- **Sum of Median Predicted Values:** {:.2f}\n".format(pred_df['predicted_sales_median'].sum())
-        markdown_text += "- **Sum of Mean Predicted Values:** {:.2f}\n\n".format(pred_df['predicted_sales_mean'].sum())
-        markdown_text += "### Sample Historical Data\n"
-        markdown_text += "```\n" + df.head().to_string() + "\n```"
         # Create plot
         fig, ax = plt.subplots(figsize=(14, 7))
-        ax.plot(df.index, df['sales'], label=f'Historical {selected_col} (Full CSV Data)', color='#1f77b4', linewidth=1.5, alpha=0.8)
-        ax.plot(pred_df.index, pred_df['predicted_sales_median'], label='TiRex Forecast (Median)', color='#d62728', linestyle='-', linewidth=2)
         ax.plot(pred_df.index, pred_df['predicted_sales_mean'], label='TiRex Forecast (Mean)', color='#ff7f0e', linestyle='--', linewidth=2)
         # Fan chart: non-overlapping bands
@@ -185,81 +249,122 @@ def run_forecast(file, selected_col, prediction_length, confidence):
         ax.fill_between(pred_df.index, upper50, upper_slider,
                         color='#d62728', alpha=0.3, label=f'{confidence}% Uncertainty Wings')
-        ax.set_title(f'{selected_col} Forecast with TiRex (Full History + Horizon: {pred_len})', fontsize=16, fontweight='bold')
         ax.set_xlabel('Date', fontsize=12)
         ax.set_ylabel(selected_col, fontsize=12)
-        ax.legend(fontsize=10)
         ax.tick_params(axis='x', rotation=45)
         plt.tight_layout()
         return fig, markdown_text
     except Exception as e:
-        return None, f"### ❌ Error\n{str(e)}\n\nTips: Check NaNs/zeros; ensure data is valid."
 # Create the Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="red"), title="🚀 TiRex Forecaster") as demo:
     gr.Markdown("""
-    # 🚀 TiRex Forecaster Dashboard
-    Upload a CSV file with a date column (e.g., 'Day' or 'date') and numeric columns. Select one to forecast future values using the TiRex model.
-    The dashboard will display in this new window/tab for a cool, interactive experience!
     """)
     with gr.Row(variant="panel"):
         with gr.Column(scale=1):
             csv_file = gr.File(
                 file_types=[".csv"],
-                label="📁 Upload CSV File",
                 elem_id="file_upload"
             )
             column_dropdown = gr.Dropdown(
                 choices=[],
-                label="📈 Select Column to Forecast",
                 interactive=True,
                 elem_id="column_select"
             )
             prediction_length = gr.Slider(
-                minimum=1, maximum=100, value=12, step=1,
-                label="🔮 Prediction Length (Periods)",
                 elem_id="pred_length"
             )
             confidence = gr.Slider(
                 minimum=50, maximum=95, value=80, step=5,
-                label="🎯 Confidence Level (%)",
                 elem_id="confidence"
             )
             run_button = gr.Button(
-                "⚡ Run TiRex Forecast",
                 variant="primary",
                 size="lg",
                 elem_id="run_btn"
             )
         with gr.Column(scale=2):
             forecast_plot = gr.Plot(
-                label="📊 Forecast Visualization",
                 elem_id="plot"
             )
             output_text = gr.Markdown(
                 "### Welcome!\nUpload your CSV to get started.",
                 elem_id="output"
             )
-    # Event for updating dropdown on file upload
     csv_file.change(
         load_columns,
         inputs=csv_file,
-        outputs=column_dropdown
     )
     # Event for running forecast
     run_button.click(
         run_forecast,
-        inputs=[csv_file, column_dropdown, prediction_length, confidence],
         outputs=[forecast_plot, output_text]
     )
 # Launch the app
-if __name__ == "__main__":
-    demo.launch()

 def load_columns(file):
     if file is None:
+        return (gr.Dropdown(choices=[], label="Select Time Column:", interactive=True),
+                gr.Dropdown(choices=[], label="Select Column to Forecast:", interactive=True),
+                gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Historical Start Index (1-based)"),
+                gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Historical End Index (1-based)"))
     try:
         # Handle file as path string (Gradio convention)
         with open(file, 'rb') as f:
             content = f.read()
         df_preview = pd.read_csv(io.BytesIO(content))
+        # All columns for time selection
+        all_cols = df_preview.columns.tolist()
+        time_choices = [(col, col) for col in all_cols]
+        time_value = all_cols[0] if all_cols else None
+        # Available numeric columns for forecast
         numeric_cols = df_preview.select_dtypes(include=['number']).columns.tolist()
         if numeric_cols:
+            value_choices = [(col, col) for col in numeric_cols]
+            value_value = numeric_cols[0]
         else:
+            value_choices = []
+            value_value = None
+        n_rows = len(df_preview)
+        time_dropdown = gr.Dropdown(
+            choices=time_choices,
+            value=time_value,
+            label="Select Time Column:",
+            interactive=True
+        )
+        value_dropdown = gr.Dropdown(
+            choices=value_choices,
+            value=value_value,
+            label="Select Column to Forecast:",
+            interactive=True
+        ) if value_choices else gr.Dropdown(
+            choices=[],
+            value=None,
+            label="No numeric columns found",
+            interactive=False
+        )
+        start_slider = gr.Slider(
+            minimum=1, maximum=n_rows, value=1, step=1,
+            label="Historical Start Index (1-based)"
+        )
+        end_slider = gr.Slider(
+            minimum=1, maximum=n_rows, value=n_rows, step=1,
+            label="Historical End Index (1-based)"
+        )
+        return time_dropdown, value_dropdown, start_slider, end_slider
     except Exception as e:
+        return (gr.Dropdown(
             choices=[],
             value=None,
             label=f"Error loading CSV: {str(e)}",
             interactive=False
+        ), gr.Dropdown(
+            choices=[],
+            value=None,
+            label=f"Error loading CSV: {str(e)}",
+            interactive=False
+        ), gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Historical Start Index (1-based)"),
+                gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Historical End Index (1-based)"))
+def run_forecast(file, time_col, selected_col, start_idx, end_idx, prediction_length, confidence):
+    if file is None or time_col is None or selected_col is None:
+        return None, "### Error\nPlease upload a CSV and select time and value columns!"
     try:
         # Handle file as path string (Gradio convention)
         with open(file, 'rb') as f:
             content = f.read()
         df = pd.read_csv(io.BytesIO(content))
+        # Validate columns exist
+        if time_col not in df.columns or selected_col not in df.columns:
+            return None, f"### Error\nSelected columns '{time_col}' or '{selected_col}' not found in CSV."
+        # Rename selected columns
+        df = df.rename(columns={time_col: 'date', selected_col: 'sales'})
         # Validate
         required_cols = ['date', 'sales']
         if not all(col in df.columns for col in required_cols):
+            return None, f"### Error\nMissing renamed columns."
         # Prep data
         df['date'] = pd.to_datetime(df['date'])
         df = df.set_index('date').sort_index()
+        full_len = len(df)
+        context_start = max(0, int(start_idx) - 1)
+        context_end = min(full_len, int(end_idx))
+        context_df = df.iloc[context_start:context_end]
+        held_out_df = df.iloc[context_end:] if context_end < full_len else pd.DataFrame(index=pd.DatetimeIndex([]), columns=df.columns)
+        if len(context_df) < 10:
+            return None, "### Error\nNeed at least 10 data points in the selected historical range."
+        context_series = context_df['sales'].dropna().values
+        print(f"Loaded context: {len(context_series)} points from {context_df.index.min().date()} to {context_df.index.max().date()} (Column: {selected_col})")  # For logs
         # Infer freq
+        freq = pd.infer_freq(context_df.index)
         if freq is None:
             freq = 'D'
             print(f"Frequency: '{freq}'.")
         # Prep context
+        context_len = min(len(context_series), 2048)
+        context = torch.tensor(context_series[-context_len:]).unsqueeze(0).float()
         pred_len = prediction_length
         conf_level = confidence / 100.0
         lower_slider = np.zeros(pred_len)
         upper_slider = np.zeros(pred_len)
+        skew_directions = []
         for t in range(pred_len):
             q_t = q[t]
             lower50[t] = np.interp(lower_alpha_50, alphas, q_t)
             lower_slider[t] = np.interp(lower_alpha_slider, alphas, q_t)
             upper_slider[t] = np.interp(upper_alpha_slider, alphas, q_t)
+            # Compute skew direction based on asymmetry around median
+            med = median[t]
+            upside_dist = upper_slider[t] - med
+            downside_dist = med - lower_slider[t]
+            if upside_dist > downside_dist:
+                skew_directions.append("Upside")
+            elif downside_dist > upside_dist:
+                skew_directions.append("Downside")
+            else:
+                skew_directions.append("Neutral")
         # Mean forecast
         mean_forecast = mean[0].detach().numpy()
         # Future dates
+        last_date = context_df.index[-1]
         if freq == 'D':
             future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=pred_len, freq='D')
         else:
             'predicted_sales_median': median,
             'predicted_sales_lower': lower_slider,
             'predicted_sales_upper': upper_slider,
+            'predicted_sales_mean': mean_forecast,
+            'skew_direction': skew_directions
         }).set_index('date')
+        # Count skews for summary
+        upside_count = skew_directions.count("Upside")
+        downside_count = skew_directions.count("Downside")
+        neutral_count = skew_directions.count("Neutral")
         # Prepare markdown output (broken into smaller strings to avoid multiline f-string parsing issues)
+        markdown_text = "### Summary\n"
+        markdown_text += "- **Number of Historical Periods Used:** {} points\n".format(len(context_series))
+        markdown_text += "- **Held Out Periods:** {} points {}\n".format(len(held_out_df), "(Full Context Used)" if len(held_out_df) == 0 else "(For Validation)")
         markdown_text += "- **Prediction Length:** {} periods\n".format(pred_len)
         markdown_text += "- **Confidence Level:** {}% (alphas: {:.3f} - {:.3f})\n".format(confidence, lower_alpha_slider, upper_alpha_slider)
         markdown_text += "- **Sum of Median Predicted Values:** {:.2f}\n".format(pred_df['predicted_sales_median'].sum())
+        markdown_text += "- **Sum of Mean Predicted Values:** {:.2f}\n".format(pred_df['predicted_sales_mean'].sum())
+        markdown_text += "- **Skew Distribution:** {} Upside, {} Downside, {} Neutral\n\n".format(upside_count, downside_count, neutral_count)
+        forecast_table = "### TiRex Forecast Results (Median + {}% Interval)\n\n".format(confidence)
+        forecast_table += "| Date | Median | Lower Bound | Upper Bound | Mean | Skew |\n"
+        forecast_table += "|------|--------|-------------|-------------|------|------|\n"
+        for idx, row in pred_df.iterrows():
+            forecast_table += "| {} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {} |\n".format(
+                idx.strftime('%Y-%m-%d'),
+                row['predicted_sales_median'],
+                row['predicted_sales_lower'],
+                row['predicted_sales_upper'],
+                row['predicted_sales_mean'],
+                row['skew_direction']
+            )
+        sample_data = "### Sample Historical Data (Context)\n"
+        sample_data += "```\n" + context_df.head().to_string() + "\n```"
+        markdown_text += f'\n<details><summary>Click to expand Forecast Table</summary>\n\n{forecast_table}\n</details>\n\n'
+        markdown_text += f'<details><summary>Click to expand Sample Historical Data</summary>\n\n{sample_data}\n</details>'
         # Create plot
         fig, ax = plt.subplots(figsize=(14, 7))
+        ax.plot(context_df.index, context_df['sales'], label=f'Used Historical {selected_col}', color='#1f77b4', linewidth=1.5, alpha=0.8)
+        if not held_out_df.empty:
+            ax.plot(held_out_df.index, held_out_df['sales'], label='Held Out Actual (Validation)', color='#2ca02c', linestyle=':', linewidth=2)
         ax.plot(pred_df.index, pred_df['predicted_sales_mean'], label='TiRex Forecast (Mean)', color='#ff7f0e', linestyle='--', linewidth=2)
         # Fan chart: non-overlapping bands
         ax.fill_between(pred_df.index, upper50, upper_slider,
                         color='#d62728', alpha=0.3, label=f'{confidence}% Uncertainty Wings')
+        # Subtle skew visualization: colored segments on the median forecast line
+        from matplotlib.lines import Line2D
+        legend_elements = []
+        skew_colors = {'Upside': 'green', 'Downside': 'red', 'Neutral': 'gray'}
+        for i in range(len(pred_df) - 1):
+            start_date = pred_df.index[i]
+            end_date = pred_df.index[i + 1]
+            start_val = median[i]
+            end_val = median[i + 1]
+            skew = skew_directions[i]
+            color = skew_colors[skew]
+            ax.plot([start_date, end_date], [start_val, end_val], color=color, linewidth=2.5, alpha=0.7)
+        # Connect the last point if needed, but since segments cover, add a small marker at end if desired
+        ax.plot(pred_df.index[-1], median[-1], marker='o', color=skew_colors[skew_directions[-1]], markersize=4, alpha=0.7)
+        # Add to legend only if present
+        if upside_count > 0:
+            legend_elements.append(Line2D([0], [0], color='green', lw=2, label='Upside Skew'))
+        if downside_count > 0:
+            legend_elements.append(Line2D([0], [0], color='red', lw=2, label='Downside Skew'))
+        if neutral_count > 0:
+            legend_elements.append(Line2D([0], [0], color='gray', lw=2, label='Neutral Skew'))
+        ax.set_title(f'{selected_col} Forecast with TiRex (Context: {context_start+1}-{context_end}, Horizon: {pred_len})', fontsize=16, fontweight='bold')
         ax.set_xlabel('Date', fontsize=12)
         ax.set_ylabel(selected_col, fontsize=12)
+        ax.legend(handles=ax.get_legend_handles_labels()[0] + legend_elements, fontsize=10)
         ax.tick_params(axis='x', rotation=45)
         plt.tight_layout()
         return fig, markdown_text
     except Exception as e:
+        return None, f"### Error\n{str(e)}\n\nTips: Ensure the time column can be parsed as dates; check NaNs/zeros; ensure data is valid."
 # Create the Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="red"), title="TiRex Forecaster") as demo:
     gr.Markdown("""
+    # TiRex Forecaster Dashboard
+    Upload a CSV file with a time column and numeric columns. Select the time column and one numeric column to forecast future values using the TiRex model.
     """)
     with gr.Row(variant="panel"):
         with gr.Column(scale=1):
             csv_file = gr.File(
                 file_types=[".csv"],
+                label="Upload CSV File",
                 elem_id="file_upload"
             )
+            gr.Markdown("The minimum effective input is around 128 time steps per series. Use a full context of 2048 steps for optimal performance.")
+            time_dropdown = gr.Dropdown(
+                choices=[],
+                label="Select Time Column",
+                interactive=True,
+                elem_id="time_select"
+            )
             column_dropdown = gr.Dropdown(
                 choices=[],
+                label="Select Column to Forecast",
                 interactive=True,
                 elem_id="column_select"
             )
+            start_slider = gr.Slider(
+                minimum=1, maximum=1, value=1, step=1,
+                label="Historical Start Index (1-based)",
+                elem_id="start_idx"
+            )
+            end_slider = gr.Slider(
+                minimum=1, maximum=1, value=1, step=1,
+                label="Historical End Index (1-based)",
+                elem_id="end_idx"
+            )
             prediction_length = gr.Slider(
+                minimum=1, maximum=720, value=12, step=1,
+                label="Prediction Length (Periods)",
                 elem_id="pred_length"
             )
             confidence = gr.Slider(
                 minimum=50, maximum=95, value=80, step=5,
+                label="Confidence Level (%)",
                 elem_id="confidence"
             )
             run_button = gr.Button(
+                "Run TiRex Forecast",
                 variant="primary",
                 size="lg",
                 elem_id="run_btn"
             )
         with gr.Column(scale=2):
             forecast_plot = gr.Plot(
+                label="Forecast Visualization",
                 elem_id="plot"
             )
             output_text = gr.Markdown(
                 "### Welcome!\nUpload your CSV to get started.",
                 elem_id="output"
             )
+    gr.Markdown("**Built by** [next one gmbh](https://www.nextone.at)")
+    # Event for updating dropdowns on file upload
     csv_file.change(
         load_columns,
         inputs=csv_file,
+        outputs=[time_dropdown, column_dropdown, start_slider, end_slider]
     )
     # Event for running forecast
     run_button.click(
         run_forecast,
+        inputs=[csv_file, time_dropdown, column_dropdown, start_slider, end_slider, prediction_length, confidence],
         outputs=[forecast_plot, output_text]
     )
 # Launch the app
+demo.launch()