Spaces:

SowmiyaNagaraj
/

pred

Runtime error

App Files Files Community

SowmiyaNagaraj commited on Apr 11, 2025

Commit

d872cb5

verified ·

1 Parent(s): 8eb433e

Create app.py

Browse files

Files changed (1) hide show

app.py +378 -0

app.py ADDED Viewed

	@@ -0,0 +1,378 @@

+import numpy as np
+import pandas as pd
+import gradio as gr
+from statsmodels.tsa.arima.model import ARIMA
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.metrics import r2_score
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense
+from tensorflow.keras.optimizers import Adam
+import warnings
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MaxNLocator
+import os
+warnings.filterwarnings("ignore")
+# Load Dataset with better error handling
+try:
+    # Print current directory to help debug file location issues
+    print(f"Current working directory: {os.getcwd()}")
+    print(f"Files in directory: {os.listdir()}")
+    df = pd.read_csv('/content/drive/MyDrive/enhanced_sales_data_for_arima_lstm.csv')
+    print("\nDataset loaded successfully!")
+    print(f"Columns in dataset: {df.columns.tolist()}")
+    # Convert Date column to datetime
+    df['Date'] = pd.to_datetime(df['Date'])
+    df = df.sort_values(['Product_Name', 'Date'])
+    # Check if required columns exist
+    required_columns = ['Product_Name', 'Date', 'Sales']
+    if not all(col in df.columns for col in required_columns):
+        missing = [col for col in required_columns if col not in df.columns]
+        print(f"\nERROR: Missing required columns: {missing}")
+        df = None
+    else:
+        print(f"\nFirst few products: {df['Product_Name'].unique()[:5]}... (total: {len(df['Product_Name'].unique())} products)")
+except FileNotFoundError:
+    df = None
+    print("\nERROR: Dataset file not found!")
+    print("Please make sure the file exists in the specified path.")
+except Exception as e:
+    df = None
+    print(f"\nERROR loading dataset: {str(e)}")
+# Get product list with fallback
+if df is not None and 'Product_Name' in df.columns:
+    product_list = sorted(df['Product_Name'].unique().tolist())
+    print(f"\nProducts loaded ({len(product_list)} total):")
+    print(product_list[:5], "...") if len(product_list) > 5 else print(product_list)
+else:
+    product_list = []
+    print("\nNo products loaded - using empty list")
+def prepare_data(product_name):
+    if df is None:
+        print("ERROR: No data available (df is None)")
+        return None
+    print(f"\nPreparing data for product: {product_name}")
+    data = df[df['Product_Name'] == product_name][['Date', 'Sales']].set_index('Date')['Sales']
+    if data.empty:
+        print(f"WARNING: No sales data found for product: {product_name}")
+        return None
+    print(f"Found {len(data)} data points for {product_name}")
+    return data
+def train_arima(data, steps=60):
+    if len(data) < 6:
+        print("ARIMA: Not enough data (need at least 6 points)")
+        return None
+    try:
+        print(f"\nTraining ARIMA model on {len(data)} data points...")
+        model = ARIMA(data, order=(5,1,0))
+        model_fit = model.fit()
+        forecast = model_fit.forecast(steps=steps)
+        print("ARIMA training completed successfully")
+        return forecast
+    except Exception as e:
+        print(f"ARIMA Error: {e}")
+        return None
+def train_lstm(data, steps=60):
+    if len(data) < 6:
+        print("LSTM: Not enough data (need at least 6 points)")
+        return None
+    try:
+        print(f"\nTraining LSTM model on {len(data)} data points...")
+        scaler = MinMaxScaler()
+        data_scaled = scaler.fit_transform(data.values.reshape(-1, 1))
+        X, y = [], []
+        for i in range(5, len(data_scaled)):
+            X.append(data_scaled[i-5:i, 0])
+            y.append(data_scaled[i, 0])
+        if len(X) < 1:
+            print("LSTM: Not enough data after windowing")
+            return None
+        X, y = np.array(X), np.array(y)
+        X = X.reshape(X.shape[0], X.shape[1], 1)
+        model = Sequential([
+            LSTM(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], 1)),
+            LSTM(50, activation='relu'),
+            Dense(1)
+        ])
+        model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
+        model.fit(X, y, epochs=20, batch_size=4, verbose=0)
+        last_sequence = data_scaled[-5:].reshape(1, 5, 1)
+        predictions = []
+        for _ in range(steps):
+            next_pred = model.predict(last_sequence, verbose=0)
+            predictions.append(next_pred[0,0])
+            last_sequence = np.append(last_sequence[:,1:,:], next_pred.reshape(1,1,1), axis=1)
+        print("LSTM training completed successfully")
+        return scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
+    except Exception as e:
+        print(f"LSTM Error: {e}")
+        return None
+def hybrid_prediction(data):
+    print("\nStarting hybrid prediction...")
+    arima_pred = train_arima(data)
+    lstm_pred = train_lstm(data)
+    if arima_pred is None or lstm_pred is None:
+        error_msg = "Model training failed - "
+        error_msg += "ARIMA failed" if arima_pred is None else ""
+        error_msg += " and " if arima_pred is None and lstm_pred is None else ""
+        error_msg += "LSTM failed" if lstm_pred is None else ""
+        print(error_msg)
+        return {"error": error_msg}
+    min_length = min(len(arima_pred), len(lstm_pred))
+    if min_length < 60:
+        error_msg = f"Prediction length too short: {min_length} (need 60)"
+        print(error_msg)
+        return {"error": error_msg}
+    final_pred = 0.5 * np.array(arima_pred[:60]) + 0.5 * np.array(lstm_pred[:60])
+    print("Hybrid prediction completed successfully")
+    return final_pred.tolist()
+def create_monthly_plot(monthly_data, product_name):
+    fig, ax = plt.subplots(figsize=(12, 6))
+    months = [f"Month {i+1}" for i in range(len(monthly_data))]
+    # Bar plot
+    bars = ax.bar(months, monthly_data, color='skyblue', alpha=0.7, label='Monthly Forecast')
+    # Line plot on top
+    ax.plot(months, monthly_data, color='red', marker='o', linestyle='-', linewidth=2, markersize=5, label='Trend')
+    ax.set_title(f"5-Year Monthly Sales Forecast for {product_name}", fontsize=14)
+    ax.set_xlabel("Months", fontsize=12)
+    ax.set_ylabel("Sales", fontsize=12)
+    ax.grid(True, linestyle='--', alpha=0.7)
+    ax.legend()
+    # Rotate x-axis labels and show only every 6th month to avoid crowding
+    plt.xticks(rotation=45, ha='right')
+    for i, label in enumerate(ax.xaxis.get_ticklabels()):
+        if i % 6 != 0:
+            label.set_visible(False)
+    plt.tight_layout()
+    return fig
+def create_yearly_scatter(yearly_data, product_name):
+    fig, ax = plt.subplots(figsize=(12, 6))
+    colors = ['red', 'blue', 'green', 'purple', 'orange']
+    markers = ['o', 's', 'D', '^', 'v']  # Different markers for each year
+    for year_idx, year_data in enumerate(yearly_data):
+        months = np.arange(1, 13)  # 1-12 months
+        ax.scatter(months, year_data, color=colors[year_idx],
+                  marker=markers[year_idx], s=100, label=f'Year {year_idx+1}', alpha=0.7)
+    ax.set_title(f"Yearly Sales Comparison for {product_name}", fontsize=14)
+    ax.set_xlabel("Month of Year", fontsize=12)
+    ax.set_ylabel("Sales", fontsize=12)
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))  # Only integer months
+    ax.grid(True, linestyle='--', alpha=0.7)
+    ax.legend()
+    plt.tight_layout()
+    return fig
+def create_evaluation_plot(actual, predicted, product_name, r2_score):
+    fig, ax = plt.subplots(figsize=(12, 6))
+    months = [f"Month {i+1}" for i in range(len(actual))]
+    ax.plot(months, actual, 'b-', label='Actual Sales', marker='o')
+    ax.plot(months, predicted, 'r--', label='Predicted Sales', marker='x')
+    ax.set_title(f"Model Evaluation for {product_name}\nR² Score: {r2_score:.2f}", fontsize=14)
+    ax.set_xlabel("Months", fontsize=12)
+    ax.set_ylabel("Sales", fontsize=12)
+    ax.grid(True, linestyle='--', alpha=0.7)
+    ax.legend()
+    plt.xticks(rotation=45, ha='right')
+    plt.tight_layout()
+    return fig
+def predict(product_name):
+    print(f"\nStarting prediction for: {product_name}")
+    if df is None:
+        error_msg = "Dataset not loaded or could not be processed"
+        print(error_msg)
+        return {"error": error_msg}, None, None
+    sales_data = prepare_data(product_name)
+    if sales_data is None or len(sales_data) < 6:
+        error_msg = "Not enough historical data for prediction"
+        print(error_msg)
+        return {"error": error_msg}, None, None
+    predictions = hybrid_prediction(sales_data)
+    if isinstance(predictions, dict) and "error" in predictions:
+        return predictions, None, None
+    monthly = predictions[:60]
+    yearly = [monthly[i*12:(i+1)*12] for i in range(5)]
+    monthly_plot = create_monthly_plot(monthly, product_name)
+    yearly_plot = create_yearly_scatter(yearly, product_name)
+    print(f"Successfully generated forecast for {product_name}")
+    return None, monthly_plot, yearly_plot
+def evaluate_model(product_name, test_size=12):
+    print(f"\nStarting evaluation for: {product_name}")
+    if df is None:
+        error_msg = "Dataset not loaded or could not be processed"
+        print(error_msg)
+        return {"error": error_msg}, None
+    data = prepare_data(product_name)
+    if data is None or len(data) < test_size + 6:
+        error_msg = "Not enough data to evaluate model"
+        print(error_msg)
+        return {"error": error_msg}, None
+    train_data = data[:-test_size]
+    test_data = data[-test_size:]
+    arima_pred = train_arima(train_data, steps=test_size)
+    lstm_pred = train_lstm(train_data, steps=test_size)
+    if arima_pred is None or lstm_pred is None:
+        error_msg = "Model training failed during evaluation"
+        print(error_msg)
+        return {"error": error_msg}, None
+    hybrid_pred = 0.5 * np.array(arima_pred) + 0.5 * np.array(lstm_pred)
+    score = r2_score(test_data.values, hybrid_pred)
+    evaluation_plot = create_evaluation_plot(
+        test_data.values,
+        hybrid_pred,
+        product_name,
+        score
+    )
+    print(f"Evaluation completed for {product_name} with R² score: {score:.2f}")
+    return None, evaluation_plot
+# Create Gradio interface
+with gr.Blocks(title="Sales Forecast Dashboard", theme="soft") as demo:
+    gr.Markdown("# 🚀 Hybrid ARIMA-LSTM Sales Forecasting")
+    gr.Markdown("Predict 5 years of monthly sales and evaluate model accuracy")
+    with gr.Tabs():
+        with gr.Tab("📈 Forecast Sales"):
+            gr.Markdown("### Generate 5-Year Sales Forecast")
+            with gr.Row():
+                product_dropdown = gr.Dropdown(
+                    choices=product_list,
+                    label="Select Product",
+                    interactive=True,
+                    value=product_list[0] if product_list else None
+                )
+                forecast_btn = gr.Button("Generate Forecast", variant="primary")
+            error_box = gr.JSON(
+                label="Error Messages",
+                visible=False,
+                elem_id="error-box"
+            )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Monthly Forecast")
+                    monthly_plot = gr.Plot(
+                        label="Monthly Sales Forecast",
+                        show_label=True
+                    )
+                with gr.Column():
+                    gr.Markdown("### Yearly Comparison")
+                    yearly_plot = gr.Plot(
+                        label="Yearly Sales Pattern",
+                        show_label=True
+                    )
+            # Examples section
+            if product_list:
+                gr.Examples(
+                    examples=[[product] for product in product_list[:3]],
+                    inputs=product_dropdown,
+                    label="Try these products:"
+                )
+            forecast_btn.click(
+                fn=predict,
+                inputs=product_dropdown,
+                outputs=[error_box, monthly_plot, yearly_plot],
+                api_name="predict"
+            )
+        with gr.Tab("📊 Evaluate Accuracy"):
+            gr.Markdown("### Evaluate Model Performance")
+            with gr.Row():
+                eval_product_dropdown = gr.Dropdown(
+                    choices=product_list,
+                    label="Select Product",
+                    interactive=True,
+                    value=product_list[0] if product_list else None
+                )
+                evaluate_btn = gr.Button("Evaluate Model", variant="primary")
+            eval_error_box = gr.JSON(
+                label="Error Messages",
+                visible=False,
+                elem_id="error-box"
+            )
+            gr.Markdown("### Actual vs Predicted Sales")
+            evaluation_plot = gr.Plot(
+                label="Model Evaluation Results",
+                show_label=True
+            )
+            evaluate_btn.click(
+                fn=evaluate_model,
+                inputs=eval_product_dropdown,
+                outputs=[eval_error_box, evaluation_plot],
+                api_name="evaluate"
+            )
+    # Add some debug info if no products found
+    if not product_list:
+        gr.Markdown("## ⚠️ No Products Found")
+        gr.Markdown("""
+        The application couldn't load any products. This usually means:
+        - The dataset file wasn't found at the specified path
+        - The dataset doesn't contain the required columns (Product_Name, Date, Sales)
+        - There was an error loading the data
+        Check the console output for more details.
+        """)
+# Launch the application
+if __name__ == "__main__":
+    print("\nStarting Gradio application...")
+    demo.launch()