kawaiipeace commited on
Commit
c06b0aa
·
1 Parent(s): e4ee23f

update the darts models

Browse files
.gitignore CHANGED
@@ -3,4 +3,5 @@ models/__pycache__
3
  utils/__pycache__
4
  dataset/*
5
  figure/*
6
- result/*
 
 
3
  utils/__pycache__
4
  dataset/*
5
  figure/*
6
+ result/*
7
+ .python-version
app.py CHANGED
@@ -1,5 +1,6 @@
1
  # app.py
2
  import os
 
3
  import gradio as gr
4
  import matplotlib.pyplot as plt
5
  from dotenv import load_dotenv
@@ -9,6 +10,8 @@ from models.darts_models import run_darts_forecast # Darts-based models
9
 
10
  load_dotenv()
11
 
 
 
12
  def plot_raw_data(df):
13
  if df.shape[1] == 1:
14
  fig = plt.figure(figsize=(12, 4))
@@ -32,9 +35,15 @@ def plot_raw_data(df):
32
  fig.tight_layout()
33
  return fig
34
 
35
- def forecast_interface(file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon):
36
- df, _ = load_and_process_data(file.name, is_multivariate == "Multivariate")
37
- print(df.head())
 
 
 
 
 
 
38
  raw_plot = plot_raw_data(df)
39
 
40
  if model_type in ["LSTM", "BiLSTM", "GRU"]:
@@ -48,19 +57,21 @@ def forecast_interface(file, model_type, is_multivariate, horizon, lag, neurons,
48
  epochs,
49
  batch_size,
50
  future_horizon=future_horizon,
 
51
  )
52
  return raw_plot, fig, metrics, export_path
53
-
54
  else:
55
- forecast, fig, metrics = run_darts_forecast(
56
  df,
57
  model_type,
58
  is_multivariate == "Multivariate",
59
  horizon,
60
  epochs,
61
- batch_size
 
 
62
  )
63
- return raw_plot, fig, metrics, None
64
 
65
  models = ["LSTM", "BiLSTM", "GRU", "Transformer", "N-BEATS", "TFT", "TCN", "BlockRNN"]
66
 
@@ -78,9 +89,16 @@ with gr.Blocks() as demo:
78
  future_horizon = gr.Slider(0, 10000, value=0, step=100, label="🔮 Future Horizon (Steps Ahead)")
79
 
80
  with gr.Row():
81
- neurons = gr.Slider(10, 12400, value=64, step=8, label="🧠 Neurons (Only for LSTM)")
82
  epochs = gr.Slider(10, 10000, value=100, label="🔁 Epochs")
83
  batch_size = gr.Slider(8, 2048, value=32, step=8, label="📦 Batch Size")
 
 
 
 
 
 
 
84
 
85
  run_btn = gr.Button("🚀 Run Forecast")
86
 
@@ -91,7 +109,7 @@ with gr.Blocks() as demo:
91
 
92
  run_btn.click(
93
  forecast_interface,
94
- inputs=[file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon],
95
  outputs=[raw_plot, output_plot, metrics_out, export_csv]
96
  )
97
 
 
1
  # app.py
2
  import os
3
+ import torch
4
  import gradio as gr
5
  import matplotlib.pyplot as plt
6
  from dotenv import load_dotenv
 
10
 
11
  load_dotenv()
12
 
13
+ has_gpu = torch.cuda.is_available()
14
+
15
  def plot_raw_data(df):
16
  if df.shape[1] == 1:
17
  fig = plt.figure(figsize=(12, 4))
 
35
  fig.tight_layout()
36
  return fig
37
 
38
+ def forecast_interface(file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon, device):
39
+ darts_models = ["Transformer", "N-BEATS", "TFT", "TCN", "BlockRNN"]
40
+ is_darts_model = model_type in darts_models
41
+
42
+ df, _ = load_and_process_data(
43
+ file.name,
44
+ is_multivariate == "Multivariate",
45
+ keep_datetime_column_for_darts=is_darts_model
46
+ )
47
  raw_plot = plot_raw_data(df)
48
 
49
  if model_type in ["LSTM", "BiLSTM", "GRU"]:
 
57
  epochs,
58
  batch_size,
59
  future_horizon=future_horizon,
60
+ device=device
61
  )
62
  return raw_plot, fig, metrics, export_path
 
63
  else:
64
+ forecast, fig, metrics, export_path = run_darts_forecast(
65
  df,
66
  model_type,
67
  is_multivariate == "Multivariate",
68
  horizon,
69
  epochs,
70
+ batch_size,
71
+ future_horizon=future_horizon,
72
+ device=device
73
  )
74
+ return raw_plot, fig, metrics, export_path
75
 
76
  models = ["LSTM", "BiLSTM", "GRU", "Transformer", "N-BEATS", "TFT", "TCN", "BlockRNN"]
77
 
 
89
  future_horizon = gr.Slider(0, 10000, value=0, step=100, label="🔮 Future Horizon (Steps Ahead)")
90
 
91
  with gr.Row():
92
+ neurons = gr.Slider(10, 2048, value=32, step=8, label="🧠 Neurons (Only for LSTM)")
93
  epochs = gr.Slider(10, 10000, value=100, label="🔁 Epochs")
94
  batch_size = gr.Slider(8, 2048, value=32, step=8, label="📦 Batch Size")
95
+
96
+ with gr.Row():
97
+ device_choice = gr.Radio(
98
+ choices=["CPU", "GPU"] if has_gpu else ["CPU"],
99
+ value="GPU" if has_gpu else "CPU",
100
+ label="⚙️ Device"
101
+ )
102
 
103
  run_btn = gr.Button("🚀 Run Forecast")
104
 
 
109
 
110
  run_btn.click(
111
  forecast_interface,
112
+ inputs=[file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon, device_choice],
113
  outputs=[raw_plot, output_plot, metrics_out, export_csv]
114
  )
115
 
dockerfile.gpu CHANGED
@@ -1,15 +1,26 @@
1
- # Dockerfile.gpu
2
- FROM nvidia/cuda:12.2.0-runtime-ubuntu20.04
3
 
 
4
  WORKDIR /app
5
 
 
6
  COPY . .
7
 
 
8
  RUN apt-get update && apt-get install -y \
9
- python3-pip python3-dev git gcc g++ && \
10
- pip3 install --upgrade pip && \
11
- pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
12
- pip3 install -r requirements.txt
 
 
 
 
 
13
 
 
14
  EXPOSE 7860
15
- CMD ["python3", "app.py"]
 
 
 
1
+ # Use the Nvidia CUDA base image
2
+ FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04
3
 
4
+ # Set the working directory to /app
5
  WORKDIR /app
6
 
7
+ # Copy the current directory contents to the container at /app
8
  COPY . .
9
 
10
+ # Install system dependencies
11
  RUN apt-get update && apt-get install -y \
12
+ python3-venv python3-pip python3-dev git gcc g++ && \
13
+ # Create a virtual environment in the /app directory
14
+ python3 -m venv /app/venv && \
15
+ # Activate the virtual environment and upgrade pip
16
+ /app/venv/bin/pip install --upgrade pip && \
17
+ # Install PyTorch and other dependencies inside the virtual environment
18
+ /app/venv/bin/pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
19
+ # Install the remaining dependencies from requirements.txt
20
+ /app/venv/bin/pip install -r requirements.txt
21
 
22
+ # Expose the port 7860 (for your app)
23
  EXPOSE 7860
24
+
25
+ # Use the virtual environment to run the app
26
+ CMD ["/app/venv/bin/python3", "app.py"]
models/custom_model_bk.py DELETED
@@ -1,108 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
- import pandas as pd
4
- from sklearn.metrics import r2_score, root_mean_squared_error
5
- from sklearn.preprocessing import MinMaxScaler
6
- from tensorflow.keras.models import Sequential
7
- from tensorflow.keras.layers import LSTM, Bidirectional, GRU, Dense
8
- from tensorflow.keras.callbacks import EarlyStopping
9
- import tempfile
10
- import os
11
-
12
- def prepare_data(df, lag=1):
13
- values = df.values.astype('float32')
14
- X, y = [], []
15
- for i in range(len(values) - lag):
16
- X.append(values[i:i+lag])
17
- y.append(values[i+lag])
18
- return np.array(X), np.array(y), values
19
-
20
- def scale_data(X, y):
21
- scaler = MinMaxScaler(feature_range=(0, 1))
22
- y = y.reshape(-1, 1)
23
- scaled_X = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
24
- scaled_y = scaler.transform(y)
25
- return scaled_X, scaled_y, scaler
26
-
27
- def invert_prediction(scaler, pred):
28
- return scaler.inverse_transform(pred.reshape(-1, 1)).flatten()
29
-
30
- def create_model(input_shape, model_type, neurons):
31
- model = Sequential()
32
- if model_type == "LSTM":
33
- model.add(LSTM(neurons, input_shape=input_shape))
34
- elif model_type == "BiLSTM":
35
- model.add(Bidirectional(LSTM(neurons), input_shape=input_shape))
36
- elif model_type == "GRU":
37
- model.add(GRU(neurons, input_shape=input_shape))
38
- model.add(Dense(1))
39
- model.compile(loss="mse", optimizer="adam")
40
- return model
41
-
42
- def run_forecast(df, model_type, is_multivariate, horizon, neurons, epochs, batch_size, future_horizon=0):
43
- lag = 10
44
- X, y, raw_values = prepare_data(df, lag=lag)
45
-
46
- train_size = len(X) - horizon
47
- X_train, X_test = X[:train_size], X[train_size:]
48
- y_train, y_test = y[:train_size], y[train_size:]
49
-
50
- X_train, y_train, scaler = scale_data(X_train, y_train)
51
- X_test, y_test, _ = scale_data(X_test, y_test)
52
-
53
- X_train = X_train.reshape(X_train.shape[0], lag, X_train.shape[-1])
54
- X_test = X_test.reshape(X_test.shape[0], lag, X_test.shape[-1])
55
-
56
- model = create_model((lag, X_train.shape[-1]), model_type, neurons)
57
-
58
- early_stop = EarlyStopping(monitor="loss", patience=10, restore_best_weights=True)
59
- model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=False, callbacks=[early_stop])
60
-
61
- y_train_pred = model.predict(X_train)
62
- y_test_pred = model.predict(X_test)
63
-
64
- y_train_pred = invert_prediction(scaler, y_train_pred)
65
- y_test_pred = invert_prediction(scaler, y_test_pred)
66
- y_train = invert_prediction(scaler, y_train)
67
- y_test = invert_prediction(scaler, y_test)
68
-
69
- # Forecast future
70
- future_pred = []
71
- if future_horizon > 0:
72
- last_input = X_test[-1]
73
- for _ in range(future_horizon):
74
- pred_scaled = model.predict(last_input.reshape(1, lag, -1))
75
- pred = invert_prediction(scaler, pred_scaled)
76
- future_pred.append(pred[0])
77
-
78
- # slide window
79
- next_input = np.roll(last_input, -1, axis=0)
80
- next_input[-1] = pred_scaled
81
- last_input = next_input
82
-
83
- # Metrics
84
- test_rmse = root_mean_squared_error(y_test, y_test_pred)
85
- test_r2 = r2_score(y_test, y_test_pred)
86
-
87
- metrics = f"Test RMSE: {test_rmse:.3f}, Test R2: {test_r2:.3f}"
88
-
89
- # Export CSV
90
- export_df = pd.DataFrame({
91
- "Test_Actual": y_test.flatten(),
92
- "Test_Predicted": y_test_pred.flatten()
93
- })
94
- export_path = os.path.join(tempfile.gettempdir(), "forecast_result.csv")
95
- export_df.to_csv(export_path, index=False)
96
-
97
- # Plot
98
- fig = plt.figure(figsize=(12, 6))
99
- plt.plot(y_test, label="Test Actual")
100
- plt.plot(y_test_pred, label="Test Predicted", linestyle="dashed")
101
- if future_horizon > 0:
102
- plt.plot(range(len(y_test), len(y_test) + len(future_pred)), future_pred, label="Future Forecast", linestyle="dashdot")
103
- plt.title("Forecast Result")
104
- plt.xlabel("Time Step")
105
- plt.ylabel("Value")
106
- plt.legend()
107
-
108
- return y_train_pred, y_test_pred, future_pred, fig, metrics, export_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/custom_models.py CHANGED
@@ -1,5 +1,6 @@
1
  import numpy as np
2
  import pandas as pd
 
3
  import os
4
  import tempfile
5
  import matplotlib.pyplot as plt
@@ -61,7 +62,13 @@ def run_forecast(
61
  epochs,
62
  batch_size,
63
  future_horizon=0,
 
64
  ):
 
 
 
 
 
65
  # lag = 10
66
  X, y, raw_values = prepare_data(df, lag=lag)
67
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import tensorflow as tf
4
  import os
5
  import tempfile
6
  import matplotlib.pyplot as plt
 
62
  epochs,
63
  batch_size,
64
  future_horizon=0,
65
+ device="CPU",
66
  ):
67
+ if device == "GPU":
68
+ physical_devices = tf.config.list_physical_devices('GPU')
69
+ if physical_devices:
70
+ tf.config.experimental.set_memory_growth(physical_devices[0], True)
71
+
72
  # lag = 10
73
  X, y, raw_values = prepare_data(df, lag=lag)
74
 
models/darts_models.py CHANGED
@@ -30,9 +30,9 @@ def run_darts_forecast(
30
  print(df.columns)
31
  if "datetime" in df.columns:
32
  df["datetime"] = pd.to_datetime(df["datetime"])
33
- series = TimeSeries.from_dataframe(df, time_col="datetime")
34
  elif isinstance(df.index, pd.DatetimeIndex):
35
- series = TimeSeries.from_dataframe(df)
36
  else:
37
  raise ValueError(
38
  f"DataFrame must have a 'datetime' column or a DatetimeIndex. Columns are: {df.columns}"
@@ -57,7 +57,7 @@ def run_darts_forecast(
57
  n_epochs=epochs,
58
  batch_size=batch_size,
59
  random_state=42,
60
- pl_trainer_kwargs={"accelerator": "auto"},
61
  )
62
 
63
  model.fit(train, verbose=False)
 
30
  print(df.columns)
31
  if "datetime" in df.columns:
32
  df["datetime"] = pd.to_datetime(df["datetime"])
33
+ series = TimeSeries.from_dataframe(df, time_col="datetime", fill_missing_dates=True, freq=None)
34
  elif isinstance(df.index, pd.DatetimeIndex):
35
+ series = TimeSeries.from_dataframe(df, fill_missing_dates=True, freq=None)
36
  else:
37
  raise ValueError(
38
  f"DataFrame must have a 'datetime' column or a DatetimeIndex. Columns are: {df.columns}"
 
57
  n_epochs=epochs,
58
  batch_size=batch_size,
59
  random_state=42,
60
+ pl_trainer_kwargs={"accelerator": device.lower()},
61
  )
62
 
63
  model.fit(train, verbose=False)
requirements.txt CHANGED
@@ -7,4 +7,5 @@ keras
7
  gradio
8
  python-dotenv
9
  openpyxl
 
10
  u8darts[torch]
 
7
  gradio
8
  python-dotenv
9
  openpyxl
10
+ torch
11
  u8darts[torch]
utils/preprocessing.py CHANGED
@@ -1,7 +1,7 @@
1
  # utils/preprocessing.py
2
  import pandas as pd
3
 
4
- def load_and_process_data(file_path, is_multivariate):
5
  df = pd.read_csv(file_path)
6
 
7
  # Auto-detect time column
@@ -12,14 +12,19 @@ def load_and_process_data(file_path, is_multivariate):
12
  break
13
 
14
  if time_col:
 
 
15
  df.set_index(time_col, inplace=True)
16
 
17
  if not is_multivariate:
18
- # Use only the first numeric column for univariate
19
- numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
20
  if len(numeric_cols) == 0:
21
  raise ValueError("No numeric column found for univariate forecast.")
22
- df = df[[numeric_cols[0]]]
23
 
24
- return df, df.shape[1]
 
 
 
 
25
 
 
 
1
  # utils/preprocessing.py
2
  import pandas as pd
3
 
4
+ def load_and_process_data(file_path, is_multivariate, keep_datetime_column_for_darts=False):
5
  df = pd.read_csv(file_path)
6
 
7
  # Auto-detect time column
 
12
  break
13
 
14
  if time_col:
15
+ df[time_col] = pd.to_datetime(df[time_col], errors="coerce") # force datetime conversion
16
+ df = df.dropna(subset=[time_col])
17
  df.set_index(time_col, inplace=True)
18
 
19
  if not is_multivariate:
20
+ numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
 
21
  if len(numeric_cols) == 0:
22
  raise ValueError("No numeric column found for univariate forecast.")
 
23
 
24
+ if keep_datetime_column_for_darts:
25
+ # For Darts: keep full DataFrame with datetime index, but only 1 target column
26
+ df = df[[numeric_cols[0]]] # still only one target column but keep datetime index
27
+ else:
28
+ df = df[[numeric_cols[0]]] # just target column, no datetime needed
29
 
30
+ return df, df.shape[1]