Spaces:

Nixtla
/

chatgpt-forecast

Runtime error

App Files Files Community

azulgarza commited on Jul 10, 2023

Commit

b22704e

1 Parent(s): e676cab

feat: add conformal and ensembles

Browse files

Files changed (1) hide show

src/utils.py +51 -15

src/utils.py CHANGED Viewed

@@ -10,7 +10,6 @@ from statsforecast.models import Naive
 openai.api_key = os.environ['OPENAI_API_KEY']
 class ChatGPTForecast:
     def __init__(self):
@@ -26,7 +25,6 @@ class ChatGPTForecast:
         - give more weight to the most recent observations
         - consider trend
         - consider seasonality
-        - values should lie between 0 and {len(self.bins) - 1}, please be sure to do this
         """
     def tokenize_time_series(self, series):
@@ -79,23 +77,59 @@ class ChatGPTForecast:
         series = [self.bins[i] + bin_width / 2 for i in indices]
         return series
-    def forward(self, series, seasonality, h):
         series_tokenized = self.tokenize_time_series(series)
         prompt = f"""
         {self.prompt}-consider {seasonality} as seasonality
         - just print {h} steps ahead
         this is the series: {series_tokenized}
         """
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": prompt}]
         )
-        output_gpt = response['choices'][0]['message']['content']
-        output_gpt = self.extend_string(output_gpt, h)
-        output_gpt = ' '.join(f'{max(min(int(x), len(self.bins) - 1), 0)}' for x in output_gpt.split())
-        return self.decode_time_series(output_gpt)
     def compute_ds_future(self, ds, fh):
         ds_ = pd.to_datetime(ds)
@@ -111,7 +145,7 @@ class ChatGPTForecast:
         ds_future = list(map(str, ds_future))
         return ds_future, freq
-    def forecast(self, df, h, input_size):
         df = df.copy()
         scaler = MinMaxScaler()
         df['y'] = scaler.fit_transform(df[['y']])
@@ -120,12 +154,14 @@ class ChatGPTForecast:
         sf = StatsForecast(models=[Naive()], freq='D')
         fcst_df = sf.forecast(df=df, h=h)
         fcst_df['ds'] = ds_future
-        fcst_df['ChatGPT-3.5-Turbo'] = self.forward(df['y'].values[-input_size:], freq, h)[-h:]
-        for col in ['Naive', 'ChatGPT-3.5-Turbo']:
             fcst_df[col] = scaler.inverse_transform(fcst_df[[col]])
         df['y'] = scaler.inverse_transform(df[['y']])
-        return sf.plot(df, fcst_df, max_insample_length=3 * h)

 openai.api_key = os.environ['OPENAI_API_KEY']
 class ChatGPTForecast:
     def __init__(self):
         - give more weight to the most recent observations
         - consider trend
         - consider seasonality
         """
     def tokenize_time_series(self, series):
         series = [self.bins[i] + bin_width / 2 for i in indices]
         return series
+    def find_min_max(self, string_of_integers):
+        # Split the string into a list of strings
+        str_list = string_of_integers.split()
+        # Convert the list of strings into a list of integers
+        int_list = [int(i) for i in str_list]
+        # Find the minimum and maximum values
+        min_value = min(int_list)
+        max_value = max(int_list)
+        return min_value, max_value
+    def call_openai(self, series, seasonality, h, n_forecasts):
         series_tokenized = self.tokenize_time_series(series)
+        min_val, max_val = self.find_min_max(series_tokenized)
         prompt = f"""
         {self.prompt}-consider {seasonality} as seasonality
         - just print {h} steps ahead
+        - values should be integers between {min_val} and {max_val}, please be sure to do this
         this is the series: {series_tokenized}
         """
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": prompt}],
+            n=n_forecasts
         )
+        choices = response['choices']
+        outputs = []
+        for choice in choices:
+            output_gpt = choice['message']['content']
+            if len(output_gpt.split()) < 2:
+                continue
+            output_gpt = self.extend_string(output_gpt, h)
+            output_gpt = ' '.join(f'{max(min(int(x), len(self.bins) - 1), 0)}' for x in output_gpt.split())
+            outputs.append(self.decode_time_series(output_gpt))
+        outputs = np.vstack(outputs)
+        return outputs
+    def forward(self, series, seasonality, h, n_forecasts):
+        outputs = self.call_openai(series, seasonality, h, n_forecasts)
+        outputs = np.median(outputs, axis=0)
+        return outputs
+    def conformal_intervals(self, series, seasonality, h, n_forecasts):
+        series_train, series_test = series[:-h], series[-h:]
+        outputs = self.call_openai(series_train, seasonality, h, n_forecasts)
+        errors = np.abs(outputs - series_test)
+        lower_levels = np.quantile(errors, q=0.05, axis=0)
+        upper_levels = np.quantile(errors, q=0.095, axis=0)
+        return lower_levels, upper_levels
     def compute_ds_future(self, ds, fh):
         ds_ = pd.to_datetime(ds)
         ds_future = list(map(str, ds_future))
         return ds_future, freq
+    def forecast(self, df, h, input_size, n_forecasts=10):
         df = df.copy()
         scaler = MinMaxScaler()
         df['y'] = scaler.fit_transform(df[['y']])
         sf = StatsForecast(models=[Naive()], freq='D')
         fcst_df = sf.forecast(df=df, h=h)
         fcst_df['ds'] = ds_future
+        fcst_df['ChatGPT_3.5_Turbo'] = self.forward(df['y'].values[-input_size:], freq, h, n_forecasts)[-h:]
+        # add prediction intervals
+        lower_levels, upper_levels = self.conformal_intervals(df['y'].values[-(input_size + h):], freq, h, n_forecasts)
+        fcst_df['ChatGPT_3.5_Turbo-lo-90'] = fcst_df['ChatGPT_3.5_Turbo'] - lower_levels
+        fcst_df['ChatGPT_3.5_Turbo-hi-90'] = fcst_df['ChatGPT_3.5_Turbo'] + upper_levels
+        for col in ['Naive', 'ChatGPT_3.5_Turbo', 'ChatGPT_3.5_Turbo-lo-90', 'ChatGPT_3.5_Turbo-hi-90']:
             fcst_df[col] = scaler.inverse_transform(fcst_df[[col]])
         df['y'] = scaler.inverse_transform(df[['y']])
+        return sf.plot(df, fcst_df, max_insample_length=3 * h, level=[90])