Spaces:

thesven
/

BTC-Autoformer

Runtime error

App Files Files Community

thesven commited on May 7, 2024

Commit

05b4326

1 Parent(s): ce9e854

added app

Browse files

Files changed (3) hide show

.gitignore +5 -0
.idea/.gitignore +8 -0
app.py +312 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+BTC-Autoformer.ipynb
+BTC_Dataset_to_huggingface.ipynb
+huggingface_model.ipynb
+app_backtest.py
+.idea/*

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

app.py ADDED Viewed

	@@ -0,0 +1,312 @@

+# Standard library imports
+from typing import Optional, Iterable
+# Third-party library imports
+from transformers import PretrainedConfig, AutoformerForPrediction
+from functools import lru_cache, partial
+import gradio as gr
+import spaces
+import torch
+import pandas as pd
+# External imports
+# GluonTS imports
+from gluonts.dataset.field_names import FieldName
+from gluonts.transform import (
+    AddAgeFeature,
+    AddObservedValuesIndicator,
+    AddTimeFeatures,
+    AsNumpyArray,
+    Chain,
+    ExpectedNumInstanceSampler,
+    InstanceSplitter,
+    RemoveFields,
+    TestSplitSampler,
+    Transformation,
+    ValidationSplitSampler,
+    VstackFeatures,
+    RenameFields,
+)
+from gluonts.time_feature import time_features_from_frequency_str
+from gluonts.transform.sampler import InstanceSampler
+# Hugging Face Datasets imports
+from datasets import Dataset, Features, Value, Sequence, load_dataset
+# GluonTS Loader imports
+from gluonts.dataset.loader import as_stacked_batches
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import numpy as np
+def convert_to_pandas_period(date, freq):
+    return pd.Period(date, freq)
+def transform_start_field(batch, freq):
+    batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]]
+    return batch
+def create_transformation(freq: str, config: PretrainedConfig, prediction_length: int) -> Transformation:
+    remove_field_names = []
+    if config.num_static_real_features == 0:
+        remove_field_names.append(FieldName.FEAT_STATIC_REAL)
+    if config.num_dynamic_real_features == 0:
+        remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL)
+    if config.num_static_categorical_features == 0:
+        remove_field_names.append(FieldName.FEAT_STATIC_CAT)
+    # a bit like torchvision.transforms.Compose
+    return Chain(
+        # step 1: remove static/dynamic fields if not specified
+        [RemoveFields(field_names=remove_field_names)]
+        # step 2: convert the data to NumPy (potentially not needed)
+        + (
+            [
+                AsNumpyArray(
+                    field=FieldName.FEAT_STATIC_CAT,
+                    expected_ndim=1,
+                    dtype=int,
+                )
+            ]
+            if config.num_static_categorical_features > 0
+            else []
+        )
+        + (
+            [
+                AsNumpyArray(
+                    field=FieldName.FEAT_STATIC_REAL,
+                    expected_ndim=1,
+                )
+            ]
+            if config.num_static_real_features > 0
+            else []
+        )
+        + [
+            AsNumpyArray(
+                field=FieldName.TARGET,
+                # we expect an extra dim for the multivariate case:
+                expected_ndim=1 if config.input_size == 1 else 2,
+            ),
+            # step 3: handle the NaN's by filling in the target with zero
+            # and return the mask (which is in the observed values)
+            # true for observed values, false for nan's
+            # the decoder uses this mask (no loss is incurred for unobserved values)
+            # see loss_weights inside the xxxForPrediction model
+            AddObservedValuesIndicator(
+                target_field=FieldName.TARGET,
+                output_field=FieldName.OBSERVED_VALUES,
+            ),
+            # step 4: add temporal features based on freq of the dataset
+            # and the desired prediction length
+            AddTimeFeatures(
+                start_field=FieldName.START,
+                target_field=FieldName.TARGET,
+                output_field=FieldName.FEAT_TIME,
+                time_features=time_features_from_frequency_str(freq),
+                pred_length=prediction_length,
+            ),
+            # step 5: add another temporal feature (just a single number)
+            # tells the model where in its life the value of the time series is,
+            # sort of a running counter
+            AddAgeFeature(
+                target_field=FieldName.TARGET,
+                output_field=FieldName.FEAT_AGE,
+                pred_length=prediction_length,
+                log_scale=True,
+            ),
+            # step 6: vertically stack all the temporal features into the key FEAT_TIME
+            VstackFeatures(
+                output_field=FieldName.FEAT_TIME,
+                input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE]
+                + (
+                    [FieldName.FEAT_DYNAMIC_REAL]
+                    if config.num_dynamic_real_features > 0
+                    else []
+                ),
+            ),
+            # step 7: rename to match HuggingFace names
+            RenameFields(
+                mapping={
+                    FieldName.FEAT_STATIC_CAT: "static_categorical_features",
+                    FieldName.FEAT_STATIC_REAL: "static_real_features",
+                    FieldName.FEAT_TIME: "time_features",
+                    FieldName.TARGET: "values",
+                    FieldName.OBSERVED_VALUES: "observed_mask",
+                }
+            ),
+        ]
+    )
+def create_instance_splitter(
+    config: PretrainedConfig,
+    mode: str,
+    prediction_length: int,
+    train_sampler: Optional[InstanceSampler] = None,
+    validation_sampler: Optional[InstanceSampler] = None,
+) -> Transformation:
+    assert mode in ["train", "validation", "test"]
+    instance_sampler = {
+        "train": train_sampler
+        or ExpectedNumInstanceSampler(
+            num_instances=1.0, min_future=prediction_length
+        ),
+        "validation": validation_sampler
+        or ValidationSplitSampler(min_future=prediction_length),
+        "test": TestSplitSampler(),
+    }[mode]
+    return InstanceSplitter(
+        target_field="values",
+        is_pad_field=FieldName.IS_PAD,
+        start_field=FieldName.START,
+        forecast_start_field=FieldName.FORECAST_START,
+        instance_sampler=instance_sampler,
+        past_length=config.context_length + max(config.lags_sequence),
+        future_length=prediction_length,
+        time_series_fields=["time_features", "observed_mask"],
+    )
+def create_test_dataloader(
+    config: PretrainedConfig,
+    freq: str,
+    data: Dataset,
+    batch_size: int,
+    prediction_length: int,
+    **kwargs,
+):
+    PREDICTION_INPUT_NAMES = [
+        "past_time_features",
+        "past_values",
+        "past_observed_mask",
+        "future_time_features",
+    ]
+    if config.num_static_categorical_features > 0:
+        PREDICTION_INPUT_NAMES.append("static_categorical_features")
+    if config.num_static_real_features > 0:
+        PREDICTION_INPUT_NAMES.append("static_real_features")
+    transformation = create_transformation(freq, config, prediction_length)
+    transformed_data = transformation.apply(data, is_train=False)
+    # we create a Test Instance splitter which will sample the very last
+    # context window seen during training only for the encoder.
+    instance_sampler = create_instance_splitter(
+        config, "test", prediction_length=prediction_length
+    )
+    # we apply the transformations in test mode
+    testing_instances = instance_sampler.apply(transformed_data, is_train=False)
+    return as_stacked_batches(
+        testing_instances,
+        batch_size=batch_size,
+        output_type=torch.tensor,
+        field_names=PREDICTION_INPUT_NAMES,
+    )
+def plot(ts_index, test_dataset, forecasts, prediction_length):
+    fig, ax = plt.subplots(figsize=(12, 8), facecolor='white')
+    # Length of the target data
+    target_length = len(test_dataset[ts_index]['target'])
+    # Creating a period range for the entire dataset plus forecast period
+    index = pd.period_range(
+        start=test_dataset[ts_index]['start'],
+        periods=target_length + prediction_length,
+        freq='1D'
+    ).to_timestamp()
+    # Plotting actual data
+    ax.plot(
+        index[:target_length],
+        test_dataset[ts_index]['target'],
+        label="Actual"
+    )
+    # Plotting the forecast data
+    # Forecast starts right after the last actual data point
+    forecast_start_index = target_length
+    ax.plot(
+        index[forecast_start_index:],
+        forecasts[ts_index][0][:prediction_length],  # Use forecasts[ts_index][0][:prediction_length] to slice the forecast values
+        label="Prediction"
+    )
+    ax.set_ylim(0, 140000)
+    ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=(1, 7)))
+    ax.xaxis.set_minor_locator(mdates.MonthLocator())
+    plt.legend()
+    return fig
+zero = torch.Tensor([0]).cuda()
+print(zero.device) # <-- 'cpu' 🤔
+@spaces.GPU
+def do_prediction(days_to_predict: int):
+    device = zero.device
+    # Define the desired prediction length
+    prediction_length = 7  # Number of time steps to predict into the future
+    freq = "1D"  # Daily frequency
+    dataset = load_dataset("thesven/BTC-Daily-Avg-Market-Value")
+    dataset['test'].set_transform(partial(transform_start_field, freq=freq))
+    model = AutoformerForPrediction.from_pretrained("thesven/BTC-Autoformer-v1")
+    config = model.config
+    print(f"Config: {config}")
+    test_dataloader = create_test_dataloader(
+        config=config,
+        freq=freq,
+        data=dataset['test'],
+        batch_size=64,
+        prediction_length=prediction_length,
+    )
+    model.to(device)
+    model.eval()
+    forecasts = []
+    for batch in test_dataloader:
+        outputs = model.generate(
+            static_categorical_features=batch["static_categorical_features"].to(device)
+            if config.num_static_categorical_features > 0
+            else None,
+            static_real_features=batch["static_real_features"].to(device)
+            if config.num_static_real_features > 0
+            else None,
+            past_time_features=batch["past_time_features"].to(device),
+            past_values=batch["past_values"].to(device),
+            future_time_features=batch["future_time_features"].to(device),
+            past_observed_mask=batch["past_observed_mask"].to(device),
+        )
+        forecasts.append(outputs.sequences.cpu().numpy())
+    forecasts = np.vstack(forecasts)
+    print(forecasts.shape)
+    return plot(0, dataset['test'], forecasts, prediction_length)
+interface = gr.Interface(
+    fn=do_prediction,
+    inputs=gr.Slider(minimum=1, maximum=30, step=1, label="Days to Predict"),
+    outputs="plot",
+    title="Prediction Plot",
+    description="Adjust the slider to set the number of days to predict.",
+    allow_flagging=False,  # Disable flagging for simplicity
+)
+interface.launch()