File size: 2,044 Bytes
9ca85db
3c8cd88
 
 
 
aec4533
 
 
 
 
 
 
a41b426
 
 
 
 
 
 
 
 
aec4533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca88a8c
aec4533
4a34e93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler

try:
    import yfinance as yf
except ImportError:
    raise ImportError("yfinance must be installed to fetch financial data.")

def load_data(data_src="yahoo", ticker="AAPL", file_upload=None, start="2020-01-01", end="2023-01-01"):
    if data_src == "yahoo":
        try:
            info = yf.Ticker(ticker).info
            if not info:
                raise ValueError(f"Ticker '{ticker}' not found.")
            df = yf.download(ticker, start=start, end=end, progress=False)
            if df.empty:
                raise ValueError(f"No data found for ticker '{ticker}' in the specified date range. Please check the symbol and dates.")
        except Exception as e:
            raise ValueError(f"Error fetching data for ticker '{ticker}': {e}")
        df = df[['Close']].dropna().rename(columns={'Close': 'value'})
        df.reset_index(inplace=True)
    elif data_src == "csv":
        if file_upload is None:
            raise ValueError("CSV file upload required but not provided.")
        try:
            df = pd.read_csv(file_upload)
        except Exception as e:
            raise ValueError(f"Failed to read uploaded CSV file: {e}")
        if 'value' not in df.columns:
            if 'Close' in df.columns:
                df = df[['Close']].rename(columns={'Close': 'value'})
            else:
                raise ValueError("CSV must contain a 'value' or 'Close' column.")
        df = df[['value']].dropna().reset_index(drop=True)
    else:
        raise ValueError("Invalid data source. 'csv' or 'yahoo' expected.")
    return df


def preprocess_data(df, column, window_size=30):
    scaler = StandardScaler()
    data = df[[column]].values.astype(float)
    scaled = scaler.fit_transform(data)

    X, y = [], []
    for i in range(len(scaled) - window_size):
        X.append(scaled[i:i + window_size])
        y.append(scaled[i + window_size])
    X = np.array(X)
    y = np.array(y)

    return X, y, scaler