Shaikat01 commited on
Commit
6504afd
Β·
verified Β·
1 Parent(s): 2fe0dc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -54
app.py CHANGED
@@ -5,13 +5,13 @@ import plotly.graph_objects as go
5
  from datetime import datetime, timedelta
6
  import pickle
7
  import yfinance as yf
 
 
8
  from statsmodels.tsa.arima.model import ARIMA
9
  from prophet import Prophet
10
  from tensorflow import keras
11
  from sklearn.preprocessing import MinMaxScaler
12
  import warnings
13
- import os
14
- from datetime import datetime, timedelta
15
  warnings.filterwarnings('ignore')
16
 
17
  # Load your saved models (update paths as needed)
@@ -41,70 +41,87 @@ def load_models():
41
  arima_model, prophet_model, lstm_model, scaler = load_models()
42
  SEQ_LENGTH = 60 # Should match your training
43
 
44
-
45
  def fetch_stock_data(ticker, days=365):
46
- """
47
- Fetch stock data from a local CSV (handles multi-row headers)
48
- or downloads from Yahoo Finance if not found.
49
- Returns last `days` of data with standardized 'Price' column.
50
- """
51
  try:
52
- filename = f"{ticker.upper()}.csv"
53
- file_path = os.path.join(os.getcwd(), filename)
54
-
55
- # βœ… Load local dataset if it exists
56
- if os.path.exists(file_path):
57
- print(f"πŸ“‚ Loading local dataset: {filename}")
58
 
59
- # Try reading while skipping bad header rows
60
- df = pd.read_csv(file_path, skiprows=2) # skip first two rows ("Ticker", "Date" lines)
61
- df.rename(columns=lambda c: c.strip().lower(), inplace=True)
 
 
62
 
63
- if 'date' not in df.columns:
64
- # Maybe pandas treated first column as unnamed
65
- df.rename(columns={df.columns[0]: 'date'}, inplace=True)
 
 
 
 
 
66
 
67
- else:
68
- print(f"🌐 Downloading {ticker} data from Yahoo Finance...")
69
- end_date = datetime.now()
70
- start_date = end_date - timedelta(days=days * 2)
71
- df = yf.download(ticker, start=start_date, end=end_date, progress=False)
72
 
73
- if df.empty:
74
- return None, f"No data found for ticker: {ticker}"
75
-
76
- df.to_csv(file_path)
77
- print(f"πŸ’Ύ Saved downloaded dataset to: {file_path}")
78
- df.reset_index(inplace=True)
79
- df.rename(columns=lambda c: c.strip().lower(), inplace=True)
80
-
81
- # βœ… Identify columns
82
- date_col = next((c for c in df.columns if 'date' in c or 'time' in c), None)
83
- close_col = next((c for c in df.columns if 'close' in c), None)
84
- price_col = 'price' if 'price' in df.columns else close_col
85
-
86
- if not date_col or not price_col:
87
- return None, f"Could not detect date or price/close column in {filename}"
88
 
89
- # βœ… Clean up and format
90
- df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
91
- df.dropna(subset=[date_col, price_col], inplace=True)
92
- df.set_index(date_col, inplace=True)
93
- df.sort_index(inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- # βœ… Keep last `days` of data
96
- end_date = df.index.max()
97
- start_date = end_date - timedelta(days=days)
98
- df = df.loc[df.index >= start_date]
 
 
 
 
 
 
 
99
 
100
- # βœ… Final column cleanup
101
- df = df[[price_col]].copy()
102
- df.columns = ['Price']
 
103
 
104
- return df, None
 
105
 
 
106
  except Exception as e:
107
- return None, str(e)
108
 
109
  def make_arima_forecast(data, days):
110
  """Make ARIMA forecast"""
 
5
  from datetime import datetime, timedelta
6
  import pickle
7
  import yfinance as yf
8
+ import os
9
+ import re
10
  from statsmodels.tsa.arima.model import ARIMA
11
  from prophet import Prophet
12
  from tensorflow import keras
13
  from sklearn.preprocessing import MinMaxScaler
14
  import warnings
 
 
15
  warnings.filterwarnings('ignore')
16
 
17
  # Load your saved models (update paths as needed)
 
41
  arima_model, prophet_model, lstm_model, scaler = load_models()
42
  SEQ_LENGTH = 60 # Should match your training
43
 
 
44
  def fetch_stock_data(ticker, days=365):
45
+ """Fetch stock data from Yahoo Finance"""
 
 
 
 
46
  try:
47
+ # Prefer local CSV file named <TICKER>.csv in the project root
48
+ csv_name = f"{ticker.upper()}.csv"
49
+ workspace_dir = os.path.dirname(__file__)
50
+ csv_path = os.path.join(workspace_dir, csv_name)
 
 
51
 
52
+ if os.path.exists(csv_path):
53
+ # Read the CSV fully, then detect which column contains dates. Many of
54
+ # the CSVs here contain extra header/noise rows; reading everything and
55
+ # filtering non-date rows is more robust than skipping rows beforehand.
56
+ df_raw = pd.read_csv(csv_path, header=0, dtype=str)
57
 
58
+ # Try to detect a date column by checking which column's values look like dates
59
+ date_col = None
60
+ for col in df_raw.columns:
61
+ sample = df_raw[col].astype(str).head(20)
62
+ matches = sample.str.match(r"^\s*\d{4}-\d{2}-\d{2}")
63
+ if matches.sum() >= max(1, int(len(sample) * 0.5)):
64
+ date_col = col
65
+ break
66
 
67
+ if date_col is None and 'Date' in df_raw.columns:
68
+ date_col = 'Date'
 
 
 
69
 
70
+ if date_col is not None:
71
+ df_raw[date_col] = pd.to_datetime(df_raw[date_col], errors='coerce')
72
+ df = df_raw.dropna(subset=[date_col]).copy()
73
+ df.set_index(date_col, inplace=True)
74
+ else:
75
+ # Try parsing the index as dates (if CSV had implicit index)
76
+ try:
77
+ df_raw.index = pd.to_datetime(df_raw.index)
78
+ df = df_raw.copy()
79
+ except Exception:
80
+ # Give up and use raw DataFrame β€” downstream checks will catch issues
81
+ df = df_raw.copy()
82
+
 
 
83
 
84
+ # Prefer 'Close' column, fall back to common alternatives
85
+ if 'Close' in df.columns:
86
+ df = df[['Close']].copy()
87
+ elif 'Adj Close' in df.columns:
88
+ df = df[['Adj Close']].copy()
89
+ df.columns = ['Close']
90
+ elif 'Close*' in df.columns:
91
+ df = df[['Close*']].copy()
92
+ df.columns = ['Close']
93
+ else:
94
+ # Try to find a column that looks like price
95
+ possible = [c for c in df.columns if 'close' in c.lower() or 'price' in c.lower()]
96
+ if possible:
97
+ df = df[[possible[0]]].copy()
98
+ df.columns = ['Close']
99
+ else:
100
+ return None, f"Local CSV found but no 'Close' column in {csv_name}"
101
 
102
+ # Coerce to numeric price and drop rows that can't be converted
103
+ df.columns = ['Price']
104
+ df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
105
+ df.dropna(subset=['Price'], inplace=True)
106
+ # Ensure sorted by date
107
+ df.sort_index(inplace=True)
108
+ # Remove index name to avoid printing a duplicate label
109
+ try:
110
+ df.index.name = None
111
+ except Exception:
112
+ pass
113
 
114
+ # Slice to the requested window (last `days` days)
115
+ if days is not None and days > 0:
116
+ start_dt = df.index.max() - timedelta(days=days - 1)
117
+ df = df.loc[df.index >= start_dt]
118
 
119
+ if df.empty:
120
+ return None, f"No data in local CSV for the requested period: {csv_name}"
121
 
122
+ return df, None
123
  except Exception as e:
124
+ return None, f"Error fetching stock data: {e}"
125
 
126
  def make_arima_forecast(data, days):
127
  """Make ARIMA forecast"""