Spaces:

Shaikat01
/

DataSynthis_ML_JobTask

Sleeping

App Files Files Community

Shaikat01 commited on Oct 4, 2025

Commit

6504afd

verified ·

1 Parent(s): 2fe0dc5

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -54

app.py CHANGED Viewed

@@ -5,13 +5,13 @@ import plotly.graph_objects as go
 from datetime import datetime, timedelta
 import pickle
 import yfinance as yf
 from statsmodels.tsa.arima.model import ARIMA
 from prophet import Prophet
 from tensorflow import keras
 from sklearn.preprocessing import MinMaxScaler
 import warnings
-import os
-from datetime import datetime, timedelta
 warnings.filterwarnings('ignore')
 # Load your saved models (update paths as needed)
@@ -41,70 +41,87 @@ def load_models():
 arima_model, prophet_model, lstm_model, scaler = load_models()
 SEQ_LENGTH = 60  # Should match your training
 def fetch_stock_data(ticker, days=365):
-    """
-    Fetch stock data from a local CSV (handles multi-row headers)
-    or downloads from Yahoo Finance if not found.
-    Returns last `days` of data with standardized 'Price' column.
-    """
     try:
-        filename = f"{ticker.upper()}.csv"
-        file_path = os.path.join(os.getcwd(), filename)
-        # ✅ Load local dataset if it exists
-        if os.path.exists(file_path):
-            print(f"📂 Loading local dataset: {filename}")
-            # Try reading while skipping bad header rows
-            df = pd.read_csv(file_path, skiprows=2)  # skip first two rows ("Ticker", "Date" lines)
-            df.rename(columns=lambda c: c.strip().lower(), inplace=True)
-            if 'date' not in df.columns:
-                # Maybe pandas treated first column as unnamed
-                df.rename(columns={df.columns[0]: 'date'}, inplace=True)
-        else:
-            print(f"🌐 Downloading {ticker} data from Yahoo Finance...")
-            end_date = datetime.now()
-            start_date = end_date - timedelta(days=days * 2)
-            df = yf.download(ticker, start=start_date, end=end_date, progress=False)
-            if df.empty:
-                return None, f"No data found for ticker: {ticker}"
-            df.to_csv(file_path)
-            print(f"💾 Saved downloaded dataset to: {file_path}")
-            df.reset_index(inplace=True)
-            df.rename(columns=lambda c: c.strip().lower(), inplace=True)
-        # ✅ Identify columns
-        date_col = next((c for c in df.columns if 'date' in c or 'time' in c), None)
-        close_col = next((c for c in df.columns if 'close' in c), None)
-        price_col = 'price' if 'price' in df.columns else close_col
-        if not date_col or not price_col:
-            return None, f"Could not detect date or price/close column in {filename}"
-        # ✅ Clean up and format
-        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
-        df.dropna(subset=[date_col, price_col], inplace=True)
-        df.set_index(date_col, inplace=True)
-        df.sort_index(inplace=True)
-        # ✅ Keep last `days` of data
-        end_date = df.index.max()
-        start_date = end_date - timedelta(days=days)
-        df = df.loc[df.index >= start_date]
-        # ✅ Final column cleanup
-        df = df[[price_col]].copy()
-        df.columns = ['Price']
-        return df, None
     except Exception as e:
-        return None, str(e)
 def make_arima_forecast(data, days):
     """Make ARIMA forecast"""

 from datetime import datetime, timedelta
 import pickle
 import yfinance as yf
+import os
+import re
 from statsmodels.tsa.arima.model import ARIMA
 from prophet import Prophet
 from tensorflow import keras
 from sklearn.preprocessing import MinMaxScaler
 import warnings
 warnings.filterwarnings('ignore')
 # Load your saved models (update paths as needed)
 arima_model, prophet_model, lstm_model, scaler = load_models()
 SEQ_LENGTH = 60  # Should match your training
 def fetch_stock_data(ticker, days=365):
+    """Fetch stock data from Yahoo Finance"""
     try:
+        # Prefer local CSV file named <TICKER>.csv in the project root
+        csv_name = f"{ticker.upper()}.csv"
+        workspace_dir = os.path.dirname(__file__)
+        csv_path = os.path.join(workspace_dir, csv_name)
+        if os.path.exists(csv_path):
+            # Read the CSV fully, then detect which column contains dates. Many of
+            # the CSVs here contain extra header/noise rows; reading everything and
+            # filtering non-date rows is more robust than skipping rows beforehand.
+            df_raw = pd.read_csv(csv_path, header=0, dtype=str)
+            # Try to detect a date column by checking which column's values look like dates
+            date_col = None
+            for col in df_raw.columns:
+                sample = df_raw[col].astype(str).head(20)
+                matches = sample.str.match(r"^\s*\d{4}-\d{2}-\d{2}")
+                if matches.sum() >= max(1, int(len(sample) * 0.5)):
+                    date_col = col
+                    break
+            if date_col is None and 'Date' in df_raw.columns:
+                date_col = 'Date'
+            if date_col is not None:
+                df_raw[date_col] = pd.to_datetime(df_raw[date_col], errors='coerce')
+                df = df_raw.dropna(subset=[date_col]).copy()
+                df.set_index(date_col, inplace=True)
+            else:
+                # Try parsing the index as dates (if CSV had implicit index)
+                try:
+                    df_raw.index = pd.to_datetime(df_raw.index)
+                    df = df_raw.copy()
+                except Exception:
+                    # Give up and use raw DataFrame — downstream checks will catch issues
+                    df = df_raw.copy()
+            # Prefer 'Close' column, fall back to common alternatives
+            if 'Close' in df.columns:
+                df = df[['Close']].copy()
+            elif 'Adj Close' in df.columns:
+                df = df[['Adj Close']].copy()
+                df.columns = ['Close']
+            elif 'Close*' in df.columns:
+                df = df[['Close*']].copy()
+                df.columns = ['Close']
+            else:
+                # Try to find a column that looks like price
+                possible = [c for c in df.columns if 'close' in c.lower() or 'price' in c.lower()]
+                if possible:
+                    df = df[[possible[0]]].copy()
+                    df.columns = ['Close']
+                else:
+                    return None, f"Local CSV found but no 'Close' column in {csv_name}"
+            # Coerce to numeric price and drop rows that can't be converted
+            df.columns = ['Price']
+            df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
+            df.dropna(subset=['Price'], inplace=True)
+            # Ensure sorted by date
+            df.sort_index(inplace=True)
+            # Remove index name to avoid printing a duplicate label
+            try:
+                df.index.name = None
+            except Exception:
+                pass
+            # Slice to the requested window (last `days` days)
+            if days is not None and days > 0:
+                start_dt = df.index.max() - timedelta(days=days - 1)
+                df = df.loc[df.index >= start_dt]
+            if df.empty:
+                return None, f"No data in local CSV for the requested period: {csv_name}"
+            return df, None
     except Exception as e:
+        return None, f"Error fetching stock data: {e}"
 def make_arima_forecast(data, days):
     """Make ARIMA forecast"""