Spaces:
Running
Running
Upload 2 files
Browse files- data/market_data_backup.csv +61 -0
- data_fetcher.py +142 -0
data/market_data_backup.csv
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Date,Close,High,Low,Open,Volume,VIX
|
| 2 |
+
2025-11-24,6705.1201171875,6715.75,6630.7001953125,6636.5400390625,6039740000,
|
| 3 |
+
2025-11-25,6765.8798828125,6776.39990234375,6659.97998046875,6697.02978515625,5003330000,
|
| 4 |
+
2025-11-26,6812.60986328125,6831.43994140625,6783.8701171875,6793.5498046875,4485000000,
|
| 5 |
+
2025-11-28,6849.08984375,6850.85986328125,6819.75,6822.52001953125,2558540000,
|
| 6 |
+
2025-12-01,6812.6298828125,6843.64990234375,6799.93994140625,6812.2998046875,4549370000,17.239999771118164
|
| 7 |
+
2025-12-02,6829.3701171875,6851.5498046875,6806.7099609375,6830.9599609375,4582290000,16.59000015258789
|
| 8 |
+
2025-12-03,6849.72021484375,6862.419921875,6810.43017578125,6815.2900390625,4736780000,16.079999923706055
|
| 9 |
+
2025-12-04,6857.1201171875,6866.47021484375,6827.1201171875,6866.47021484375,4872440000,15.779999732971191
|
| 10 |
+
2025-12-05,6870.39990234375,6895.77978515625,6858.2900390625,6866.31982421875,4944560000,15.40999984741211
|
| 11 |
+
2025-12-08,6846.509765625,6878.27001953125,6827.18994140625,6875.2001953125,4757130000,16.65999984741211
|
| 12 |
+
2025-12-09,6840.509765625,6864.919921875,6837.43017578125,6840.60986328125,4508050000,16.93000030517578
|
| 13 |
+
2025-12-10,6886.68017578125,6900.669921875,6824.68994140625,6833.490234375,5526570000,15.770000457763672
|
| 14 |
+
2025-12-11,6901.0,6903.4599609375,6833.4501953125,6861.2998046875,5021060000,14.850000381469727
|
| 15 |
+
2025-12-12,6827.41015625,6899.85009765625,6801.7900390625,6886.85009765625,4910160000,15.739999771118164
|
| 16 |
+
2025-12-15,6816.509765625,6861.58984375,6801.490234375,6860.18994140625,4975600000,16.5
|
| 17 |
+
2025-12-16,6800.259765625,6819.27001953125,6759.740234375,6800.1201171875,4983180000,16.479999542236328
|
| 18 |
+
2025-12-17,6721.43017578125,6812.259765625,6720.43017578125,6802.8798828125,5122120000,17.6200008392334
|
| 19 |
+
2025-12-18,6774.759765625,6816.1298828125,6758.5,6778.06005859375,5101190000,16.8700008392334
|
| 20 |
+
2025-12-19,6834.5,6840.02001953125,6792.6201171875,6792.6201171875,8554470000,14.90999984741211
|
| 21 |
+
2025-12-22,6878.490234375,6882.02978515625,6855.740234375,6865.2099609375,4465030000,14.079999923706055
|
| 22 |
+
2025-12-23,6909.7900390625,6910.8798828125,6868.81005859375,6872.41015625,3820560000,14.0
|
| 23 |
+
2025-12-24,6932.0498046875,6937.31982421875,6904.91015625,6904.91015625,1798270000,13.470000267028809
|
| 24 |
+
2025-12-26,6929.93994140625,6945.77001953125,6921.60009765625,6936.02001953125,2586550000,13.600000381469727
|
| 25 |
+
2025-12-29,6905.740234375,6920.2099609375,6888.759765625,6903.60009765625,3541750000,14.199999809265137
|
| 26 |
+
2025-12-30,6896.240234375,6913.25,6893.47021484375,6900.43994140625,3309930000,14.329999923706055
|
| 27 |
+
2025-12-31,6845.5,6901.419921875,6844.5498046875,6898.81982421875,3261830000,14.949999809265137
|
| 28 |
+
2026-01-02,6858.47021484375,6894.8701171875,6824.31005859375,6878.10986328125,4184120000,14.510000228881836
|
| 29 |
+
2026-01-05,6902.0498046875,6920.3798828125,6891.56005859375,6892.18994140625,5771930000,14.899999618530273
|
| 30 |
+
2026-01-06,6944.81982421875,6948.68994140625,6904.02001953125,6908.02978515625,5509680000,14.75
|
| 31 |
+
2026-01-07,6920.93017578125,6965.68994140625,6919.18994140625,6945.06982421875,5214480000,15.380000114440918
|
| 32 |
+
2026-01-08,6921.4599609375,6931.27978515625,6899.330078125,6914.10986328125,5333200000,15.449999809265137
|
| 33 |
+
2026-01-09,6966.27978515625,6978.35986328125,6917.64013671875,6927.830078125,5163900000,14.489999771118164
|
| 34 |
+
2026-01-12,6977.27001953125,6986.330078125,6934.06982421875,6944.1201171875,5019040000,15.119999885559082
|
| 35 |
+
2026-01-13,6963.740234375,6985.830078125,6938.77001953125,6977.41015625,5091730000,15.979999542236328
|
| 36 |
+
2026-01-14,6926.60009765625,6941.2998046875,6885.740234375,6937.41015625,5530830000,16.75
|
| 37 |
+
2026-01-15,6944.47021484375,6979.33984375,6937.93017578125,6969.4599609375,5114050000,15.84000015258789
|
| 38 |
+
2026-01-16,6940.009765625,6967.2998046875,6925.08984375,6960.5400390625,5356550000,15.859999656677246
|
| 39 |
+
2026-01-20,6796.85986328125,6871.169921875,6789.0498046875,6865.240234375,5769500000,20.09000015258789
|
| 40 |
+
2026-01-21,6875.6201171875,6910.39013671875,6804.9599609375,6810.7099609375,5835520000,16.899999618530273
|
| 41 |
+
2026-01-22,6913.35009765625,6934.75,6893.6201171875,6914.43994140625,5307580000,15.640000343322754
|
| 42 |
+
2026-01-23,6915.60986328125,6932.9599609375,6895.5,6907.85009765625,4871930000,16.09000015258789
|
| 43 |
+
2026-01-26,6950.22998046875,6964.66015625,6921.60009765625,6923.22998046875,4968050000,16.149999618530273
|
| 44 |
+
2026-01-27,6978.60009765625,6988.81982421875,6958.830078125,6965.9599609375,5331720000,16.350000381469727
|
| 45 |
+
2026-01-28,6978.02978515625,7002.27978515625,6963.4599609375,7002.0,5507670000,16.350000381469727
|
| 46 |
+
2026-01-29,6969.009765625,6992.83984375,6870.7998046875,6977.740234375,6877780000,16.8799991607666
|
| 47 |
+
2026-01-30,6939.02978515625,6964.08984375,6893.47998046875,6947.27001953125,6697340000,17.440000534057617
|
| 48 |
+
2026-02-02,6976.43994140625,6991.919921875,6914.33984375,6916.64013671875,5772050000,16.34000015258789
|
| 49 |
+
2026-02-03,6917.81005859375,6993.080078125,6862.0498046875,6985.4501953125,7017660000,18.0
|
| 50 |
+
2026-02-04,6882.72021484375,6936.08984375,6838.7998046875,6924.5,7475750000,18.639999389648438
|
| 51 |
+
2026-02-05,6798.39990234375,6857.85009765625,6780.1298828125,6837.39013671875,6989120000,21.770000457763672
|
| 52 |
+
2026-02-06,6932.2998046875,6944.89013671875,6816.740234375,6816.740234375,6283680000,20.3700008392334
|
| 53 |
+
2026-02-09,6964.81982421875,6980.10009765625,6905.8701171875,6917.259765625,5650260000,17.360000610351562
|
| 54 |
+
2026-02-10,6941.81005859375,6986.830078125,6937.52978515625,6974.490234375,5595950000,17.790000915527344
|
| 55 |
+
2026-02-11,6941.47021484375,6993.47998046875,6911.97021484375,6976.47998046875,6175510000,17.649999618530273
|
| 56 |
+
2026-02-12,6832.759765625,6973.22021484375,6824.0400390625,6957.5400390625,7118700000,20.81999969482422
|
| 57 |
+
2026-02-13,6836.169921875,6881.9599609375,6794.5498046875,6834.27001953125,5718360000,20.600000381469727
|
| 58 |
+
2026-02-17,6843.22021484375,6866.990234375,6775.5,6819.85986328125,5418480000,20.290000915527344
|
| 59 |
+
2026-02-18,6881.31005859375,6909.1201171875,6849.66015625,6855.47998046875,5098160000,19.6200008392334
|
| 60 |
+
2026-02-19,6861.89013671875,6879.1201171875,6833.06005859375,6861.33984375,5151690000,20.229999542236328
|
| 61 |
+
2026-02-20,6909.509765625,6915.85986328125,6836.330078125,6843.259765625,3336389000,19.190000534057617
|
data_fetcher.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import yfinance as yf
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import finnhub
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from datetime import datetime, timedelta
|
| 9 |
+
|
| 10 |
+
# Load environment variables
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
class DataFetcher:
|
| 14 |
+
def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
|
| 15 |
+
self.ticker = ticker
|
| 16 |
+
self.vix_ticker = vix_ticker
|
| 17 |
+
|
| 18 |
+
# Initialize Finnhub Client
|
| 19 |
+
api_key = os.getenv("FINNHUB_API_KEY")
|
| 20 |
+
if not api_key:
|
| 21 |
+
raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
|
| 22 |
+
|
| 23 |
+
self.finnhub_client = finnhub.Client(api_key=api_key)
|
| 24 |
+
|
| 25 |
+
def fetch_market_data(self, days=50):
|
| 26 |
+
"""
|
| 27 |
+
Fetches raw OHLCV and VIX data from Yahoo Finance.
|
| 28 |
+
Falls back to local CSV in the data/ folder if Yahoo blocks the server IP.
|
| 29 |
+
"""
|
| 30 |
+
print(f"📡 Attempting to fetch last {days} days of {self.ticker} and {self.vix_ticker}...")
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# 1. TRY TO FETCH LIVE DATA
|
| 34 |
+
#df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
|
| 35 |
+
#df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
|
| 36 |
+
|
| 37 |
+
# Handle yfinance MultiIndex columns if they exist
|
| 38 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 39 |
+
df.columns = df.columns.get_level_values(0)
|
| 40 |
+
if isinstance(df_vix.columns, pd.MultiIndex):
|
| 41 |
+
df_vix.columns = df_vix.columns.get_level_values(0)
|
| 42 |
+
|
| 43 |
+
df['VIX'] = df_vix['Close']
|
| 44 |
+
df = df.ffill()
|
| 45 |
+
|
| 46 |
+
# If the dataframe is empty (Yahoo stealth-blocked us), force an error
|
| 47 |
+
if df.empty:
|
| 48 |
+
raise ValueError("Yahoo Finance returned empty data.")
|
| 49 |
+
|
| 50 |
+
return df
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
# 2. FALLBACK TO LOCAL CSV IF BLOCKED
|
| 54 |
+
print(f"⚠️ Live fetch failed ({e}). Loading backup data from data/ folder...")
|
| 55 |
+
|
| 56 |
+
# Load the CSV from your new data folder
|
| 57 |
+
backup_path = "data/market_data_backup.csv"
|
| 58 |
+
df_backup = pd.read_csv(backup_path, index_col=0, parse_dates=True)
|
| 59 |
+
|
| 60 |
+
# Return only the requested number of days
|
| 61 |
+
return df_backup.tail(days)
|
| 62 |
+
|
| 63 |
+
# 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
|
| 64 |
+
# ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
|
| 65 |
+
@st.cache_data(ttl=3600, show_spinner=False)
|
| 66 |
+
def fetch_market_news(_self, days=45):
|
| 67 |
+
"""
|
| 68 |
+
Fetches historical market news by looping through days.
|
| 69 |
+
Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
|
| 70 |
+
"""
|
| 71 |
+
print(f"📰 Fetching last {days} days of market headlines...")
|
| 72 |
+
|
| 73 |
+
all_news = []
|
| 74 |
+
end_date = datetime.now()
|
| 75 |
+
|
| 76 |
+
# Try to render a Streamlit progress bar if running inside app.py
|
| 77 |
+
try:
|
| 78 |
+
progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
|
| 79 |
+
except:
|
| 80 |
+
progress_bar = None
|
| 81 |
+
|
| 82 |
+
# Loop backwards through time, day by day
|
| 83 |
+
for i in range(days):
|
| 84 |
+
target_date = end_date - timedelta(days=i)
|
| 85 |
+
date_str = target_date.strftime('%Y-%m-%d')
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
# FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
|
| 89 |
+
daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
|
| 90 |
+
|
| 91 |
+
if daily_news:
|
| 92 |
+
all_news.extend(daily_news)
|
| 93 |
+
|
| 94 |
+
# 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
|
| 95 |
+
# Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
|
| 96 |
+
time.sleep(1.1)
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"⚠️ API Error on {date_str}: {e}")
|
| 100 |
+
time.sleep(5) # Take a longer pause if the API gets angry
|
| 101 |
+
|
| 102 |
+
# Update UI progress
|
| 103 |
+
if progress_bar:
|
| 104 |
+
progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
|
| 105 |
+
|
| 106 |
+
# Clear the progress bar when finished
|
| 107 |
+
if progress_bar:
|
| 108 |
+
progress_bar.empty()
|
| 109 |
+
|
| 110 |
+
# Convert the master list into a DataFrame
|
| 111 |
+
df_news = pd.DataFrame(all_news)
|
| 112 |
+
|
| 113 |
+
if df_news.empty:
|
| 114 |
+
print("⚠️ No news found in the specified window.")
|
| 115 |
+
return pd.DataFrame(columns=['Title', 'Date'])
|
| 116 |
+
|
| 117 |
+
# Convert Unix timestamp to YYYY-MM-DD Date object
|
| 118 |
+
df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
|
| 119 |
+
|
| 120 |
+
# Rename columns to match what Processor expects
|
| 121 |
+
df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
|
| 122 |
+
|
| 123 |
+
# Drop duplicates in case of overlapping API returns
|
| 124 |
+
df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
|
| 125 |
+
|
| 126 |
+
print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
|
| 127 |
+
return df_news
|
| 128 |
+
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
fetcher = DataFetcher()
|
| 131 |
+
|
| 132 |
+
# Test Market Fetch
|
| 133 |
+
market_df = fetcher.fetch_market_data(days=50)
|
| 134 |
+
print("\n--- Market Data Sample ---")
|
| 135 |
+
print(market_df.tail())
|
| 136 |
+
|
| 137 |
+
# Test News Fetch
|
| 138 |
+
news_df = fetcher.fetch_market_news(days=45)
|
| 139 |
+
print("\n--- Market News Sample ---")
|
| 140 |
+
print(news_df.head())
|
| 141 |
+
print(news_df.tail())
|
| 142 |
+
print(f"\nTotal Headlines Fetched: {len(news_df)}")
|