DevKX commited on
Commit
438a831
·
verified ·
1 Parent(s): d16e5eb

Upload 2 files

Browse files
Files changed (2) hide show
  1. data/market_data_backup.csv +61 -0
  2. data_fetcher.py +142 -0
data/market_data_backup.csv ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Date,Close,High,Low,Open,Volume,VIX
2
+ 2025-11-24,6705.1201171875,6715.75,6630.7001953125,6636.5400390625,6039740000,
3
+ 2025-11-25,6765.8798828125,6776.39990234375,6659.97998046875,6697.02978515625,5003330000,
4
+ 2025-11-26,6812.60986328125,6831.43994140625,6783.8701171875,6793.5498046875,4485000000,
5
+ 2025-11-28,6849.08984375,6850.85986328125,6819.75,6822.52001953125,2558540000,
6
+ 2025-12-01,6812.6298828125,6843.64990234375,6799.93994140625,6812.2998046875,4549370000,17.239999771118164
7
+ 2025-12-02,6829.3701171875,6851.5498046875,6806.7099609375,6830.9599609375,4582290000,16.59000015258789
8
+ 2025-12-03,6849.72021484375,6862.419921875,6810.43017578125,6815.2900390625,4736780000,16.079999923706055
9
+ 2025-12-04,6857.1201171875,6866.47021484375,6827.1201171875,6866.47021484375,4872440000,15.779999732971191
10
+ 2025-12-05,6870.39990234375,6895.77978515625,6858.2900390625,6866.31982421875,4944560000,15.40999984741211
11
+ 2025-12-08,6846.509765625,6878.27001953125,6827.18994140625,6875.2001953125,4757130000,16.65999984741211
12
+ 2025-12-09,6840.509765625,6864.919921875,6837.43017578125,6840.60986328125,4508050000,16.93000030517578
13
+ 2025-12-10,6886.68017578125,6900.669921875,6824.68994140625,6833.490234375,5526570000,15.770000457763672
14
+ 2025-12-11,6901.0,6903.4599609375,6833.4501953125,6861.2998046875,5021060000,14.850000381469727
15
+ 2025-12-12,6827.41015625,6899.85009765625,6801.7900390625,6886.85009765625,4910160000,15.739999771118164
16
+ 2025-12-15,6816.509765625,6861.58984375,6801.490234375,6860.18994140625,4975600000,16.5
17
+ 2025-12-16,6800.259765625,6819.27001953125,6759.740234375,6800.1201171875,4983180000,16.479999542236328
18
+ 2025-12-17,6721.43017578125,6812.259765625,6720.43017578125,6802.8798828125,5122120000,17.6200008392334
19
+ 2025-12-18,6774.759765625,6816.1298828125,6758.5,6778.06005859375,5101190000,16.8700008392334
20
+ 2025-12-19,6834.5,6840.02001953125,6792.6201171875,6792.6201171875,8554470000,14.90999984741211
21
+ 2025-12-22,6878.490234375,6882.02978515625,6855.740234375,6865.2099609375,4465030000,14.079999923706055
22
+ 2025-12-23,6909.7900390625,6910.8798828125,6868.81005859375,6872.41015625,3820560000,14.0
23
+ 2025-12-24,6932.0498046875,6937.31982421875,6904.91015625,6904.91015625,1798270000,13.470000267028809
24
+ 2025-12-26,6929.93994140625,6945.77001953125,6921.60009765625,6936.02001953125,2586550000,13.600000381469727
25
+ 2025-12-29,6905.740234375,6920.2099609375,6888.759765625,6903.60009765625,3541750000,14.199999809265137
26
+ 2025-12-30,6896.240234375,6913.25,6893.47021484375,6900.43994140625,3309930000,14.329999923706055
27
+ 2025-12-31,6845.5,6901.419921875,6844.5498046875,6898.81982421875,3261830000,14.949999809265137
28
+ 2026-01-02,6858.47021484375,6894.8701171875,6824.31005859375,6878.10986328125,4184120000,14.510000228881836
29
+ 2026-01-05,6902.0498046875,6920.3798828125,6891.56005859375,6892.18994140625,5771930000,14.899999618530273
30
+ 2026-01-06,6944.81982421875,6948.68994140625,6904.02001953125,6908.02978515625,5509680000,14.75
31
+ 2026-01-07,6920.93017578125,6965.68994140625,6919.18994140625,6945.06982421875,5214480000,15.380000114440918
32
+ 2026-01-08,6921.4599609375,6931.27978515625,6899.330078125,6914.10986328125,5333200000,15.449999809265137
33
+ 2026-01-09,6966.27978515625,6978.35986328125,6917.64013671875,6927.830078125,5163900000,14.489999771118164
34
+ 2026-01-12,6977.27001953125,6986.330078125,6934.06982421875,6944.1201171875,5019040000,15.119999885559082
35
+ 2026-01-13,6963.740234375,6985.830078125,6938.77001953125,6977.41015625,5091730000,15.979999542236328
36
+ 2026-01-14,6926.60009765625,6941.2998046875,6885.740234375,6937.41015625,5530830000,16.75
37
+ 2026-01-15,6944.47021484375,6979.33984375,6937.93017578125,6969.4599609375,5114050000,15.84000015258789
38
+ 2026-01-16,6940.009765625,6967.2998046875,6925.08984375,6960.5400390625,5356550000,15.859999656677246
39
+ 2026-01-20,6796.85986328125,6871.169921875,6789.0498046875,6865.240234375,5769500000,20.09000015258789
40
+ 2026-01-21,6875.6201171875,6910.39013671875,6804.9599609375,6810.7099609375,5835520000,16.899999618530273
41
+ 2026-01-22,6913.35009765625,6934.75,6893.6201171875,6914.43994140625,5307580000,15.640000343322754
42
+ 2026-01-23,6915.60986328125,6932.9599609375,6895.5,6907.85009765625,4871930000,16.09000015258789
43
+ 2026-01-26,6950.22998046875,6964.66015625,6921.60009765625,6923.22998046875,4968050000,16.149999618530273
44
+ 2026-01-27,6978.60009765625,6988.81982421875,6958.830078125,6965.9599609375,5331720000,16.350000381469727
45
+ 2026-01-28,6978.02978515625,7002.27978515625,6963.4599609375,7002.0,5507670000,16.350000381469727
46
+ 2026-01-29,6969.009765625,6992.83984375,6870.7998046875,6977.740234375,6877780000,16.8799991607666
47
+ 2026-01-30,6939.02978515625,6964.08984375,6893.47998046875,6947.27001953125,6697340000,17.440000534057617
48
+ 2026-02-02,6976.43994140625,6991.919921875,6914.33984375,6916.64013671875,5772050000,16.34000015258789
49
+ 2026-02-03,6917.81005859375,6993.080078125,6862.0498046875,6985.4501953125,7017660000,18.0
50
+ 2026-02-04,6882.72021484375,6936.08984375,6838.7998046875,6924.5,7475750000,18.639999389648438
51
+ 2026-02-05,6798.39990234375,6857.85009765625,6780.1298828125,6837.39013671875,6989120000,21.770000457763672
52
+ 2026-02-06,6932.2998046875,6944.89013671875,6816.740234375,6816.740234375,6283680000,20.3700008392334
53
+ 2026-02-09,6964.81982421875,6980.10009765625,6905.8701171875,6917.259765625,5650260000,17.360000610351562
54
+ 2026-02-10,6941.81005859375,6986.830078125,6937.52978515625,6974.490234375,5595950000,17.790000915527344
55
+ 2026-02-11,6941.47021484375,6993.47998046875,6911.97021484375,6976.47998046875,6175510000,17.649999618530273
56
+ 2026-02-12,6832.759765625,6973.22021484375,6824.0400390625,6957.5400390625,7118700000,20.81999969482422
57
+ 2026-02-13,6836.169921875,6881.9599609375,6794.5498046875,6834.27001953125,5718360000,20.600000381469727
58
+ 2026-02-17,6843.22021484375,6866.990234375,6775.5,6819.85986328125,5418480000,20.290000915527344
59
+ 2026-02-18,6881.31005859375,6909.1201171875,6849.66015625,6855.47998046875,5098160000,19.6200008392334
60
+ 2026-02-19,6861.89013671875,6879.1201171875,6833.06005859375,6861.33984375,5151690000,20.229999542236328
61
+ 2026-02-20,6909.509765625,6915.85986328125,6836.330078125,6843.259765625,3336389000,19.190000534057617
data_fetcher.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import yfinance as yf
4
+ import pandas as pd
5
+ import finnhub
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+ from datetime import datetime, timedelta
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ class DataFetcher:
14
+ def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
15
+ self.ticker = ticker
16
+ self.vix_ticker = vix_ticker
17
+
18
+ # Initialize Finnhub Client
19
+ api_key = os.getenv("FINNHUB_API_KEY")
20
+ if not api_key:
21
+ raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
22
+
23
+ self.finnhub_client = finnhub.Client(api_key=api_key)
24
+
25
+ def fetch_market_data(self, days=50):
26
+ """
27
+ Fetches raw OHLCV and VIX data from Yahoo Finance.
28
+ Falls back to local CSV in the data/ folder if Yahoo blocks the server IP.
29
+ """
30
+ print(f"📡 Attempting to fetch last {days} days of {self.ticker} and {self.vix_ticker}...")
31
+
32
+ try:
33
+ # 1. TRY TO FETCH LIVE DATA
34
+ #df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
35
+ #df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
36
+
37
+ # Handle yfinance MultiIndex columns if they exist
38
+ if isinstance(df.columns, pd.MultiIndex):
39
+ df.columns = df.columns.get_level_values(0)
40
+ if isinstance(df_vix.columns, pd.MultiIndex):
41
+ df_vix.columns = df_vix.columns.get_level_values(0)
42
+
43
+ df['VIX'] = df_vix['Close']
44
+ df = df.ffill()
45
+
46
+ # If the dataframe is empty (Yahoo stealth-blocked us), force an error
47
+ if df.empty:
48
+ raise ValueError("Yahoo Finance returned empty data.")
49
+
50
+ return df
51
+
52
+ except Exception as e:
53
+ # 2. FALLBACK TO LOCAL CSV IF BLOCKED
54
+ print(f"⚠️ Live fetch failed ({e}). Loading backup data from data/ folder...")
55
+
56
+ # Load the CSV from your new data folder
57
+ backup_path = "data/market_data_backup.csv"
58
+ df_backup = pd.read_csv(backup_path, index_col=0, parse_dates=True)
59
+
60
+ # Return only the requested number of days
61
+ return df_backup.tail(days)
62
+
63
+ # 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
64
+ # ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
65
+ @st.cache_data(ttl=3600, show_spinner=False)
66
+ def fetch_market_news(_self, days=45):
67
+ """
68
+ Fetches historical market news by looping through days.
69
+ Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
70
+ """
71
+ print(f"📰 Fetching last {days} days of market headlines...")
72
+
73
+ all_news = []
74
+ end_date = datetime.now()
75
+
76
+ # Try to render a Streamlit progress bar if running inside app.py
77
+ try:
78
+ progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
79
+ except:
80
+ progress_bar = None
81
+
82
+ # Loop backwards through time, day by day
83
+ for i in range(days):
84
+ target_date = end_date - timedelta(days=i)
85
+ date_str = target_date.strftime('%Y-%m-%d')
86
+
87
+ try:
88
+ # FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
89
+ daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
90
+
91
+ if daily_news:
92
+ all_news.extend(daily_news)
93
+
94
+ # 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
95
+ # Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
96
+ time.sleep(1.1)
97
+
98
+ except Exception as e:
99
+ print(f"⚠️ API Error on {date_str}: {e}")
100
+ time.sleep(5) # Take a longer pause if the API gets angry
101
+
102
+ # Update UI progress
103
+ if progress_bar:
104
+ progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
105
+
106
+ # Clear the progress bar when finished
107
+ if progress_bar:
108
+ progress_bar.empty()
109
+
110
+ # Convert the master list into a DataFrame
111
+ df_news = pd.DataFrame(all_news)
112
+
113
+ if df_news.empty:
114
+ print("⚠️ No news found in the specified window.")
115
+ return pd.DataFrame(columns=['Title', 'Date'])
116
+
117
+ # Convert Unix timestamp to YYYY-MM-DD Date object
118
+ df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
119
+
120
+ # Rename columns to match what Processor expects
121
+ df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
122
+
123
+ # Drop duplicates in case of overlapping API returns
124
+ df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
125
+
126
+ print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
127
+ return df_news
128
+
129
+ if __name__ == "__main__":
130
+ fetcher = DataFetcher()
131
+
132
+ # Test Market Fetch
133
+ market_df = fetcher.fetch_market_data(days=50)
134
+ print("\n--- Market Data Sample ---")
135
+ print(market_df.tail())
136
+
137
+ # Test News Fetch
138
+ news_df = fetcher.fetch_market_news(days=45)
139
+ print("\n--- Market News Sample ---")
140
+ print(news_df.head())
141
+ print(news_df.tail())
142
+ print(f"\nTotal Headlines Fetched: {len(news_df)}")