Khiry McCurn commited on
Commit
8b2da55
·
unverified ·
1 Parent(s): 0bbf00f

Update update_data.py

Browse files
Files changed (1) hide show
  1. update_data.py +52 -59
update_data.py CHANGED
@@ -5,6 +5,7 @@ This script is designed to run as a scheduled job at end of trading day
5
 
6
  Features:
7
  - Batched downloads to avoid rate limiting
 
8
  - Delays between batches
9
  - Retry logic for failed downloads
10
  - Supports ~600 tickers (S&P 500 + Nasdaq 100 + recent IPOs + ETFs)
@@ -27,29 +28,29 @@ TICKERS = [
27
  # S&P 500 Components
28
  'A', 'AAL', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP',
29
  'AES', 'AFL', 'AIG', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALL', 'ALLE', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN',
30
- 'AMP', 'AMT', 'AMZN', 'ANET', 'ANSS', 'AON', 'AOS', 'APA', 'APD', 'APH', 'APTV', 'ARE', 'ATO', 'AVB', 'AVGO',
31
  'AVY', 'AWK', 'AXON', 'AXP', 'AZO', 'BA', 'BAC', 'BALL', 'BAX', 'BBWI', 'BBY', 'BDX', 'BEN', 'BF-B', 'BG',
32
  'BIIB', 'BIO', 'BK', 'BKNG', 'BKR', 'BLDR', 'BLK', 'BMY', 'BR', 'BRK-B', 'BRO', 'BSX', 'BWA', 'BX', 'BXP',
33
  'C', 'CAG', 'CAH', 'CARR', 'CAT', 'CB', 'CBOE', 'CBRE', 'CCI', 'CCL', 'CDNS', 'CDW', 'CE', 'CEG', 'CF',
34
  'CFG', 'CHD', 'CHRW', 'CHTR', 'CI', 'CINF', 'CL', 'CLX', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNC', 'CNP',
35
  'COF', 'COO', 'COP', 'COR', 'COST', 'CPAY', 'CPB', 'CPRT', 'CPT', 'CRL', 'CRM', 'CRWD', 'CSCO', 'CSGP', 'CSX',
36
- 'CTAS', 'CTLT', 'CTRA', 'CTSH', 'CTVA', 'CVS', 'CVX', 'CZR', 'D', 'DAL', 'DAY', 'DD', 'DE', 'DECK', 'DFS',
37
  'DG', 'DGX', 'DHI', 'DHR', 'DIS', 'DLR', 'DLTR', 'DOC', 'DOV', 'DOW', 'DPZ', 'DRI', 'DTE', 'DUK', 'DVA',
38
  'DVN', 'DXCM', 'EA', 'EBAY', 'ECL', 'ED', 'EFX', 'EG', 'EIX', 'EL', 'ELV', 'EMN', 'EMR', 'ENPH', 'EOG',
39
  'EPAM', 'EQIX', 'EQR', 'EQT', 'ERIE', 'ES', 'ESS', 'ETN', 'ETR', 'EVRG', 'EW', 'EXC', 'EXPD', 'EXPE', 'EXR',
40
  'F', 'FANG', 'FAST', 'FCX', 'FDS', 'FDX', 'FE', 'FFIV', 'FI', 'FICO', 'FIS', 'FITB', 'FMC', 'FOX', 'FOXA',
41
  'FRT', 'FSLR', 'FTNT', 'FTV', 'GD', 'GDDY', 'GE', 'GEHC', 'GEN', 'GEV', 'GILD', 'GIS', 'GL', 'GLW', 'GM',
42
- 'GNRC', 'GOOG', 'GOOGL', 'GPC', 'GPN', 'GRMN', 'GS', 'GWW', 'HAL', 'HAS', 'HBAN', 'HCA', 'HD', 'HES', 'HIG',
43
  'HII', 'HLT', 'HOLX', 'HON', 'HPE', 'HPQ', 'HRL', 'HSIC', 'HST', 'HSY', 'HUBB', 'HUM', 'HWM', 'IBM', 'ICE',
44
  'IDXX', 'IEX', 'IFF', 'INCY', 'INTC', 'INTU', 'INVH', 'IP', 'IPG', 'IQV', 'IR', 'IRM', 'ISRG', 'IT', 'ITW',
45
- 'IVZ', 'J', 'JBHT', 'JBL', 'JCI', 'JKHY', 'JNJ', 'JNPR', 'JPM', 'K', 'KDP', 'KEY', 'KEYS', 'KHC', 'KIM',
46
  'KKR', 'KLAC', 'KMB', 'KMI', 'KMX', 'KO', 'KR', 'KVUE', 'L', 'LDOS', 'LEN', 'LH', 'LHX', 'LIN', 'LKQ',
47
  'LLY', 'LMT', 'LNT', 'LOW', 'LRCX', 'LULU', 'LUV', 'LVS', 'LW', 'LYB', 'LYV', 'MA', 'MAA', 'MAR', 'MAS',
48
  'MCD', 'MCHP', 'MCK', 'MCO', 'MDLZ', 'MDT', 'MET', 'META', 'MGM', 'MHK', 'MKC', 'MKTX', 'MLM', 'MMC', 'MMM',
49
- 'MNST', 'MO', 'MOH', 'MOS', 'MPC', 'MPWR', 'MRK', 'MRNA', 'MRO', 'MS', 'MSCI', 'MSFT', 'MSI', 'MTB', 'MTCH',
50
  'MTD', 'MU', 'NCLH', 'NDAQ', 'NDSN', 'NEE', 'NEM', 'NFLX', 'NI', 'NKE', 'NOC', 'NOW', 'NRG', 'NSC', 'NTAP',
51
  'NTRS', 'NUE', 'NVDA', 'NVR', 'NWS', 'NWSA', 'NXPI', 'O', 'ODFL', 'OKE', 'OMC', 'ON', 'ORCL', 'ORLY', 'OTIS',
52
- 'OXY', 'PANW', 'PARA', 'PAYC', 'PAYX', 'PCAR', 'PCG', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGR', 'PH', 'PHM',
53
  'PKG', 'PLD', 'PLTR', 'PM', 'PNC', 'PNR', 'PNW', 'PODD', 'POOL', 'PPG', 'PPL', 'PRU', 'PSA', 'PSX', 'PTC',
54
  'PWR', 'PYPL', 'QCOM', 'QRVO', 'RCL', 'REG', 'REGN', 'RF', 'RJF', 'RL', 'RMD', 'ROK', 'ROL', 'ROP', 'ROST',
55
  'RSG', 'RTX', 'RVTY', 'SBAC', 'SBUX', 'SCHW', 'SHW', 'SJM', 'SLB', 'SMCI', 'SNA', 'SNPS', 'SO', 'SOLV', 'SPG',
@@ -57,11 +58,11 @@ TICKERS = [
57
  'TDY', 'TECH', 'TEL', 'TER', 'TFC', 'TFX', 'TGT', 'TJX', 'TMO', 'TMUS', 'TPR', 'TRGP', 'TRMB', 'TROW', 'TRV',
58
  'TSCO', 'TSLA', 'TSN', 'TT', 'TTWO', 'TXN', 'TXT', 'TYL', 'UAL', 'UBER', 'UDR', 'UHS', 'ULTA', 'UNH', 'UNP',
59
  'UPS', 'URI', 'USB', 'V', 'VICI', 'VLO', 'VLTO', 'VMC', 'VRSK', 'VRSN', 'VRTX', 'VST', 'VTR', 'VTRS', 'VZ',
60
- 'WAB', 'WAT', 'WBA', 'WBD', 'WDC', 'WEC', 'WELL', 'WFC', 'WM', 'WMB', 'WMT', 'WRB', 'WST', 'WTW', 'WY',
61
  'WYNN', 'XEL', 'XOM', 'XYL', 'YUM', 'ZBH', 'ZBRA', 'ZTS',
62
 
63
  # Nasdaq 100 additions (not in S&P 500)
64
- 'AZN', 'APP', 'ARM', 'CCEP', 'DASH', 'DDOG', 'GFS', 'MELI', 'MRVL', 'PDD', 'TEAM', 'WDAY',
65
 
66
  # ETFs and Benchmarks
67
  'SPY', 'QQQ', 'VTI', 'VOO', 'VXUS', 'DIA', 'IWM', 'TQQQ', 'SQQQ', 'BND', 'TLT', 'IEF', 'GLD', 'DBC', 'VNQ',
@@ -70,7 +71,7 @@ TICKERS = [
70
 
71
  # Notable 2020-2021 IPOs
72
  'RIVN', 'LCID', 'RBLX', 'COIN', 'HOOD', 'SNOW', 'U', 'CPNG', 'COUR', 'OSCR', 'SOFI', 'UPST', 'AFRM', 'PATH',
73
- 'ZI', 'BROS', 'DUOL', 'ASAN', 'FVRR', 'DOCS', 'DNUT', 'YOU', 'AI', 'DLO', 'JAMF', 'NCNO', 'BIGC', 'JMIA',
74
  'DKNG', 'NKLA', 'BLNK', 'QS', 'GOEV', 'LAZR', 'LMND', 'OPEN',
75
 
76
  # 2022 IPOs (notable)
@@ -98,20 +99,12 @@ DATA_DIR = "data"
98
  BATCH_SIZE = 50 # Download 50 tickers at a time
99
  BATCH_DELAY = 5 # Wait 5 seconds between batches
100
  MAX_RETRIES = 2 # Retry failed downloads up to 2 times
 
101
 
102
 
103
  def download_ticker_data(ticker, start_date, end_date, retry_count=0):
104
  """
105
  Download historical data for a single ticker from Yahoo Finance
106
-
107
- Args:
108
- ticker: Stock ticker symbol
109
- start_date: Start date for historical data
110
- end_date: End date for historical data
111
- retry_count: Current retry attempt number
112
-
113
- Returns:
114
- DataFrame with OHLCV data or None if download fails
115
  """
116
  try:
117
  data = yf.download(
@@ -130,7 +123,7 @@ def download_ticker_data(ticker, start_date, end_date, retry_count=0):
130
 
131
  except Exception as e:
132
  if retry_count < MAX_RETRIES:
133
- time.sleep(2) # Wait before retry
134
  return download_ticker_data(ticker, start_date, end_date, retry_count + 1)
135
  print(f"❌ Error downloading {ticker} after {MAX_RETRIES} retries: {e}")
136
  return None
@@ -139,16 +132,6 @@ def download_ticker_data(ticker, start_date, end_date, retry_count=0):
139
  def download_batch(tickers_batch, start_date, end_date, batch_num, total_batches):
140
  """
141
  Download a batch of tickers
142
-
143
- Args:
144
- tickers_batch: List of tickers to download
145
- start_date: Start date
146
- end_date: End date
147
- batch_num: Current batch number
148
- total_batches: Total number of batches
149
-
150
- Returns:
151
- Dict of {ticker: DataFrame} for successful downloads
152
  """
153
  print(f"\n📦 Batch {batch_num}/{total_batches} ({len(tickers_batch)} tickers)")
154
  print(f" Tickers: {', '.join(tickers_batch[:10])}{'...' if len(tickers_batch) > 10 else ''}")
@@ -169,11 +152,7 @@ def download_batch(tickers_batch, start_date, end_date, batch_num, total_batches
169
  def update_all_tickers():
170
  """
171
  Update CSV files for all tickers with batching and rate limiting
172
-
173
- Returns:
174
- Tuple of (success_count, error_count)
175
  """
176
- # Calculate date range (5 years of history)
177
  end_date = datetime.now()
178
  start_date = end_date - timedelta(days=5*365)
179
 
@@ -186,21 +165,17 @@ def update_all_tickers():
186
  print(f"⏱️ Delay between batches: {BATCH_DELAY}s")
187
  print(f"{'='*70}")
188
 
189
- # Create data directory
190
  os.makedirs(DATA_DIR, exist_ok=True)
191
 
192
- # Split tickers into batches
193
  batches = [TICKERS[i:i + BATCH_SIZE] for i in range(0, len(TICKERS), BATCH_SIZE)]
194
  total_batches = len(batches)
195
 
196
  success_count = 0
197
  error_count = 0
198
 
199
- # Process each batch
200
  for batch_num, batch in enumerate(batches, 1):
201
  results = download_batch(batch, start_date, end_date, batch_num, total_batches)
202
 
203
- # Save successful downloads
204
  for ticker, data in results.items():
205
  filepath = os.path.join(DATA_DIR, f"{ticker}.csv")
206
  data.to_csv(filepath)
@@ -208,7 +183,6 @@ def update_all_tickers():
208
 
209
  error_count += len(batch) - len(results)
210
 
211
- # Delay between batches (except for last batch)
212
  if batch_num < total_batches:
213
  print(f" ⏳ Waiting {BATCH_DELAY}s before next batch...")
214
  time.sleep(BATCH_DELAY)
@@ -234,31 +208,54 @@ def update_all_tickers():
234
 
235
  def push_to_huggingface(token):
236
  """
237
- Push updated CSV files to Hugging Face repository
238
-
239
- Args:
240
- token: Hugging Face API token with write access
241
  """
242
  try:
243
  print(f"\n{'='*70}")
244
- print("🚀 PUSHING TO HUGGING FACE")
245
  print(f"{'='*70}\n")
246
 
247
- # Login to Hugging Face
248
  login(token=token)
249
  api = HfApi()
250
 
251
- # Upload the entire data directory
252
- print(f"📤 Uploading data directory to {REPO_ID}...")
253
- api.upload_folder(
254
- folder_path=DATA_DIR,
255
- path_in_repo=DATA_DIR,
256
- repo_id=REPO_ID,
257
- repo_type=REPO_TYPE,
258
- commit_message=f"🤖 Bulk data update ({len(TICKERS)} tickers) - {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}"
259
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
- print(f"✅ Successfully pushed to Hugging Face!")
262
  print(f"{'='*70}\n")
263
  return True
264
 
@@ -269,22 +266,18 @@ def push_to_huggingface(token):
269
 
270
  def main():
271
  """Main execution function"""
272
- # Get HF token from environment variable
273
  hf_token = os.environ.get("HF_TOKEN")
274
 
275
  if not hf_token:
276
  print("❌ ERROR: HF_TOKEN environment variable not set!")
277
- print("Please set your Hugging Face write token as HF_TOKEN")
278
  sys.exit(1)
279
 
280
  print(f"\n🎯 Horizon Backtester - Bulk Data Update")
281
  print(f" Universe: {len(TICKERS)} tickers")
282
- print(f" Estimated time: ~{(len(TICKERS) // BATCH_SIZE) * BATCH_DELAY // 60 + 5} minutes\n")
283
 
284
- # Update all ticker data
285
  success_count, error_count = update_all_tickers()
286
 
287
- # Push to Hugging Face if we had any successful downloads
288
  if success_count > 0:
289
  push_success = push_to_huggingface(hf_token)
290
  if push_success:
 
5
 
6
  Features:
7
  - Batched downloads to avoid rate limiting
8
+ - Batched uploads to avoid timeout
9
  - Delays between batches
10
  - Retry logic for failed downloads
11
  - Supports ~600 tickers (S&P 500 + Nasdaq 100 + recent IPOs + ETFs)
 
28
  # S&P 500 Components
29
  'A', 'AAL', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP',
30
  'AES', 'AFL', 'AIG', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALL', 'ALLE', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN',
31
+ 'AMP', 'AMT', 'AMZN', 'ANET', 'AON', 'AOS', 'APA', 'APD', 'APH', 'APTV', 'ARE', 'ATO', 'AVB', 'AVGO',
32
  'AVY', 'AWK', 'AXON', 'AXP', 'AZO', 'BA', 'BAC', 'BALL', 'BAX', 'BBWI', 'BBY', 'BDX', 'BEN', 'BF-B', 'BG',
33
  'BIIB', 'BIO', 'BK', 'BKNG', 'BKR', 'BLDR', 'BLK', 'BMY', 'BR', 'BRK-B', 'BRO', 'BSX', 'BWA', 'BX', 'BXP',
34
  'C', 'CAG', 'CAH', 'CARR', 'CAT', 'CB', 'CBOE', 'CBRE', 'CCI', 'CCL', 'CDNS', 'CDW', 'CE', 'CEG', 'CF',
35
  'CFG', 'CHD', 'CHRW', 'CHTR', 'CI', 'CINF', 'CL', 'CLX', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNC', 'CNP',
36
  'COF', 'COO', 'COP', 'COR', 'COST', 'CPAY', 'CPB', 'CPRT', 'CPT', 'CRL', 'CRM', 'CRWD', 'CSCO', 'CSGP', 'CSX',
37
+ 'CTAS', 'CTRA', 'CTSH', 'CTVA', 'CVS', 'CVX', 'CZR', 'D', 'DAL', 'DAY', 'DD', 'DE', 'DECK',
38
  'DG', 'DGX', 'DHI', 'DHR', 'DIS', 'DLR', 'DLTR', 'DOC', 'DOV', 'DOW', 'DPZ', 'DRI', 'DTE', 'DUK', 'DVA',
39
  'DVN', 'DXCM', 'EA', 'EBAY', 'ECL', 'ED', 'EFX', 'EG', 'EIX', 'EL', 'ELV', 'EMN', 'EMR', 'ENPH', 'EOG',
40
  'EPAM', 'EQIX', 'EQR', 'EQT', 'ERIE', 'ES', 'ESS', 'ETN', 'ETR', 'EVRG', 'EW', 'EXC', 'EXPD', 'EXPE', 'EXR',
41
  'F', 'FANG', 'FAST', 'FCX', 'FDS', 'FDX', 'FE', 'FFIV', 'FI', 'FICO', 'FIS', 'FITB', 'FMC', 'FOX', 'FOXA',
42
  'FRT', 'FSLR', 'FTNT', 'FTV', 'GD', 'GDDY', 'GE', 'GEHC', 'GEN', 'GEV', 'GILD', 'GIS', 'GL', 'GLW', 'GM',
43
+ 'GNRC', 'GOOG', 'GOOGL', 'GPC', 'GPN', 'GRMN', 'GS', 'GWW', 'HAL', 'HAS', 'HBAN', 'HCA', 'HD', 'HIG',
44
  'HII', 'HLT', 'HOLX', 'HON', 'HPE', 'HPQ', 'HRL', 'HSIC', 'HST', 'HSY', 'HUBB', 'HUM', 'HWM', 'IBM', 'ICE',
45
  'IDXX', 'IEX', 'IFF', 'INCY', 'INTC', 'INTU', 'INVH', 'IP', 'IPG', 'IQV', 'IR', 'IRM', 'ISRG', 'IT', 'ITW',
46
+ 'IVZ', 'J', 'JBHT', 'JBL', 'JCI', 'JKHY', 'JNJ', 'JPM', 'K', 'KDP', 'KEY', 'KEYS', 'KHC', 'KIM',
47
  'KKR', 'KLAC', 'KMB', 'KMI', 'KMX', 'KO', 'KR', 'KVUE', 'L', 'LDOS', 'LEN', 'LH', 'LHX', 'LIN', 'LKQ',
48
  'LLY', 'LMT', 'LNT', 'LOW', 'LRCX', 'LULU', 'LUV', 'LVS', 'LW', 'LYB', 'LYV', 'MA', 'MAA', 'MAR', 'MAS',
49
  'MCD', 'MCHP', 'MCK', 'MCO', 'MDLZ', 'MDT', 'MET', 'META', 'MGM', 'MHK', 'MKC', 'MKTX', 'MLM', 'MMC', 'MMM',
50
+ 'MNST', 'MO', 'MOH', 'MOS', 'MPC', 'MPWR', 'MRK', 'MRNA', 'MRVL', 'MS', 'MSCI', 'MSFT', 'MSI', 'MTB', 'MTCH',
51
  'MTD', 'MU', 'NCLH', 'NDAQ', 'NDSN', 'NEE', 'NEM', 'NFLX', 'NI', 'NKE', 'NOC', 'NOW', 'NRG', 'NSC', 'NTAP',
52
  'NTRS', 'NUE', 'NVDA', 'NVR', 'NWS', 'NWSA', 'NXPI', 'O', 'ODFL', 'OKE', 'OMC', 'ON', 'ORCL', 'ORLY', 'OTIS',
53
+ 'OXY', 'PANW', 'PAYC', 'PAYX', 'PCAR', 'PCG', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGR', 'PH', 'PHM',
54
  'PKG', 'PLD', 'PLTR', 'PM', 'PNC', 'PNR', 'PNW', 'PODD', 'POOL', 'PPG', 'PPL', 'PRU', 'PSA', 'PSX', 'PTC',
55
  'PWR', 'PYPL', 'QCOM', 'QRVO', 'RCL', 'REG', 'REGN', 'RF', 'RJF', 'RL', 'RMD', 'ROK', 'ROL', 'ROP', 'ROST',
56
  'RSG', 'RTX', 'RVTY', 'SBAC', 'SBUX', 'SCHW', 'SHW', 'SJM', 'SLB', 'SMCI', 'SNA', 'SNPS', 'SO', 'SOLV', 'SPG',
 
58
  'TDY', 'TECH', 'TEL', 'TER', 'TFC', 'TFX', 'TGT', 'TJX', 'TMO', 'TMUS', 'TPR', 'TRGP', 'TRMB', 'TROW', 'TRV',
59
  'TSCO', 'TSLA', 'TSN', 'TT', 'TTWO', 'TXN', 'TXT', 'TYL', 'UAL', 'UBER', 'UDR', 'UHS', 'ULTA', 'UNH', 'UNP',
60
  'UPS', 'URI', 'USB', 'V', 'VICI', 'VLO', 'VLTO', 'VMC', 'VRSK', 'VRSN', 'VRTX', 'VST', 'VTR', 'VTRS', 'VZ',
61
+ 'WAB', 'WAT', 'WBD', 'WDC', 'WEC', 'WELL', 'WFC', 'WM', 'WMB', 'WMT', 'WRB', 'WST', 'WTW', 'WY',
62
  'WYNN', 'XEL', 'XOM', 'XYL', 'YUM', 'ZBH', 'ZBRA', 'ZTS',
63
 
64
  # Nasdaq 100 additions (not in S&P 500)
65
+ 'AZN', 'APP', 'ARM', 'CCEP', 'DASH', 'DDOG', 'GFS', 'MELI', 'PDD', 'TEAM', 'WDAY',
66
 
67
  # ETFs and Benchmarks
68
  'SPY', 'QQQ', 'VTI', 'VOO', 'VXUS', 'DIA', 'IWM', 'TQQQ', 'SQQQ', 'BND', 'TLT', 'IEF', 'GLD', 'DBC', 'VNQ',
 
71
 
72
  # Notable 2020-2021 IPOs
73
  'RIVN', 'LCID', 'RBLX', 'COIN', 'HOOD', 'SNOW', 'U', 'CPNG', 'COUR', 'OSCR', 'SOFI', 'UPST', 'AFRM', 'PATH',
74
+ 'BROS', 'DUOL', 'ASAN', 'FVRR', 'DOCS', 'DNUT', 'YOU', 'AI', 'DLO', 'JAMF', 'NCNO', 'JMIA',
75
  'DKNG', 'NKLA', 'BLNK', 'QS', 'GOEV', 'LAZR', 'LMND', 'OPEN',
76
 
77
  # 2022 IPOs (notable)
 
99
  BATCH_SIZE = 50 # Download 50 tickers at a time
100
  BATCH_DELAY = 5 # Wait 5 seconds between batches
101
  MAX_RETRIES = 2 # Retry failed downloads up to 2 times
102
+ UPLOAD_BATCH_SIZE = 100 # Upload 100 files at a time
103
 
104
 
105
  def download_ticker_data(ticker, start_date, end_date, retry_count=0):
106
  """
107
  Download historical data for a single ticker from Yahoo Finance
 
 
 
 
 
 
 
 
 
108
  """
109
  try:
110
  data = yf.download(
 
123
 
124
  except Exception as e:
125
  if retry_count < MAX_RETRIES:
126
+ time.sleep(2)
127
  return download_ticker_data(ticker, start_date, end_date, retry_count + 1)
128
  print(f"❌ Error downloading {ticker} after {MAX_RETRIES} retries: {e}")
129
  return None
 
132
  def download_batch(tickers_batch, start_date, end_date, batch_num, total_batches):
133
  """
134
  Download a batch of tickers
 
 
 
 
 
 
 
 
 
 
135
  """
136
  print(f"\n📦 Batch {batch_num}/{total_batches} ({len(tickers_batch)} tickers)")
137
  print(f" Tickers: {', '.join(tickers_batch[:10])}{'...' if len(tickers_batch) > 10 else ''}")
 
152
  def update_all_tickers():
153
  """
154
  Update CSV files for all tickers with batching and rate limiting
 
 
 
155
  """
 
156
  end_date = datetime.now()
157
  start_date = end_date - timedelta(days=5*365)
158
 
 
165
  print(f"⏱️ Delay between batches: {BATCH_DELAY}s")
166
  print(f"{'='*70}")
167
 
 
168
  os.makedirs(DATA_DIR, exist_ok=True)
169
 
 
170
  batches = [TICKERS[i:i + BATCH_SIZE] for i in range(0, len(TICKERS), BATCH_SIZE)]
171
  total_batches = len(batches)
172
 
173
  success_count = 0
174
  error_count = 0
175
 
 
176
  for batch_num, batch in enumerate(batches, 1):
177
  results = download_batch(batch, start_date, end_date, batch_num, total_batches)
178
 
 
179
  for ticker, data in results.items():
180
  filepath = os.path.join(DATA_DIR, f"{ticker}.csv")
181
  data.to_csv(filepath)
 
183
 
184
  error_count += len(batch) - len(results)
185
 
 
186
  if batch_num < total_batches:
187
  print(f" ⏳ Waiting {BATCH_DELAY}s before next batch...")
188
  time.sleep(BATCH_DELAY)
 
208
 
209
  def push_to_huggingface(token):
210
  """
211
+ Push updated CSV files to Hugging Face repository in batches
 
 
 
212
  """
213
  try:
214
  print(f"\n{'='*70}")
215
+ print("🚀 PUSHING TO HUGGING FACE (batched uploads)")
216
  print(f"{'='*70}\n")
217
 
 
218
  login(token=token)
219
  api = HfApi()
220
 
221
+ # Get list of all files to upload
222
+ all_files = []
223
+ for filename in os.listdir(DATA_DIR):
224
+ filepath = os.path.join(DATA_DIR, filename)
225
+ if os.path.isfile(filepath):
226
+ all_files.append((filepath, f"data/{filename}"))
227
+
228
+ print(f"📁 Total files to upload: {len(all_files)}")
229
+
230
+ # Upload in batches
231
+ total_batches = (len(all_files) + UPLOAD_BATCH_SIZE - 1) // UPLOAD_BATCH_SIZE
232
+
233
+ for batch_num in range(total_batches):
234
+ start_idx = batch_num * UPLOAD_BATCH_SIZE
235
+ end_idx = min(start_idx + UPLOAD_BATCH_SIZE, len(all_files))
236
+ batch = all_files[start_idx:end_idx]
237
+
238
+ print(f"\n📤 Upload batch {batch_num + 1}/{total_batches} ({len(batch)} files)")
239
+
240
+ for filepath, repo_path in batch:
241
+ try:
242
+ api.upload_file(
243
+ path_or_fileobj=filepath,
244
+ path_in_repo=repo_path,
245
+ repo_id=REPO_ID,
246
+ repo_type=REPO_TYPE,
247
+ commit_message=f"Update {os.path.basename(filepath)}"
248
+ )
249
+ except Exception as e:
250
+ print(f" ⚠️ Failed to upload {filepath}: {e}")
251
+
252
+ print(f" ✅ Batch {batch_num + 1} complete")
253
+
254
+ # Small delay between upload batches
255
+ if batch_num < total_batches - 1:
256
+ time.sleep(2)
257
 
258
+ print(f"\n✅ Successfully pushed to Hugging Face!")
259
  print(f"{'='*70}\n")
260
  return True
261
 
 
266
 
267
  def main():
268
  """Main execution function"""
 
269
  hf_token = os.environ.get("HF_TOKEN")
270
 
271
  if not hf_token:
272
  print("❌ ERROR: HF_TOKEN environment variable not set!")
 
273
  sys.exit(1)
274
 
275
  print(f"\n🎯 Horizon Backtester - Bulk Data Update")
276
  print(f" Universe: {len(TICKERS)} tickers")
277
+ print(f" Estimated time: ~{(len(TICKERS) // BATCH_SIZE) * BATCH_DELAY // 60 + 10} minutes\n")
278
 
 
279
  success_count, error_count = update_all_tickers()
280
 
 
281
  if success_count > 0:
282
  push_success = push_to_huggingface(hf_token)
283
  if push_success: