AJAY KASU commited on
Commit
5d6b190
·
1 Parent(s): e3682e7

Fix: Chunked download strategy to ensure LIVE data retrieval

Browse files
Files changed (1) hide show
  1. data/data_manager.py +41 -14
data/data_manager.py CHANGED
@@ -109,23 +109,50 @@ class MarketDataEngine:
109
  logger.info(f"Downloading prices for {len(valid_tickers)} tickers (Real Data Mode)...")
110
 
111
  data = pd.DataFrame()
112
- max_retries = 3
 
 
113
 
114
- for attempt in range(max_retries):
115
- try:
116
- # threads=False is key to avoiding 429 Rate Limit
117
- # Added timeout to prevent hanging
118
- data = yf.download(valid_tickers, start=start_date, group_by='ticker', threads=False, progress=False, timeout=10)
119
-
120
- if not data.empty:
121
- break # Success
 
 
 
 
 
 
122
 
123
- logger.warning(f"Attempt {attempt+1} returned empty. Retrying...")
124
- time.sleep(2 ** attempt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- except Exception as e:
127
- logger.error(f"Download attempt {attempt+1} failed: {e}")
128
- time.sleep(2 ** attempt)
 
 
 
 
 
129
 
130
  if data.empty:
131
  logger.error("All download attempts failed. Switching to SYNTHETIC data.")
 
109
  logger.info(f"Downloading prices for {len(valid_tickers)} tickers (Real Data Mode)...")
110
 
111
  data = pd.DataFrame()
112
+ # Chunked Download Strategy to avoid timeouts/rate-limits
113
+ chunk_size = 20
114
+ all_data = []
115
 
116
+ for i in range(0, len(valid_tickers), chunk_size):
117
+ chunk = valid_tickers[i:i+chunk_size]
118
+ logger.info(f"Downloading chunk {i//chunk_size + 1}: {chunk[:3]}...")
119
+
120
+ chunk_data = pd.DataFrame()
121
+ # Retry logic per chunk
122
+ for attempt in range(3):
123
+ try:
124
+ # Ticker-by-Ticker usually more reliable for small batches than bulk download if bulk is failing
125
+ # But let's stick to download() for speed, just smaller batches.
126
+
127
+ # Note: threads=True might actually be better for speed if we are chunking,
128
+ # but threads=False is safer for rate limits. Let's try threads=False but small chunks.
129
+ temp = yf.download(chunk, start=start_date, group_by='ticker', threads=False, progress=False, timeout=20)
130
 
131
+ if not temp.empty:
132
+ chunk_data = temp
133
+ break
134
+ time.sleep(1)
135
+ except Exception as e:
136
+ logger.warning(f"Chunk failed: {e}")
137
+ time.sleep(1)
138
+
139
+ if not chunk_data.empty:
140
+ all_data.append(chunk_data)
141
+
142
+ if not all_data:
143
+ logger.error("All chunks failed.")
144
+ # If user insists on live data, we might return empty here?
145
+ # But let's keep the fallback but make it less likely to be needed.
146
+ pass # Will fall through to empty check
147
 
148
+ # Concatenate
149
+ try:
150
+ if all_data:
151
+ data = pd.concat(all_data, axis=1)
152
+ else:
153
+ data = pd.DataFrame()
154
+ except:
155
+ data = pd.DataFrame()
156
 
157
  if data.empty:
158
  logger.error("All download attempts failed. Switching to SYNTHETIC data.")