AJAY KASU commited on
Commit
28bc1a7
·
1 Parent(s): aa68b93

Fix: Configure Robust Real Data Fetching (Serial+Retry)

Browse files
Files changed (1) hide show
  1. data/data_manager.py +56 -21
data/data_manager.py CHANGED
@@ -96,34 +96,69 @@ class MarketDataEngine:
96
 
97
  def fetch_market_data(self, tickers: List[str], start_date: str = "2023-01-01") -> pd.DataFrame:
98
  """
99
- Fetches adjusted close prices for a list of tickers.
 
100
  """
101
- if not tickers:
102
- logger.warning("No tickers provided to fetch.")
 
 
 
103
  return pd.DataFrame()
104
-
105
- logger.info(f"Downloading data for {len(tickers)} tickers from {start_date}...")
106
- # Use yfinance download with threads
107
- # 'Close' is usually adjusted in newer versions or defaults
108
- data = yf.download(tickers, start=start_date, progress=False)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  if data.empty:
111
- logger.error("No data fetched from yfinance.")
112
  return pd.DataFrame()
 
 
 
 
113
 
114
- # Handle MultiIndex (Price, Ticker)
115
- if hasattr(data.columns, 'levels') and 'Close' in data.columns.levels[0]:
116
- data = data['Close']
117
- elif 'Close' in data.columns:
118
- data = data['Close']
119
- elif 'Adj Close' in data.columns:
120
- data = data['Adj Close']
121
- else:
122
- # Fallback
123
- logger.warning("Could not find Close/Adj Close. Using first level.")
124
- data = data.iloc[:, :len(tickers)] # Risky but fallback
 
 
 
 
 
 
125
 
126
- return self._clean_data(data)
 
 
 
 
 
 
127
 
128
  def _clean_data(self, df: pd.DataFrame) -> pd.DataFrame:
129
  """
 
96
 
97
  def fetch_market_data(self, tickers: List[str], start_date: str = "2023-01-01") -> pd.DataFrame:
98
  """
99
+ Fetches adjusted close prices for a list of tickers using REAL data logic.
100
+ Uses sequential fetching (threads=False) and retries to handle rate limits.
101
  """
102
+ import time
103
+
104
+ # Clean tickers
105
+ valid_tickers = [t.strip().upper() for t in tickers if t]
106
+ if not valid_tickers:
107
  return pd.DataFrame()
108
+
109
+ logger.info(f"Downloading prices for {len(valid_tickers)} tickers (Real Data Mode)...")
110
+
111
+ data = pd.DataFrame()
112
+ max_retries = 3
113
 
114
+ for attempt in range(max_retries):
115
+ try:
116
+ # threads=False is key to avoiding 429 Rate Limit
117
+ data = yf.download(valid_tickers, start=start_date, group_by='ticker', threads=False, progress=False)
118
+
119
+ if not data.empty:
120
+ break # Success
121
+
122
+ logger.warning(f"Attempt {attempt+1} returned empty. Retrying...")
123
+ time.sleep(2 ** attempt) # Backoff: 1s, 2s, 4s
124
+
125
+ except Exception as e:
126
+ logger.error(f"Download attempt {attempt+1} failed: {e}")
127
+ time.sleep(2 ** attempt)
128
+
129
  if data.empty:
130
+ logger.error("All download attempts failed. Returning empty DataFrame.")
131
  return pd.DataFrame()
132
+
133
+ try:
134
+ # Handle MultiIndex
135
+ df_close = pd.DataFrame()
136
 
137
+ if len(valid_tickers) == 1:
138
+ t = valid_tickers[0]
139
+ if 'Adj Close' in data.columns:
140
+ df_close[t] = data['Adj Close']
141
+ elif 'Close' in data.columns:
142
+ df_close[t] = data['Close']
143
+ else:
144
+ try:
145
+ df_close = data['Adj Close']
146
+ except KeyError:
147
+ try:
148
+ df_close = data.xs('Adj Close', level=0, axis=1)
149
+ except:
150
+ try:
151
+ df_close = data['Close']
152
+ except:
153
+ return pd.DataFrame()
154
 
155
+ # Drop columns with all NaNs
156
+ df_close.dropna(axis=1, how='all', inplace=True)
157
+ return df_close
158
+
159
+ except Exception as e:
160
+ logger.error(f"Error processing market data: {e}")
161
+ return pd.DataFrame()
162
 
163
  def _clean_data(self, df: pd.DataFrame) -> pd.DataFrame:
164
  """