AJAY KASU commited on
Commit
2b62855
·
1 Parent(s): 28bc1a7

Fix: Add fallback for data ingestion and config path

Browse files
Files changed (2) hide show
  1. config.py +1 -0
  2. data/data_manager.py +50 -7
config.py CHANGED
@@ -12,6 +12,7 @@ class Settings(BaseModel):
12
  HF_TOKEN: Optional[SecretStr] = Field(default_factory=lambda: SecretStr(os.getenv("HF_TOKEN", "")) if os.getenv("HF_TOKEN") else None, description="Hugging Face API Token")
13
 
14
  # Data Configuration
 
15
  DATA_CACHE_DIR: str = Field(default="./data_cache", description="Directory to store cached market data")
16
  SECTOR_MAP_FILE: str = Field(default="./data/sector_map.json", description="Path to sector mapping cache")
17
 
 
12
  HF_TOKEN: Optional[SecretStr] = Field(default_factory=lambda: SecretStr(os.getenv("HF_TOKEN", "")) if os.getenv("HF_TOKEN") else None, description="Hugging Face API Token")
13
 
14
  # Data Configuration
15
+ DATA_DIR: str = Field(default="./data", description="Directory to store static data files")
16
  DATA_CACHE_DIR: str = Field(default="./data_cache", description="Directory to store cached market data")
17
  SECTOR_MAP_FILE: str = Field(default="./data/sector_map.json", description="Path to sector mapping cache")
18
 
data/data_manager.py CHANGED
@@ -114,21 +114,22 @@ class MarketDataEngine:
114
  for attempt in range(max_retries):
115
  try:
116
  # threads=False is key to avoiding 429 Rate Limit
117
- data = yf.download(valid_tickers, start=start_date, group_by='ticker', threads=False, progress=False)
 
118
 
119
  if not data.empty:
120
  break # Success
121
 
122
  logger.warning(f"Attempt {attempt+1} returned empty. Retrying...")
123
- time.sleep(2 ** attempt) # Backoff: 1s, 2s, 4s
124
-
125
  except Exception as e:
126
  logger.error(f"Download attempt {attempt+1} failed: {e}")
127
  time.sleep(2 ** attempt)
128
 
129
  if data.empty:
130
- logger.error("All download attempts failed. Returning empty DataFrame.")
131
- return pd.DataFrame()
132
 
133
  try:
134
  # Handle MultiIndex
@@ -148,9 +149,16 @@ class MarketDataEngine:
148
  df_close = data.xs('Adj Close', level=0, axis=1)
149
  except:
150
  try:
151
- df_close = data['Close']
 
152
  except:
153
- return pd.DataFrame()
 
 
 
 
 
 
154
 
155
  # Drop columns with all NaNs
156
  df_close.dropna(axis=1, how='all', inplace=True)
@@ -206,3 +214,38 @@ class MarketDataEngine:
206
 
207
  # Return requested
208
  return {t: caps.get(t, 0) for t in tickers}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  for attempt in range(max_retries):
115
  try:
116
  # threads=False is key to avoiding 429 Rate Limit
117
+ # Added timeout to prevent hanging
118
+ data = yf.download(valid_tickers, start=start_date, group_by='ticker', threads=False, progress=False, timeout=10)
119
 
120
  if not data.empty:
121
  break # Success
122
 
123
  logger.warning(f"Attempt {attempt+1} returned empty. Retrying...")
124
+ time.sleep(2 ** attempt)
125
+
126
  except Exception as e:
127
  logger.error(f"Download attempt {attempt+1} failed: {e}")
128
  time.sleep(2 ** attempt)
129
 
130
  if data.empty:
131
+ logger.error("All download attempts failed. Switching to SYNTHETIC data.")
132
+ return self._generate_synthetic_data(valid_tickers, start_date)
133
 
134
  try:
135
  # Handle MultiIndex
 
149
  df_close = data.xs('Adj Close', level=0, axis=1)
150
  except:
151
  try:
152
+ # Fix for group_by='ticker' (Adj Close is at Level 1)
153
+ df_close = data.xs('Adj Close', level=1, axis=1)
154
  except:
155
+ try:
156
+ df_close = data['Close']
157
+ except:
158
+ try:
159
+ df_close = data.xs('Close', level=1, axis=1)
160
+ except:
161
+ return pd.DataFrame()
162
 
163
  # Drop columns with all NaNs
164
  df_close.dropna(axis=1, how='all', inplace=True)
 
214
 
215
  # Return requested
216
  return {t: caps.get(t, 0) for t in tickers}
217
+
218
+ def _generate_synthetic_data(self, tickers: List[str], start_date: str) -> pd.DataFrame:
219
+ """
220
+ Generates realistic-looking random walk data for tickers
221
+ to ensure the app runs even if Yahoo Finance is down.
222
+ """
223
+ logger.warning(f"Generating SYNTHETIC market data for {len(tickers)} tickers (Demo Mode).")
224
+ try:
225
+ dates = pd.date_range(start=start_date, end=pd.Timestamp.now(), freq='B')
226
+ df = pd.DataFrame(index=dates)
227
+
228
+ # Consistent random seed so the "demo" looks stable between refreshes
229
+ np.random.seed(42)
230
+
231
+ for ticker in tickers:
232
+ # Start price between 50 and 200
233
+ start_price = np.random.uniform(50, 200)
234
+
235
+ # Generate returns: Drift + Volatility
236
+ # Annual Drift ~ 10%, Annual Vol ~ 20%
237
+ # Daily Drift ~ 10%/252, Daily Vol ~ 20%/sqrt(252)
238
+ mu = 0.10 / 252
239
+ sigma = 0.20 / np.sqrt(252)
240
+
241
+ returns = np.random.normal(mu, sigma, len(dates))
242
+
243
+ # Path
244
+ price_path = start_price * (1 + returns).cumprod()
245
+ df[ticker] = price_path
246
+
247
+ return df
248
+
249
+ except Exception as e:
250
+ logger.error(f"Error generating synthetic data: {e}")
251
+ return pd.DataFrame()