yasirapunsith commited on
Commit
cedeba5
·
1 Parent(s): edd9837

debug code

Browse files
Files changed (1) hide show
  1. handler.py +53 -49
handler.py CHANGED
@@ -261,61 +261,61 @@ class EndpointHandler:
261
  # print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
262
 
263
  # # --- Step 1: Ensure 'Date' column is present and correctly typed ---
264
- # if 'Date' not in ohlc_data.columns:
265
- # # Try common casing if 'Date' not found
266
- # found_date_col = None
267
- # for col in ohlc_data.columns:
268
- # if str(col).lower() == 'date':
269
- # found_date_col = col
270
- # break
271
- # if found_date_col and found_date_col != 'Date':
272
- # ohlc_data.rename(columns={found_date_col: 'Date'}, inplace=True)
273
- # print(f"HANDLER: Renamed '{found_date_col}' to 'Date'. New columns: {ohlc_data.columns.tolist()}")
274
- # elif not found_date_col:
275
- # raise ValueError("Input data must contain a 'Date' column (e.g., 'Date', 'date').")
276
-
277
- # # Convert 'Date' to datetime. 'errors='raise'' will be explicit.
278
- # # The backend explicitly formats as YYYY-MM-DD, so this should match perfectly.
279
- # ohlc_data['Date'] = pd.to_datetime(ohlc_data['Date'], format='%Y-%m-%d', errors='raise')
280
 
281
- # # --- Step 2: Ensure all required OHLCV columns are present and numeric ---
282
- # required_numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
283
- # final_ohlcv_cols = []
284
-
285
- # for col in required_numeric_cols:
286
- # if col in ohlc_data.columns:
287
- # final_ohlcv_cols.append(col)
288
- # # Convert to numeric, coercing errors. We need all rows, but NaNs can break models.
289
- # # Consider a strategy for NaNs (e.g., forward fill, mean fill, or raise a more specific error).
290
- # ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
291
- # else:
292
- # # If a required column is missing, try common casings
293
- # found_alt_col = None
294
- # for df_col in ohlc_data.columns:
295
- # if str(df_col).lower() == col.lower():
296
- # found_alt_col = df_col
297
- # break
298
- # if found_alt_col:
299
- # ohlc_data.rename(columns={found_alt_col: col}, inplace=True)
300
- # final_ohlcv_cols.append(col)
301
- # ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
302
- # print(f"HANDLER: Renamed '{found_alt_col}' to '{col}'. New columns: {ohlc_data.columns.tolist()}")
303
- # else:
304
- # raise ValueError(f"Missing required numeric column: '{col}'. Available: {ohlc_data.columns.tolist()}")
305
 
306
  # # After ensuring column names and types, check for NaNs in critical columns.
307
  # # If your model cannot handle NaNs, these rows are effectively "invalid" input.
308
  # # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
309
  # # from yfinance for the requested period.
310
- # if ohlc_data[final_ohlcv_cols].isnull().any().any():
311
- # # Log which columns/rows have NaNs, but don't drop if not allowed.
312
- # # You might need to fill NaNs, but be aware it alters data.
313
- # print("HANDLER: Warning! NaN values detected in critical OHLCV columns after conversion. Your model might require clean data.")
314
- # print(ohlc_data[ohlc_data[final_ohlcv_cols].isnull().any(axis=1)].to_string())
315
- # # If your model can't handle NaNs, this is a failure point.
316
- # # Consider raising a more specific error here, or decide on a NaN filling strategy.
317
- # # For now, if the model *needs* clean data, this implicitly is a "bad input" if NaNs appear.
318
- # # If your model handles NaNs gracefully, then this is just a warning.
319
 
320
  # Print head after all processing to see the final DataFrame state
321
  print("\n--- HANDLER: OHLC Data after all input processing ---")
@@ -327,8 +327,12 @@ class EndpointHandler:
327
  raise ValueError(f"Invalid input data format: {e}")
328
 
329
 
 
 
 
330
  ohlc_data_segment = ohlc_data.copy()
331
  seg_len = len(ohlc_data_segment)
 
332
 
333
  if ohlc_data_segment.empty:
334
  raise ValueError("OHLC Data segment is empty or invalid after processing.")
 
261
  # print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
262
 
263
  # # --- Step 1: Ensure 'Date' column is present and correctly typed ---
264
+ if 'Date' not in ohlc_data.columns:
265
+ # Try common casing if 'Date' not found
266
+ found_date_col = None
267
+ for col in ohlc_data.columns:
268
+ if str(col).lower() == 'date':
269
+ found_date_col = col
270
+ break
271
+ if found_date_col and found_date_col != 'Date':
272
+ ohlc_data.rename(columns={found_date_col: 'Date'}, inplace=True)
273
+ print(f"HANDLER: Renamed '{found_date_col}' to 'Date'. New columns: {ohlc_data.columns.tolist()}")
274
+ elif not found_date_col:
275
+ raise ValueError("Input data must contain a 'Date' column (e.g., 'Date', 'date').")
276
+
277
+ # Convert 'Date' to datetime. 'errors='raise'' will be explicit.
278
+ # The backend explicitly formats as YYYY-MM-DD, so this should match perfectly.
279
+ ohlc_data['Date'] = pd.to_datetime(ohlc_data['Date'], format='%Y-%m-%d', errors='raise')
280
 
281
+ # --- Step 2: Ensure all required OHLCV columns are present and numeric ---
282
+ required_numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
283
+ final_ohlcv_cols = []
284
+
285
+ for col in required_numeric_cols:
286
+ if col in ohlc_data.columns:
287
+ final_ohlcv_cols.append(col)
288
+ # Convert to numeric, coercing errors. We need all rows, but NaNs can break models.
289
+ # Consider a strategy for NaNs (e.g., forward fill, mean fill, or raise a more specific error).
290
+ ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
291
+ else:
292
+ # If a required column is missing, try common casings
293
+ found_alt_col = None
294
+ for df_col in ohlc_data.columns:
295
+ if str(df_col).lower() == col.lower():
296
+ found_alt_col = df_col
297
+ break
298
+ if found_alt_col:
299
+ ohlc_data.rename(columns={found_alt_col: col}, inplace=True)
300
+ final_ohlcv_cols.append(col)
301
+ ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
302
+ print(f"HANDLER: Renamed '{found_alt_col}' to '{col}'. New columns: {ohlc_data.columns.tolist()}")
303
+ else:
304
+ raise ValueError(f"Missing required numeric column: '{col}'. Available: {ohlc_data.columns.tolist()}")
305
 
306
  # # After ensuring column names and types, check for NaNs in critical columns.
307
  # # If your model cannot handle NaNs, these rows are effectively "invalid" input.
308
  # # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
309
  # # from yfinance for the requested period.
310
+ if ohlc_data[final_ohlcv_cols].isnull().any().any():
311
+ # Log which columns/rows have NaNs, but don't drop if not allowed.
312
+ # You might need to fill NaNs, but be aware it alters data.
313
+ print("HANDLER: Warning! NaN values detected in critical OHLCV columns after conversion. Your model might require clean data.")
314
+ print(ohlc_data[ohlc_data[final_ohlcv_cols].isnull().any(axis=1)].to_string())
315
+ # If your model can't handle NaNs, this is a failure point.
316
+ # Consider raising a more specific error here, or decide on a NaN filling strategy.
317
+ # For now, if the model *needs* clean data, this implicitly is a "bad input" if NaNs appear.
318
+ # If your model handles NaNs gracefully, then this is just a warning.
319
 
320
  # Print head after all processing to see the final DataFrame state
321
  print("\n--- HANDLER: OHLC Data after all input processing ---")
 
327
  raise ValueError(f"Invalid input data format: {e}")
328
 
329
 
330
+ print("--- AFTER CONVERSION ---")
331
+ print(ohlc_data.to_string())
332
+
333
  ohlc_data_segment = ohlc_data.copy()
334
  seg_len = len(ohlc_data_segment)
335
+ print(seg_len)
336
 
337
  if ohlc_data_segment.empty:
338
  raise ValueError("OHLC Data segment is empty or invalid after processing.")