Commit
·
cedeba5
1
Parent(s):
edd9837
debug code
Browse files- handler.py +53 -49
handler.py
CHANGED
|
@@ -261,61 +261,61 @@ class EndpointHandler:
|
|
| 261 |
# print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
|
| 262 |
|
| 263 |
# # --- Step 1: Ensure 'Date' column is present and correctly typed ---
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
#
|
| 278 |
-
#
|
| 279 |
-
|
| 280 |
|
| 281 |
-
#
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
|
| 306 |
# # After ensuring column names and types, check for NaNs in critical columns.
|
| 307 |
# # If your model cannot handle NaNs, these rows are effectively "invalid" input.
|
| 308 |
# # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
|
| 309 |
# # from yfinance for the requested period.
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
|
| 320 |
# Print head after all processing to see the final DataFrame state
|
| 321 |
print("\n--- HANDLER: OHLC Data after all input processing ---")
|
|
@@ -327,8 +327,12 @@ class EndpointHandler:
|
|
| 327 |
raise ValueError(f"Invalid input data format: {e}")
|
| 328 |
|
| 329 |
|
|
|
|
|
|
|
|
|
|
| 330 |
ohlc_data_segment = ohlc_data.copy()
|
| 331 |
seg_len = len(ohlc_data_segment)
|
|
|
|
| 332 |
|
| 333 |
if ohlc_data_segment.empty:
|
| 334 |
raise ValueError("OHLC Data segment is empty or invalid after processing.")
|
|
|
|
| 261 |
# print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
|
| 262 |
|
| 263 |
# # --- Step 1: Ensure 'Date' column is present and correctly typed ---
|
| 264 |
+
if 'Date' not in ohlc_data.columns:
|
| 265 |
+
# Try common casing if 'Date' not found
|
| 266 |
+
found_date_col = None
|
| 267 |
+
for col in ohlc_data.columns:
|
| 268 |
+
if str(col).lower() == 'date':
|
| 269 |
+
found_date_col = col
|
| 270 |
+
break
|
| 271 |
+
if found_date_col and found_date_col != 'Date':
|
| 272 |
+
ohlc_data.rename(columns={found_date_col: 'Date'}, inplace=True)
|
| 273 |
+
print(f"HANDLER: Renamed '{found_date_col}' to 'Date'. New columns: {ohlc_data.columns.tolist()}")
|
| 274 |
+
elif not found_date_col:
|
| 275 |
+
raise ValueError("Input data must contain a 'Date' column (e.g., 'Date', 'date').")
|
| 276 |
+
|
| 277 |
+
# Convert 'Date' to datetime. 'errors='raise'' will be explicit.
|
| 278 |
+
# The backend explicitly formats as YYYY-MM-DD, so this should match perfectly.
|
| 279 |
+
ohlc_data['Date'] = pd.to_datetime(ohlc_data['Date'], format='%Y-%m-%d', errors='raise')
|
| 280 |
|
| 281 |
+
# --- Step 2: Ensure all required OHLCV columns are present and numeric ---
|
| 282 |
+
required_numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
|
| 283 |
+
final_ohlcv_cols = []
|
| 284 |
+
|
| 285 |
+
for col in required_numeric_cols:
|
| 286 |
+
if col in ohlc_data.columns:
|
| 287 |
+
final_ohlcv_cols.append(col)
|
| 288 |
+
# Convert to numeric, coercing errors. We need all rows, but NaNs can break models.
|
| 289 |
+
# Consider a strategy for NaNs (e.g., forward fill, mean fill, or raise a more specific error).
|
| 290 |
+
ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
|
| 291 |
+
else:
|
| 292 |
+
# If a required column is missing, try common casings
|
| 293 |
+
found_alt_col = None
|
| 294 |
+
for df_col in ohlc_data.columns:
|
| 295 |
+
if str(df_col).lower() == col.lower():
|
| 296 |
+
found_alt_col = df_col
|
| 297 |
+
break
|
| 298 |
+
if found_alt_col:
|
| 299 |
+
ohlc_data.rename(columns={found_alt_col: col}, inplace=True)
|
| 300 |
+
final_ohlcv_cols.append(col)
|
| 301 |
+
ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
|
| 302 |
+
print(f"HANDLER: Renamed '{found_alt_col}' to '{col}'. New columns: {ohlc_data.columns.tolist()}")
|
| 303 |
+
else:
|
| 304 |
+
raise ValueError(f"Missing required numeric column: '{col}'. Available: {ohlc_data.columns.tolist()}")
|
| 305 |
|
| 306 |
# # After ensuring column names and types, check for NaNs in critical columns.
|
| 307 |
# # If your model cannot handle NaNs, these rows are effectively "invalid" input.
|
| 308 |
# # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
|
| 309 |
# # from yfinance for the requested period.
|
| 310 |
+
if ohlc_data[final_ohlcv_cols].isnull().any().any():
|
| 311 |
+
# Log which columns/rows have NaNs, but don't drop if not allowed.
|
| 312 |
+
# You might need to fill NaNs, but be aware it alters data.
|
| 313 |
+
print("HANDLER: Warning! NaN values detected in critical OHLCV columns after conversion. Your model might require clean data.")
|
| 314 |
+
print(ohlc_data[ohlc_data[final_ohlcv_cols].isnull().any(axis=1)].to_string())
|
| 315 |
+
# If your model can't handle NaNs, this is a failure point.
|
| 316 |
+
# Consider raising a more specific error here, or decide on a NaN filling strategy.
|
| 317 |
+
# For now, if the model *needs* clean data, this implicitly is a "bad input" if NaNs appear.
|
| 318 |
+
# If your model handles NaNs gracefully, then this is just a warning.
|
| 319 |
|
| 320 |
# Print head after all processing to see the final DataFrame state
|
| 321 |
print("\n--- HANDLER: OHLC Data after all input processing ---")
|
|
|
|
| 327 |
raise ValueError(f"Invalid input data format: {e}")
|
| 328 |
|
| 329 |
|
| 330 |
+
print("--- AFTER CONVERSION ---")
|
| 331 |
+
print(ohlc_data.to_string())
|
| 332 |
+
|
| 333 |
ohlc_data_segment = ohlc_data.copy()
|
| 334 |
seg_len = len(ohlc_data_segment)
|
| 335 |
+
print(seg_len)
|
| 336 |
|
| 337 |
if ohlc_data_segment.empty:
|
| 338 |
raise ValueError("OHLC Data segment is empty or invalid after processing.")
|