yasirapunsith
/

chart-pattern-locator

Joblib

Model card Files Files and versions

xet

Community

yasirapunsith commited on May 28, 2025

Commit

cedeba5

1 Parent(s): edd9837

debug code

Browse files

Files changed (1) hide show

handler.py +53 -49

handler.py CHANGED Viewed

@@ -261,61 +261,61 @@ class EndpointHandler:
             # print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
             # # --- Step 1: Ensure 'Date' column is present and correctly typed ---
-            # if 'Date' not in ohlc_data.columns:
-            #     # Try common casing if 'Date' not found
-            #     found_date_col = None
-            #     for col in ohlc_data.columns:
-            #         if str(col).lower() == 'date':
-            #             found_date_col = col
-            #             break
-            #     if found_date_col and found_date_col != 'Date':
-            #         ohlc_data.rename(columns={found_date_col: 'Date'}, inplace=True)
-            #         print(f"HANDLER: Renamed '{found_date_col}' to 'Date'. New columns: {ohlc_data.columns.tolist()}")
-            #     elif not found_date_col:
-            #         raise ValueError("Input data must contain a 'Date' column (e.g., 'Date', 'date').")
-            # # Convert 'Date' to datetime. 'errors='raise'' will be explicit.
-            # # The backend explicitly formats as YYYY-MM-DD, so this should match perfectly.
-            # ohlc_data['Date'] = pd.to_datetime(ohlc_data['Date'], format='%Y-%m-%d', errors='raise')
-            # # --- Step 2: Ensure all required OHLCV columns are present and numeric ---
-            # required_numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
-            # final_ohlcv_cols = []
-            # for col in required_numeric_cols:
-            #     if col in ohlc_data.columns:
-            #         final_ohlcv_cols.append(col)
-            #         # Convert to numeric, coercing errors. We need all rows, but NaNs can break models.
-            #         # Consider a strategy for NaNs (e.g., forward fill, mean fill, or raise a more specific error).
-            #         ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
-            #     else:
-            #         # If a required column is missing, try common casings
-            #         found_alt_col = None
-            #         for df_col in ohlc_data.columns:
-            #             if str(df_col).lower() == col.lower():
-            #                 found_alt_col = df_col
-            #                 break
-            #         if found_alt_col:
-            #             ohlc_data.rename(columns={found_alt_col: col}, inplace=True)
-            #             final_ohlcv_cols.append(col)
-            #             ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
-            #             print(f"HANDLER: Renamed '{found_alt_col}' to '{col}'. New columns: {ohlc_data.columns.tolist()}")
-            #         else:
-            #             raise ValueError(f"Missing required numeric column: '{col}'. Available: {ohlc_data.columns.tolist()}")
             # # After ensuring column names and types, check for NaNs in critical columns.
             # # If your model cannot handle NaNs, these rows are effectively "invalid" input.
             # # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
             # # from yfinance for the requested period.
-            # if ohlc_data[final_ohlcv_cols].isnull().any().any():
-            #     # Log which columns/rows have NaNs, but don't drop if not allowed.
-            #     # You might need to fill NaNs, but be aware it alters data.
-            #     print("HANDLER: Warning! NaN values detected in critical OHLCV columns after conversion. Your model might require clean data.")
-            #     print(ohlc_data[ohlc_data[final_ohlcv_cols].isnull().any(axis=1)].to_string())
-            #     # If your model can't handle NaNs, this is a failure point.
-            #     # Consider raising a more specific error here, or decide on a NaN filling strategy.
-            #     # For now, if the model *needs* clean data, this implicitly is a "bad input" if NaNs appear.
-            #     # If your model handles NaNs gracefully, then this is just a warning.
             # Print head after all processing to see the final DataFrame state
             print("\n--- HANDLER: OHLC Data after all input processing ---")
@@ -327,8 +327,12 @@ class EndpointHandler:
             raise ValueError(f"Invalid input data format: {e}")
         ohlc_data_segment = ohlc_data.copy()
         seg_len = len(ohlc_data_segment)
         if ohlc_data_segment.empty:
             raise ValueError("OHLC Data segment is empty or invalid after processing.")

             # print(f"HANDLER: Columns received in DataFrame: {ohlc_data.columns.tolist()}")
             # # --- Step 1: Ensure 'Date' column is present and correctly typed ---
+            if 'Date' not in ohlc_data.columns:
+                # Try common casing if 'Date' not found
+                found_date_col = None
+                for col in ohlc_data.columns:
+                    if str(col).lower() == 'date':
+                        found_date_col = col
+                        break
+                if found_date_col and found_date_col != 'Date':
+                    ohlc_data.rename(columns={found_date_col: 'Date'}, inplace=True)
+                    print(f"HANDLER: Renamed '{found_date_col}' to 'Date'. New columns: {ohlc_data.columns.tolist()}")
+                elif not found_date_col:
+                    raise ValueError("Input data must contain a 'Date' column (e.g., 'Date', 'date').")
+            # Convert 'Date' to datetime. 'errors='raise'' will be explicit.
+            # The backend explicitly formats as YYYY-MM-DD, so this should match perfectly.
+            ohlc_data['Date'] = pd.to_datetime(ohlc_data['Date'], format='%Y-%m-%d', errors='raise')
+            # --- Step 2: Ensure all required OHLCV columns are present and numeric ---
+            required_numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
+            final_ohlcv_cols = []
+            for col in required_numeric_cols:
+                if col in ohlc_data.columns:
+                    final_ohlcv_cols.append(col)
+                    # Convert to numeric, coercing errors. We need all rows, but NaNs can break models.
+                    # Consider a strategy for NaNs (e.g., forward fill, mean fill, or raise a more specific error).
+                    ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
+                else:
+                    # If a required column is missing, try common casings
+                    found_alt_col = None
+                    for df_col in ohlc_data.columns:
+                        if str(df_col).lower() == col.lower():
+                            found_alt_col = df_col
+                            break
+                    if found_alt_col:
+                        ohlc_data.rename(columns={found_alt_col: col}, inplace=True)
+                        final_ohlcv_cols.append(col)
+                        ohlc_data[col] = pd.to_numeric(ohlc_data[col], errors='coerce')
+                        print(f"HANDLER: Renamed '{found_alt_col}' to '{col}'. New columns: {ohlc_data.columns.tolist()}")
+                    else:
+                        raise ValueError(f"Missing required numeric column: '{col}'. Available: {ohlc_data.columns.tolist()}")
             # # After ensuring column names and types, check for NaNs in critical columns.
             # # If your model cannot handle NaNs, these rows are effectively "invalid" input.
             # # You stated no rows can be dropped, so if NaNs appear here, it implies a data quality issue
             # # from yfinance for the requested period.
+            if ohlc_data[final_ohlcv_cols].isnull().any().any():
+                # Log which columns/rows have NaNs, but don't drop if not allowed.
+                # You might need to fill NaNs, but be aware it alters data.
+                print("HANDLER: Warning! NaN values detected in critical OHLCV columns after conversion. Your model might require clean data.")
+                print(ohlc_data[ohlc_data[final_ohlcv_cols].isnull().any(axis=1)].to_string())
+                # If your model can't handle NaNs, this is a failure point.
+                # Consider raising a more specific error here, or decide on a NaN filling strategy.
+                # For now, if the model *needs* clean data, this implicitly is a "bad input" if NaNs appear.
+                # If your model handles NaNs gracefully, then this is just a warning.
             # Print head after all processing to see the final DataFrame state
             print("\n--- HANDLER: OHLC Data after all input processing ---")
             raise ValueError(f"Invalid input data format: {e}")
+        print("--- AFTER CONVERSION ---")
+        print(ohlc_data.to_string())
         ohlc_data_segment = ohlc_data.copy()
         seg_len = len(ohlc_data_segment)
+        print(seg_len)
         if ohlc_data_segment.empty:
             raise ValueError("OHLC Data segment is empty or invalid after processing.")