Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| from logger import get_logger | |
| from datetime import datetime | |
| logger = get_logger() | |
| class DataPreprocessor: | |
| def process_klines(data): | |
| """Convert Binance API data to a DataFrame and clean it.""" | |
| columns = ["Open_Time", "Open", "High", "Low", "Close", "Volume", | |
| "Close_Time", "Quote_Asset_Volume", "Number_of_Trades", | |
| "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume", "Ignore"] | |
| df = pd.DataFrame(data, columns=columns) | |
| # Convert timestamps to datetime | |
| df["Open_Time"] = pd.to_datetime(df["Open_Time"], unit="ms") | |
| df["Close_Time"] = pd.to_datetime(df["Close_Time"], unit="ms") | |
| # Convert numerical values to float | |
| num_cols = ["Open", "High", "Low", "Close", "Volume", | |
| "Quote_Asset_Volume", "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume"] | |
| df[num_cols] = df[num_cols].astype(float) | |
| logger.info("Data successfully processed and cleaned.") | |
| return df | |
| def save_to_csv(df, file_path=f"data/raw_datasetes/crypto_data_{datetime.now()}.csv"): | |
| """Save the DataFrame to a CSV file.""" | |
| os.makedirs(os.path.dirname(file_path), exist_ok=True) # Ensure directory exists | |
| df.to_csv(file_path, index=False) | |
| logger.info(f"Data successfully saved to {file_path}") | |