File size: 1,456 Bytes
4bc6f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
import os
from logger import get_logger
from datetime import datetime 
logger = get_logger()

class DataPreprocessor:
    @staticmethod
    def process_klines(data):
        """Convert Binance API data to a DataFrame and clean it."""
        columns = ["Open_Time", "Open", "High", "Low", "Close", "Volume", 
                   "Close_Time", "Quote_Asset_Volume", "Number_of_Trades", 
                   "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume", "Ignore"]
        
        df = pd.DataFrame(data, columns=columns)

        # Convert timestamps to datetime
        df["Open_Time"] = pd.to_datetime(df["Open_Time"], unit="ms")
        df["Close_Time"] = pd.to_datetime(df["Close_Time"], unit="ms")

        # Convert numerical values to float
        num_cols = ["Open", "High", "Low", "Close", "Volume", 
                    "Quote_Asset_Volume", "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume"]
        
        df[num_cols] = df[num_cols].astype(float)

        logger.info("Data successfully processed and cleaned.")
        return df

    @staticmethod
    def save_to_csv(df, file_path=f"data/raw_datasetes/crypto_data_{datetime.now()}.csv"):
        """Save the DataFrame to a CSV file."""
        os.makedirs(os.path.dirname(file_path), exist_ok=True)  # Ensure directory exists
        df.to_csv(file_path, index=False)
        logger.info(f"Data successfully saved to {file_path}")