File size: 5,241 Bytes

c5ef85d

import pandas as pd
import numpy as np

def extract_tables_from_csv(file_path):
    """

    Extract two separate tables from a CSV file and convert to clean DataFrames

    

    Args:

        file_path (str): Path to the CSV file

    

    Returns:

        tuple: (orders_df, deals_df) - Two pandas DataFrames

    """
    # Read the entire CSV file
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    # Find the row indices where each table starts
    orders_start = None
    deals_start = None
    
    for i, line in enumerate(lines):
        # Check for first table header
        if 'Open Time' in line and 'Order' in line and 'Symbol' in line:
            orders_start = i
        # Check for second table header
        if 'Time' in line and 'Deal' in line and 'Direction' in line:
            deals_start = i
    
    print(f"Orders table starts at line: {orders_start}")
    print(f"Deals table starts at line: {deals_start}\n")
    
    # Extract Orders table
    if orders_start is not None:
        if deals_start is not None:
            # Read from orders_start to deals_start
            orders_df = pd.read_csv(
                file_path,
                skiprows=orders_start,
                nrows=deals_start - orders_start - 1
            )
        else:
            # Read from orders_start to end
            orders_df = pd.read_csv(file_path, skiprows=orders_start)
        
        # Clean the orders dataframe
        orders_df = clean_dataframe(orders_df)
        
        # Define expected columns for orders
        orders_columns = ['Open Time', 'Order', 'Symbol', 'Type', 'Volume', 
                         'Price', 'S / L', 'T / P', 'Time', 'State', 'Comment']
        orders_df = orders_df.reindex(columns=orders_columns, fill_value=np.nan)
        
        print("Orders Table:")
        print(orders_df.head())
        print(f"\nShape: {orders_df.shape}")
        print(f"Columns: {list(orders_df.columns)}\n")
    else:
        orders_df = None
        print("Orders table not found!\n")
    
    # Extract Deals table
    if deals_start is not None:
        # Read from deals_start to end
        deals_df = pd.read_csv(file_path, skiprows=deals_start)
        
        # Clean the deals dataframe
        deals_df = clean_dataframe(deals_df)
        
        # Define expected columns for deals
        deals_columns = ['Time', 'Deal', 'Symbol', 'Type', 'Direction', 
                        'Volume', 'Price', 'Order', 'Commission', 'Swap', 
                        'Profit', 'Balance', 'Comment']
        deals_df = deals_df.reindex(columns=deals_columns, fill_value=np.nan)
        
        print("Deals Table:")
        print(deals_df.head())
        print(f"\nShape: {deals_df.shape}")
        print(f"Columns: {list(deals_df.columns)}\n")
    else:
        deals_df = None
        print("Deals table not found!\n")
    
    return orders_df, deals_df


def clean_dataframe(df):
    """

    Clean the dataframe by removing empty rows and columns

    

    Args:

        df (DataFrame): Input dataframe

    

    Returns:

        DataFrame: Cleaned dataframe

    """
    # Remove completely empty rows
    df = df.dropna(how='all')
    
    # Remove completely empty columns
    df = df.dropna(axis=1, how='all')
    
    # Remove columns with no name (Unnamed columns that are empty)
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Reset index
    df = df.reset_index(drop=True)
    
    # Strip whitespace from string columns
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].str.strip() if df[col].notna().any() else df[col]
    
    return df


def save_tables(orders_df, deals_df, output_prefix='table'):
    """

    Save the extracted tables to separate CSV files

    

    Args:

        orders_df (DataFrame): Orders table

        deals_df (DataFrame): Deals table

        output_prefix (str): Prefix for output files

    """
    if orders_df is not None:
        orders_file = f"{output_prefix}_orders.csv"
        orders_df.to_csv(orders_file, index=False)
        print(f"Orders table saved to: {orders_file}")
    
    if deals_df is not None:
        deals_file = f"{output_prefix}_deals.csv"
        deals_df.to_csv(deals_file, index=False)
        print(f"Deals table saved to: {deals_file}")


if __name__ == "__main__":
    # File path
    file_path = "Sheet1.csv"
    
    # Extract tables
    orders_df, deals_df = extract_tables_from_csv(file_path)
    
    # Save to separate files
    save_tables(orders_df, deals_df, output_prefix='extracted')
    
    # Example: Access the dataframes
    if orders_df is not None:
        print("\n" + "="*60)
        print("Orders Table Info:")
        print("="*60)
        print(orders_df.info())
        print("\nSample data:")
        print(orders_df.head(10))
    
    if deals_df is not None:
        print("\n" + "="*60)
        print("Deals Table Info:")
        print("="*60)
        print(deals_df.info())
        print("\nSample data:")
        print(deals_df.head(10))