File size: 5,241 Bytes
c5ef85d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import pandas as pd
import numpy as np

def extract_tables_from_csv(file_path):
    """

    Extract two separate tables from a CSV file and convert to clean DataFrames

    

    Args:

        file_path (str): Path to the CSV file

    

    Returns:

        tuple: (orders_df, deals_df) - Two pandas DataFrames

    """
    # Read the entire CSV file
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    # Find the row indices where each table starts
    orders_start = None
    deals_start = None
    
    for i, line in enumerate(lines):
        # Check for first table header
        if 'Open Time' in line and 'Order' in line and 'Symbol' in line:
            orders_start = i
        # Check for second table header
        if 'Time' in line and 'Deal' in line and 'Direction' in line:
            deals_start = i
    
    print(f"Orders table starts at line: {orders_start}")
    print(f"Deals table starts at line: {deals_start}\n")
    
    # Extract Orders table
    if orders_start is not None:
        if deals_start is not None:
            # Read from orders_start to deals_start
            orders_df = pd.read_csv(
                file_path,
                skiprows=orders_start,
                nrows=deals_start - orders_start - 1
            )
        else:
            # Read from orders_start to end
            orders_df = pd.read_csv(file_path, skiprows=orders_start)
        
        # Clean the orders dataframe
        orders_df = clean_dataframe(orders_df)
        
        # Define expected columns for orders
        orders_columns = ['Open Time', 'Order', 'Symbol', 'Type', 'Volume', 
                         'Price', 'S / L', 'T / P', 'Time', 'State', 'Comment']
        orders_df = orders_df.reindex(columns=orders_columns, fill_value=np.nan)
        
        print("Orders Table:")
        print(orders_df.head())
        print(f"\nShape: {orders_df.shape}")
        print(f"Columns: {list(orders_df.columns)}\n")
    else:
        orders_df = None
        print("Orders table not found!\n")
    
    # Extract Deals table
    if deals_start is not None:
        # Read from deals_start to end
        deals_df = pd.read_csv(file_path, skiprows=deals_start)
        
        # Clean the deals dataframe
        deals_df = clean_dataframe(deals_df)
        
        # Define expected columns for deals
        deals_columns = ['Time', 'Deal', 'Symbol', 'Type', 'Direction', 
                        'Volume', 'Price', 'Order', 'Commission', 'Swap', 
                        'Profit', 'Balance', 'Comment']
        deals_df = deals_df.reindex(columns=deals_columns, fill_value=np.nan)
        
        print("Deals Table:")
        print(deals_df.head())
        print(f"\nShape: {deals_df.shape}")
        print(f"Columns: {list(deals_df.columns)}\n")
    else:
        deals_df = None
        print("Deals table not found!\n")
    
    return orders_df, deals_df


def clean_dataframe(df):
    """

    Clean the dataframe by removing empty rows and columns

    

    Args:

        df (DataFrame): Input dataframe

    

    Returns:

        DataFrame: Cleaned dataframe

    """
    # Remove completely empty rows
    df = df.dropna(how='all')
    
    # Remove completely empty columns
    df = df.dropna(axis=1, how='all')
    
    # Remove columns with no name (Unnamed columns that are empty)
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Reset index
    df = df.reset_index(drop=True)
    
    # Strip whitespace from string columns
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].str.strip() if df[col].notna().any() else df[col]
    
    return df


def save_tables(orders_df, deals_df, output_prefix='table'):
    """

    Save the extracted tables to separate CSV files

    

    Args:

        orders_df (DataFrame): Orders table

        deals_df (DataFrame): Deals table

        output_prefix (str): Prefix for output files

    """
    if orders_df is not None:
        orders_file = f"{output_prefix}_orders.csv"
        orders_df.to_csv(orders_file, index=False)
        print(f"Orders table saved to: {orders_file}")
    
    if deals_df is not None:
        deals_file = f"{output_prefix}_deals.csv"
        deals_df.to_csv(deals_file, index=False)
        print(f"Deals table saved to: {deals_file}")


if __name__ == "__main__":
    # File path
    file_path = "Sheet1.csv"
    
    # Extract tables
    orders_df, deals_df = extract_tables_from_csv(file_path)
    
    # Save to separate files
    save_tables(orders_df, deals_df, output_prefix='extracted')
    
    # Example: Access the dataframes
    if orders_df is not None:
        print("\n" + "="*60)
        print("Orders Table Info:")
        print("="*60)
        print(orders_df.info())
        print("\nSample data:")
        print(orders_df.head(10))
    
    if deals_df is not None:
        print("\n" + "="*60)
        print("Deals Table Info:")
        print("="*60)
        print(deals_df.info())
        print("\nSample data:")
        print(deals_df.head(10))