| | import pandas as pd
|
| | import numpy as np
|
| |
|
| | def extract_tables_from_csv(file_path):
|
| | """
|
| | Extract two separate tables from a CSV file and convert to clean DataFrames
|
| |
|
| | Args:
|
| | file_path (str): Path to the CSV file
|
| |
|
| | Returns:
|
| | tuple: (orders_df, deals_df) - Two pandas DataFrames
|
| | """
|
| |
|
| | with open(file_path, 'r', encoding='utf-8') as f:
|
| | lines = f.readlines()
|
| |
|
| |
|
| | orders_start = None
|
| | deals_start = None
|
| |
|
| | for i, line in enumerate(lines):
|
| |
|
| | if 'Open Time' in line and 'Order' in line and 'Symbol' in line:
|
| | orders_start = i
|
| |
|
| | if 'Time' in line and 'Deal' in line and 'Direction' in line:
|
| | deals_start = i
|
| |
|
| | print(f"Orders table starts at line: {orders_start}")
|
| | print(f"Deals table starts at line: {deals_start}\n")
|
| |
|
| |
|
| | if orders_start is not None:
|
| | if deals_start is not None:
|
| |
|
| | orders_df = pd.read_csv(
|
| | file_path,
|
| | skiprows=orders_start,
|
| | nrows=deals_start - orders_start - 1
|
| | )
|
| | else:
|
| |
|
| | orders_df = pd.read_csv(file_path, skiprows=orders_start)
|
| |
|
| |
|
| | orders_df = clean_dataframe(orders_df)
|
| |
|
| |
|
| | orders_columns = ['Open Time', 'Order', 'Symbol', 'Type', 'Volume',
|
| | 'Price', 'S / L', 'T / P', 'Time', 'State', 'Comment']
|
| | orders_df = orders_df.reindex(columns=orders_columns, fill_value=np.nan)
|
| |
|
| | print("Orders Table:")
|
| | print(orders_df.head())
|
| | print(f"\nShape: {orders_df.shape}")
|
| | print(f"Columns: {list(orders_df.columns)}\n")
|
| | else:
|
| | orders_df = None
|
| | print("Orders table not found!\n")
|
| |
|
| |
|
| | if deals_start is not None:
|
| |
|
| | deals_df = pd.read_csv(file_path, skiprows=deals_start)
|
| |
|
| |
|
| | deals_df = clean_dataframe(deals_df)
|
| |
|
| |
|
| | deals_columns = ['Time', 'Deal', 'Symbol', 'Type', 'Direction',
|
| | 'Volume', 'Price', 'Order', 'Commission', 'Swap',
|
| | 'Profit', 'Balance', 'Comment']
|
| | deals_df = deals_df.reindex(columns=deals_columns, fill_value=np.nan)
|
| |
|
| | print("Deals Table:")
|
| | print(deals_df.head())
|
| | print(f"\nShape: {deals_df.shape}")
|
| | print(f"Columns: {list(deals_df.columns)}\n")
|
| | else:
|
| | deals_df = None
|
| | print("Deals table not found!\n")
|
| |
|
| | return orders_df, deals_df
|
| |
|
| |
|
| | def clean_dataframe(df):
|
| | """
|
| | Clean the dataframe by removing empty rows and columns
|
| |
|
| | Args:
|
| | df (DataFrame): Input dataframe
|
| |
|
| | Returns:
|
| | DataFrame: Cleaned dataframe
|
| | """
|
| |
|
| | df = df.dropna(how='all')
|
| |
|
| |
|
| | df = df.dropna(axis=1, how='all')
|
| |
|
| |
|
| | df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
| |
|
| |
|
| | df = df.reset_index(drop=True)
|
| |
|
| |
|
| | for col in df.columns:
|
| | if df[col].dtype == 'object':
|
| | df[col] = df[col].str.strip() if df[col].notna().any() else df[col]
|
| |
|
| | return df
|
| |
|
| |
|
| | def save_tables(orders_df, deals_df, output_prefix='table'):
|
| | """
|
| | Save the extracted tables to separate CSV files
|
| |
|
| | Args:
|
| | orders_df (DataFrame): Orders table
|
| | deals_df (DataFrame): Deals table
|
| | output_prefix (str): Prefix for output files
|
| | """
|
| | if orders_df is not None:
|
| | orders_file = f"{output_prefix}_orders.csv"
|
| | orders_df.to_csv(orders_file, index=False)
|
| | print(f"Orders table saved to: {orders_file}")
|
| |
|
| | if deals_df is not None:
|
| | deals_file = f"{output_prefix}_deals.csv"
|
| | deals_df.to_csv(deals_file, index=False)
|
| | print(f"Deals table saved to: {deals_file}")
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| |
|
| | file_path = "Sheet1.csv"
|
| |
|
| |
|
| | orders_df, deals_df = extract_tables_from_csv(file_path)
|
| |
|
| |
|
| | save_tables(orders_df, deals_df, output_prefix='extracted')
|
| |
|
| |
|
| | if orders_df is not None:
|
| | print("\n" + "="*60)
|
| | print("Orders Table Info:")
|
| | print("="*60)
|
| | print(orders_df.info())
|
| | print("\nSample data:")
|
| | print(orders_df.head(10))
|
| |
|
| | if deals_df is not None:
|
| | print("\n" + "="*60)
|
| | print("Deals Table Info:")
|
| | print("="*60)
|
| | print(deals_df.info())
|
| | print("\nSample data:")
|
| | print(deals_df.head(10)) |