algorembrant's picture
Upload 63 files
c5ef85d verified
import pandas as pd
import numpy as np
def extract_tables_from_csv(file_path):
"""
Extract two separate tables from a CSV file and convert to clean DataFrames
Args:
file_path (str): Path to the CSV file
Returns:
tuple: (orders_df, deals_df) - Two pandas DataFrames
"""
# Read the entire CSV file
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Find the row indices where each table starts
orders_start = None
deals_start = None
for i, line in enumerate(lines):
# Check for first table header
if 'Open Time' in line and 'Order' in line and 'Symbol' in line:
orders_start = i
# Check for second table header
if 'Time' in line and 'Deal' in line and 'Direction' in line:
deals_start = i
print(f"Orders table starts at line: {orders_start}")
print(f"Deals table starts at line: {deals_start}\n")
# Extract Orders table
if orders_start is not None:
if deals_start is not None:
# Read from orders_start to deals_start
orders_df = pd.read_csv(
file_path,
skiprows=orders_start,
nrows=deals_start - orders_start - 1
)
else:
# Read from orders_start to end
orders_df = pd.read_csv(file_path, skiprows=orders_start)
# Clean the orders dataframe
orders_df = clean_dataframe(orders_df)
# Define expected columns for orders
orders_columns = ['Open Time', 'Order', 'Symbol', 'Type', 'Volume',
'Price', 'S / L', 'T / P', 'Time', 'State', 'Comment']
orders_df = orders_df.reindex(columns=orders_columns, fill_value=np.nan)
print("Orders Table:")
print(orders_df.head())
print(f"\nShape: {orders_df.shape}")
print(f"Columns: {list(orders_df.columns)}\n")
else:
orders_df = None
print("Orders table not found!\n")
# Extract Deals table
if deals_start is not None:
# Read from deals_start to end
deals_df = pd.read_csv(file_path, skiprows=deals_start)
# Clean the deals dataframe
deals_df = clean_dataframe(deals_df)
# Define expected columns for deals
deals_columns = ['Time', 'Deal', 'Symbol', 'Type', 'Direction',
'Volume', 'Price', 'Order', 'Commission', 'Swap',
'Profit', 'Balance', 'Comment']
deals_df = deals_df.reindex(columns=deals_columns, fill_value=np.nan)
print("Deals Table:")
print(deals_df.head())
print(f"\nShape: {deals_df.shape}")
print(f"Columns: {list(deals_df.columns)}\n")
else:
deals_df = None
print("Deals table not found!\n")
return orders_df, deals_df
def clean_dataframe(df):
"""
Clean the dataframe by removing empty rows and columns
Args:
df (DataFrame): Input dataframe
Returns:
DataFrame: Cleaned dataframe
"""
# Remove completely empty rows
df = df.dropna(how='all')
# Remove completely empty columns
df = df.dropna(axis=1, how='all')
# Remove columns with no name (Unnamed columns that are empty)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
# Reset index
df = df.reset_index(drop=True)
# Strip whitespace from string columns
for col in df.columns:
if df[col].dtype == 'object':
df[col] = df[col].str.strip() if df[col].notna().any() else df[col]
return df
def save_tables(orders_df, deals_df, output_prefix='table'):
"""
Save the extracted tables to separate CSV files
Args:
orders_df (DataFrame): Orders table
deals_df (DataFrame): Deals table
output_prefix (str): Prefix for output files
"""
if orders_df is not None:
orders_file = f"{output_prefix}_orders.csv"
orders_df.to_csv(orders_file, index=False)
print(f"Orders table saved to: {orders_file}")
if deals_df is not None:
deals_file = f"{output_prefix}_deals.csv"
deals_df.to_csv(deals_file, index=False)
print(f"Deals table saved to: {deals_file}")
if __name__ == "__main__":
# File path
file_path = "Sheet1.csv"
# Extract tables
orders_df, deals_df = extract_tables_from_csv(file_path)
# Save to separate files
save_tables(orders_df, deals_df, output_prefix='extracted')
# Example: Access the dataframes
if orders_df is not None:
print("\n" + "="*60)
print("Orders Table Info:")
print("="*60)
print(orders_df.info())
print("\nSample data:")
print(orders_df.head(10))
if deals_df is not None:
print("\n" + "="*60)
print("Deals Table Info:")
print("="*60)
print(deals_df.info())
print("\nSample data:")
print(deals_df.head(10))