File size: 8,145 Bytes
683d9cb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | import pandas as pd
from bs4 import BeautifulSoup
import re
from datetime import datetime
class MT5ReportParser:
def __init__(self, html_file_path):
"""Initialize parser with HTML file path"""
self.html_file_path = html_file_path
self.soup = None
self.deals_df = None
self.statistics = {}
def load_html(self):
"""Load and parse HTML file"""
with open(self.html_file_path, 'r', encoding='utf-8') as f:
content = f.read()
self.soup = BeautifulSoup(content, 'html.parser')
def extract_deals_table(self):
"""Extract the Deals table from HTML"""
# Find all tables and look for the one with Deal data
tables = self.soup.find_all('table')
for table in tables:
headers = [th.get_text(strip=True) for th in table.find_all('th')]
# Check if this is the Deals table
if 'Deal' in headers or 'Time' in headers:
# Extract headers
cols = headers
# Extract rows
rows = []
for tr in table.find_all('tr')[1:]: # Skip header row
cells = tr.find_all('td')
if cells:
row = [td.get_text(strip=True) for td in cells]
rows.append(row)
# Create DataFrame
if rows:
self.deals_df = pd.DataFrame(rows, columns=cols)
self._clean_deals_data()
break
def _clean_deals_data(self):
"""Clean and convert data types in deals DataFrame"""
if self.deals_df is None:
return
# Convert numeric columns
numeric_cols = ['Volume', 'Price', 'Commission', 'Swap', 'Profit', 'Balance']
for col in numeric_cols:
if col in self.deals_df.columns:
self.deals_df[col] = pd.to_numeric(
self.deals_df[col].str.replace(r'[^\d.-]', '', regex=True),
errors='coerce'
)
# Convert Time to datetime if possible
if 'Time' in self.deals_df.columns:
try:
self.deals_df['Time'] = pd.to_datetime(self.deals_df['Time'])
except:
pass
def extract_statistics(self):
"""Extract all statistics from the report"""
# Find all table rows and extract key-value pairs
all_tables = self.soup.find_all('table')
for table in all_tables:
rows = table.find_all('tr')
for row in rows:
cells = row.find_all('td')
if len(cells) == 2:
key = cells[0].get_text(strip=True)
value = cells[1].get_text(strip=True)
# Clean up the key
key = key.replace(':', '').strip()
if key and value:
# Try to convert value to number
try:
value = float(value.replace(' ', '').replace(',', ''))
except:
pass
self.statistics[key] = value
# Also look for div-based statistics
divs = self.soup.find_all('div')
for div in divs:
text = div.get_text(strip=True)
if ':' in text:
parts = text.split(':')
if len(parts) == 2:
key = parts[0].strip()
value = parts[1].strip()
if key and value and key not in self.statistics:
try:
value = float(value.replace(' ', '').replace(',', ''))
except:
pass
self.statistics[key] = value
def get_common_statistics(self):
"""Extract commonly used MT5 statistics"""
common_stats = {
'Initial Deposit': None,
'Total Net Profit': None,
'Gross Profit': None,
'Gross Loss': None,
'Profit Factor': None,
'Expected Payoff': None,
'Absolute Drawdown': None,
'Maximal Drawdown': None,
'Relative Drawdown': None,
'Total Trades': None,
'Short Positions': None,
'Long Positions': None,
'Profit Trades': None,
'Loss Trades': None,
'Largest Profit': None,
'Largest Loss': None,
'Average Profit': None,
'Average Loss': None,
'Maximum Consecutive Wins': None,
'Maximum Consecutive Losses': None,
'Maximal Consecutive Profit': None,
'Maximal Consecutive Loss': None,
'Average Consecutive Wins': None,
'Average Consecutive Losses': None,
}
# Try to match statistics with common names
for key in self.statistics:
for common_key in common_stats.keys():
if common_key.lower() in key.lower():
common_stats[common_key] = self.statistics[key]
return common_stats
def parse(self):
"""Main method to parse the entire report"""
self.load_html()
self.extract_deals_table()
self.extract_statistics()
def save_deals_to_csv(self, output_path='deals.csv'):
"""Save deals table to CSV"""
if self.deals_df is not None:
self.deals_df.to_csv(output_path, index=False)
print(f"Deals saved to {output_path}")
else:
print("No deals table found")
def save_statistics_to_csv(self, output_path='statistics.csv'):
"""Save statistics to CSV"""
if self.statistics:
stats_df = pd.DataFrame(list(self.statistics.items()),
columns=['Statistic', 'Value'])
stats_df.to_csv(output_path, index=False)
print(f"Statistics saved to {output_path}")
else:
print("No statistics found")
def print_summary(self):
"""Print a summary of the parsed data"""
print("=" * 60)
print("MT5 TRADE REPORT SUMMARY")
print("=" * 60)
if self.deals_df is not None:
print(f"\nDeals Table: {len(self.deals_df)} rows")
print(f"Columns: {', '.join(self.deals_df.columns.tolist())}")
print("\nFirst 5 deals:")
print(self.deals_df.head())
else:
print("\nNo deals table found")
print(f"\n\nStatistics Found: {len(self.statistics)}")
common_stats = self.get_common_statistics()
print("\nCommon Statistics:")
for key, value in common_stats.items():
if value is not None:
print(f" {key}: {value}")
print("\n" + "=" * 60)
# Example usage
if __name__ == "__main__":
# Replace with your HTML file path
html_file = "mt5_report.html"
# Create parser instance
parser = MT5ReportParser(html_file)
# Parse the report
parser.parse()
# Print summary
parser.print_summary()
# Save to CSV files
parser.save_deals_to_csv("deals.csv")
parser.save_statistics_to_csv("statistics.csv")
# Access the data programmatically
deals_df = parser.deals_df
statistics = parser.statistics
common_stats = parser.get_common_statistics()
# Example: Calculate total profit
if deals_df is not None and 'Profit' in deals_df.columns:
total_profit = deals_df['Profit'].sum()
print(f"\nTotal Profit from Deals: {total_profit}") |