import pandas as pd from bs4 import BeautifulSoup import re from datetime import datetime class MT5ReportParser: def __init__(self, html_file_path): """Initialize parser with HTML file path""" self.html_file_path = html_file_path self.soup = None self.deals_df = None self.statistics = {} def load_html(self): """Load and parse HTML file""" with open(self.html_file_path, 'r', encoding='utf-8') as f: content = f.read() self.soup = BeautifulSoup(content, 'html.parser') def extract_deals_table(self): """Extract the Deals table from HTML""" # Find all tables and look for the one with Deal data tables = self.soup.find_all('table') for table in tables: headers = [th.get_text(strip=True) for th in table.find_all('th')] # Check if this is the Deals table if 'Deal' in headers or 'Time' in headers: # Extract headers cols = headers # Extract rows rows = [] for tr in table.find_all('tr')[1:]: # Skip header row cells = tr.find_all('td') if cells: row = [td.get_text(strip=True) for td in cells] rows.append(row) # Create DataFrame if rows: self.deals_df = pd.DataFrame(rows, columns=cols) self._clean_deals_data() break def _clean_deals_data(self): """Clean and convert data types in deals DataFrame""" if self.deals_df is None: return # Convert numeric columns numeric_cols = ['Volume', 'Price', 'Commission', 'Swap', 'Profit', 'Balance'] for col in numeric_cols: if col in self.deals_df.columns: self.deals_df[col] = pd.to_numeric( self.deals_df[col].str.replace(r'[^\d.-]', '', regex=True), errors='coerce' ) # Convert Time to datetime if possible if 'Time' in self.deals_df.columns: try: self.deals_df['Time'] = pd.to_datetime(self.deals_df['Time']) except: pass def extract_statistics(self): """Extract all statistics from the report""" # Find all table rows and extract key-value pairs all_tables = self.soup.find_all('table') for table in all_tables: rows = table.find_all('tr') for row in rows: cells = row.find_all('td') if len(cells) == 2: key = cells[0].get_text(strip=True) value = cells[1].get_text(strip=True) # Clean up the key key = key.replace(':', '').strip() if key and value: # Try to convert value to number try: value = float(value.replace(' ', '').replace(',', '')) except: pass self.statistics[key] = value # Also look for div-based statistics divs = self.soup.find_all('div') for div in divs: text = div.get_text(strip=True) if ':' in text: parts = text.split(':') if len(parts) == 2: key = parts[0].strip() value = parts[1].strip() if key and value and key not in self.statistics: try: value = float(value.replace(' ', '').replace(',', '')) except: pass self.statistics[key] = value def get_common_statistics(self): """Extract commonly used MT5 statistics""" common_stats = { 'Initial Deposit': None, 'Total Net Profit': None, 'Gross Profit': None, 'Gross Loss': None, 'Profit Factor': None, 'Expected Payoff': None, 'Absolute Drawdown': None, 'Maximal Drawdown': None, 'Relative Drawdown': None, 'Total Trades': None, 'Short Positions': None, 'Long Positions': None, 'Profit Trades': None, 'Loss Trades': None, 'Largest Profit': None, 'Largest Loss': None, 'Average Profit': None, 'Average Loss': None, 'Maximum Consecutive Wins': None, 'Maximum Consecutive Losses': None, 'Maximal Consecutive Profit': None, 'Maximal Consecutive Loss': None, 'Average Consecutive Wins': None, 'Average Consecutive Losses': None, } # Try to match statistics with common names for key in self.statistics: for common_key in common_stats.keys(): if common_key.lower() in key.lower(): common_stats[common_key] = self.statistics[key] return common_stats def parse(self): """Main method to parse the entire report""" self.load_html() self.extract_deals_table() self.extract_statistics() def save_deals_to_csv(self, output_path='deals.csv'): """Save deals table to CSV""" if self.deals_df is not None: self.deals_df.to_csv(output_path, index=False) print(f"Deals saved to {output_path}") else: print("No deals table found") def save_statistics_to_csv(self, output_path='statistics.csv'): """Save statistics to CSV""" if self.statistics: stats_df = pd.DataFrame(list(self.statistics.items()), columns=['Statistic', 'Value']) stats_df.to_csv(output_path, index=False) print(f"Statistics saved to {output_path}") else: print("No statistics found") def print_summary(self): """Print a summary of the parsed data""" print("=" * 60) print("MT5 TRADE REPORT SUMMARY") print("=" * 60) if self.deals_df is not None: print(f"\nDeals Table: {len(self.deals_df)} rows") print(f"Columns: {', '.join(self.deals_df.columns.tolist())}") print("\nFirst 5 deals:") print(self.deals_df.head()) else: print("\nNo deals table found") print(f"\n\nStatistics Found: {len(self.statistics)}") common_stats = self.get_common_statistics() print("\nCommon Statistics:") for key, value in common_stats.items(): if value is not None: print(f" {key}: {value}") print("\n" + "=" * 60) # Example usage if __name__ == "__main__": # Replace with your HTML file path html_file = "mt5_report.html" # Create parser instance parser = MT5ReportParser(html_file) # Parse the report parser.parse() # Print summary parser.print_summary() # Save to CSV files parser.save_deals_to_csv("deals.csv") parser.save_statistics_to_csv("statistics.csv") # Access the data programmatically deals_df = parser.deals_df statistics = parser.statistics common_stats = parser.get_common_statistics() # Example: Calculate total profit if deals_df is not None and 'Profit' in deals_df.columns: total_profit = deals_df['Profit'].sum() print(f"\nTotal Profit from Deals: {total_profit}")