File size: 8,145 Bytes

683d9cb

import pandas as pd
from bs4 import BeautifulSoup
import re
from datetime import datetime

class MT5ReportParser:
    def __init__(self, html_file_path):
        """Initialize parser with HTML file path"""
        self.html_file_path = html_file_path
        self.soup = None
        self.deals_df = None
        self.statistics = {}
        
    def load_html(self):
        """Load and parse HTML file"""
        with open(self.html_file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        self.soup = BeautifulSoup(content, 'html.parser')
        
    def extract_deals_table(self):
        """Extract the Deals table from HTML"""
        # Find all tables and look for the one with Deal data
        tables = self.soup.find_all('table')
        
        for table in tables:
            headers = [th.get_text(strip=True) for th in table.find_all('th')]
            
            # Check if this is the Deals table
            if 'Deal' in headers or 'Time' in headers:
                # Extract headers
                cols = headers
                
                # Extract rows
                rows = []
                for tr in table.find_all('tr')[1:]:  # Skip header row
                    cells = tr.find_all('td')
                    if cells:
                        row = [td.get_text(strip=True) for td in cells]
                        rows.append(row)
                
                # Create DataFrame
                if rows:
                    self.deals_df = pd.DataFrame(rows, columns=cols)
                    self._clean_deals_data()
                    break
    
    def _clean_deals_data(self):
        """Clean and convert data types in deals DataFrame"""
        if self.deals_df is None:
            return
        
        # Convert numeric columns
        numeric_cols = ['Volume', 'Price', 'Commission', 'Swap', 'Profit', 'Balance']
        for col in numeric_cols:
            if col in self.deals_df.columns:
                self.deals_df[col] = pd.to_numeric(
                    self.deals_df[col].str.replace(r'[^\d.-]', '', regex=True), 
                    errors='coerce'
                )
        
        # Convert Time to datetime if possible
        if 'Time' in self.deals_df.columns:
            try:
                self.deals_df['Time'] = pd.to_datetime(self.deals_df['Time'])
            except:
                pass
    
    def extract_statistics(self):
        """Extract all statistics from the report"""
        # Find all table rows and extract key-value pairs
        all_tables = self.soup.find_all('table')
        
        for table in all_tables:
            rows = table.find_all('tr')
            for row in rows:
                cells = row.find_all('td')
                if len(cells) == 2:
                    key = cells[0].get_text(strip=True)
                    value = cells[1].get_text(strip=True)
                    
                    # Clean up the key
                    key = key.replace(':', '').strip()
                    
                    if key and value:
                        # Try to convert value to number
                        try:
                            value = float(value.replace(' ', '').replace(',', ''))
                        except:
                            pass
                        
                        self.statistics[key] = value
        
        # Also look for div-based statistics
        divs = self.soup.find_all('div')
        for div in divs:
            text = div.get_text(strip=True)
            if ':' in text:
                parts = text.split(':')
                if len(parts) == 2:
                    key = parts[0].strip()
                    value = parts[1].strip()
                    if key and value and key not in self.statistics:
                        try:
                            value = float(value.replace(' ', '').replace(',', ''))
                        except:
                            pass
                        self.statistics[key] = value
    
    def get_common_statistics(self):
        """Extract commonly used MT5 statistics"""
        common_stats = {
            'Initial Deposit': None,
            'Total Net Profit': None,
            'Gross Profit': None,
            'Gross Loss': None,
            'Profit Factor': None,
            'Expected Payoff': None,
            'Absolute Drawdown': None,
            'Maximal Drawdown': None,
            'Relative Drawdown': None,
            'Total Trades': None,
            'Short Positions': None,
            'Long Positions': None,
            'Profit Trades': None,
            'Loss Trades': None,
            'Largest Profit': None,
            'Largest Loss': None,
            'Average Profit': None,
            'Average Loss': None,
            'Maximum Consecutive Wins': None,
            'Maximum Consecutive Losses': None,
            'Maximal Consecutive Profit': None,
            'Maximal Consecutive Loss': None,
            'Average Consecutive Wins': None,
            'Average Consecutive Losses': None,
        }
        
        # Try to match statistics with common names
        for key in self.statistics:
            for common_key in common_stats.keys():
                if common_key.lower() in key.lower():
                    common_stats[common_key] = self.statistics[key]
        
        return common_stats
    
    def parse(self):
        """Main method to parse the entire report"""
        self.load_html()
        self.extract_deals_table()
        self.extract_statistics()
        
    def save_deals_to_csv(self, output_path='deals.csv'):
        """Save deals table to CSV"""
        if self.deals_df is not None:
            self.deals_df.to_csv(output_path, index=False)
            print(f"Deals saved to {output_path}")
        else:
            print("No deals table found")
    
    def save_statistics_to_csv(self, output_path='statistics.csv'):
        """Save statistics to CSV"""
        if self.statistics:
            stats_df = pd.DataFrame(list(self.statistics.items()), 
                                   columns=['Statistic', 'Value'])
            stats_df.to_csv(output_path, index=False)
            print(f"Statistics saved to {output_path}")
        else:
            print("No statistics found")
    
    def print_summary(self):
        """Print a summary of the parsed data"""
        print("=" * 60)
        print("MT5 TRADE REPORT SUMMARY")
        print("=" * 60)
        
        if self.deals_df is not None:
            print(f"\nDeals Table: {len(self.deals_df)} rows")
            print(f"Columns: {', '.join(self.deals_df.columns.tolist())}")
            print("\nFirst 5 deals:")
            print(self.deals_df.head())
        else:
            print("\nNo deals table found")
        
        print(f"\n\nStatistics Found: {len(self.statistics)}")
        common_stats = self.get_common_statistics()
        print("\nCommon Statistics:")
        for key, value in common_stats.items():
            if value is not None:
                print(f"  {key}: {value}")
        
        print("\n" + "=" * 60)


# Example usage
if __name__ == "__main__":
    # Replace with your HTML file path
    html_file = "mt5_report.html"
    
    # Create parser instance
    parser = MT5ReportParser(html_file)
    
    # Parse the report
    parser.parse()
    
    # Print summary
    parser.print_summary()
    
    # Save to CSV files
    parser.save_deals_to_csv("deals.csv")
    parser.save_statistics_to_csv("statistics.csv")
    
    # Access the data programmatically
    deals_df = parser.deals_df
    statistics = parser.statistics
    common_stats = parser.get_common_statistics()
    
    # Example: Calculate total profit
    if deals_df is not None and 'Profit' in deals_df.columns:
        total_profit = deals_df['Profit'].sum()
        print(f"\nTotal Profit from Deals: {total_profit}")