from bs4 import BeautifulSoup
import re
from models import FinancialResult,FinancialEntry,Financials,RatioEntry,CompanyData, CircuitBreakerRow
from typing import List, Optional, Dict, Any

class PsxScraper(object):
    def __init__(self, html_content:str):
        self.soup = BeautifulSoup(html_content, 'html.parser')
        
    def _clean_number(self, text: str) -> float:
        """Clean and convert number strings to float"""
        if not text:
            return 0.0
        # Remove commas, spaces, and non-numeric characters except decimal points and minus signs
        text = str(text).replace(',', '').replace(' ', '').replace('Rs.', '')
        # Extract numbers with optional decimal points
        match = re.search(r'[-+]?\d*\.?\d+', text)
        return float(match.group()) if match else 0.0
    
    def _extract_range(self, range_text: str) -> Dict[str, float]:
        """Extract low, high, and current values from range strings"""
        # Example: "296.08 — 361.88"
        parts = range_text.split('—')
        if len(parts) == 2:
            return {
                'low': self._clean_number(parts[0]),
                'high': self._clean_number(parts[1]),
                'current': 0.0  # Will be set from data attributes
            }
        return {'low': 0.0, 'high': 0.0, 'current': 0.0}
    
    
    def extract_announcements(self) -> List[FinancialResult]:
        """Extract financial results announcements"""
        announcements = []
        
        # Look for financial results tab
        financial_results_tab = self.soup.find('div', class_='tabs__panel', attrs={'data-name': 'Financial Results'})
        if not financial_results_tab:
            return announcements
        
        table = financial_results_tab.find('table')
        if not table:
            return announcements
        
        rows = table.find_all('tr')[1:]  # Skip header row
        for row in rows:
            cols = row.find_all('td')
            if len(cols) >= 3:
                date = cols[0].text.strip()
                title = cols[1].text.strip()
                
                # Extract links
                document_link = None
                pdf_link = None
                
                links = cols[2].find_all('a')
                for link in links:
                    href = link.get('href', '')
                    if 'javascript:' in href:
                        document_link = href
                    elif '.pdf' in href:
                        pdf_link = href
                
                announcements.append(FinancialResult(
                    date=date,
                    title=title,
                    documentLink=document_link,
                    pdfLink=pdf_link
                ))
        
        return announcements
    
    
    def extract_financials(self) -> Financials:
        """Extract financial data (annual and quarterly)"""
        annual_data = []
        quarterly_data = []
        
        # Find the financials section
        financials_section = self.soup.find('div', id='financials')
        if not financials_section:
            return Financials(annual=[], quarterly=[])
        
        # Extract annual financials
        annual_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Annual'})
        if annual_tab:
            table = annual_tab.find('table')
            if table:
                headers = []
                rows_data = []
                
                # Extract headers
                header_row = table.find('thead').find('tr')
                for th in header_row.find_all('th'):
                    headers.append(th.text.strip())
                
                # Extract data rows
                body_rows = table.find('tbody').find_all('tr')
                for row in body_rows:
                    row_data = {}
                    cells = row.find_all('td')
                    if len(cells) == len(headers):
                        for i, cell in enumerate(cells):
                            row_data[headers[i]] = cell.text.strip()
                        rows_data.append(row_data)
                
                # Process annual data
                if headers and rows_data:
                    for i in range(1, len(headers)):  # Skip first header (metric names)
                        period = headers[i]
                        entry = FinancialEntry(period=period)
                        
                        for row in rows_data:
                            metric = row[headers[0]]
                            value = row[period]
                            
                            if 'Sales' in metric:
                                entry.sales = self._clean_number(value)
                            elif 'Profit after Taxation' in metric:
                                entry.profit_after_tax = self._clean_number(value)
                            elif 'EPS' in metric:
                                entry.eps = self._clean_number(value)
                        
                        annual_data.append(entry)
        
        # Extract quarterly financials
        quarterly_tab = financials_section.find('div', class_='tabs__panel', attrs={'data-name': 'Quarterly'})
        if quarterly_tab:
            table = quarterly_tab.find('table')
            if table:
                headers = []
                rows_data = []
                
                # Extract headers
                header_row = table.find('thead').find('tr')
                for th in header_row.find_all('th'):
                    headers.append(th.text.strip())
                
                # Extract data rows
                body_rows = table.find('tbody').find_all('tr')
                for row in body_rows:
                    row_data = {}
                    cells = row.find_all('td')
                    if len(cells) == len(headers):
                        for i, cell in enumerate(cells):
                            row_data[headers[i]] = cell.text.strip()
                        rows_data.append(row_data)
                
                # Process quarterly data
                if headers and rows_data:
                    for i in range(1, len(headers)):  # Skip first header (metric names)
                        period = headers[i]
                        entry = FinancialEntry(period=period)
                        
                        for row in rows_data:
                            metric = row[headers[0]]
                            value = row[period]
                            
                            if 'Sales' in metric:
                                entry.sales = self._clean_number(value)
                            elif 'Profit after Taxation' in metric:
                                entry.profit_after_tax = self._clean_number(value)
                            elif 'EPS' in metric:
                                entry.eps = self._clean_number(value)
                        
                        quarterly_data.append(entry)
        
        return Financials(annual=annual_data, quarterly=quarterly_data)
    
    
    def extract_ratios(self) -> List[RatioEntry]:
        """Extract financial ratios"""
        ratios = []
        
        ratios_section = self.soup.find('div', id='ratios')
        if not ratios_section:
            return ratios
        
        table = ratios_section.find('table')
        if not table:
            return ratios
        
        headers = []
        rows_data = []
        
        # Extract headers
        header_row = table.find('thead').find('tr')
        for th in header_row.find_all('th'):
            headers.append(th.text.strip())
        
        # Extract data rows
        body_rows = table.find('tbody').find_all('tr')
        for row in body_rows:
            row_data = {}
            cells = row.find_all('td')
            if len(cells) == len(headers):
                for i, cell in enumerate(cells):
                    row_data[headers[i]] = cell.text.strip()
                rows_data.append(row_data)
        
        # Process ratio data
        if headers and rows_data:
            for i in range(1, len(headers)):  # Skip first header (ratio names)
                period = headers[i]
                entry = RatioEntry(period=period)
                
                for row in rows_data:
                    ratio_name = row[headers[0]]
                    value = row[period]
                    
                    # Clean value (remove parentheses for negative numbers)
                    clean_value = value.replace('(', '').replace(')', '')
                    
                    if 'Gross Profit Margin' in ratio_name:
                        entry.gross_profit_margin = self._clean_number(clean_value)
                    elif 'Net Profit Margin' in ratio_name:
                        entry.net_profit_margin = self._clean_number(clean_value)
                    elif 'EPS Growth' in ratio_name:
                        entry.eps_growth = self._clean_number(clean_value)
                    elif 'PEG' in ratio_name:
                        entry.peg = self._clean_number(clean_value)
                
                ratios.append(entry)
        
        return ratios
    
    def scrape_all_data(self) -> CompanyData:
        """Scrape all data and return as CompanyData object"""
        return CompanyData(
            announcements=self.extract_announcements(),
            financials=self.extract_financials(),
            ratios=self.extract_ratios()
        )
    

    def fetch_circuit_breaker_table(self,table_id:str) ->  list[CircuitBreakerRow]:
        table = self.soup.find("table", id=table_id)
        if not table or not table.tbody:
            return []
        records = []
        for row in table.tbody.find_all("tr"):
            cols = [td.get_text(strip=True) for td in row.find_all("td")]

            records.append(
                CircuitBreakerRow(
                    symbol=cols[0],
                    ldcp=cols[1],
                    open=cols[2],
                    high=cols[3],
                    low=cols[4],
                    current=cols[5],
                    change=cols[6],
                    change_percent=cols[7],
                    volume=cols[8],
                )
            )

        return records