import os
from pathlib import Path
from typing import Callable, Dict, Any
import traceback

# Optional imports with error handling
try:
    from docx import Document

    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False
    print("⚠ python-docx not installed. DOCX conversion will not work.")

try:
    import fitz  # PyMuPDF

    FITZ_AVAILABLE = True
except ImportError:
    FITZ_AVAILABLE = False
    print("⚠ PyMuPDF not installed. PDF conversion will not work.")

try:
    import markdown

    MARKDOWN_AVAILABLE = True
except ImportError:
    MARKDOWN_AVAILABLE = False
    print("⚠ markdown not installed. MD conversion will not work.")

try:
    from bs4 import BeautifulSoup

    BS4_AVAILABLE = True
except ImportError:
    BS4_AVAILABLE = False
    print("⚠ beautifulsoup4 not installed. HTML conversion will not work.")


class DocumentConverter:
    def __init__(self):
        pass

    def convert(self, input_path: str, output_path: str,
                options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert document files"""
        input_ext = Path(input_path).suffix.lower()

        try:
            self._update_progress(progress_callback, 10)

            # Check if input file exists
            if not os.path.exists(input_path):
                print(f"Input file not found: {input_path}")
                return False

            # Create output directory if needed
            Path(output_path).parent.mkdir(parents=True, exist_ok=True)

            result = False

            # PDF conversion
            if input_ext == '.pdf':
                if not FITZ_AVAILABLE:
                    print("PyMuPDF not available for PDF conversion")
                    return False
                result = self.convert_pdf(input_path, output_path, options, progress_callback)

            # DOCX conversion
            elif input_ext in ['.docx', '.doc']:
                if not DOCX_AVAILABLE:
                    print("python-docx not available for DOCX conversion")
                    return False
                result = self.convert_docx(input_path, output_path, options, progress_callback)

            # TXT conversion
            elif input_ext == '.txt':
                result = self.convert_txt(input_path, output_path, options, progress_callback)

            # Markdown conversion
            elif input_ext == '.md':
                if not MARKDOWN_AVAILABLE:
                    print("markdown library not available")
                    return False
                result = self.convert_markdown(input_path, output_path, options, progress_callback)

            # HTML conversion
            elif input_ext == '.html':
                result = self.convert_html(input_path, output_path, options, progress_callback)

            else:
                result = self.convert_generic(input_path, output_path, options, progress_callback)

            if result:
                output_ext = Path(output_path).suffix.lower()
                print(f"✓ Successfully converted: {os.path.basename(input_path)} → {output_ext}")

            return result

        except Exception as e:
            print(f"Document conversion error for {input_path}: {str(e)}")
            traceback.print_exc()
            return False

    def _update_progress(self, callback, value):
        """Safely update progress"""
        if callback is not None:
            try:
                callback(value)
            except Exception:
                pass

    def convert_pdf(self, input_path: str, output_path: str,
                    options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert PDF to other formats"""
        try:
            doc = fitz.open(input_path)
            total_pages = len(doc)

            self._update_progress(progress_callback, 20)

            if output_path.endswith('.txt'):
                text = ""
                for page_num in range(total_pages):
                    page = doc[page_num]
                    text += page.get_text()
                    progress_pct = 20 + (page_num + 1) * 60 // total_pages
                    self._update_progress(progress_callback, progress_pct)

                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(text)

            elif output_path.endswith('.docx'):
                docx_doc = Document()
                for page_num in range(total_pages):
                    page = doc[page_num]
                    text = page.get_text()
                    docx_doc.add_paragraph(text)
                    progress_pct = 20 + (page_num + 1) * 60 // total_pages
                    self._update_progress(progress_callback, progress_pct)

                docx_doc.save(output_path)

            elif output_path.endswith('.html'):
                html_content = """<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>PDF Content</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 40px; }
        .page { margin-bottom: 30px; page-break-after: always; }
        .page-number { color: #666; font-size: 12px; margin-bottom: 10px; }
        pre { white-space: pre-wrap; word-wrap: break-word; }
    </style>
</head>
<body>
"""
                for page_num in range(total_pages):
                    page = doc[page_num]
                    text = page.get_text()
                    html_content += f"""
<div class="page">
    <div class="page-number">Page {page_num + 1}</div>
    <pre>{text}</pre>
</div>
"""
                    progress_pct = 20 + (page_num + 1) * 60 // total_pages
                    self._update_progress(progress_callback, progress_pct)

                html_content += "</body></html>"

                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(html_content)

            doc.close()
            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"PDF conversion error: {e}")
            return False

    def convert_docx(self, input_path: str, output_path: str,
                     options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert DOCX to other formats"""
        try:
            doc = Document(input_path)
            self._update_progress(progress_callback, 30)

            if output_path.endswith('.txt'):
                text = "\n".join([para.text for para in doc.paragraphs])
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(text)

            elif output_path.endswith('.html'):
                html_content = """<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Document Content</title></head>
<body>
"""
                for para in doc.paragraphs:
                    if para.text.strip():
                        html_content += f"<p>{para.text}</p>"
                html_content += "</body></html>"

                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(html_content)

            elif output_path.endswith('.md'):
                markdown_content = "\n\n".join([para.text for para in doc.paragraphs if para.text.strip()])
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(markdown_content)

            elif output_path.endswith('.pdf'):
                # Simple PDF conversion using text extraction
                text = "\n".join([para.text for para in doc.paragraphs])
                with open(output_path.replace('.pdf', '.txt'), 'w', encoding='utf-8') as f:
                    f.write(text)
                print("Note: DOCX to PDF requires additional libraries. Saved as TXT instead.")

            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"DOCX conversion error: {e}")
            return False

    def convert_txt(self, input_path: str, output_path: str,
                    options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert TXT to other formats"""
        try:
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()

            self._update_progress(progress_callback, 40)

            if output_path.endswith('.md'):
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(content)

            elif output_path.endswith('.html'):
                html_content = f"""<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Text Document</title></head>
<body>
<pre>{content}</pre>
</body></html>"""
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(html_content)

            elif output_path.endswith('.docx'):
                if DOCX_AVAILABLE:
                    doc = Document()
                    doc.add_paragraph(content)
                    doc.save(output_path)
                else:
                    with open(output_path.replace('.docx', '.txt'), 'w', encoding='utf-8') as f:
                        f.write(content)
                    print("Note: python-docx not installed. Saved as TXT instead.")

            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"TXT conversion error: {e}")
            return False

    def convert_markdown(self, input_path: str, output_path: str,
                         options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert Markdown to other formats"""
        try:
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()

            self._update_progress(progress_callback, 40)

            if output_path.endswith('.html'):
                html_content = markdown.markdown(content)
                full_html = f"""<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Markdown Document</title></head>
<body>
{html_content}
</body></html>"""
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(full_html)

            elif output_path.endswith('.docx'):
                if DOCX_AVAILABLE and BS4_AVAILABLE:
                    html = markdown.markdown(content)
                    soup = BeautifulSoup(html, 'html.parser')
                    doc = Document()
                    for para in soup.find_all('p'):
                        if para.get_text().strip():
                            doc.add_paragraph(para.get_text())
                    doc.save(output_path)
                else:
                    with open(output_path.replace('.docx', '.txt'), 'w', encoding='utf-8') as f:
                        f.write(content)
                    print("Note: Required libraries not installed. Saved as TXT instead.")

            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"Markdown conversion error: {e}")
            return False

    def convert_html(self, input_path: str, output_path: str,
                     options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Convert HTML to other formats"""
        try:
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()

            self._update_progress(progress_callback, 40)

            if BS4_AVAILABLE:
                soup = BeautifulSoup(content, 'html.parser')
                text = soup.get_text()
            else:
                # Simple text extraction
                import re
                text = re.sub(r'<[^>]+>', ' ', content)
                text = re.sub(r'\s+', ' ', text).strip()

            if output_path.endswith('.txt'):
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(text)

            elif output_path.endswith('.md'):
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(f"# Converted from HTML\n\n{text}")

            elif output_path.endswith('.docx'):
                if DOCX_AVAILABLE:
                    doc = Document()
                    doc.add_paragraph(text)
                    doc.save(output_path)
                else:
                    with open(output_path.replace('.docx', '.txt'), 'w', encoding='utf-8') as f:
                        f.write(text)

            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"HTML conversion error: {e}")
            return False

    def convert_generic(self, input_path: str, output_path: str,
                        options: Dict[str, Any], progress_callback: Callable = None) -> bool:
        """Generic text file conversion"""
        try:
            # Try to read as text
            encodings = ['utf-8', 'latin-1', 'cp1252']
            content = None

            for encoding in encodings:
                try:
                    with open(input_path, 'r', encoding=encoding) as f:
                        content = f.read()
                    break
                except UnicodeDecodeError:
                    continue

            if content is None:
                # If can't read as text, just copy binary
                with open(input_path, 'rb') as src:
                    with open(output_path, 'wb') as dst:
                        dst.write(src.read())
            else:
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(content)

            self._update_progress(progress_callback, 100)
            return True

        except Exception as e:
            print(f"Generic conversion error: {e}")
            return False