import gradio as gr
import openpyxl
import os
from datetime import datetime
from pptx import Presentation
import PyPDF2
from docx import Document
import io
import tempfile
import logging
import base64

# Importowanie biblioteki do starszych plików Excel
try:
    import xlrd
    XLRD_AVAILABLE = True
except ImportError:
    XLRD_AVAILABLE = False
    logging.warning("xlrd not available, .xls files may not be supported")

# Konfiguracja logowania
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

class MultiConverter:
    def convert_excel_to_formatted_text(self, excel_file):
        """Convert Excel to formatted Markdown-style text."""
        output = io.StringIO()
        file_ext = os.path.splitext(excel_file)[1].lower()
        
        # Obsługa błędów
        try:
            if file_ext == '.xls' and XLRD_AVAILABLE:
                # Użyj xlrd dla starszego formatu .xls
                logging.info("Processing old Excel format (.xls) with xlrd")
                return self._convert_xls_with_xlrd(excel_file, output)
            else:
                # Użyj openpyxl dla nowszego formatu .xlsx
                logging.info("Processing Excel format with openpyxl")
                try:
                    workbook = openpyxl.load_workbook(excel_file, data_only=True)
                except Exception as e:
                    logging.error(f"Error opening Excel file with openpyxl: {str(e)}")
                    output.write(f"# Error opening Excel file\n\n")
                    output.write(f"Details: {str(e)}\n\n")
                    output.write("Possible reasons:\n")
                    output.write("- The file may be in an older Excel format (.xls). Try saving it as .xlsx\n")
                    output.write("- The file may be corrupted or password-protected\n")
                    output.write("- The file may contain unsupported features\n\n")
                    return output.getvalue()
                
                # Przetwarzanie arkuszy
                for idx, sheet_name in enumerate(workbook.sheetnames):
                    if idx > 0:
                        output.write("\n" + "-" * 70 + "\n\n")
                    output.write(f"### {sheet_name}:\n")
                    sheet = workbook[sheet_name]
                    
                    # Sprawdź, czy arkusz zawiera dane
                    if sheet.max_row <= 1 and sheet.max_column <= 1:
                        output.write("# No data in sheet\n\n")
                        continue
                        
                    # Znajdź niepuste komórki
                    data = []
                    max_col_widths = []
                    non_empty_rows = []
                    non_empty_cols = []
                    
                    for row_idx in range(1, sheet.max_row + 1):
                        for col_idx in range(1, sheet.max_column + 1):
                            try:
                                cell_value = sheet.cell(row=row_idx, column=col_idx).value
                                if cell_value is not None:
                                    non_empty_rows.append(row_idx)
                                    non_empty_cols.append(col_idx)
                            except Exception as e:
                                logging.warning(f"Error reading cell at row {row_idx}, col {col_idx}: {str(e)}")
                    
                    if not non_empty_rows or not non_empty_cols:
                        output.write("# No data in sheet\n\n")
                        continue
                        
                    # Określ zakres danych
                    min_row, max_row = min(non_empty_rows), max(non_empty_rows)
                    min_col, max_col = min(non_empty_cols), max(non_empty_cols)
                    max_col_widths = [0] * (max_col - min_col + 1)
                    
                    # Zbierz dane
                    for row_idx in range(min_row, max_row + 1):
                        row_data = []
                        for col_idx in range(min_col, max_col + 1):
                            try:
                                value = str(sheet.cell(row=row_idx, column=col_idx).value or "")
                            except:
                                value = ""
                            row_data.append(value)
                            col_pos = col_idx - min_col
                            max_col_widths[col_pos] = max(max_col_widths[col_pos], len(value))
                        data.append(row_data)
                    
                    # Sformatuj jako tabelę Markdown
                    for row_idx, row in enumerate(data):
                        if row_idx == 0:
                            header_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
                            output.write(header_line + "\n")
                            separator_line = "|" + "|".join("-" * (width + 2) for width in max_col_widths) + "|"
                            output.write(separator_line + "\n")
                        data_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
                        output.write(data_line + "\n")
                    output.write("\n")
        except Exception as e:
            logging.exception(f"Error processing Excel file: {str(e)}")
            output.write(f"# Error processing Excel file\n\n")
            output.write(f"Details: {str(e)}\n\n")
        
        return output.getvalue()
        
    def _convert_xls_with_xlrd(self, excel_file, output):
        """Convert old Excel (.xls) format using xlrd."""
        if not XLRD_AVAILABLE:
            output.write("# Error: xlrd library not available to process .xls files\n\n")
            output.write("Please install xlrd with 'pip install xlrd' to process .xls files\n")
            return output.getvalue()
            
        try:
            # Otwórz plik Excel za pomocą xlrd
            workbook = xlrd.open_workbook(excel_file)
            
            # Przetwórz każdy arkusz
            for idx, sheet in enumerate(workbook.sheets()):
                if idx > 0:
                    output.write("\n" + "-" * 70 + "\n\n")
                    
                sheet_name = sheet.name
                output.write(f"### {sheet_name}:\n")
                
                # Sprawdź, czy arkusz zawiera dane
                if sheet.nrows <= 0 or sheet.ncols <= 0:
                    output.write("# No data in sheet\n\n")
                    continue
                
                # Zbierz dane i określ szerokości kolumn
                data = []
                max_col_widths = [0] * sheet.ncols
                
                for row_idx in range(sheet.nrows):
                    row_data = []
                    for col_idx in range(sheet.ncols):
                        try:
                            cell = sheet.cell(row_idx, col_idx)
                            if cell.ctype == xlrd.XL_CELL_DATE:
                                # Konwertuj datę na czytelny format
                                date_tuple = xlrd.xldate_as_tuple(cell.value, workbook.datemode)
                                value = datetime(*date_tuple).strftime("%Y-%m-%d %H:%M:%S")
                            else:
                                value = str(cell.value).strip()
                        except:
                            value = ""
                            
                        row_data.append(value)
                        max_col_widths[col_idx] = max(max_col_widths[col_idx], len(value))
                    data.append(row_data)
                
                # Sformatuj jako tabelę Markdown
                for row_idx, row in enumerate(data):
                    if row_idx == 0:
                        header_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
                        output.write(header_line + "\n")
                        separator_line = "|" + "|".join("-" * (width + 2) for width in max_col_widths) + "|"
                        output.write(separator_line + "\n")
                    data_line = "| " + " | ".join(cell + " " * (max_col_widths[i] - len(cell)) for i, cell in enumerate(row)) + " |"
                    output.write(data_line + "\n")
                output.write("\n")
                
        except Exception as e:
            logging.exception(f"Error processing .xls file with xlrd: {str(e)}")
            output.write(f"# Error processing .xls file\n\n")
            output.write(f"Details: {str(e)}\n\n")
            
        return output.getvalue()

    def convert_pptx_to_text(self, pptx_file, filename):
        """Convert PowerPoint to plain text."""
        output = io.StringIO()
        prs = Presentation(pptx_file)
        output.write(f"# PowerPoint Presentation: {filename}\n\n")
        for slide_num, slide in enumerate(prs.slides, 1):
            output.write(f"## Slide {slide_num}\n")
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    output.write(f"{shape.text}\n\n")
        return output.getvalue()

    def convert_pdf_to_text(self, pdf_file, filename):
        """Convert PDF to plain text."""
        output = io.StringIO()
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        output.write(f"# PDF Document: {filename}\n\n")
        for page_num, page in enumerate(pdf_reader.pages, 1):
            output.write(f"## Page {page_num}\n")
            output.write(page.extract_text() + "\n\n")
        return output.getvalue()

    def convert_docx_to_text(self, docx_file, filename):
        """Convert Word to plain text."""
        output = io.StringIO()
        doc = Document(docx_file)
        output.write(f"# Word Document: {filename}\n\n")
        for para in doc.paragraphs:
            output.write(para.text + "\n\n")
        return output.getvalue()


def convert_file(file):
    """Process uploaded file and convert it to text"""
    if file is None:
        return "No file uploaded. Please select a file first.", ""
    
    try:
        logging.info(f"Starting conversion for file: {file.name if hasattr(file, 'name') else 'unknown'}")
        
        # Save uploaded file to a temporary file
        temp_dir = tempfile.mkdtemp()
        temp_file_path = os.path.join(temp_dir, "uploaded_file")
        output_file_path = ""
        
        # Zapisz plik na dysk niezależnie od jego formatu
        if hasattr(file, 'name'):
            file_name = file.name
        else:
            file_name = "unknown_file"
            
        # Zapisujemy zawartość pliku do pliku tymczasowego
        try:
            # Próbuj odczytać jako obiekt z metodą read()
            if hasattr(file, 'read'):
                with open(temp_file_path, 'wb') as f:
                    f.write(file.read())
            # Sprawdź czy to jest ścieżka
            elif isinstance(file, str) and os.path.exists(file):
                with open(file, 'rb') as src, open(temp_file_path, 'wb') as dst:
                    dst.write(src.read())
            # Sprawdź czy to jest tuple (nazwa, ścieżka)
            elif isinstance(file, tuple) and len(file) > 1 and os.path.exists(file[1]):
                with open(file[1], 'rb') as src, open(temp_file_path, 'wb') as dst:
                    dst.write(src.read())
                file_name = file[0]
            else:
                # Ostatnia szansa - spróbuj potraktować plik jako ścieżkę
                try:
                    with open(str(file), 'rb') as src, open(temp_file_path, 'wb') as dst:
                        dst.write(src.read())
                except:
                    return f"Could not read file. Type: {type(file)}", ""
        except Exception as e:
            return f"Error reading file: {str(e)}", ""
        
        # Określ rozszerzenie pliku
        _, file_ext = os.path.splitext(file_name)
        file_ext = file_ext.lower()
        
        # Konwertuj plik w zależności od formatu
        converter = MultiConverter()
        try:
            if file_ext in [".xlsx", ".xls"]:
                try:
                    result = converter.convert_excel_to_formatted_text(temp_file_path)
                except Exception as e:
                    logging.exception(f"Error during Excel conversion: {str(e)}")
                    result = f"Error converting Excel file: {str(e)}\n\n"
                    result += "This may be due to:\n"
                    result += "- Unsupported Excel format (some .xls files require xlrd library)\n"
                    result += "- Corrupted or password-protected file\n"
                    result += "- Excel file with complex formatting or macros\n\n"
                    result += "Try saving your Excel file as a simple .xlsx file before uploading."
            elif file_ext in [".pptx", ".ppt"]:
                result = converter.convert_pptx_to_text(temp_file_path, file_name)
            elif file_ext == ".pdf":
                result = converter.convert_pdf_to_text(temp_file_path, file_name)
            elif file_ext in [".docx", ".doc"]:
                result = converter.convert_docx_to_text(temp_file_path, file_name)
            else:
                result = f"Unsupported file format: {file_ext}"
            
            # Utwórz nazwę pliku wyjściowego
            output_filename = os.path.splitext(file_name)[0] + ".txt"
            
            # Przygotuj plik do pobrania
            content_bytes = result.encode('utf-8')
            b64 = base64.b64encode(content_bytes).decode()
            
            # Przygotuj przycisk do pobrania
            download_link = f"""
            <a href="data:text/plain;base64,{b64}" download="{output_filename}" 
              style="display: inline-block; padding: 0.6em 1.2em; margin: 0.5em 0; 
              background-color: #4CAF50; color: white; border: none; border-radius: 4px; 
              cursor: pointer; text-decoration: none; font-weight: bold;">
              ⬇️ Download {output_filename}
            </a>
            """
            
            return result, download_link
        except Exception as e:
            logging.exception(f"Error converting file: {str(e)}")
            return f"Error converting file: {str(e)}", ""
        finally:
            # Usuń pliki tymczasowe
            try:
                if os.path.exists(temp_file_path):
                    os.unlink(temp_file_path)
                if os.path.exists(output_file_path):
                    os.unlink(output_file_path)
                os.rmdir(temp_dir)
            except Exception as e:
                logging.warning(f"Could not clean up temporary files: {str(e)}")
    
    except Exception as e:
        logging.exception(f"Unexpected error: {str(e)}")
        return f"Unexpected error: {str(e)}", ""


# Utwórz interfejs Gradio
with gr.Blocks(title="Multi-Format to TXT Converter") as app:
    gr.Markdown("# Multi-Format to TXT Converter by Heuristica.pl")
    gr.Markdown("Convert Excel, PowerPoint, PDF, and Word files to text format.")
    
    with gr.Row():
        file_input = gr.File(label="Upload a file (Excel, PowerPoint, PDF, or Word)")
    
    with gr.Row():
        convert_button = gr.Button("Convert to TXT", variant="primary")
    
    with gr.Row():
        text_output = gr.Textbox(label="Converted Text", lines=15)
    
    with gr.Row():
        download_html = gr.HTML(label="Download")
    
    # Info about supported formats
    gr.Markdown("""
    ## Supported file formats:
    - **Excel**: .xlsx, .xls
    - **PowerPoint**: .pptx, .ppt
    - **PDF**: .pdf
    - **Word**: .docx, .doc
    
    ## How to use:
    1. Upload a file using the file upload button
    2. Click "Convert to TXT"
    3. View the converted text
    4. Click the download button to save the converted text file
    """)
    
    # Obsługa konwersji
    convert_button.click(
        fn=convert_file,
        inputs=[file_input],
        outputs=[text_output, download_html]
    )

# Uruchom aplikację
if __name__ == "__main__":
    try:
        logging.info("Starting the application")
        app.launch(debug=True)
        logging.info("Application stopped")
    except Exception as e:
        logging.exception(f"Error launching application: {str(e)}")