import gradio as gr
from openpyxl import load_workbook
import zipfile
import os
import tempfile
import xml.etree.ElementTree as ET


import tempfile
import zipfile
import os
import xml.etree.ElementTree as ET


def extract_images_from_excel(file, sheet_names=None):
    """
    Extracts images from an Excel file, referenced in drawing.xml files.
    """

    output_path = "output"

    with tempfile.TemporaryDirectory() as temp_dir:
        with zipfile.ZipFile(file, "r") as zip_ref:
            zip_ref.extractall(temp_dir)

        xl_folder = os.path.join(temp_dir, "xl")
        worksheets_folder = os.path.join(xl_folder, "worksheets")
        rels_folder = os.path.join(worksheets_folder, "_rels")
        drawings_folder = os.path.join(xl_folder, "drawings")
        media_folder = os.path.join(xl_folder, "media")

        if not os.path.exists(rels_folder) or not os.path.exists(media_folder):
            print("No images or relationships found in the Excel file.")
            return

        sheet_name_to_rel = {}
        if sheet_names:
            workbook_file = os.path.join(xl_folder, "workbook.xml")

            root = ET.fromstring(open(workbook_file).read())
            namespace = {"ns": root.tag.split("}")[0].strip("{")}

            sheets = root.findall(".//ns:sheet", namespace)

            for sheet in sheets:
                sheet_id = sheet.attrib["sheetId"]
                name = sheet.attrib["name"]
                if name in sheet_names:
                    sheet_name_to_rel[name] = f"sheet{sheet_id}.xml.rels"

        extracted_images = []

        for rel_file in os.listdir(rels_folder):
            if sheet_names:
                if not any(
                    sheet_name_to_rel[sheet] in rel_file for sheet in sheet_names
                ):
                    continue
            namespace = {
                "rel": "http://schemas.openxmlformats.org/package/2006/relationships"
            }
            rel_file_path = os.path.join(rels_folder, rel_file)
            rel_tree = ET.parse(rel_file_path)

            for relationship in rel_tree.findall("rel:Relationship", namespace):
                if relationship.attrib["Type"].endswith("/drawing"):
                    drawing_file = os.path.join(
                        drawings_folder, os.path.basename(relationship.attrib["Target"])
                    )

                    if os.path.exists(drawing_file):
                        drawing_tree = ET.parse(drawing_file)

                        namespace = {
                            "xdr": "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
                            "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
                            "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
                        }

                        images_path = drawing_tree.findall(".//xdr:cNvPr", namespace)

                        print("Found images elems in drawing.xml file", images_path)
                        for i in images_path:
                            image_name = i.attrib.get("name", None)

                            image_path = os.path.normpath(
                                os.path.join(media_folder, os.path.basename(image_name))
                            )

                            if os.path.exists(image_path):
                                output_image_path = os.path.join(
                                    output_path, os.path.basename(image_name)
                                )
                                os.makedirs(output_path, exist_ok=True)

                                with open(image_path, "rb") as img_file:
                                    with open(output_image_path, "wb") as out_file:
                                        out_file.write(img_file.read())
                                extracted_images.append(output_image_path)

        print(f"Extracted {len(extracted_images)} images to {temp_dir}")
        return extracted_images


def get_markdown_format(cell):
    """
    Convert Excel cell formatting (bold, italic, strikethrough) to Markdown format.
    """
    if cell is None or cell.value is None:
        return ""

    value = str(cell.value)

    if cell.font.bold:
        value = f"**{value}**"
    if cell.font.italic:
        value = f"*{value}*"
    if cell.font.strike:
        value = f"~~{value}~~"

    value = value.replace("\n", " ").replace("|", "\\|")

    return value


def extract_tables_from_sheet(sheet):
    """
    Extract tables from an Excel sheet.
    Detects tables based on non-empty rows and retains formatting.
    """
    tables = []
    current_table = []

    for row in sheet.iter_rows():
        if all(cell.value is None for cell in row):
            if current_table:
                tables.append(current_table)
                current_table = []
        else:
            current_table.append(row)
    if current_table:
        tables.append(current_table)

    return tables


def convert_table_to_markdown(table, images=[]):
    """
    Convert a table to Markdown format, preserving Excel formatting.
    """

    try:
        non_empty_columns = [
            col_idx
            for col_idx in range(len(table[0]))
            if any(row[col_idx].value is not None for row in table)
        ]
    except IndexError:
        non_empty_columns = [
            col_idx
            for col_idx in range(len(table[0]))
            if any(row[col_idx] is not None for row in table)
        ]

    filtered_table = [
        [row[col_idx] if col_idx < len(row) else None for col_idx in non_empty_columns]
        for row in table
    ]

    md_str = ""
    header = [get_markdown_format(cell) for cell in filtered_table[0]]
    md_str += f"| {' | '.join(header)} |\n"
    md_str += f"|{'|'.join(['---' for _ in header])}|\n"

    for data_row in filtered_table[1:]:
        row_str = []
        for cell in data_row:
            row_str.append(get_markdown_format(cell))
        md_str += f"| {' | '.join(row_str)} |\n"

    return md_str


def parse_excel(file, selected_sheets):
    """
    Parse the selected sheets from the uploaded Excel file and return Markdown content.
    """

    if not selected_sheets or len(selected_sheets) == 0:
        return "No sheets selected.", []

    workbook = load_workbook(file.name, read_only=True)
    sheets = workbook.sheetnames

    markdown_output = ""

    images = extract_images_from_excel(file.name, selected_sheets)

    for sheet_name in selected_sheets:
        if sheet_name not in sheets:
            markdown_output += f"Sheet '{sheet_name}' not found.\n\n"
            continue

        sheet = workbook[sheet_name]
        tables = extract_tables_from_sheet(sheet)
        markdown_output += f"# {sheet_name}\n\n"

        for table in tables:
            markdown_output += convert_table_to_markdown(table)
            markdown_output += "\n\n"
    if markdown_output == "":
        markdown_output = "No tables found in selected sheets."
    return markdown_output, images


def get_sheets(file):
    """
    Return the list of sheet names from the uploaded Excel file.
    """
    if not file:
        return gr.update(choices=[])

    workbook = load_workbook(file.name, read_only=True)

    return gr.update(choices=workbook.sheetnames)


# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Excel Tables and Images from XLSX")

    with gr.Row():
        with gr.Column(scale=4):
            with gr.Tab("Text/Tables"):
                markdown_output = gr.Markdown(show_copy_button=True)

            with gr.Tab("Images"):
                images_output = gr.Gallery(
                    label="Images in Sheets",
                    show_label=False,
                    elem_id="gallery",
                    object_fit="contain",
                    height="auto",
                )
        with gr.Column(scale=1):
            sheet_selector = gr.CheckboxGroup(
                label="Select Sheets to Parse", interactive=True
            )

    with gr.Row():
        file_input = gr.File(label="Upload Excel File", file_types=[".xlsx"])

    extrract_btn = gr.Button(
        "Extract Sheets",
    )

    extrract_btn.click(fn=get_sheets, inputs=[file_input], outputs=sheet_selector)

    sheet_selector.change(
        fn=parse_excel,
        inputs=[file_input, sheet_selector],
        outputs=[markdown_output, images_output],
    )


if __name__ == "__main__":
    demo.launch()