import os
import streamlit as st
from pdf2image import convert_from_path
from PIL import Image
import pytesseract
from docx import Document
import tempfile

# Function to convert PDF to image
def pdf_to_image(pdf_path):
    try:
        images = convert_from_path(pdf_path, 500)
        return images
    except Exception as e:
        st.error(f"Error during PDF to image conversion: {str(e)}")
        return None

# Function to extract text from an image using pytesseract
def image_to_text(image):
    try:
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        st.error(f"Error during image to text conversion: {str(e)}")
        return None

# Function to save text to a Word document
def save_to_word(text, file_name):
    doc = Document()
    doc.add_paragraph(text)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx', prefix=file_name)
    doc.save(temp_file.name)
    return temp_file.name

# Streamlit UI
st.title("PDF to Word Converter")
st.write("Upload a PDF to convert it to a Word document")

# File upload feature
uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True)

if uploaded_files:
    for uploaded_file in uploaded_files:
        # Save the uploaded PDF to a temporary file
        temp_pdf_path = tempfile.mktemp(suffix=".pdf")
        with open(temp_pdf_path, "wb") as temp_pdf:
            temp_pdf.write(uploaded_file.getbuffer())

        # Convert PDF to images
        images = pdf_to_image(temp_pdf_path)

        if images:
            # Extract text from images
            extracted_text = ""
            for img in images:
                text = image_to_text(img)
                if text:
                    extracted_text += text + "\n"

            # Save the extracted text to Word
            if extracted_text:
                word_file = save_to_word(extracted_text, uploaded_file.name)
                st.success(f"Conversion of {uploaded_file.name} complete! Download the Word file below.")
                st.download_button(f"Download {uploaded_file.name} as Word", word_file, file_name=f"{uploaded_file.name}.docx")
else:
    st.write("Please upload PDF files to convert.")