File size: 9,709 Bytes
91bb80d
 
 
375e0d5
91bb80d
 
 
 
 
 
 
 
 
 
 
1a15cab
91bb80d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38ac856
 
 
 
 
 
 
 
 
91bb80d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a15cab
91bb80d
 
 
 
 
 
 
 
 
 
 
 
1a15cab
91bb80d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a15cab
 
91bb80d
 
38ac856
91bb80d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38ac856
91bb80d
 
 
 
 
 
 
 
1a15cab
91bb80d
 
 
 
 
 
38ac856
91bb80d
 
 
 
 
 
 
1a15cab
 
91bb80d
1a15cab
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import os
#os.environ["PATH"] += os.pathsep + "/usr/bin" + os.pathsep + "/usr/local/bin" #Removed redundant path setting
print(f"Current PATH: {os.environ['PATH']}")
import streamlit as st
from PyPDF2 import PdfMerger
from google.cloud import vision
from google.oauth2 import service_account
import fitz
from PIL import Image
from io import BytesIO
import tempfile
import json
from streamlit_sortables import sort_items
import subprocess
from tqdm import tqdm
import base64

# Check if /usr/bin/pdfinfo exists
if os.path.exists("/usr/bin/pdfinfo"):
    print("pdfinfo exists at /usr/bin/pdfinfo")
    # Check the file permissions
    permissions = os.stat("/usr/bin/pdfinfo").st_mode
    print(f"File permissions: {oct(permissions)}")
else:
    print("pdfinfo does not exist at /usr/bin/pdfinfo")


# Load Google Cloud Vision credentials from secret
credentials_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
if credentials_json:
    credentials_dict = json.loads(credentials_json)
    credentials = service_account.Credentials.from_service_account_info(credentials_dict)
    client = vision.ImageAnnotatorClient(credentials=credentials)
else:
    client = None

# Function to extract text using Google Cloud Vision
def extract_text_with_google_vision(image_bytes):
    """Extracts text using Google Cloud Vision."""
    image = vision.Image(content=image_bytes)
    response = client.document_text_detection(image=image)
    if response.error.message:
        raise Exception(f"Google Cloud Vision API Error: {response.error.message}")
    return response.full_text_annotation.text if response.full_text_annotation else ""

# Function to process PDF for transcription
def process_pdf(file):
    """Converts PDF pages to images and extracts text."""
    text = ""
    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
        temp_pdf.write(file.read())
        temp_pdf_path = temp_pdf.name

    try:
        doc = fitz.open(temp_pdf_path)
        for i in tqdm(range(len(doc)), desc="Processing pages"):
            page = doc.load_page(i)
            pix = page.get_pixmap()
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image_bytes = BytesIO()
            image.save(image_bytes, format="PNG")
            image_bytes.seek(0)
            try:
                page_text = extract_text_with_google_vision(image_bytes.getvalue())
                text += f"--- Page {i + 1} ---\n{page_text}\n\n"
            except Exception as e:
                st.error(f"Error on page {i + 1}: {e}")
    finally:
        os.remove(temp_pdf_path)

    return text


# Function to generate thumbnail from PDF
def get_pdf_thumbnail(uploaded_file):
    """Generates a thumbnail image of the first page of a PDF."""
    uploaded_file.seek(0)
    doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
    page = doc.load_page(0)
    pix = page.get_pixmap()
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    img.thumbnail((100, 140))
    return img

# Function to merge PDFs
def merge_pdfs(reordered_files):
    """Merges multiple PDFs into one."""
    merger = PdfMerger()
    for file in reordered_files:
        file.seek(0)
        merger.append(file)
    output_filename = "combined_document.pdf"
    with open(output_filename, "wb") as output_file:
        merger.write(output_file)
    return output_filename

def download_file(output_file, file_name, mime_type):
    with open(output_file, "rb") as f:
        st.download_button(
            label="Download File",
            data=f,
            file_name=file_name,
            mime=mime_type,
        )

def add_signature_to_pdf(pdf_file, signature_image, x, y):
    """Adds a signature image to the PDF at the given coordinates."""

    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
        try:
          temp_pdf.write(pdf_file.read())
          temp_pdf_path = temp_pdf.name
          print(f"temp file name: {temp_pdf_path}")
        except Exception as e:
          print(f"Error creating temp file: {e}")
          return None

    try:
        doc = fitz.open(temp_pdf_path)
        page = doc[0]  # Operate on the first page for simplicity
        rect = fitz.Rect(x, y, x + 200, y + 100) # Size for the signature

        img = Image.open(BytesIO(base64.b64decode(signature_image)))
        img_bytes = BytesIO()
        img.save(img_bytes, format="PNG")
        img_bytes.seek(0)
        page.insert_image(rect, stream=img_bytes.read(), keep_proportion=True)
        output_filename = "signed_document.pdf"
        doc.save(output_filename)
        os.remove(temp_pdf_path)
        return output_filename
    except Exception as e:
      print(f"Error with pdf: {e}")
      return None

def pdf_signer_ui(pdf_file, signature_image_data):
    """Handles the UI and logic for PDF signing."""
    if st.button("Preview PDF"):
      with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
          try:
              temp_pdf.write(pdf_file.read())
              temp_pdf_path = temp_pdf.name
              print(f"temp file name: {temp_pdf_path}")
          except Exception as e:
              print(f"Error creating temporary file: {e}")
              st.error("Could not create temporary file. Please make sure your uploaded file is valid")
              return

          try:
              doc = fitz.open(temp_pdf_path)
              page = doc[0]
              pix = page.get_pixmap()
              img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
              clicked = st.image(img, use_container_width=True)
              if clicked:
                x = clicked.x
                y = clicked.y
                if st.button("Add Signature"):
                  with st.spinner("Adding Signature..."):
                    if pdf_file and signature_image_data:  # Check for file existence
                        output_file = add_signature_to_pdf(pdf_file, signature_image_data, x, y)
                        if output_file:
                            st.success("Signature added successfully")
                            download_file(output_file, "signed_document.pdf", "application/pdf")
                            os.remove(output_file)
                        else:
                            st.error("An Error occurred when creating the signed PDF")
                    else:
                        st.error("Please upload both a PDF file and a signature image")

              os.remove(temp_pdf_path)

          except Exception as e:
            print(f"Error with pdf: {e}")
            st.error(f"Could not open PDF file {e}")
            os.remove(temp_pdf_path)

# Sidebar Navigation
st.sidebar.title("Tool Selector")
selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber", "PDF Signer"])

# PDF Combiner with Preview and Reordering
if selection == "PDF Combiner":
    st.title("PDF Combiner with Preview & Reordering")
    st.write("Upload individual PDF pages, visualize them, reorder, and merge into a single PDF.")

    uploaded_files = st.file_uploader("Upload PDF pages", type="pdf", accept_multiple_files=True)

    if uploaded_files:
        # Generate thumbnails and filenames for each uploaded PDF
        thumbnails = []
        filenames = []

        for file in uploaded_files:
            thumbnails.append(get_pdf_thumbnail(file))
            filenames.append(file.name)

        # Display thumbnails with filenames for reordering
        st.write("**Drag and drop to reorder the PDFs:**")
        reordered_filenames = sort_items(filenames)

        # Map the filenames back to the corresponding files
        reordered_files = [uploaded_files[filenames.index(name)] for name in reordered_filenames]

        # Display the thumbnails in the new order
        st.write("**Preview of selected order:**")
        cols = st.columns(len(reordered_files))
        for idx, file in enumerate(reordered_files):
            with cols[idx]:
                st.image(get_pdf_thumbnail(file), caption=file.name, use_container_width=True)

        # Merge PDFs in the specified order
        if st.button("Merge PDFs"):
            output_file = merge_pdfs(reordered_files)
            st.success("PDF pages combined successfully!")
            download_file(output_file, "combined_document.pdf", "application/pdf")
            os.remove(output_file)

# PDF Transcriber Tool
elif selection == "PDF Transcriber":
    st.title("PDF Transcriber Tool")
    st.write("Upload a scanned PDF to transcribe the text.")
    if not client:
        st.error("Google Cloud credentials are not set. Please configure the secret.")
    uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
    if uploaded_file and st.button("Transcribe PDF"):
        with st.spinner("Processing..."):
            pdf_text = process_pdf(uploaded_file)
            st.success("Text extraction complete!")
            st.text_area("Extracted Text", pdf_text, height=400)
            output_file_name = f"(T){os.path.splitext(uploaded_file.name)[0]}.txt"
            download_file(output_file_name, f"(T){os.path.splitext(uploaded_file.name)[0]}.txt", "text/plain")


# PDF Signer Tool
elif selection == "PDF Signer":
    st.title("PDF Signer")
    st.write("Upload a PDF and place a signature on it by clicking the preview.")

    pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
    signature_image = st.file_uploader("Upload Signature Image", type=["png", "jpg", "jpeg"])
    if pdf_file and signature_image:
        signature_image_data = base64.b64encode(signature_image.read()).decode("utf-8")
        pdf_signer_ui(pdf_file, signature_image_data)