InitialMarkups

Runtime error

App Files Files Community

Marthee commited on Feb 10, 2025

Commit

6daf3b4

verified ·

1 Parent(s): 1d56743

Update Find_Hyperlinking_text.py

Browse files

Files changed (1) hide show

Find_Hyperlinking_text.py +93 -6

Find_Hyperlinking_text.py CHANGED Viewed

@@ -7,7 +7,95 @@ from collections import Counter
 import fitz  # PyMuPDF
 import re
 import urllib.parse
 baselink='https://marthee-nbslink.hf.space/view-pdf?'
 def normalize_text(text):
     """Lowercase, remove extra spaces, and strip special characters."""
     text = text.lower().strip()
@@ -298,9 +386,8 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
                     all_text += current_line.strip() + '\n'  # Append the current line
     print(df)
     print(dictionaryNBS)
-    # Save the annotated PDF to bytes
-    pdf_bytes = BytesIO()
-    pdf_document.save(pdf_bytes)
-    pdf_document.close()
-    return pdf_bytes.getvalue(), pageNumberFound, zoom_str

 import fitz  # PyMuPDF
 import re
 import urllib.parse
+import pandas as pd
+import tempfile
+from fpdf import FPDF
 baselink='https://marthee-nbslink.hf.space/view-pdf?'
+class PDF(FPDF):
+    def header(self):
+        self.set_font("Arial", "B", 12)
+        self.cell(0, 10, "NBS Document Links", ln=True, align="C")
+        self.ln(5)  # Space after header
+def save_df_to_pdf(df):
+    pdf = PDF()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    # Set equal margins
+    margin = 15
+    pdf.set_left_margin(margin)
+    pdf.set_right_margin(margin)
+    pdf.add_page()
+    pdf.set_font("Arial", size=10)
+    # Set column widths and calculate total table width
+    col_width = 50
+    num_cols = 4
+    table_width = col_width * num_cols
+    # Get page width and calculate left alignment
+    page_width = pdf.w
+    start_x = (page_width - table_width) / 2  # Centering the table
+    pdf.set_x(start_x)  # Move to calculated start position
+    # Table headers
+    pdf.set_fill_color(200, 200, 200)  # Light gray background
+    pdf.set_font("Arial", "B", 10)
+    headers = ["NBS Link", "NBS", "Head Above 1", "Head Above 2"]
+    # Draw table headers
+    for header in headers:
+        pdf.cell(col_width, 8, header, border=1, fill=True, align="C")
+    pdf.ln()
+    pdf.set_font("Arial", size=9)
+    # Add rows
+    for _, row in df.iterrows():
+        x_start = start_x  # Ensure every row starts at the same position
+        y_start = pdf.get_y()
+        # Calculate max height needed for this row
+        text_lines = {col: pdf.multi_cell(col_width, 5, row[col], border=0, align="L", split_only=True) for col in ["NBS", "head above 1", "head above 2"]}
+        max_lines = max(len(lines) for lines in text_lines.values())
+        max_height = max_lines * 5
+        pdf.set_x(x_start)  # Ensure correct alignment for each row
+        # Clickable link cell (keeps same height as others)
+        pdf.cell(col_width, max_height, "Click Here", border=1, link=row["NBSLink"], align="C")
+        # Move to next column
+        pdf.set_xy(x_start + col_width, y_start)
+        # Draw each cell manually, ensuring equal height
+        for i, col_name in enumerate(["NBS", "head above 1", "head above 2"]):
+            x_col = x_start + col_width * (i + 1)
+            y_col = y_start
+            pdf.multi_cell(col_width, 5, row[col_name], border=0, align="L")  # Draw text
+            pdf.rect(x_col, y_col, col_width, max_height)  # Draw border
+            pdf.set_xy(x_col + col_width, y_start)  # Move to next column
+        # Move to the next row
+        pdf.ln(max_height)
+        # Save PDF to memory instead of a file
+    pdf_output = BytesIO()
+    pdf.output(pdf_output)
+    # Move the cursor to the beginning of the BytesIO stream to read its content
+    pdf_output.seek(0)
+    # Open the PDF using fitz (PyMuPDF) directly from the BytesIO object
+    outputpdfFitz = fitz.open(pdf_output)
+    return outputpdfFitz
 def normalize_text(text):
     """Lowercase, remove extra spaces, and strip special characters."""
     text = text.lower().strip()
                     all_text += current_line.strip() + '\n'  # Append the current line
     print(df)
     print(dictionaryNBS)
+    outputpdf=save_df_to_pdf(df)
+    outputpdfFitz =fitz.open('pdf',outputpdf)
+    # return pdf_bytes.getvalue(), pageNumberFound, zoom_str
+    return  pdf_document , outputpdfFitz