Marthee commited on
Commit
6daf3b4
·
verified ·
1 Parent(s): 1d56743

Update Find_Hyperlinking_text.py

Browse files
Files changed (1) hide show
  1. Find_Hyperlinking_text.py +93 -6
Find_Hyperlinking_text.py CHANGED
@@ -7,7 +7,95 @@ from collections import Counter
7
  import fitz # PyMuPDF
8
  import re
9
  import urllib.parse
 
 
 
 
 
10
  baselink='https://marthee-nbslink.hf.space/view-pdf?'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def normalize_text(text):
12
  """Lowercase, remove extra spaces, and strip special characters."""
13
  text = text.lower().strip()
@@ -298,9 +386,8 @@ def annotate_text_from_pdf(pdfshareablelinks, LISTheading_to_search):
298
  all_text += current_line.strip() + '\n' # Append the current line
299
  print(df)
300
  print(dictionaryNBS)
301
- # Save the annotated PDF to bytes
302
- pdf_bytes = BytesIO()
303
- pdf_document.save(pdf_bytes)
304
- pdf_document.close()
305
-
306
- return pdf_bytes.getvalue(), pageNumberFound, zoom_str
 
7
  import fitz # PyMuPDF
8
  import re
9
  import urllib.parse
10
+ import pandas as pd
11
+ import tempfile
12
+ from fpdf import FPDF
13
+
14
+
15
  baselink='https://marthee-nbslink.hf.space/view-pdf?'
16
+ class PDF(FPDF):
17
+ def header(self):
18
+ self.set_font("Arial", "B", 12)
19
+ self.cell(0, 10, "NBS Document Links", ln=True, align="C")
20
+ self.ln(5) # Space after header
21
+
22
+ def save_df_to_pdf(df):
23
+ pdf = PDF()
24
+ pdf.set_auto_page_break(auto=True, margin=15)
25
+
26
+ # Set equal margins
27
+ margin = 15
28
+ pdf.set_left_margin(margin)
29
+ pdf.set_right_margin(margin)
30
+
31
+ pdf.add_page()
32
+ pdf.set_font("Arial", size=10)
33
+
34
+ # Set column widths and calculate total table width
35
+ col_width = 50
36
+ num_cols = 4
37
+ table_width = col_width * num_cols
38
+
39
+ # Get page width and calculate left alignment
40
+ page_width = pdf.w
41
+ start_x = (page_width - table_width) / 2 # Centering the table
42
+
43
+ pdf.set_x(start_x) # Move to calculated start position
44
+
45
+ # Table headers
46
+ pdf.set_fill_color(200, 200, 200) # Light gray background
47
+ pdf.set_font("Arial", "B", 10)
48
+ headers = ["NBS Link", "NBS", "Head Above 1", "Head Above 2"]
49
+
50
+ # Draw table headers
51
+ for header in headers:
52
+ pdf.cell(col_width, 8, header, border=1, fill=True, align="C")
53
+ pdf.ln()
54
+
55
+ pdf.set_font("Arial", size=9)
56
+
57
+ # Add rows
58
+ for _, row in df.iterrows():
59
+ x_start = start_x # Ensure every row starts at the same position
60
+ y_start = pdf.get_y()
61
+
62
+ # Calculate max height needed for this row
63
+ text_lines = {col: pdf.multi_cell(col_width, 5, row[col], border=0, align="L", split_only=True) for col in ["NBS", "head above 1", "head above 2"]}
64
+ max_lines = max(len(lines) for lines in text_lines.values())
65
+ max_height = max_lines * 5
66
+
67
+ pdf.set_x(x_start) # Ensure correct alignment for each row
68
+
69
+ # Clickable link cell (keeps same height as others)
70
+ pdf.cell(col_width, max_height, "Click Here", border=1, link=row["NBSLink"], align="C")
71
+
72
+ # Move to next column
73
+ pdf.set_xy(x_start + col_width, y_start)
74
+
75
+ # Draw each cell manually, ensuring equal height
76
+ for i, col_name in enumerate(["NBS", "head above 1", "head above 2"]):
77
+ x_col = x_start + col_width * (i + 1)
78
+ y_col = y_start
79
+ pdf.multi_cell(col_width, 5, row[col_name], border=0, align="L") # Draw text
80
+ pdf.rect(x_col, y_col, col_width, max_height) # Draw border
81
+ pdf.set_xy(x_col + col_width, y_start) # Move to next column
82
+
83
+ # Move to the next row
84
+ pdf.ln(max_height)
85
+ # Save PDF to memory instead of a file
86
+ pdf_output = BytesIO()
87
+ pdf.output(pdf_output)
88
+
89
+ # Move the cursor to the beginning of the BytesIO stream to read its content
90
+ pdf_output.seek(0)
91
+
92
+ # Open the PDF using fitz (PyMuPDF) directly from the BytesIO object
93
+ outputpdfFitz = fitz.open(pdf_output)
94
+
95
+ return outputpdfFitz
96
+
97
+
98
+
99
  def normalize_text(text):
100
  """Lowercase, remove extra spaces, and strip special characters."""
101
  text = text.lower().strip()
 
386
  all_text += current_line.strip() + '\n' # Append the current line
387
  print(df)
388
  print(dictionaryNBS)
389
+
390
+ outputpdf=save_df_to_pdf(df)
391
+ outputpdfFitz =fitz.open('pdf',outputpdf)
392
+ # return pdf_bytes.getvalue(), pageNumberFound, zoom_str
393
+ return pdf_document , outputpdfFitz