Marthee commited on
Commit
1abd7ac
·
verified ·
1 Parent(s): d0b9747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -45
app.py CHANGED
@@ -1,71 +1,72 @@
1
  from flask import Flask, send_file, render_template, request
2
  import requests
3
  from io import BytesIO
 
4
 
5
  app = Flask(__name__)
6
 
 
7
  @app.route("/", methods=["GET", "POST"])
8
  def getInfotoMeasure(pagenum):
9
- # Set the page number internally in Flask (for example, page 2)
10
-
11
- # page = 2
12
-
13
  return render_template("gui.html", page=pagenum)
14
 
15
- def highlight_text_from_pdf(pdfshareablelinks , keyword):
16
- print('intexts',pdfshareablelinks)
17
- # pdfshareablelinks=split_links(pdfshareablelinks) #if array like in pdftotext
18
  for link in pdfshareablelinks:
19
- pdf_content = None
20
-
21
- if link and ('http' in link or 'dropbox' in link):
22
- # Modify Dropbox link for direct download
23
- if 'dl=0' in link:
24
- link = link.replace('dl=0', 'dl=1')
25
-
26
- # Download the PDF content from the shareable link
27
- response = requests.get(link)
28
- pdf_content = BytesIO(response.content) # Store the content in memory
29
- print('Downloaded from shareable link.')
30
-
31
- if pdf_content is None:
32
- raise ValueError("No valid PDF content found.")
33
- pageNumTextFound=1
34
- # Open the PDF using fitz (PyMuPDF) directly from memory
35
- pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
36
- for page_num in range(pdf_document.page_count):
 
 
37
  page = pdf_document.load_page(page_num)
38
- matched=page.search_for(keyword)
 
39
  if matched:
40
- page.add_highlight_annot(word)
41
- pageNumTextFound=page_num +1 # 1 indexed pdfs
42
- return pdf_document, pageNumTextFound
 
 
 
43
  # Route to serve PDF with a specified page
44
  @app.route('/view-pdf', methods=['GET'])
45
  def download_pdf():
46
- # page = 3 # Get the page number (default is page 1)
47
- # dropbox_link = 'https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
 
 
48
 
49
- # # Modify Dropbox link to enable direct download
50
- # if 'dl=0' in dropbox_link:
51
- # dropbox_link = dropbox_link.replace('dl=0', 'dl=1')
52
 
53
- # response = requests.get(dropbox_link)
54
-
55
- # if response.status_code != 200:
56
- # return "Failed to download the PDF.", 500
57
- pdflink='https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
58
- keyword='To be read with preliminaries/ general conditions'
59
-
60
- pdf_content, page_num = highlight_text_from_pdf(pdf_link, keyword)
61
  getInfotoMeasure(pageNumTextFound)
62
- pdf_content = BytesIO(response.content)
 
 
 
63
 
64
  return send_file(
65
- pdf_content,
66
  mimetype='application/pdf',
67
  as_attachment=False,
68
- download_name=f"document_page_{page}.pdf"
69
  )
70
 
71
  if __name__ == '__main__':
 
1
  from flask import Flask, send_file, render_template, request
2
  import requests
3
  from io import BytesIO
4
+ import fitz # PyMuPDF
5
 
6
  app = Flask(__name__)
7
 
8
+ # Route to render the main page
9
  @app.route("/", methods=["GET", "POST"])
10
  def getInfotoMeasure(pagenum):
 
 
 
 
11
  return render_template("gui.html", page=pagenum)
12
 
13
+ # Function to highlight text in PDF
14
+ def highlight_text_from_pdf(pdfshareablelinks, keyword):
15
+ print('PDF Links:', pdfshareablelinks)
16
  for link in pdfshareablelinks:
17
+ pdf_content = None
18
+
19
+ if link and ('http' in link or 'dropbox' in link):
20
+ # Modify Dropbox link for direct download
21
+ if 'dl=0' in link:
22
+ link = link.replace('dl=0', 'dl=1')
23
+
24
+ # Download the PDF content from the shareable link
25
+ response = requests.get(link)
26
+ if response.status_code == 200:
27
+ pdf_content = BytesIO(response.content)
28
+ print('Downloaded from shareable link.')
29
+
30
+ if pdf_content is None:
31
+ raise ValueError("No valid PDF content found.")
32
+
33
+ pageNumTextFound = 1
34
+ pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
35
+
36
+ for page_num in range(pdf_document.page_count):
37
  page = pdf_document.load_page(page_num)
38
+ matched = page.search_for(keyword)
39
+
40
  if matched:
41
+ for word in matched:
42
+ page.add_highlight_annot(word)
43
+ pageNumTextFound = page_num + 1 # 1-indexed pages
44
+
45
+ return pdf_document, pageNumTextFound
46
+
47
  # Route to serve PDF with a specified page
48
  @app.route('/view-pdf', methods=['GET'])
49
  def download_pdf():
50
+ pdf_link = [
51
+ 'https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
52
+ ]
53
+ keyword = "To be read with preliminaries/ general conditions"
54
 
55
+ # Call function to highlight text and get PDF page
56
+ pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
 
57
 
58
+ # Render the main GUI with the detected page number
 
 
 
 
 
 
 
59
  getInfotoMeasure(pageNumTextFound)
60
+
61
+ pdf_bytes = BytesIO()
62
+ pdf_content.save(pdf_bytes)
63
+ pdf_bytes.seek(0)
64
 
65
  return send_file(
66
+ pdf_bytes,
67
  mimetype='application/pdf',
68
  as_attachment=False,
69
+ download_name=f"highlighted_page_{pageNumTextFound}.pdf"
70
  )
71
 
72
  if __name__ == '__main__':