Marthee commited on
Commit
c9e58d1
·
verified ·
1 Parent(s): 5356c27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -13
app.py CHANGED
@@ -5,27 +5,60 @@ from io import BytesIO
5
  app = Flask(__name__)
6
 
7
  @app.route("/", methods=["GET", "POST"])
8
- def getInfotoMeasure():
9
  # Set the page number internally in Flask (for example, page 2)
10
- page = 2
11
- download_pdf()
12
- return render_template("gui.html", page=page)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Route to serve PDF with a specified page
15
  @app.route('/view-pdf', methods=['GET'])
16
  def download_pdf():
17
- page = 3 # Get the page number (default is page 1)
18
- dropbox_link = 'https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
19
-
20
- # Modify Dropbox link to enable direct download
21
- if 'dl=0' in dropbox_link:
22
- dropbox_link = dropbox_link.replace('dl=0', 'dl=1')
23
 
24
- response = requests.get(dropbox_link)
 
 
25
 
26
- if response.status_code != 200:
27
- return "Failed to download the PDF.", 500
28
 
 
 
 
 
 
 
 
29
  pdf_content = BytesIO(response.content)
30
 
31
  return send_file(
 
5
  app = Flask(__name__)
6
 
7
  @app.route("/", methods=["GET", "POST"])
8
+ def getInfotoMeasure(pagenum):
9
  # Set the page number internally in Flask (for example, page 2)
10
+
11
+ # page = 2
 
12
 
13
+ return render_template("gui.html", page=pagenum)
14
+
15
+ def highlight_text_from_pdf(pdfshareablelinks , keyword):
16
+ print('intexts',pdfshareablelinks)
17
+ # pdfshareablelinks=split_links(pdfshareablelinks) #if array like in pdftotext
18
+ for link in pdfshareablelinks:
19
+ pdf_content = None
20
+
21
+ if link and ('http' in link or 'dropbox' in link):
22
+ # Modify Dropbox link for direct download
23
+ if 'dl=0' in link:
24
+ link = link.replace('dl=0', 'dl=1')
25
+
26
+ # Download the PDF content from the shareable link
27
+ response = requests.get(link)
28
+ pdf_content = BytesIO(response.content) # Store the content in memory
29
+ print('Downloaded from shareable link.')
30
+
31
+ if pdf_content is None:
32
+ raise ValueError("No valid PDF content found.")
33
+ pageNumTextFound=1
34
+ # Open the PDF using fitz (PyMuPDF) directly from memory
35
+ pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
36
+ for page_num in range(pdf_document.page_count):
37
+ page = pdf_document.load_page(page_num)
38
+ matched=page.search_for(keyword)
39
+ if matched:
40
+ page.add_highlight_annot(word)
41
+ pageNumTextFound=page_num +1 # 1 indexed pdfs
42
+ return pdf_document, pageNumTextFound
43
  # Route to serve PDF with a specified page
44
  @app.route('/view-pdf', methods=['GET'])
45
  def download_pdf():
46
+ # page = 3 # Get the page number (default is page 1)
47
+ # dropbox_link = 'https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
 
 
 
 
48
 
49
+ # # Modify Dropbox link to enable direct download
50
+ # if 'dl=0' in dropbox_link:
51
+ # dropbox_link = dropbox_link.replace('dl=0', 'dl=1')
52
 
53
+ # response = requests.get(dropbox_link)
 
54
 
55
+ # if response.status_code != 200:
56
+ # return "Failed to download the PDF.", 500
57
+ pdflink='https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
58
+ keyword='To be read with preliminaries/ general conditions'
59
+ pdf_document, pageNumTextFound =highlight_text_from_pdf(pdflink , keyword)
60
+ getInfotoMeasure(pagenum)
61
+ pdf_content, page_num = highlight_text_from_pdf(pdf_link, keyword)
62
  pdf_content = BytesIO(response.content)
63
 
64
  return send_file(