Marthee commited on
Commit
67c3f41
·
verified ·
1 Parent(s): bd99369

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -1,39 +1,45 @@
1
- from flask import Flask, send_file, render_template, request, session
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
 
 
 
 
6
  app = Flask(__name__)
7
 
8
- # Store the result in session to reuse it across routes
9
  @app.route("/", methods=["GET", "POST"])
10
  def getInfotoMeasure():
11
- if 'pageNumTextFound' not in session:
12
- pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
13
- keyword = "To be read with preliminaries/ general conditions"
 
14
 
15
- # Call the function once and store the result in session
16
- pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
17
- session['pageNumTextFound'] = pageNumTextFound
18
- session['pdf_content'] = pdf_content.getvalue()
19
 
20
- return render_template("gui.html", page=session['pageNumTextFound'])
 
21
 
22
  @app.route('/view-pdf', methods=['GET'])
23
  def download_pdf():
24
- if 'pdf_content' not in session:
25
- return "PDF content not found in session.", 404
 
 
26
 
27
- pdf_bytes = BytesIO(session['pdf_content'])
28
  return send_file(
29
  pdf_bytes,
30
  mimetype='application/pdf',
31
  as_attachment=False,
32
- download_name=f"highlighted_page_{session['pageNumTextFound']}.pdf"
33
  )
34
 
35
  def highlight_text_from_pdf(pdfshareablelinks, keyword):
36
  print('PDF Links:', pdfshareablelinks)
 
37
  for link in pdfshareablelinks:
38
  pdf_content = None
39
 
@@ -42,6 +48,7 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
42
  link = link.replace('dl=0', 'dl=1')
43
 
44
  response = requests.get(link)
 
45
  if response.status_code == 200:
46
  pdf_content = BytesIO(response.content)
47
 
@@ -58,9 +65,15 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
58
  if matched:
59
  for word in matched:
60
  page.add_highlight_annot(word)
 
61
  pageNumTextFound = page_num + 1
62
 
63
- return pdf_document, pageNumTextFound
 
 
 
 
 
64
 
65
  if __name__ == '__main__':
66
  app.run(host='0.0.0.0', port=7860)
 
1
+ from flask import Flask, send_file, render_template, request
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
6
+ # Define local variables to retain the PDF content across function calls
7
+ pdf_content = None
8
+ pageNumTextFound = 0
9
+
10
  app = Flask(__name__)
11
 
 
12
  @app.route("/", methods=["GET", "POST"])
13
  def getInfotoMeasure():
14
+ global pdf_content, pageNumTextFound
15
+
16
+ pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
17
+ keyword = "To be read with preliminaries/ general conditions"
18
 
19
+ # Call the function to process the PDF
20
+ pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
 
 
21
 
22
+ # Render the GUI with the current page number
23
+ return render_template("gui.html", page=pageNumTextFound)
24
 
25
  @app.route('/view-pdf', methods=['GET'])
26
  def download_pdf():
27
+ global pdf_content, pageNumTextFound
28
+
29
+ if pdf_content is None:
30
+ return "PDF content not found.", 404
31
 
32
+ pdf_bytes = BytesIO(pdf_content)
33
  return send_file(
34
  pdf_bytes,
35
  mimetype='application/pdf',
36
  as_attachment=False,
37
+ download_name=f"highlighted_page_{pageNumTextFound}.pdf"
38
  )
39
 
40
  def highlight_text_from_pdf(pdfshareablelinks, keyword):
41
  print('PDF Links:', pdfshareablelinks)
42
+
43
  for link in pdfshareablelinks:
44
  pdf_content = None
45
 
 
48
  link = link.replace('dl=0', 'dl=1')
49
 
50
  response = requests.get(link)
51
+
52
  if response.status_code == 200:
53
  pdf_content = BytesIO(response.content)
54
 
 
65
  if matched:
66
  for word in matched:
67
  page.add_highlight_annot(word)
68
+
69
  pageNumTextFound = page_num + 1
70
 
71
+ # Save PDF content to memory and return it along with the page number
72
+ pdf_bytes = BytesIO()
73
+ pdf_document.save(pdf_bytes)
74
+ pdf_document.close()
75
+
76
+ return pdf_bytes.getvalue(), pageNumTextFound
77
 
78
  if __name__ == '__main__':
79
  app.run(host='0.0.0.0', port=7860)