Marthee commited on
Commit
bd99369
·
verified ·
1 Parent(s): 46886f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -38
app.py CHANGED
@@ -1,37 +1,49 @@
1
- from flask import Flask, send_file, render_template, request
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
6
  app = Flask(__name__)
7
 
8
- # Route to render the main page with page number
9
  @app.route("/", methods=["GET", "POST"])
10
  def getInfotoMeasure():
11
- # Call the function to highlight text and get the page number
12
- pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
13
- keyword = "To be read with preliminaries/ general conditions"
14
- pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
15
-
16
- # Pass pageNumTextFound to the template for rendering
17
- return render_template("gui.html", page=pageNumTextFound)
18
 
19
- # Function to highlight text in PDF and return the page number
20
- def highlight_text_from_pdf(pdfshareablelinks, keyword):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  print('PDF Links:', pdfshareablelinks)
22
  for link in pdfshareablelinks:
23
  pdf_content = None
24
 
25
  if link and ('http' in link or 'dropbox' in link):
26
- # Modify Dropbox link for direct download
27
  if 'dl=0' in link:
28
  link = link.replace('dl=0', 'dl=1')
29
 
30
- # Download PDF content from the link
31
  response = requests.get(link)
32
  if response.status_code == 200:
33
  pdf_content = BytesIO(response.content)
34
- print('Downloaded PDF content.')
35
 
36
  if pdf_content is None:
37
  raise ValueError("No valid PDF content found.")
@@ -46,32 +58,9 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
46
  if matched:
47
  for word in matched:
48
  page.add_highlight_annot(word)
49
- pageNumTextFound = page_num + 1 # 1-indexed
50
 
51
  return pdf_document, pageNumTextFound
52
 
53
- # Flask route to serve the highlighted PDF
54
- @app.route('/view-pdf', methods=['GET'])
55
- def download_pdf():
56
- pdf_link = [
57
- 'https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'
58
- ]
59
- keyword = "To be read with preliminaries/ general conditions"
60
-
61
- # Call function to highlight text and get PDF page
62
- pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
63
-
64
- # Create a BytesIO object from the modified PDF content
65
- pdf_bytes = BytesIO()
66
- pdf_content.save(pdf_bytes)
67
- pdf_bytes.seek(0)
68
-
69
- return send_file(
70
- pdf_bytes,
71
- mimetype='application/pdf',
72
- as_attachment=False,
73
- download_name=f"highlighted_page_{pageNumTextFound}.pdf"
74
- )
75
-
76
  if __name__ == '__main__':
77
  app.run(host='0.0.0.0', port=7860)
 
1
+ from flask import Flask, send_file, render_template, request, session
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
6
  app = Flask(__name__)
7
 
8
+ # Store the result in session to reuse it across routes
9
  @app.route("/", methods=["GET", "POST"])
10
  def getInfotoMeasure():
11
+ if 'pageNumTextFound' not in session:
12
+ pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
13
+ keyword = "To be read with preliminaries/ general conditions"
 
 
 
 
14
 
15
+ # Call the function once and store the result in session
16
+ pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
17
+ session['pageNumTextFound'] = pageNumTextFound
18
+ session['pdf_content'] = pdf_content.getvalue()
19
+
20
+ return render_template("gui.html", page=session['pageNumTextFound'])
21
+
22
+ @app.route('/view-pdf', methods=['GET'])
23
+ def download_pdf():
24
+ if 'pdf_content' not in session:
25
+ return "PDF content not found in session.", 404
26
+
27
+ pdf_bytes = BytesIO(session['pdf_content'])
28
+ return send_file(
29
+ pdf_bytes,
30
+ mimetype='application/pdf',
31
+ as_attachment=False,
32
+ download_name=f"highlighted_page_{session['pageNumTextFound']}.pdf"
33
+ )
34
+
35
+ def highlight_text_from_pdf(pdfshareablelinks, keyword):
36
  print('PDF Links:', pdfshareablelinks)
37
  for link in pdfshareablelinks:
38
  pdf_content = None
39
 
40
  if link and ('http' in link or 'dropbox' in link):
 
41
  if 'dl=0' in link:
42
  link = link.replace('dl=0', 'dl=1')
43
 
 
44
  response = requests.get(link)
45
  if response.status_code == 200:
46
  pdf_content = BytesIO(response.content)
 
47
 
48
  if pdf_content is None:
49
  raise ValueError("No valid PDF content found.")
 
58
  if matched:
59
  for word in matched:
60
  page.add_highlight_annot(word)
61
+ pageNumTextFound = page_num + 1
62
 
63
  return pdf_document, pageNumTextFound
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  if __name__ == '__main__':
66
  app.run(host='0.0.0.0', port=7860)