Marthee commited on
Commit
8db659f
·
verified ·
1 Parent(s): 4dbe79f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -12
app.py CHANGED
@@ -1,23 +1,20 @@
1
- from flask import Flask, send_file, render_template, request
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
6
- # Define local variables to retain the PDF content across function calls
7
  pdf_content = None
8
  pageNumTextFound = 0
9
-
10
  app = Flask(__name__)
11
 
12
  @app.route("/", methods=["GET", "POST"])
13
  def getInfotoMeasure():
14
  global pdf_content, pageNumTextFound
15
 
16
- pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
17
- keyword = "To be read with preliminaries/ general conditions"
18
-
19
- # Call the function to process the PDF
20
- pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
21
 
22
  # Render the GUI with the current page number
23
  return render_template("gui.html", page=pageNumTextFound)
@@ -37,8 +34,35 @@ def download_pdf():
37
  download_name=f"highlighted_page_{pageNumTextFound}.pdf"
38
  )
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def highlight_text_from_pdf(pdfshareablelinks, keyword):
41
- print('PDF Links:', pdfshareablelinks)
42
 
43
  for link in pdfshareablelinks:
44
  pdf_content = None
@@ -53,7 +77,7 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
53
  pdf_content = BytesIO(response.content)
54
 
55
  if pdf_content is None:
56
- raise ValueError("No valid PDF content found.")
57
 
58
  pageNumTextFound = 1
59
  pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
@@ -65,10 +89,9 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
65
  if matched:
66
  for word in matched:
67
  page.add_highlight_annot(word)
68
-
69
  pageNumTextFound = page_num + 1
70
 
71
- # Save PDF content to memory and return it along with the page number
72
  pdf_bytes = BytesIO()
73
  pdf_document.save(pdf_bytes)
74
  pdf_document.close()
@@ -77,3 +100,84 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
77
 
78
  if __name__ == '__main__':
79
  app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, send_file, render_template, request, jsonify
2
  import requests
3
  from io import BytesIO
4
  import fitz # PyMuPDF
5
 
6
+ # Define global variables to retain PDF content across function calls
7
  pdf_content = None
8
  pageNumTextFound = 0
9
+ BASE_URL="https://marthee-navigatetopage.hf.space"
10
  app = Flask(__name__)
11
 
12
  @app.route("/", methods=["GET", "POST"])
13
  def getInfotoMeasure():
14
  global pdf_content, pageNumTextFound
15
 
16
+ if pdf_content is None:
17
+ return "No PDF content available.", 404
 
 
 
18
 
19
  # Render the GUI with the current page number
20
  return render_template("gui.html", page=pageNumTextFound)
 
34
  download_name=f"highlighted_page_{pageNumTextFound}.pdf"
35
  )
36
 
37
+ # Route to handle external webhook
38
+ @app.route('/api/process-data', methods=['POST'])
39
+ def receive_pdf_data():
40
+ global pdf_content, pageNumTextFound
41
+
42
+ # Extract PDF link and keyword from the request payload
43
+ pdf_link = request.form.get('pdf_link')
44
+ keyword = request.form.get('keyword')
45
+ print('receiveddd',pdf_link,keyword)
46
+ if not pdf_link or not keyword:
47
+ return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400
48
+
49
+ try:
50
+ # Call the function to process the PDF
51
+ pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)
52
+
53
+ if pdf_content is None:
54
+ return jsonify({"error": "No valid PDF content found."}), 404
55
+
56
+ return jsonify({
57
+ "message": "PDF processed successfully.",
58
+ "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
59
+ })
60
+
61
+ except Exception as e:
62
+ return jsonify({"error": str(e)}), 500
63
+
64
  def highlight_text_from_pdf(pdfshareablelinks, keyword):
65
+ global pdf_content, pageNumTextFound
66
 
67
  for link in pdfshareablelinks:
68
  pdf_content = None
 
77
  pdf_content = BytesIO(response.content)
78
 
79
  if pdf_content is None:
80
+ return None, 0
81
 
82
  pageNumTextFound = 1
83
  pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
 
89
  if matched:
90
  for word in matched:
91
  page.add_highlight_annot(word)
92
+
93
  pageNumTextFound = page_num + 1
94
 
 
95
  pdf_bytes = BytesIO()
96
  pdf_document.save(pdf_bytes)
97
  pdf_document.close()
 
100
 
101
  if __name__ == '__main__':
102
  app.run(host='0.0.0.0', port=7860)
103
+
104
+
105
+ # from flask import Flask, send_file, render_template, request
106
+ # import requests
107
+ # from io import BytesIO
108
+ # import fitz # PyMuPDF
109
+
110
+ # # Define local variables to retain the PDF content across function calls
111
+ # pdf_content = None
112
+ # pageNumTextFound = 0
113
+
114
+ # app = Flask(__name__)
115
+
116
+ # @app.route("/", methods=["GET", "POST"])
117
+ # def getInfotoMeasure():
118
+ # global pdf_content, pageNumTextFound
119
+
120
+ # pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
121
+ # keyword = "To be read with preliminaries/ general conditions"
122
+
123
+ # # Call the function to process the PDF
124
+ # pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
125
+
126
+ # # Render the GUI with the current page number
127
+ # return render_template("gui.html", page=pageNumTextFound)
128
+
129
+ # @app.route('/view-pdf', methods=['GET'])
130
+ # def download_pdf():
131
+ # global pdf_content, pageNumTextFound
132
+
133
+ # if pdf_content is None:
134
+ # return "PDF content not found.", 404
135
+
136
+ # pdf_bytes = BytesIO(pdf_content)
137
+ # return send_file(
138
+ # pdf_bytes,
139
+ # mimetype='application/pdf',
140
+ # as_attachment=False,
141
+ # download_name=f"highlighted_page_{pageNumTextFound}.pdf"
142
+ # )
143
+
144
+ # def highlight_text_from_pdf(pdfshareablelinks, keyword):
145
+ # print('PDF Links:', pdfshareablelinks)
146
+
147
+ # for link in pdfshareablelinks:
148
+ # pdf_content = None
149
+
150
+ # if link and ('http' in link or 'dropbox' in link):
151
+ # if 'dl=0' in link:
152
+ # link = link.replace('dl=0', 'dl=1')
153
+
154
+ # response = requests.get(link)
155
+
156
+ # if response.status_code == 200:
157
+ # pdf_content = BytesIO(response.content)
158
+
159
+ # if pdf_content is None:
160
+ # raise ValueError("No valid PDF content found.")
161
+
162
+ # pageNumTextFound = 1
163
+ # pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
164
+
165
+ # for page_num in range(pdf_document.page_count):
166
+ # page = pdf_document.load_page(page_num)
167
+ # matched = page.search_for(keyword)
168
+
169
+ # if matched:
170
+ # for word in matched:
171
+ # page.add_highlight_annot(word)
172
+
173
+ # pageNumTextFound = page_num + 1
174
+
175
+ # # Save PDF content to memory and return it along with the page number
176
+ # pdf_bytes = BytesIO()
177
+ # pdf_document.save(pdf_bytes)
178
+ # pdf_document.close()
179
+
180
+ # return pdf_bytes.getvalue(), pageNumTextFound
181
+
182
+ # if __name__ == '__main__':
183
+ # app.run(host='0.0.0.0', port=7860)