Marthee commited on
Commit
ef850dc
·
verified ·
1 Parent(s): 7e99d85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -100
app.py CHANGED
@@ -10,51 +10,155 @@ import datetime
10
  import time
11
  from threading import Thread
12
  import urllib
13
-
14
  app = Flask(__name__)
15
 
16
- pdf_content = None
17
  pageNumTextFound = 0
18
- BASE_URL = "https://findconsole-initialmarkups.hf.space" # Hugging Face Spaces base URL
19
- global jsonoutput
20
- @app.route("/", methods=["GET", "POST"])
21
- def thismain():
22
- print('ayhaga')
23
- return render_template("gui.html")
24
-
 
25
  @app.route("/keepaliveapii", methods=["GET", "POST"])
26
  def keepaliveapi():
27
  try:
28
- print('alivee')
29
- return 'alivee'
30
  except Exception as error:
31
- print('error in keepalive:', error)
32
  return jsonify(status="error", message=str(error)), 500
33
-
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  @app.route('/view-pdf', methods=['GET'])
37
  def download_pdf():
 
 
 
 
 
 
 
38
 
39
- # Manually parse the query parameters
40
- full_query_string = request.query_string.decode() # Get raw query string
41
- parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
42
 
43
- # Extract pdfLink and keyword manually
44
- pdf_link = parsed_params.get('pdfLink', [None])[0]
45
- if not pdf_link :
46
- return "Missing required parameters.", 400
 
 
47
 
48
- # Decode the extracted values
49
- pdf_link = urllib.parse.unquote(pdf_link)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  print("Extracted PDF Link:", pdf_link)
52
- # print("Extracted Keywords:", keyword)
53
- createDF=False
54
 
55
- pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
56
- if pdf_content is None:
57
- return "PDF content not found.", 404
 
 
 
 
 
 
58
 
59
  pdf_bytes = BytesIO(pdf_content)
60
  return send_file(
@@ -64,6 +168,12 @@ def download_pdf():
64
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
65
  )
66
 
 
 
 
 
 
 
67
 
68
  @app.route('/view-highlight', methods=['GET','POST'])
69
  def download_pdfHighlight():
@@ -105,81 +215,55 @@ def download_pdfHighlight():
105
  as_attachment=False,
106
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
107
  )
108
-
109
- @app.route('/api/process-data', methods=['POST'])
110
- def receive_pdf_data():
111
- global pdf_content, pageNumTextFound
112
 
113
- # Get PDF link and keyword from finddata()
114
- pdfLink = finddata()
115
 
116
- if not pdfLink :
117
- return jsonify({"error": "'pdfLink' must be provided."}), 400
118
-
119
- try:
120
- print(pdfLink)
121
-
122
-
123
- pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
124
- dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
125
-
126
- # Get metadata using the shared link
127
- metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
128
- dbPath='/TSA JOBS/ADR Test/FIND/'
129
- pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
130
- print('LINKS0',pdflink)
131
-
132
- dbPath='/TSA JOBS/ADR Test/FIND/'
133
- tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
134
- print(f"PDF successfully uploaded to Dropbox at")
135
- print('LINKS1',tablepdfLink)
136
- return jsonify({
137
- "message": "PDF processed successfully.",
138
- "PDF_MarkedUp": pdflink,
139
- 'Table_PDF_Markup_Summary': tablepdfLink
140
- })
141
-
142
- except Exception as e:
143
- return jsonify({"error": str(e)}), 500
144
-
145
-
146
- @app.route('/findapi', methods=['GET','POST'])
147
- def findapi():
148
  try:
149
  print('In process [Try]')
150
  data = request.get_json()
151
  # Extracting values
152
  pdfLink = data.get('filePath')
153
- pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
 
 
 
154
  global jsonoutput
155
  jsonoutput=tablepdfoutput
156
  return jsonify(tablepdfoutput)
157
  except Exception as e:
158
  return jsonify({"error": str(e)}), 500
159
-
160
- @app.route('/findapiFilteredHeadings', methods=['GET','POST'])
161
- def findapiFilteredHeadings():
 
 
 
 
 
 
 
 
162
  try:
163
  print('In process [Try]')
164
  data = request.get_json()
165
  # Extracting values
166
  pdfLink = data.get('filePath')
167
- print(pdfLink)
168
- listofheadings = data.get('listofheadings') #in json format
169
- print(listofheadings)
170
- pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
171
  global jsonoutput
172
  jsonoutput=tablepdfoutput
173
  return jsonify(tablepdfoutput)
174
  except Exception as e:
175
  return jsonify({"error": str(e)}), 500
176
 
 
 
177
  def finddata():
178
  pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
179
  keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
180
  return pdfLink, keyword
181
 
182
-
183
  #_________________________________________________________________________________________________________________________
184
  #_________________________________________________________________________________________________________________________
185
 
@@ -190,32 +274,6 @@ def finddata():
190
  #_________________________________________________________________________________________________________________________
191
 
192
 
193
- # def runn():
194
- # from gevent.pywsgi import WSGIServer
195
- # http_server = WSGIServer(('0.0.0.0', 7860), app)
196
- # http_server.serve_forever()
197
-
198
-
199
- # def keep_alive():
200
- # t=Thread(target=runn)
201
- # t.start()
202
-
203
- # dtn = datetime.datetime.now(datetime.timezone.utc)
204
- # print(dtn)
205
- # next_start = datetime.datetime(dtn.year, dtn.month, dtn.day, 21, 0, 0).astimezone(datetime.timezone.utc) #us - 2 = utc time (21 utc is 19:00 our time and 9 is 7 our time , it needs to run 9 utc time ____ )
206
- # print(next_start)
207
- # keep_alive()
208
- # while 1:
209
- # dtnNow = datetime.datetime.now(datetime.timezone.utc)
210
- # print(dtnNow)
211
- # if dtnNow >= next_start:
212
- # next_start += datetime.timedelta(hours=12) # 1 day
213
- # print('YES- 12 hours passed!!',next_start)
214
- # time.sleep(1800)
215
-
216
- # if __name__ == "__main__":
217
- # runn()
218
- # app.run
219
  if __name__ == '__main__':
220
  app.run(host='0.0.0.0', port=7860)
221
 
 
10
  import time
11
  from threading import Thread
12
  import urllib
13
+ from urllib.parse import quote
14
  app = Flask(__name__)
15
 
 
16
  pageNumTextFound = 0
17
+ BASE_URL = "https://findconsole-initialmarkups.hf.space"
18
+ # Simulate a backend readiness flag (replace with actual check if possible)
19
+ backend_ready = False
20
+ # @app.route("/")
21
+ # def thismain():
22
+ # print('Home page loaded')
23
+ # return render_template("gui.html")
24
+
25
  @app.route("/keepaliveapii", methods=["GET", "POST"])
26
  def keepaliveapi():
27
  try:
28
+ print('Keepalive pinged')
29
+ return 'alivee'
30
  except Exception as error:
31
+ print('Error in keepalive:', error)
32
  return jsonify(status="error", message=str(error)), 500
 
33
 
34
 
35
+
36
+ @app.route("/")
37
+ def home():
38
+ global backend_ready
39
+ # If backend not ready, show loading page
40
+ if not backend_ready:
41
+ return render_template("wake_and_redirect.html")
42
+ else:
43
+ # Redirect to your PDF viewer route when ready
44
+ return redirect(url_for("view_pdf", **request.args))
45
+ ################################################################################################################################################################
46
+ ################################################################################################################################################################
47
+ ##################### Main console ###########################################################################################################
48
+ ################################################################################################################################################################
49
+ ################################################################################################################################################################
50
+
51
  @app.route('/view-pdf', methods=['GET'])
52
  def download_pdf():
53
+ # Parse and decode pdfLink safely
54
+ full_query_string = request.query_string.decode()
55
+ parsed_params = urllib.parse.parse_qs(full_query_string)
56
+ encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
57
+
58
+ if not encoded_pdf_link:
59
+ return "Missing pdfLink parameter.", 400
60
 
61
+ # Decode the URL-encoded PDF link
62
+ pdf_link = urllib.parse.unquote(encoded_pdf_link)
63
+ print("Extracted PDF Link:", pdf_link)
64
 
65
+ try:
66
+ # Use InitialMarkups to extract content
67
+ pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
68
+ except Exception as e:
69
+ print("Error during PDF extraction:", e)
70
+ return "PDF could not be processed.", 500
71
 
72
+ if pdf_content is None or not pdf_content.startswith(b"%PDF"):
73
+ return "PDF content not found or broken.", 404
74
+
75
+ pdf_bytes = BytesIO(pdf_content)
76
+ return send_file(
77
+ pdf_bytes,
78
+ mimetype='application/pdf',
79
+ as_attachment=False,
80
+ download_name=f"annotated_page_{pageNumTextFound}.pdf"
81
+ )
82
+
83
+
84
+ @app.route('/api/process-data', methods=['POST'])
85
+ def receive_pdf_data():
86
+ global pdf_content, pageNumTextFound
87
+
88
+ # Get PDF link and keyword from finddata()
89
+ pdfLink = finddata()
90
+
91
+ if not pdfLink :
92
+ return jsonify({"error": "'pdfLink' must be provided."}), 400
93
+
94
+ try:
95
+ print(pdfLink)
96
 
97
+
98
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
99
+ dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
100
+
101
+ # Get metadata using the shared link
102
+ metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
103
+ dbPath='/TSA JOBS/ADR Test/FIND/'
104
+ pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
105
+ print('LINKS0',pdflink)
106
+
107
+ dbPath='/TSA JOBS/ADR Test/FIND/'
108
+ tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
109
+ print(f"PDF successfully uploaded to Dropbox at")
110
+ print('LINKS1',tablepdfLink)
111
+ return jsonify({
112
+ "message": "PDF processed successfully.",
113
+ "PDF_MarkedUp": pdflink,
114
+ 'Table_PDF_Markup_Summary': tablepdfLink
115
+ })
116
+
117
+ except Exception as e:
118
+ return jsonify({"error": str(e)}), 500
119
+ ################################################################################################################################################################
120
+ ################################################################################################################################################################
121
+ ##################### Not to billed not markuped up ###########################################################################################################
122
+ ################################################################################################################################################################
123
+ ################################################################################################################################################################
124
+ @app.route('/findapitobebilled', methods=['GET','POST'])
125
+ def findapitobebilled():
126
+ try:
127
+ print('In process [Try]')
128
+ data = request.get_json()
129
+ # Extracting values
130
+ pdfLink = data.get('filePath')
131
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
132
+ global jsonoutput
133
+ jsonoutput=tablepdfoutput
134
+ return jsonify(tablepdfoutput)
135
+ except Exception as e:
136
+ return jsonify({"error": str(e)}), 500
137
+
138
+
139
+ @app.route('/view-pdf-tobebilled', methods=['GET'])
140
+ def download_pdf_tobebilled():
141
+ # Parse and decode pdfLink safely
142
+ full_query_string = request.query_string.decode()
143
+ parsed_params = urllib.parse.parse_qs(full_query_string)
144
+ encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
145
+
146
+ if not encoded_pdf_link:
147
+ return "Missing pdfLink parameter.", 400
148
+
149
+ # Decode the URL-encoded PDF link
150
+ pdf_link = urllib.parse.unquote(encoded_pdf_link)
151
  print("Extracted PDF Link:", pdf_link)
 
 
152
 
153
+ try:
154
+ # Use InitialMarkups to extract content
155
+ pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
156
+ except Exception as e:
157
+ print("Error during PDF extraction:", e)
158
+ return "PDF could not be processed.", 500
159
+
160
+ if pdf_content is None or not pdf_content.startswith(b"%PDF"):
161
+ return "PDF content not found or broken.", 404
162
 
163
  pdf_bytes = BytesIO(pdf_content)
164
  return send_file(
 
168
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
169
  )
170
 
171
+ ################################################################################################################################################################
172
+ ################################################################################################################################################################
173
+ ##################### For final markups - view one highlight at a time - not used yet ###########################################################################################################
174
+ ################################################################################################################################################################
175
+ ################################################################################################################################################################
176
+
177
 
178
  @app.route('/view-highlight', methods=['GET','POST'])
179
  def download_pdfHighlight():
 
215
  as_attachment=False,
216
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
217
  )
 
 
 
 
218
 
 
 
219
 
220
+ @app.route('/findapiFilteredHeadings', methods=['GET','POST'])
221
+ def findapiFilteredHeadings():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  try:
223
  print('In process [Try]')
224
  data = request.get_json()
225
  # Extracting values
226
  pdfLink = data.get('filePath')
227
+ print(pdfLink)
228
+ listofheadings = data.get('listofheadings') #in json format
229
+ print(listofheadings)
230
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
231
  global jsonoutput
232
  jsonoutput=tablepdfoutput
233
  return jsonify(tablepdfoutput)
234
  except Exception as e:
235
  return jsonify({"error": str(e)}), 500
236
+
237
+
238
+
239
+ ################################################################################################################################################################
240
+ ################################################################################################################################################################
241
+ ##################### For Rawan - MC Connection ###########################################################################################################
242
+ ################################################################################################################################################################
243
+ ################################################################################################################################################################
244
+
245
+ @app.route('/findapi', methods=['GET','POST'])
246
+ def findapi():
247
  try:
248
  print('In process [Try]')
249
  data = request.get_json()
250
  # Extracting values
251
  pdfLink = data.get('filePath')
252
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
 
 
 
253
  global jsonoutput
254
  jsonoutput=tablepdfoutput
255
  return jsonify(tablepdfoutput)
256
  except Exception as e:
257
  return jsonify({"error": str(e)}), 500
258
 
259
+ ############################################# Testing #################################################
260
+
261
  def finddata():
262
  pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
263
  keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
264
  return pdfLink, keyword
265
 
266
+ ########################################### Running #####################################################
267
  #_________________________________________________________________________________________________________________________
268
  #_________________________________________________________________________________________________________________________
269
 
 
274
  #_________________________________________________________________________________________________________________________
275
 
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  if __name__ == '__main__':
278
  app.run(host='0.0.0.0', port=7860)
279