Marthee commited on
Commit
756ad9e
·
verified ·
1 Parent(s): 30e5400

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -50
app.py CHANGED
@@ -3,6 +3,7 @@ import tsadropboxretrieval
3
  import json
4
  import Find_Hyperlinking_text
5
  import findspecsv1
 
6
  import requests
7
  from io import BytesIO
8
  import urllib
@@ -11,7 +12,7 @@ app = Flask(__name__)
11
 
12
  pdf_content = None
13
  pageNumTextFound = 0
14
- BASE_URL = "https://marthee-nbslink.hf.space" # Hugging Face Spaces base URL
15
 
16
  @app.route("/", methods=["GET", "POST"])
17
  def thismain():
@@ -27,26 +28,17 @@ def download_pdf():
27
 
28
  # Extract pdfLink and keyword manually
29
  pdf_link = parsed_params.get('pdfLink', [None])[0]
30
- keyword = parsed_params.get('keyword', [None])[0]
31
-
32
- if not pdf_link or not keyword:
33
  return "Missing required parameters.", 400
34
 
35
  # Decode the extracted values
36
  pdf_link = urllib.parse.unquote(pdf_link)
37
- keyword = urllib.parse.unquote(keyword)
38
-
39
- # If the keyword is a JSON string, convert it back to a list
40
- try:
41
- keyword = json.loads(keyword)
42
- except json.JSONDecodeError:
43
- keyword = [keyword] # Treat it as a single keyword if not JSON
44
 
45
  print("Extracted PDF Link:", pdf_link)
46
- print("Extracted Keywords:", keyword)
47
  createDF=False
48
 
49
- pdf_content = findspecsv1.extract_section_under_header(pdf_link, keyword)[0]
50
  if pdf_content is None:
51
  return "PDF content not found.", 404
52
 
@@ -63,16 +55,16 @@ def receive_pdf_data():
63
  global pdf_content, pageNumTextFound
64
 
65
  # Get PDF link and keyword from finddata()
66
- pdfLink, keyword = finddata()
67
 
68
- if not pdfLink or not keyword:
69
- return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
70
 
71
  try:
72
- print(pdfLink, keyword)
73
 
74
 
75
- pdfbytes, pdf_document , df ,tablepdfoutput= findspecsv1.extract_section_under_header(pdfLink, keyword)
76
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
77
 
78
  # Get metadata using the shared link
@@ -99,44 +91,44 @@ def finddata():
99
  keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
100
  return pdfLink, keyword
101
 
102
- @app.route('/apiNBSData', methods=['POST'])
103
- def NBSData():
104
 
105
 
106
- try:
107
- print('In process [Try]')
108
- data = request.get_json()
109
- # Extracting values
110
- pdfLink = data.get('filePath')
111
- keyword = data.get('NBS_List')
112
-
113
- # Checking if both values are provided
114
- if not pdfLink or not keyword:
115
- return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
116
 
117
- keyword = eval(keyword) # Convert the string representation of the list to an actual list
118
 
119
- # Now you can use the extracted pdfLink and keyword
120
- print(pdfLink, keyword)
121
- pdfbytes, pdf_document , df ,tablepdfoutput= findspecsv1.extract_section_under_header(pdfLink, keyword)
122
- dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
123
-
124
- # Get metadata using the shared link
125
- metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
126
- dbPath='/TSA JOBS/ADR Test/FIND/'
127
- pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
128
- print('LINKS0',pdflink)
129
 
130
- dbPath='/TSA JOBS/ADR Test/FIND/'
131
- tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
132
- print(f"PDF successfully uploaded to Dropbox at")
133
- print('LINKS1',tablepdfLink)
134
- return jsonify(tablepdfoutput)
135
 
136
 
137
- except Exception as e:
138
- print(f"Error: {e}")
139
- return jsonify({"error": str(e)}), 500
140
-
141
  if __name__ == '__main__':
142
  app.run(host='0.0.0.0', port=7860)
 
3
  import json
4
  import Find_Hyperlinking_text
5
  import findspecsv1
6
+ import InitialMarkups
7
  import requests
8
  from io import BytesIO
9
  import urllib
 
12
 
13
  pdf_content = None
14
  pageNumTextFound = 0
15
+ BASE_URL = "https://find-initialmarkups.hf.space" # Hugging Face Spaces base URL
16
 
17
  @app.route("/", methods=["GET", "POST"])
18
  def thismain():
 
28
 
29
  # Extract pdfLink and keyword manually
30
  pdf_link = parsed_params.get('pdfLink', [None])[0]
31
+ if not pdf_link :
 
 
32
  return "Missing required parameters.", 400
33
 
34
  # Decode the extracted values
35
  pdf_link = urllib.parse.unquote(pdf_link)
 
 
 
 
 
 
 
36
 
37
  print("Extracted PDF Link:", pdf_link)
38
+ # print("Extracted Keywords:", keyword)
39
  createDF=False
40
 
41
+ pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
42
  if pdf_content is None:
43
  return "PDF content not found.", 404
44
 
 
55
  global pdf_content, pageNumTextFound
56
 
57
  # Get PDF link and keyword from finddata()
58
+ pdfLink = finddata()
59
 
60
+ if not pdfLink :
61
+ return jsonify({"error": "'pdfLink' must be provided."}), 400
62
 
63
  try:
64
+ print(pdfLink)
65
 
66
 
67
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
68
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
69
 
70
  # Get metadata using the shared link
 
91
  keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
92
  return pdfLink, keyword
93
 
94
+ # @app.route('/apiNBSData', methods=['POST'])
95
+ # def NBSData():
96
 
97
 
98
+ # try:
99
+ # print('In process [Try]')
100
+ # data = request.get_json()
101
+ # # Extracting values
102
+ # pdfLink = data.get('filePath')
103
+ # keyword = data.get('NBS_List')
104
+
105
+ # # Checking if both values are provided
106
+ # if not pdfLink or not keyword:
107
+ # return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
108
 
109
+ # keyword = eval(keyword) # Convert the string representation of the list to an actual list
110
 
111
+ # # Now you can use the extracted pdfLink and keyword
112
+ # print(pdfLink, keyword)
113
+ # pdfbytes, pdf_document , df ,tablepdfoutput= findspecsv1.extract_section_under_header(pdfLink, keyword)
114
+ # dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
115
+
116
+ # # Get metadata using the shared link
117
+ # metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
118
+ # dbPath='/TSA JOBS/ADR Test/FIND/'
119
+ # pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
120
+ # print('LINKS0',pdflink)
121
 
122
+ # dbPath='/TSA JOBS/ADR Test/FIND/'
123
+ # tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
124
+ # print(f"PDF successfully uploaded to Dropbox at")
125
+ # print('LINKS1',tablepdfLink)
126
+ # return jsonify(tablepdfoutput)
127
 
128
 
129
+ # except Exception as e:
130
+ # print(f"Error: {e}")
131
+ # return jsonify({"error": str(e)}), 500
132
+
133
  if __name__ == '__main__':
134
  app.run(host='0.0.0.0', port=7860)