Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,7 +43,8 @@ def download_pdf():
|
|
| 43 |
|
| 44 |
print("Extracted PDF Link:", pdf_link)
|
| 45 |
print("Extracted Keywords:", keyword)
|
| 46 |
-
|
|
|
|
| 47 |
if pdf_content is None:
|
| 48 |
return "PDF content not found.", 404
|
| 49 |
|
|
@@ -54,42 +55,12 @@ def download_pdf():
|
|
| 54 |
as_attachment=False,
|
| 55 |
download_name=f"annotated_page_{pageNumTextFound}.pdf"
|
| 56 |
)
|
| 57 |
-
|
| 58 |
-
@app.route('/api/process-data', methods=['POST'])
|
| 59 |
-
def receive_pdf_data():
|
| 60 |
-
global pdf_content, pageNumTextFound
|
| 61 |
-
|
| 62 |
-
# Get PDF link and keyword from finddata()
|
| 63 |
-
pdfLink, keyword = finddata()
|
| 64 |
-
|
| 65 |
-
if not pdfLink or not keyword:
|
| 66 |
-
return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
|
| 67 |
-
|
| 68 |
-
try:
|
| 69 |
-
print(pdfLink, keyword)
|
| 70 |
-
|
| 71 |
-
# Call function to process the PDF
|
| 72 |
-
pdf_content, pageNumTextFound, highlight_rect = Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
|
| 73 |
-
|
| 74 |
-
if pdf_content is None:
|
| 75 |
-
return jsonify({"error": "No valid PDF content found."}), 404
|
| 76 |
-
|
| 77 |
-
# Construct the URL with the rectangle coordinates
|
| 78 |
-
download_link = f"{BASE_URL}/view-pdf#page={pageNumTextFound}&zoom={highlight_rect}"
|
| 79 |
-
print('Download Link:', download_link)
|
| 80 |
-
|
| 81 |
-
return jsonify({
|
| 82 |
-
"message": "PDF processed successfully.",
|
| 83 |
-
"download_link": download_link
|
| 84 |
-
})
|
| 85 |
-
|
| 86 |
-
except Exception as e:
|
| 87 |
-
return jsonify({"error": str(e)}), 500
|
| 88 |
-
|
| 89 |
def finddata():
|
| 90 |
pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
|
| 91 |
keyword = ['115 INTEGRATED MRI ROOM LININGS', '710 TRANSPORTATION']
|
| 92 |
return pdfLink, keyword
|
|
|
|
|
|
|
| 93 |
@app.route('/apiNBSData', methods=['POST'])
|
| 94 |
def NBSData():
|
| 95 |
|
|
@@ -98,12 +69,33 @@ def NBSData():
|
|
| 98 |
print('In process [Try]')
|
| 99 |
data = request.get_json()
|
| 100 |
# Extracting values
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
print(filePath,groupName)
|
| 105 |
-
# pdftext = pdftotext.texts_from_pdf(filePath,groupName)
|
| 106 |
-
return jsonify('Received!')
|
| 107 |
|
| 108 |
except Exception as e:
|
| 109 |
print(f"Error: {e}")
|
|
|
|
| 43 |
|
| 44 |
print("Extracted PDF Link:", pdf_link)
|
| 45 |
print("Extracted Keywords:", keyword)
|
| 46 |
+
createDF=False
|
| 47 |
+
outputDone = Find_Hyperlinking_text.annotate_text_from_pdf([pdf_link], keyword)
|
| 48 |
if pdf_content is None:
|
| 49 |
return "PDF content not found.", 404
|
| 50 |
|
|
|
|
| 55 |
as_attachment=False,
|
| 56 |
download_name=f"annotated_page_{pageNumTextFound}.pdf"
|
| 57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def finddata():
|
| 59 |
pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
|
| 60 |
keyword = ['115 INTEGRATED MRI ROOM LININGS', '710 TRANSPORTATION']
|
| 61 |
return pdfLink, keyword
|
| 62 |
+
|
| 63 |
+
|
| 64 |
@app.route('/apiNBSData', methods=['POST'])
|
| 65 |
def NBSData():
|
| 66 |
|
|
|
|
| 69 |
print('In process [Try]')
|
| 70 |
data = request.get_json()
|
| 71 |
# Extracting values
|
| 72 |
+
pdfLink = data.get('filePath')
|
| 73 |
+
keyword = data.get('NBS_List')
|
| 74 |
+
|
| 75 |
+
print(pdfLink,keyword)
|
| 76 |
+
receive_pdf_data( pdfLink, keyword )
|
| 77 |
+
if not pdfLink or not keyword:
|
| 78 |
+
return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
|
| 79 |
+
|
| 80 |
+
print(pdfLink, keyword)
|
| 81 |
+
|
| 82 |
+
pdf_document ,tablepdf= Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
|
| 83 |
+
dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
|
| 84 |
+
|
| 85 |
+
# Get metadata using the shared link
|
| 86 |
+
metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
|
| 87 |
+
dbPath='/TSA JOBS/ADR Test/FIND/'
|
| 88 |
+
pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
|
| 89 |
+
print('LINKS0',pdflink)
|
| 90 |
+
dbPath='/TSA JOBS/ADR Test/FIND/'
|
| 91 |
+
tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdf,path=dbPath,pdfname=metadata.name+' Markup Summary')
|
| 92 |
+
print(f"PDF successfully uploaded to Dropbox at")
|
| 93 |
+
print('LINKS1',tablepdfLink)
|
| 94 |
+
return jsonify({
|
| 95 |
+
"message": "PDF processed successfully.",
|
| 96 |
+
"download_link": 'done'
|
| 97 |
+
})
|
| 98 |
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
except Exception as e:
|
| 101 |
print(f"Error: {e}")
|