Update app.py
Browse files
app.py
CHANGED
|
@@ -8,30 +8,47 @@ import urllib
|
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
|
| 16 |
pdf_content = None
|
| 17 |
pageNumTextFound = 0
|
| 18 |
-
BASE_URL = "https://marthee-nbslink.hf.space" #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@app.route('/view-pdf', methods=['GET'])
|
| 21 |
def download_pdf():
|
| 22 |
global pdf_content, pageNumTextFound
|
| 23 |
-
pdf_link = request.args.get('pdfLink')
|
| 24 |
-
keyword = request.args.get('keyword')
|
| 25 |
|
| 26 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
if not pdf_link or not keyword:
|
| 28 |
return "Missing required parameters.", 400
|
| 29 |
|
| 30 |
-
# Decode
|
| 31 |
-
pdf_link = urllib.parse.unquote(pdf_link)
|
| 32 |
-
keyword =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
# Debugging output
|
| 35 |
print("Extracted PDF Link:", pdf_link)
|
| 36 |
print("Extracted Keywords:", keyword)
|
| 37 |
|
|
@@ -49,38 +66,38 @@ def download_pdf():
|
|
| 49 |
@app.route('/api/process-data', methods=['POST'])
|
| 50 |
def receive_pdf_data():
|
| 51 |
global pdf_content, pageNumTextFound
|
| 52 |
-
|
| 53 |
-
# Get PDF link and keyword from
|
| 54 |
-
|
| 55 |
-
# keyword = request.form.get('keyword')
|
| 56 |
|
| 57 |
if not pdfLink or not keyword:
|
| 58 |
return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
|
| 59 |
|
| 60 |
try:
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
pdf_content, pageNumTextFound,highlight_rect = Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
|
| 66 |
|
| 67 |
if pdf_content is None:
|
| 68 |
return jsonify({"error": "No valid PDF content found."}), 404
|
| 69 |
|
| 70 |
-
# Construct the URL
|
| 71 |
download_link = f"{BASE_URL}/view-pdf#page={pageNumTextFound}&zoom={highlight_rect}"
|
| 72 |
-
print('
|
|
|
|
| 73 |
return jsonify({
|
| 74 |
"message": "PDF processed successfully.",
|
| 75 |
-
"download_link": download_link
|
| 76 |
})
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
return jsonify({"error": str(e)}), 500
|
| 80 |
-
|
| 81 |
def finddata():
|
| 82 |
-
pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
|
| 83 |
-
keyword = ['115 INTEGRATED MRI ROOM LININGS','710 TRANSPORTATION']
|
| 84 |
-
return pdfLink,keyword
|
|
|
|
| 85 |
if __name__ == '__main__':
|
| 86 |
app.run(host='0.0.0.0', port=7860)
|
|
|
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
| 11 |
+
from flask import Flask, request, jsonify, send_file, render_template
|
| 12 |
+
import urllib.parse
|
| 13 |
+
import json
|
| 14 |
+
from io import BytesIO
|
| 15 |
+
|
| 16 |
+
app = Flask(__name__)
|
| 17 |
|
| 18 |
pdf_content = None
|
| 19 |
pageNumTextFound = 0
|
| 20 |
+
BASE_URL = "https://marthee-nbslink.hf.space" # Hugging Face Spaces base URL
|
| 21 |
+
|
| 22 |
+
@app.route("/", methods=["GET", "POST"])
|
| 23 |
+
def thismain():
|
| 24 |
+
print('ayhaga')
|
| 25 |
+
return render_template("gui.html")
|
| 26 |
|
| 27 |
@app.route('/view-pdf', methods=['GET'])
|
| 28 |
def download_pdf():
|
| 29 |
global pdf_content, pageNumTextFound
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# Manually parse the query parameters
|
| 32 |
+
full_query_string = request.query_string.decode() # Get raw query string
|
| 33 |
+
parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
|
| 34 |
+
|
| 35 |
+
# Extract pdfLink and keyword manually
|
| 36 |
+
pdf_link = parsed_params.get('pdfLink', [None])[0]
|
| 37 |
+
keyword = parsed_params.get('keyword', [None])[0]
|
| 38 |
+
|
| 39 |
if not pdf_link or not keyword:
|
| 40 |
return "Missing required parameters.", 400
|
| 41 |
|
| 42 |
+
# Decode the extracted values
|
| 43 |
+
pdf_link = urllib.parse.unquote(pdf_link)
|
| 44 |
+
keyword = urllib.parse.unquote(keyword)
|
| 45 |
+
|
| 46 |
+
# If the keyword is a JSON string, convert it back to a list
|
| 47 |
+
try:
|
| 48 |
+
keyword = json.loads(keyword)
|
| 49 |
+
except json.JSONDecodeError:
|
| 50 |
+
keyword = [keyword] # Treat it as a single keyword if not JSON
|
| 51 |
|
|
|
|
| 52 |
print("Extracted PDF Link:", pdf_link)
|
| 53 |
print("Extracted Keywords:", keyword)
|
| 54 |
|
|
|
|
| 66 |
@app.route('/api/process-data', methods=['POST'])
|
| 67 |
def receive_pdf_data():
|
| 68 |
global pdf_content, pageNumTextFound
|
| 69 |
+
|
| 70 |
+
# Get PDF link and keyword from finddata()
|
| 71 |
+
pdfLink, keyword = finddata()
|
|
|
|
| 72 |
|
| 73 |
if not pdfLink or not keyword:
|
| 74 |
return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
|
| 75 |
|
| 76 |
try:
|
| 77 |
+
print(pdfLink, keyword)
|
| 78 |
+
|
| 79 |
+
# Call function to process the PDF
|
| 80 |
+
pdf_content, pageNumTextFound, highlight_rect = Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
|
|
|
|
| 81 |
|
| 82 |
if pdf_content is None:
|
| 83 |
return jsonify({"error": "No valid PDF content found."}), 404
|
| 84 |
|
| 85 |
+
# Construct the URL with the rectangle coordinates
|
| 86 |
download_link = f"{BASE_URL}/view-pdf#page={pageNumTextFound}&zoom={highlight_rect}"
|
| 87 |
+
print('Download Link:', download_link)
|
| 88 |
+
|
| 89 |
return jsonify({
|
| 90 |
"message": "PDF processed successfully.",
|
| 91 |
+
"download_link": download_link
|
| 92 |
})
|
| 93 |
|
| 94 |
except Exception as e:
|
| 95 |
return jsonify({"error": str(e)}), 500
|
| 96 |
+
|
| 97 |
def finddata():
|
| 98 |
+
pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
|
| 99 |
+
keyword = ['115 INTEGRATED MRI ROOM LININGS', '710 TRANSPORTATION']
|
| 100 |
+
return pdfLink, keyword
|
| 101 |
+
|
| 102 |
if __name__ == '__main__':
|
| 103 |
app.run(host='0.0.0.0', port=7860)
|