Spaces:
Sleeping
Sleeping
File size: 5,773 Bytes
8db659f a0786f3 1abd7ac a0786f3 8db659f 4dbe79f 4b67bf3 8db659f 0da3f4b a0786f3 46886f1 4dbe79f 8db659f bd99369 67c3f41 4dbe79f bd99369 4dbe79f 67c3f41 bd99369 4dbe79f bd99369 4dbe79f bd99369 8db659f bd99369 8db659f 67c3f41 c9e58d1 1abd7ac 67c3f41 1abd7ac 8db659f 1abd7ac c9e58d1 1abd7ac c9e58d1 1abd7ac 8db659f bd99369 1abd7ac 67c3f41 1abd7ac a0786f3 e2a7908 8db659f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
from flask import Flask, send_file, render_template, request, jsonify
import requests
from io import BytesIO
import fitz # PyMuPDF
# Define global variables to retain PDF content across function calls
pdf_content = None
pageNumTextFound = 0
BASE_URL="https://marthee-navigatetopage.hf.space"
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
def getInfotoMeasure():
global pdf_content, pageNumTextFound
if pdf_content is None:
return "No PDF content available.", 404
# Render the GUI with the current page number
return render_template("gui.html", page=pageNumTextFound)
@app.route('/view-pdf', methods=['GET'])
def download_pdf():
global pdf_content, pageNumTextFound
if pdf_content is None:
return "PDF content not found.", 404
pdf_bytes = BytesIO(pdf_content)
return send_file(
pdf_bytes,
mimetype='application/pdf',
as_attachment=False,
download_name=f"highlighted_page_{pageNumTextFound}.pdf"
)
# Route to handle external webhook
@app.route('/api/process-data', methods=['POST'])
def receive_pdf_data():
global pdf_content, pageNumTextFound
# Extract PDF link and keyword from the request payload
pdf_link = request.form.get('pdf_link')
keyword = request.form.get('keyword')
print('receiveddd',pdf_link,keyword)
if not pdf_link or not keyword:
return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400
try:
# Call the function to process the PDF
pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)
if pdf_content is None:
return jsonify({"error": "No valid PDF content found."}), 404
return jsonify({
"message": "PDF processed successfully.",
"download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
})
except Exception as e:
return jsonify({"error": str(e)}), 500
def highlight_text_from_pdf(pdfshareablelinks, keyword):
global pdf_content, pageNumTextFound
for link in pdfshareablelinks:
pdf_content = None
if link and ('http' in link or 'dropbox' in link):
if 'dl=0' in link:
link = link.replace('dl=0', 'dl=1')
response = requests.get(link)
if response.status_code == 200:
pdf_content = BytesIO(response.content)
if pdf_content is None:
return None, 0
pageNumTextFound = 1
pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
matched = page.search_for(keyword)
if matched:
for word in matched:
page.add_highlight_annot(word)
pageNumTextFound = page_num + 1
pdf_bytes = BytesIO()
pdf_document.save(pdf_bytes)
pdf_document.close()
return pdf_bytes.getvalue(), pageNumTextFound
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
# from flask import Flask, send_file, render_template, request
# import requests
# from io import BytesIO
# import fitz # PyMuPDF
# # Define local variables to retain the PDF content across function calls
# pdf_content = None
# pageNumTextFound = 0
# app = Flask(__name__)
# @app.route("/", methods=["GET", "POST"])
# def getInfotoMeasure():
# global pdf_content, pageNumTextFound
# pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
# keyword = "To be read with preliminaries/ general conditions"
# # Call the function to process the PDF
# pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
# # Render the GUI with the current page number
# return render_template("gui.html", page=pageNumTextFound)
# @app.route('/view-pdf', methods=['GET'])
# def download_pdf():
# global pdf_content, pageNumTextFound
# if pdf_content is None:
# return "PDF content not found.", 404
# pdf_bytes = BytesIO(pdf_content)
# return send_file(
# pdf_bytes,
# mimetype='application/pdf',
# as_attachment=False,
# download_name=f"highlighted_page_{pageNumTextFound}.pdf"
# )
# def highlight_text_from_pdf(pdfshareablelinks, keyword):
# print('PDF Links:', pdfshareablelinks)
# for link in pdfshareablelinks:
# pdf_content = None
# if link and ('http' in link or 'dropbox' in link):
# if 'dl=0' in link:
# link = link.replace('dl=0', 'dl=1')
# response = requests.get(link)
# if response.status_code == 200:
# pdf_content = BytesIO(response.content)
# if pdf_content is None:
# raise ValueError("No valid PDF content found.")
# pageNumTextFound = 1
# pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
# for page_num in range(pdf_document.page_count):
# page = pdf_document.load_page(page_num)
# matched = page.search_for(keyword)
# if matched:
# for word in matched:
# page.add_highlight_annot(word)
# pageNumTextFound = page_num + 1
# # Save PDF content to memory and return it along with the page number
# pdf_bytes = BytesIO()
# pdf_document.save(pdf_bytes)
# pdf_document.close()
# return pdf_bytes.getvalue(), pageNumTextFound
# if __name__ == '__main__':
# app.run(host='0.0.0.0', port=7860)
|