from flask import Flask, send_file, render_template, request, jsonify import requests from io import BytesIO import fitz # PyMuPDF # Define global variables to retain PDF content across function calls pdf_content = None pageNumTextFound = 0 BASE_URL="https://marthee-navigatetopage.hf.space" app = Flask(__name__) @app.route("/", methods=["GET", "POST"]) def getInfotoMeasure(): global pdf_content, pageNumTextFound if pdf_content is None: return "No PDF content available.", 404 # Render the GUI with the current page number return render_template("gui.html", page=pageNumTextFound) @app.route('/view-pdf', methods=['GET']) def download_pdf(): global pdf_content, pageNumTextFound if pdf_content is None: return "PDF content not found.", 404 pdf_bytes = BytesIO(pdf_content) return send_file( pdf_bytes, mimetype='application/pdf', as_attachment=False, download_name=f"highlighted_page_{pageNumTextFound}.pdf" ) # Route to handle external webhook @app.route('/api/process-data', methods=['POST']) def receive_pdf_data(): global pdf_content, pageNumTextFound # Extract PDF link and keyword from the request payload pdf_link = request.form.get('pdf_link') keyword = request.form.get('keyword') print('receiveddd',pdf_link,keyword) if not pdf_link or not keyword: return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400 try: # Call the function to process the PDF pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword) if pdf_content is None: return jsonify({"error": "No valid PDF content found."}), 404 return jsonify({ "message": "PDF processed successfully.", "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}" }) except Exception as e: return jsonify({"error": str(e)}), 500 def highlight_text_from_pdf(pdfshareablelinks, keyword): global pdf_content, pageNumTextFound for link in pdfshareablelinks: pdf_content = None if link and ('http' in link or 'dropbox' in link): if 'dl=0' in link: link = link.replace('dl=0', 'dl=1') response = requests.get(link) if response.status_code == 200: pdf_content = BytesIO(response.content) if pdf_content is None: return None, 0 pageNumTextFound = 1 pdf_document = fitz.open(stream=pdf_content, filetype="pdf") for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) matched = page.search_for(keyword) if matched: for word in matched: page.add_highlight_annot(word) pageNumTextFound = page_num + 1 pdf_bytes = BytesIO() pdf_document.save(pdf_bytes) pdf_document.close() return pdf_bytes.getvalue(), pageNumTextFound if __name__ == '__main__': app.run(host='0.0.0.0', port=7860) # from flask import Flask, send_file, render_template, request # import requests # from io import BytesIO # import fitz # PyMuPDF # # Define local variables to retain the PDF content across function calls # pdf_content = None # pageNumTextFound = 0 # app = Flask(__name__) # @app.route("/", methods=["GET", "POST"]) # def getInfotoMeasure(): # global pdf_content, pageNumTextFound # pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0'] # keyword = "To be read with preliminaries/ general conditions" # # Call the function to process the PDF # pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword) # # Render the GUI with the current page number # return render_template("gui.html", page=pageNumTextFound) # @app.route('/view-pdf', methods=['GET']) # def download_pdf(): # global pdf_content, pageNumTextFound # if pdf_content is None: # return "PDF content not found.", 404 # pdf_bytes = BytesIO(pdf_content) # return send_file( # pdf_bytes, # mimetype='application/pdf', # as_attachment=False, # download_name=f"highlighted_page_{pageNumTextFound}.pdf" # ) # def highlight_text_from_pdf(pdfshareablelinks, keyword): # print('PDF Links:', pdfshareablelinks) # for link in pdfshareablelinks: # pdf_content = None # if link and ('http' in link or 'dropbox' in link): # if 'dl=0' in link: # link = link.replace('dl=0', 'dl=1') # response = requests.get(link) # if response.status_code == 200: # pdf_content = BytesIO(response.content) # if pdf_content is None: # raise ValueError("No valid PDF content found.") # pageNumTextFound = 1 # pdf_document = fitz.open(stream=pdf_content, filetype="pdf") # for page_num in range(pdf_document.page_count): # page = pdf_document.load_page(page_num) # matched = page.search_for(keyword) # if matched: # for word in matched: # page.add_highlight_annot(word) # pageNumTextFound = page_num + 1 # # Save PDF content to memory and return it along with the page number # pdf_bytes = BytesIO() # pdf_document.save(pdf_bytes) # pdf_document.close() # return pdf_bytes.getvalue(), pageNumTextFound # if __name__ == '__main__': # app.run(host='0.0.0.0', port=7860)