Spaces:

Marthee
/

NavigateToPage

Sleeping

App Files Files Community

Marthee commited on Dec 7, 2024

Commit

8db659f

verified ·

1 Parent(s): 4dbe79f

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -12

app.py CHANGED Viewed

@@ -1,23 +1,20 @@
-from flask import Flask, send_file, render_template, request
 import requests
 from io import BytesIO
 import fitz  # PyMuPDF
-# Define local variables to retain the PDF content across function calls
 pdf_content = None
 pageNumTextFound = 0
 app = Flask(__name__)
 @app.route("/", methods=["GET", "POST"])
 def getInfotoMeasure():
     global pdf_content, pageNumTextFound
-    pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
-    keyword = "To be read with preliminaries/ general conditions"
-    # Call the function to process the PDF
-    pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
     # Render the GUI with the current page number
     return render_template("gui.html", page=pageNumTextFound)
@@ -37,8 +34,35 @@ def download_pdf():
         download_name=f"highlighted_page_{pageNumTextFound}.pdf"
     )
 def highlight_text_from_pdf(pdfshareablelinks, keyword):
-    print('PDF Links:', pdfshareablelinks)
     for link in pdfshareablelinks:
         pdf_content = None
@@ -53,7 +77,7 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
                 pdf_content = BytesIO(response.content)
         if pdf_content is None:
-            raise ValueError("No valid PDF content found.")
         pageNumTextFound = 1
         pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
@@ -65,10 +89,9 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
             if matched:
                 for word in matched:
                     page.add_highlight_annot(word)
                 pageNumTextFound = page_num + 1
-        # Save PDF content to memory and return it along with the page number
         pdf_bytes = BytesIO()
         pdf_document.save(pdf_bytes)
         pdf_document.close()
@@ -77,3 +100,84 @@ def highlight_text_from_pdf(pdfshareablelinks, keyword):
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

+from flask import Flask, send_file, render_template, request, jsonify
 import requests
 from io import BytesIO
 import fitz  # PyMuPDF
+# Define global variables to retain PDF content across function calls
 pdf_content = None
 pageNumTextFound = 0
+BASE_URL="https://marthee-navigatetopage.hf.space"
 app = Flask(__name__)
 @app.route("/", methods=["GET", "POST"])
 def getInfotoMeasure():
     global pdf_content, pageNumTextFound
+    if pdf_content is None:
+        return "No PDF content available.", 404
     # Render the GUI with the current page number
     return render_template("gui.html", page=pageNumTextFound)
         download_name=f"highlighted_page_{pageNumTextFound}.pdf"
     )
+# Route to handle external webhook
+@app.route('/api/process-data', methods=['POST'])
+def receive_pdf_data():
+    global pdf_content, pageNumTextFound
+    # Extract PDF link and keyword from the request payload
+    pdf_link = request.form.get('pdf_link')
+    keyword = request.form.get('keyword')
+    print('receiveddd',pdf_link,keyword)
+    if not pdf_link or not keyword:
+        return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400
+    try:
+        # Call the function to process the PDF
+        pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)
+        if pdf_content is None:
+            return jsonify({"error": "No valid PDF content found."}), 404
+        return jsonify({
+            "message": "PDF processed successfully.",
+            "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
 def highlight_text_from_pdf(pdfshareablelinks, keyword):
+    global pdf_content, pageNumTextFound
     for link in pdfshareablelinks:
         pdf_content = None
                 pdf_content = BytesIO(response.content)
         if pdf_content is None:
+            return None, 0
         pageNumTextFound = 1
         pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
             if matched:
                 for word in matched:
                     page.add_highlight_annot(word)
                 pageNumTextFound = page_num + 1
         pdf_bytes = BytesIO()
         pdf_document.save(pdf_bytes)
         pdf_document.close()
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)
+# from flask import Flask, send_file, render_template, request
+# import requests
+# from io import BytesIO
+# import fitz  # PyMuPDF
+# # Define local variables to retain the PDF content across function calls
+# pdf_content = None
+# pageNumTextFound = 0
+# app = Flask(__name__)
+# @app.route("/", methods=["GET", "POST"])
+# def getInfotoMeasure():
+#     global pdf_content, pageNumTextFound
+#     pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
+#     keyword = "To be read with preliminaries/ general conditions"
+#     # Call the function to process the PDF
+#     pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)
+#     # Render the GUI with the current page number
+#     return render_template("gui.html", page=pageNumTextFound)
+# @app.route('/view-pdf', methods=['GET'])
+# def download_pdf():
+#     global pdf_content, pageNumTextFound
+#     if pdf_content is None:
+#         return "PDF content not found.", 404
+#     pdf_bytes = BytesIO(pdf_content)
+#     return send_file(
+#         pdf_bytes,
+#         mimetype='application/pdf',
+#         as_attachment=False,
+#         download_name=f"highlighted_page_{pageNumTextFound}.pdf"
+#     )
+# def highlight_text_from_pdf(pdfshareablelinks, keyword):
+#     print('PDF Links:', pdfshareablelinks)
+#     for link in pdfshareablelinks:
+#         pdf_content = None
+#         if link and ('http' in link or 'dropbox' in link):
+#             if 'dl=0' in link:
+#                 link = link.replace('dl=0', 'dl=1')
+#             response = requests.get(link)
+#             if response.status_code == 200:
+#                 pdf_content = BytesIO(response.content)
+#         if pdf_content is None:
+#             raise ValueError("No valid PDF content found.")
+#         pageNumTextFound = 1
+#         pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
+#         for page_num in range(pdf_document.page_count):
+#             page = pdf_document.load_page(page_num)
+#             matched = page.search_for(keyword)
+#             if matched:
+#                 for word in matched:
+#                     page.add_highlight_annot(word)
+#                 pageNumTextFound = page_num + 1
+#         # Save PDF content to memory and return it along with the page number
+#         pdf_bytes = BytesIO()
+#         pdf_document.save(pdf_bytes)
+#         pdf_document.close()
+#         return pdf_bytes.getvalue(), pageNumTextFound
+# if __name__ == '__main__':
+#     app.run(host='0.0.0.0', port=7860)