Spaces:

Marthee
/

NavigateToPage

Sleeping

File size: 5,773 Bytes

8db659f
a0786f3
 
1abd7ac
a0786f3
8db659f
4dbe79f
4b67bf3
8db659f
0da3f4b
a0786f3
 
46886f1
4dbe79f
 
8db659f
 
bd99369
67c3f41
4dbe79f
bd99369
 
 
4dbe79f
 
 
67c3f41
bd99369
4dbe79f
bd99369
 
 
 
4dbe79f
bd99369
 
8db659f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd99369
8db659f
67c3f41
c9e58d1
1abd7ac
 
 
 
 
 
 
67c3f41
1abd7ac
 
 
 
8db659f
1abd7ac
 
 
 
 
c9e58d1
1abd7ac
 
c9e58d1
1abd7ac
 
8db659f
bd99369
1abd7ac
67c3f41
 
 
 
 
1abd7ac
a0786f3
e2a7908
8db659f

from flask import Flask, send_file, render_template, request, jsonify
import requests
from io import BytesIO
import fitz  # PyMuPDF

# Define global variables to retain PDF content across function calls
pdf_content = None
pageNumTextFound = 0 
BASE_URL="https://marthee-navigatetopage.hf.space"
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
def getInfotoMeasure():
    global pdf_content, pageNumTextFound

    if pdf_content is None:
        return "No PDF content available.", 404

    # Render the GUI with the current page number
    return render_template("gui.html", page=pageNumTextFound)

@app.route('/view-pdf', methods=['GET'])
def download_pdf():
    global pdf_content, pageNumTextFound

    if pdf_content is None:
        return "PDF content not found.", 404

    pdf_bytes = BytesIO(pdf_content)
    return send_file(
        pdf_bytes,
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"highlighted_page_{pageNumTextFound}.pdf"
    )

# Route to handle external webhook
@app.route('/api/process-data', methods=['POST'])
def receive_pdf_data():
    global pdf_content, pageNumTextFound

    # Extract PDF link and keyword from the request payload
    pdf_link = request.form.get('pdf_link')
    keyword = request.form.get('keyword')
    print('receiveddd',pdf_link,keyword)
    if not pdf_link or not keyword:
        return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400

    try:
        # Call the function to process the PDF
        pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)

        if pdf_content is None:
            return jsonify({"error": "No valid PDF content found."}), 404

        return jsonify({
            "message": "PDF processed successfully.",
            "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500

def highlight_text_from_pdf(pdfshareablelinks, keyword):
    global pdf_content, pageNumTextFound

    for link in pdfshareablelinks:
        pdf_content = None

        if link and ('http' in link or 'dropbox' in link):
            if 'dl=0' in link:
                link = link.replace('dl=0', 'dl=1')

            response = requests.get(link)

            if response.status_code == 200:
                pdf_content = BytesIO(response.content)

        if pdf_content is None:
            return None, 0

        pageNumTextFound = 1
        pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

        for page_num in range(pdf_document.page_count):
            page = pdf_document.load_page(page_num)
            matched = page.search_for(keyword)

            if matched:
                for word in matched:
                    page.add_highlight_annot(word)

                pageNumTextFound = page_num + 1

        pdf_bytes = BytesIO()
        pdf_document.save(pdf_bytes)
        pdf_document.close()

        return pdf_bytes.getvalue(), pageNumTextFound

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)


# from flask import Flask, send_file, render_template, request
# import requests
# from io import BytesIO
# import fitz  # PyMuPDF

# # Define local variables to retain the PDF content across function calls
# pdf_content = None
# pageNumTextFound = 0

# app = Flask(__name__)

# @app.route("/", methods=["GET", "POST"])
# def getInfotoMeasure():
#     global pdf_content, pageNumTextFound

#     pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
#     keyword = "To be read with preliminaries/ general conditions"

#     # Call the function to process the PDF
#     pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)

#     # Render the GUI with the current page number
#     return render_template("gui.html", page=pageNumTextFound)

# @app.route('/view-pdf', methods=['GET'])
# def download_pdf():
#     global pdf_content, pageNumTextFound

#     if pdf_content is None:
#         return "PDF content not found.", 404

#     pdf_bytes = BytesIO(pdf_content)
#     return send_file(
#         pdf_bytes,
#         mimetype='application/pdf',
#         as_attachment=False,
#         download_name=f"highlighted_page_{pageNumTextFound}.pdf"
#     )

# def highlight_text_from_pdf(pdfshareablelinks, keyword):
#     print('PDF Links:', pdfshareablelinks)

#     for link in pdfshareablelinks:
#         pdf_content = None

#         if link and ('http' in link or 'dropbox' in link):
#             if 'dl=0' in link:
#                 link = link.replace('dl=0', 'dl=1')

#             response = requests.get(link)

#             if response.status_code == 200:
#                 pdf_content = BytesIO(response.content)

#         if pdf_content is None:
#             raise ValueError("No valid PDF content found.")

#         pageNumTextFound = 1
#         pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

#         for page_num in range(pdf_document.page_count):
#             page = pdf_document.load_page(page_num)
#             matched = page.search_for(keyword)

#             if matched:
#                 for word in matched:
#                     page.add_highlight_annot(word)
                
#                 pageNumTextFound = page_num + 1

#         # Save PDF content to memory and return it along with the page number
#         pdf_bytes = BytesIO()
#         pdf_document.save(pdf_bytes)
#         pdf_document.close()

#         return pdf_bytes.getvalue(), pageNumTextFound

# if __name__ == '__main__':
#     app.run(host='0.0.0.0', port=7860)