File size: 5,773 Bytes
8db659f
a0786f3
 
1abd7ac
a0786f3
8db659f
4dbe79f
4b67bf3
8db659f
0da3f4b
a0786f3
 
46886f1
4dbe79f
 
8db659f
 
bd99369
67c3f41
4dbe79f
bd99369
 
 
4dbe79f
 
 
67c3f41
bd99369
4dbe79f
bd99369
 
 
 
4dbe79f
bd99369
 
8db659f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd99369
8db659f
67c3f41
c9e58d1
1abd7ac
 
 
 
 
 
 
67c3f41
1abd7ac
 
 
 
8db659f
1abd7ac
 
 
 
 
c9e58d1
1abd7ac
 
c9e58d1
1abd7ac
 
8db659f
bd99369
1abd7ac
67c3f41
 
 
 
 
1abd7ac
a0786f3
e2a7908
8db659f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from flask import Flask, send_file, render_template, request, jsonify
import requests
from io import BytesIO
import fitz  # PyMuPDF

# Define global variables to retain PDF content across function calls
pdf_content = None
pageNumTextFound = 0 
BASE_URL="https://marthee-navigatetopage.hf.space"
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
def getInfotoMeasure():
    global pdf_content, pageNumTextFound

    if pdf_content is None:
        return "No PDF content available.", 404

    # Render the GUI with the current page number
    return render_template("gui.html", page=pageNumTextFound)

@app.route('/view-pdf', methods=['GET'])
def download_pdf():
    global pdf_content, pageNumTextFound

    if pdf_content is None:
        return "PDF content not found.", 404

    pdf_bytes = BytesIO(pdf_content)
    return send_file(
        pdf_bytes,
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"highlighted_page_{pageNumTextFound}.pdf"
    )

# Route to handle external webhook
@app.route('/api/process-data', methods=['POST'])
def receive_pdf_data():
    global pdf_content, pageNumTextFound

    # Extract PDF link and keyword from the request payload
    pdf_link = request.form.get('pdf_link')
    keyword = request.form.get('keyword')
    print('receiveddd',pdf_link,keyword)
    if not pdf_link or not keyword:
        return jsonify({"error": "Both 'pdf_link' and 'keyword' must be provided."}), 400

    try:
        # Call the function to process the PDF
        pdf_content, pageNumTextFound = highlight_text_from_pdf([pdf_link], keyword)

        if pdf_content is None:
            return jsonify({"error": "No valid PDF content found."}), 404

        return jsonify({
            "message": "PDF processed successfully.",
            "download_link": f"{BASE_URL}/view-pdf#page={pageNumTextFound}"
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500

def highlight_text_from_pdf(pdfshareablelinks, keyword):
    global pdf_content, pageNumTextFound

    for link in pdfshareablelinks:
        pdf_content = None

        if link and ('http' in link or 'dropbox' in link):
            if 'dl=0' in link:
                link = link.replace('dl=0', 'dl=1')

            response = requests.get(link)

            if response.status_code == 200:
                pdf_content = BytesIO(response.content)

        if pdf_content is None:
            return None, 0

        pageNumTextFound = 1
        pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

        for page_num in range(pdf_document.page_count):
            page = pdf_document.load_page(page_num)
            matched = page.search_for(keyword)

            if matched:
                for word in matched:
                    page.add_highlight_annot(word)

                pageNumTextFound = page_num + 1

        pdf_bytes = BytesIO()
        pdf_document.save(pdf_bytes)
        pdf_document.close()

        return pdf_bytes.getvalue(), pageNumTextFound

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)


# from flask import Flask, send_file, render_template, request
# import requests
# from io import BytesIO
# import fitz  # PyMuPDF

# # Define local variables to retain the PDF content across function calls
# pdf_content = None
# pageNumTextFound = 0

# app = Flask(__name__)

# @app.route("/", methods=["GET", "POST"])
# def getInfotoMeasure():
#     global pdf_content, pageNumTextFound

#     pdf_link = ['https://www.dropbox.com/scl/fi/fjykwhhn9gu9t3kqrflxd/LA002-NOR-ZZ-ZZ-T-A-2403_Architectural-Specification-F10-Brick-and-Block-Walling_A4-_C01.pdf?rlkey=ek9i66i79m0hwp8z5yjs6rp5p&st=jh05a6qs&dl=0']
#     keyword = "To be read with preliminaries/ general conditions"

#     # Call the function to process the PDF
#     pdf_content, pageNumTextFound = highlight_text_from_pdf(pdf_link, keyword)

#     # Render the GUI with the current page number
#     return render_template("gui.html", page=pageNumTextFound)

# @app.route('/view-pdf', methods=['GET'])
# def download_pdf():
#     global pdf_content, pageNumTextFound

#     if pdf_content is None:
#         return "PDF content not found.", 404

#     pdf_bytes = BytesIO(pdf_content)
#     return send_file(
#         pdf_bytes,
#         mimetype='application/pdf',
#         as_attachment=False,
#         download_name=f"highlighted_page_{pageNumTextFound}.pdf"
#     )

# def highlight_text_from_pdf(pdfshareablelinks, keyword):
#     print('PDF Links:', pdfshareablelinks)

#     for link in pdfshareablelinks:
#         pdf_content = None

#         if link and ('http' in link or 'dropbox' in link):
#             if 'dl=0' in link:
#                 link = link.replace('dl=0', 'dl=1')

#             response = requests.get(link)

#             if response.status_code == 200:
#                 pdf_content = BytesIO(response.content)

#         if pdf_content is None:
#             raise ValueError("No valid PDF content found.")

#         pageNumTextFound = 1
#         pdf_document = fitz.open(stream=pdf_content, filetype="pdf")

#         for page_num in range(pdf_document.page_count):
#             page = pdf_document.load_page(page_num)
#             matched = page.search_for(keyword)

#             if matched:
#                 for word in matched:
#                     page.add_highlight_annot(word)
                
#                 pageNumTextFound = page_num + 1

#         # Save PDF content to memory and return it along with the page number
#         pdf_bytes = BytesIO()
#         pdf_document.save(pdf_bytes)
#         pdf_document.close()

#         return pdf_bytes.getvalue(), pageNumTextFound

# if __name__ == '__main__':
#     app.run(host='0.0.0.0', port=7860)