File size: 17,108 Bytes
1fca599
 
68c64f2
1fca599
78a602d
1fca599
d99ce5e
1fca599
 
 
 
4784ea6
9f52778
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f52778
 
 
1fca599
9f52778
1fca599
9f52778
1fca599
 
9f52778
1fca599
 
9f52778
 
1fca599
 
 
 
 
9f52778
1fca599
 
ce2c42f
1fca599
 
 
 
 
b87c24f
1fca599
 
9f52778
1fca599
 
 
 
 
 
 
9f52778
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f52778
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f52778
 
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d35b62
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f52778
1fca599
 
 
 
 
 
 
 
 
 
 
fd4cb96
1fca599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2353004
9933281
a3d32de
1fca599
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response
import tsadropboxretrieval
# import findInitialMarkups
import InitialMarkups
import requests
import fitz
from io import BytesIO   
import datetime
import time
from threading import Thread
from urllib.parse import quote, unquote, parse_qs
# import pdftotext
import json
# -------------------- App & Globals --------------------
app = Flask(__name__)
pageNumTextFound = 0
BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only
backend_ready = False
jsonoutput = []  # ensure defined before use

# -------------------- Simple Health/Test --------------------
@app.route("/health", methods=["GET"])
def health():
    return jsonify(status="ok", time=datetime.datetime.now().isoformat())

# -------------------- Root: keep it simple & reliable --------------------
@app.route("/", methods=["GET"])
def root():
    # Avoid missing-template errors. Keep it simple so external access works.
    return jsonify(message="FIND APIs root. Use /health or /testpage."), 200

# -------------------- Headers Filtering Find 1 Space --------------------
@app.route('/api/process-data', methods=['POST'])
def process_headers():
    try:
        data = request.get_json(force=True) or {}
        filePath = data.get('filePath')
        if not filePath:
            return jsonify({"error": "Missing 'filePath'"}), 400
        headers = findInitialMarkups.headersfrompdf(filePath)
        return jsonify(headers)
    except Exception as e:
        print(f"Error in /api/process-data: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- PDF to Text 1 Space --------------------
@app.route('/processalltext1', methods=['POST'])
def processalltextTotext():
    try:
        data = request.get_json(force=True) or {}
        pdfpath = data.get('filePath')
        if not pdfpath:
            return jsonify({"error": "Missing 'filePath' in request data"}), 400
        pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath)
        return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename})
    except Exception as e:
        print(f"Error in /processalltext1: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Keepalive --------------------
@app.route("/keepaliveapii", methods=["GET", "POST"])
def keepaliveapi():
    try:
        print('Keepalive pinged')
        return 'alivee'
    except Exception as error:
        print('Error in keepalive:', error)
        return jsonify(status="error", message=str(error)), 500

# -------------------- View PDF (Marked up) --------------------
def getpdfcontent(pdf_path):
    # Handle Dropbox URLs
    if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
        pdf_path = pdf_path.replace('dl=0', 'dl=1')

    # Get the PDF bytes
    response = requests.get(pdf_path)
    pdf_bytes = response.content

    if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"):
        raise ValueError("No valid PDF content found.")

    # Return a BytesIO stream
    return BytesIO(pdf_bytes)


@app.route('/view-pdf', methods=['GET'])
def view_pdf():
    encoded_pdf_link = request.args.get('pdfLink')
    if not encoded_pdf_link:
        return "Missing pdfLink parameter.", 400

    pdf_link = unquote(encoded_pdf_link)
    print("Extracted PDF Link:", pdf_link)

    try:
        pdf_content = getpdfcontent(pdf_link)
    except Exception as e:
        print("Error during PDF extraction:", e)
        return "PDF could not be processed.", 500

    if pdf_content is None:
        return "PDF content not found or broken.", 404

    # ✅ Do NOT wrap again in BytesIO
    return send_file(
        pdf_content,
        mimetype='application/pdf',
        as_attachment=False,
        download_name="annotated_page.pdf"
    )

# -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) --------------------
@app.route('/api/process-pdf', methods=['POST'])
def process_pdf_and_upload():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "'filePath' must be provided."}), 400

        print("Processing PDF:", pdfLink)
        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)

        dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
        metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)

        dbPath = '/TSA JOBS/ADR Test/FIND/'
        pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name)
        tablepdfLink = tsadropboxretrieval.uploadanyFile(
            doc=tablepdfoutput,
            path=dbPath,
            pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf'
        )
        print('Uploaded:', pdflink, tablepdfLink)

        return jsonify({
            "message": "PDF processed successfully.",
            "PDF_MarkedUp": pdflink,
            "Table_PDF_Markup_Summary": tablepdfLink
        })
    except Exception as e:
        print(f"Error in /api/process-pdf: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Not billed / Markup subsets --------------------
@app.route('/findapitobebilled1', methods=['GET','POST'])
def findapitobebilled1():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        
        return jsonify(alltext_tobebilled)
    except Exception as e:
        print(f"Error in /findapitobebilled1: {e}")
        return jsonify({"error": str(e)}), 500
    

# ----------------------------------------------------------------------
@app.route('/findapitobebilled_htmlformat', methods=['GET','POST'])
def findapitobebilled_htmlformat():  
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        # Parse JSON string → list of dicts
        data = json.loads(tablepdfoutput)

        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
    except Exception as e:
        print(f"Error in /findapitobebilled_htmlformat: {e}")
        return jsonify({"error": str(e)}), 500


@app.route('/view-pdf-tobebilled', methods=['GET'])
def view_pdf_tobebilled():
    encoded_pdf_link = request.args.get('pdfLink')
    if not encoded_pdf_link:
        return "Missing pdfLink parameter.", 400
    pdf_link = unquote(encoded_pdf_link)
    print("Extracted PDF Link:", pdf_link)
    try:
        pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
    except Exception as e:
        print("Error during PDF extraction:", e)
        return "PDF could not be processed.", 500
    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
        return "PDF content not found or broken.", 404
    return send_file(
        BytesIO(pdf_content),
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"annotated_page_{pageNumTextFound}.pdf"
    )

# -------------------- Final markups: view one highlight --------------------
@app.route('/view-highlight', methods=['GET','POST'])
def download_pdfHighlight():
    pdf_link = request.args.get('pdfLink')
    keyword = request.args.get('keyword')
    if not pdf_link or not keyword:
        return "Missing required parameters.", 400

    pdf_link = unquote(pdf_link)
    print("Extracted PDF Link:", pdf_link)
    print("Extracted Keyword:", keyword)

    global jsonoutput
    matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)

    if matching_item:
        page_number = int(matching_item.get("Page")) - 1
        stringtowrite = matching_item.get("head above 1")
        print(f"Page number for '{keyword}': {page_number}")
    else:
        page_number = 0
        stringtowrite = None
        print("No match found in jsonoutput; defaulting to page 0.")

    pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0]
    if pdf_content is None:
        return "PDF content not found.", 404

    return send_file(
        BytesIO(pdf_content),
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"annotated_page_{pageNumTextFound}.pdf"
    )

@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
def findapiFilteredHeadings():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        listofheadings = data.get('listofheadings')  # json array
        if not pdfLink or listofheadings is None:
            return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400

        pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings)
        global jsonoutput
        jsonoutput = tablepdfoutput
        return jsonify(alltext)
    except Exception as e:
        print(f"Error in /findapiFilteredHeadings: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/findapitobebilledonlyNew', methods=['GET','POST'])
def findapitobebilledonly():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink)
        # return jsonify(tablepdfoutput)
                # Parse JSON string → list of dicts
        data = json.loads(tablepdfoutput)

        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
    except Exception as e:
        print(f"Error in /findapitobebilledonly: {e}")
        return jsonify({"error": str(e)}), 500



@app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST'])
def findapitobebilledonlymarthe():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink)
        # return jsonify(tablepdfoutput)
                # Parse JSON string → list of dicts
        if isinstance(tablepdfoutput, str):
            data = json.loads(tablepdfoutput)
        else:
            data = tablepdfoutput
        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
    except Exception as e:
        print(f"Error in /findapitobebilledonly: {e}")
        return jsonify({"error": str(e)}), 500


@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
def findapiAllDocNoNotbilled():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        return jsonify(alltextNoNotbilled)
    except Exception as e:
        print(f"Error in /findapiAllDocNoNotbilled: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Rawan - MC Connection --------------------
@app.route('/findapi', methods=['GET','POST'])
def findapi():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        
        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
        global jsonoutput
        jsonoutput = tablepdfoutput
        return jsonify(tablepdfoutput)
    except Exception as e:
        print(f"Error in /findapi: {e}")
        return jsonify({"error": str(e)}), 500

#--------------------testpage-----------------------------
import socket
from datetime import datetime

@app.route('/testpage')
def test_page():
    # Get some system info
    hostname = socket.gethostname()
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    return f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Server Test Page</title>
        <style>
            body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
            .success {{ color: #2ecc71; font-size: 24px; }}
            .info {{ color: #34495e; margin-top: 10px; }}
            .container {{ max-width: 600px; margin: 0 auto; text-align: left; }}
        </style>
    </head>
    <body>
        <div class="success">🚀 Flask Server is Running!</div>
        <div class="container">
            <p class="info"><strong>Hostname:</strong> {hostname}</p>
            <p class="info"><strong>Server Time:</strong> {current_time}</p>
            <p class="info"><strong>Endpoint:</strong> /testpage</p>
            <p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p>
        </div>
    </body>
    </html>
    """   

# -------------------- Run --------------------   
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000, debug=True)