from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response import tsadropboxretrieval # import findInitialMarkups import InitialMarkups import requests import fitz from io import BytesIO import datetime import time from threading import Thread from urllib.parse import quote, unquote, parse_qs # import pdftotext import json # -------------------- App & Globals -------------------- app = Flask(__name__) pageNumTextFound = 0 BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only backend_ready = False jsonoutput = [] # ensure defined before use # -------------------- Simple Health/Test -------------------- @app.route("/health", methods=["GET"]) def health(): return jsonify(status="ok", time=datetime.datetime.now().isoformat()) # -------------------- Root: keep it simple & reliable -------------------- @app.route("/", methods=["GET"]) def root(): # Avoid missing-template errors. Keep it simple so external access works. return jsonify(message="FIND APIs root. Use /health or /testpage."), 200 # -------------------- Headers Filtering Find 1 Space -------------------- @app.route('/api/process-data', methods=['POST']) def process_headers(): try: data = request.get_json(force=True) or {} filePath = data.get('filePath') if not filePath: return jsonify({"error": "Missing 'filePath'"}), 400 headers = findInitialMarkups.headersfrompdf(filePath) return jsonify(headers) except Exception as e: print(f"Error in /api/process-data: {e}") return jsonify({"error": str(e)}), 500 # -------------------- PDF to Text 1 Space -------------------- @app.route('/processalltext1', methods=['POST']) def processalltextTotext(): try: data = request.get_json(force=True) or {} pdfpath = data.get('filePath') if not pdfpath: return jsonify({"error": "Missing 'filePath' in request data"}), 400 pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath) return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename}) except Exception as e: print(f"Error in /processalltext1: {e}") return jsonify({"error": str(e)}), 500 # -------------------- Keepalive -------------------- @app.route("/keepaliveapii", methods=["GET", "POST"]) def keepaliveapi(): try: print('Keepalive pinged') return 'alivee' except Exception as error: print('Error in keepalive:', error) return jsonify(status="error", message=str(error)), 500 # -------------------- View PDF (Marked up) -------------------- def getpdfcontent(pdf_path): # Handle Dropbox URLs if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path): pdf_path = pdf_path.replace('dl=0', 'dl=1') # Get the PDF bytes response = requests.get(pdf_path) pdf_bytes = response.content if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"): raise ValueError("No valid PDF content found.") # Return a BytesIO stream return BytesIO(pdf_bytes) @app.route('/view-pdf', methods=['GET']) def view_pdf(): encoded_pdf_link = request.args.get('pdfLink') if not encoded_pdf_link: return "Missing pdfLink parameter.", 400 pdf_link = unquote(encoded_pdf_link) print("Extracted PDF Link:", pdf_link) try: pdf_content = getpdfcontent(pdf_link) except Exception as e: print("Error during PDF extraction:", e) return "PDF could not be processed.", 500 if pdf_content is None: return "PDF content not found or broken.", 404 # ✅ Do NOT wrap again in BytesIO return send_file( pdf_content, mimetype='application/pdf', as_attachment=False, download_name="annotated_page.pdf" ) # -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) -------------------- @app.route('/api/process-pdf', methods=['POST']) def process_pdf_and_upload(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "'filePath' must be provided."}), 400 print("Processing PDF:", pdfLink) pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink) dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user') metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink) dbPath = '/TSA JOBS/ADR Test/FIND/' pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name) tablepdfLink = tsadropboxretrieval.uploadanyFile( doc=tablepdfoutput, path=dbPath, pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf' ) print('Uploaded:', pdflink, tablepdfLink) return jsonify({ "message": "PDF processed successfully.", "PDF_MarkedUp": pdflink, "Table_PDF_Markup_Summary": tablepdfLink }) except Exception as e: print(f"Error in /api/process-pdf: {e}") return jsonify({"error": str(e)}), 500 # -------------------- Not billed / Markup subsets -------------------- @app.route('/findapitobebilled1', methods=['GET','POST']) def findapitobebilled1(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) return jsonify(alltext_tobebilled) except Exception as e: print(f"Error in /findapitobebilled1: {e}") return jsonify({"error": str(e)}), 500 # ---------------------------------------------------------------------- @app.route('/findapitobebilled_htmlformat', methods=['GET','POST']) def findapitobebilled_htmlformat(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) # Parse JSON string → list of dicts data = json.loads(tablepdfoutput) # Collect all body parts html_body = "" for section in data: if "head above 2" in section: html_body += f"

{section['head above 2']}


" if "head above 1" in section: html_body += f"

{section['head above 1']}


" if "Subject" in section: html_body += f"

{section['Subject']}


" if "BodyText" in section: html_body += f"

{' '.join(section['BodyText'])}


" # html_body += f"
{' '.join(section['bodytext'])}

" # Wrap everything into one HTML document html_content = f""" {filename} {html_body} """ # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) return jsonify({"input_data": html_content,"Filename:":filename}) except Exception as e: print(f"Error in /findapitobebilled_htmlformat: {e}") return jsonify({"error": str(e)}), 500 @app.route('/view-pdf-tobebilled', methods=['GET']) def view_pdf_tobebilled(): encoded_pdf_link = request.args.get('pdfLink') if not encoded_pdf_link: return "Missing pdfLink parameter.", 400 pdf_link = unquote(encoded_pdf_link) print("Extracted PDF Link:", pdf_link) try: pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0] except Exception as e: print("Error during PDF extraction:", e) return "PDF could not be processed.", 500 if pdf_content is None or not pdf_content.startswith(b"%PDF"): return "PDF content not found or broken.", 404 return send_file( BytesIO(pdf_content), mimetype='application/pdf', as_attachment=False, download_name=f"annotated_page_{pageNumTextFound}.pdf" ) # -------------------- Final markups: view one highlight -------------------- @app.route('/view-highlight', methods=['GET','POST']) def download_pdfHighlight(): pdf_link = request.args.get('pdfLink') keyword = request.args.get('keyword') if not pdf_link or not keyword: return "Missing required parameters.", 400 pdf_link = unquote(pdf_link) print("Extracted PDF Link:", pdf_link) print("Extracted Keyword:", keyword) global jsonoutput matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None) if matching_item: page_number = int(matching_item.get("Page")) - 1 stringtowrite = matching_item.get("head above 1") print(f"Page number for '{keyword}': {page_number}") else: page_number = 0 stringtowrite = None print("No match found in jsonoutput; defaulting to page 0.") pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0] if pdf_content is None: return "PDF content not found.", 404 return send_file( BytesIO(pdf_content), mimetype='application/pdf', as_attachment=False, download_name=f"annotated_page_{pageNumTextFound}.pdf" ) @app.route('/findapiFilteredHeadings', methods=['GET','POST']) def findapiFilteredHeadings(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') listofheadings = data.get('listofheadings') # json array if not pdfLink or listofheadings is None: return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings) global jsonoutput jsonoutput = tablepdfoutput return jsonify(alltext) except Exception as e: print(f"Error in /findapiFilteredHeadings: {e}") return jsonify({"error": str(e)}), 500 @app.route('/findapitobebilledonlyNew', methods=['GET','POST']) def findapitobebilledonly(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink) # return jsonify(tablepdfoutput) # Parse JSON string → list of dicts data = json.loads(tablepdfoutput) # Collect all body parts html_body = "" for section in data: if "head above 2" in section: html_body += f"

{section['head above 2']}


" if "head above 1" in section: html_body += f"

{section['head above 1']}


" if "Subject" in section: html_body += f"

{section['Subject']}


" if "BodyText" in section: html_body += f"

{' '.join(section['BodyText'])}


" # html_body += f"
{' '.join(section['bodytext'])}

" # Wrap everything into one HTML document html_content = f""" {filename} {html_body} """ # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) return jsonify({"input_data": html_content,"Filename:":filename}) # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) except Exception as e: print(f"Error in /findapitobebilledonly: {e}") return jsonify({"error": str(e)}), 500 @app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST']) def findapitobebilledonlymarthe(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink) # return jsonify(tablepdfoutput) # Parse JSON string → list of dicts if isinstance(tablepdfoutput, str): data = json.loads(tablepdfoutput) else: data = tablepdfoutput # Collect all body parts html_body = "" for section in data: if "head above 2" in section: html_body += f"

{section['head above 2']}


" if "head above 1" in section: html_body += f"

{section['head above 1']}


" if "Subject" in section: html_body += f"

{section['Subject']}


" if "BodyText" in section: html_body += f"

{' '.join(section['BodyText'])}


" # html_body += f"
{' '.join(section['bodytext'])}

" # Wrap everything into one HTML document html_content = f""" {filename} {html_body} """ # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) return jsonify({"input_data": html_content,"Filename:":filename}) # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) except Exception as e: print(f"Error in /findapitobebilledonly: {e}") return jsonify({"error": str(e)}), 500 @app.route('/findapiAllDocNoNotbilled', methods=['GET','POST']) def findapiAllDocNoNotbilled(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) return jsonify(alltextNoNotbilled) except Exception as e: print(f"Error in /findapiAllDocNoNotbilled: {e}") return jsonify({"error": str(e)}), 500 # -------------------- Rawan - MC Connection -------------------- @app.route('/findapi', methods=['GET','POST']) def findapi(): try: data = request.get_json(force=True) or {} pdfLink = data.get('filePath') if not pdfLink: return jsonify({"error": "Missing 'filePath'"}), 400 pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink) global jsonoutput jsonoutput = tablepdfoutput return jsonify(tablepdfoutput) except Exception as e: print(f"Error in /findapi: {e}") return jsonify({"error": str(e)}), 500 #--------------------testpage----------------------------- import socket from datetime import datetime @app.route('/testpage') def test_page(): # Get some system info hostname = socket.gethostname() current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return f""" Server Test Page
🚀 Flask Server is Running!

Hostname: {hostname}

Server Time: {current_time}

Endpoint: /testpage

Status: Operational ✅

""" # -------------------- Run -------------------- if __name__ == "__main__": app.run(host="0.0.0.0", port=5000, debug=True)