Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response | |
| import tsadropboxretrieval | |
| # import findInitialMarkups | |
| import InitialMarkups | |
| import requests | |
| import fitz | |
| from io import BytesIO | |
| import datetime | |
| import time | |
| from threading import Thread | |
| from urllib.parse import quote, unquote, parse_qs | |
| # import pdftotext | |
| import json | |
| # -------------------- App & Globals -------------------- | |
| app = Flask(__name__) | |
| pageNumTextFound = 0 | |
| BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only | |
| backend_ready = False | |
| jsonoutput = [] # ensure defined before use | |
| # -------------------- Simple Health/Test -------------------- | |
| def health(): | |
| return jsonify(status="ok", time=datetime.datetime.now().isoformat()) | |
| # -------------------- Root: keep it simple & reliable -------------------- | |
| def root(): | |
| # Avoid missing-template errors. Keep it simple so external access works. | |
| return jsonify(message="FIND APIs root. Use /health or /testpage."), 200 | |
| # -------------------- Headers Filtering Find 1 Space -------------------- | |
| def process_headers(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| filePath = data.get('filePath') | |
| if not filePath: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| headers = findInitialMarkups.headersfrompdf(filePath) | |
| return jsonify(headers) | |
| except Exception as e: | |
| print(f"Error in /api/process-data: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| # -------------------- PDF to Text 1 Space -------------------- | |
| def processalltextTotext(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfpath = data.get('filePath') | |
| if not pdfpath: | |
| return jsonify({"error": "Missing 'filePath' in request data"}), 400 | |
| pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath) | |
| return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename}) | |
| except Exception as e: | |
| print(f"Error in /processalltext1: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| # -------------------- Keepalive -------------------- | |
| def keepaliveapi(): | |
| try: | |
| print('Keepalive pinged') | |
| return 'alivee' | |
| except Exception as error: | |
| print('Error in keepalive:', error) | |
| return jsonify(status="error", message=str(error)), 500 | |
| # -------------------- View PDF (Marked up) -------------------- | |
| def getpdfcontent(pdf_path): | |
| # Handle Dropbox URLs | |
| if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path): | |
| pdf_path = pdf_path.replace('dl=0', 'dl=1') | |
| # Get the PDF bytes | |
| response = requests.get(pdf_path) | |
| pdf_bytes = response.content | |
| if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"): | |
| raise ValueError("No valid PDF content found.") | |
| # Return a BytesIO stream | |
| return BytesIO(pdf_bytes) | |
| def view_pdf(): | |
| encoded_pdf_link = request.args.get('pdfLink') | |
| if not encoded_pdf_link: | |
| return "Missing pdfLink parameter.", 400 | |
| pdf_link = unquote(encoded_pdf_link) | |
| print("Extracted PDF Link:", pdf_link) | |
| try: | |
| pdf_content = getpdfcontent(pdf_link) | |
| except Exception as e: | |
| print("Error during PDF extraction:", e) | |
| return "PDF could not be processed.", 500 | |
| if pdf_content is None: | |
| return "PDF content not found or broken.", 404 | |
| # ✅ Do NOT wrap again in BytesIO | |
| return send_file( | |
| pdf_content, | |
| mimetype='application/pdf', | |
| as_attachment=False, | |
| download_name="annotated_page.pdf" | |
| ) | |
| # -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) -------------------- | |
| def process_pdf_and_upload(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "'filePath' must be provided."}), 400 | |
| print("Processing PDF:", pdfLink) | |
| pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink) | |
| dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user') | |
| metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink) | |
| dbPath = '/TSA JOBS/ADR Test/FIND/' | |
| pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name) | |
| tablepdfLink = tsadropboxretrieval.uploadanyFile( | |
| doc=tablepdfoutput, | |
| path=dbPath, | |
| pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf' | |
| ) | |
| print('Uploaded:', pdflink, tablepdfLink) | |
| return jsonify({ | |
| "message": "PDF processed successfully.", | |
| "PDF_MarkedUp": pdflink, | |
| "Table_PDF_Markup_Summary": tablepdfLink | |
| }) | |
| except Exception as e: | |
| print(f"Error in /api/process-pdf: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| # -------------------- Not billed / Markup subsets -------------------- | |
| def findapitobebilled1(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) | |
| return jsonify(alltext_tobebilled) | |
| except Exception as e: | |
| print(f"Error in /findapitobebilled1: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| # ---------------------------------------------------------------------- | |
| def findapitobebilled_htmlformat(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) | |
| # Parse JSON string → list of dicts | |
| data = json.loads(tablepdfoutput) | |
| # Collect all body parts | |
| html_body = "" | |
| for section in data: | |
| if "head above 2" in section: | |
| html_body += f"<h1>{section['head above 2']}</h1><br>" | |
| if "head above 1" in section: | |
| html_body += f"<h2>{section['head above 1']}</h2><br>" | |
| if "Subject" in section: | |
| html_body += f"<h3>{section['Subject']}</h3><br>" | |
| if "BodyText" in section: | |
| html_body += f"<p>{' '.join(section['BodyText'])}</p><br>" | |
| # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>" | |
| # Wrap everything into one HTML document | |
| html_content = f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>{filename}</title> | |
| <meta charset="utf-8"> | |
| </head> | |
| <body> | |
| {html_body} | |
| </body> | |
| </html> | |
| """ | |
| # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) | |
| return jsonify({"input_data": html_content,"Filename:":filename}) | |
| except Exception as e: | |
| print(f"Error in /findapitobebilled_htmlformat: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| def view_pdf_tobebilled(): | |
| encoded_pdf_link = request.args.get('pdfLink') | |
| if not encoded_pdf_link: | |
| return "Missing pdfLink parameter.", 400 | |
| pdf_link = unquote(encoded_pdf_link) | |
| print("Extracted PDF Link:", pdf_link) | |
| try: | |
| pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0] | |
| except Exception as e: | |
| print("Error during PDF extraction:", e) | |
| return "PDF could not be processed.", 500 | |
| if pdf_content is None or not pdf_content.startswith(b"%PDF"): | |
| return "PDF content not found or broken.", 404 | |
| return send_file( | |
| BytesIO(pdf_content), | |
| mimetype='application/pdf', | |
| as_attachment=False, | |
| download_name=f"annotated_page_{pageNumTextFound}.pdf" | |
| ) | |
| # -------------------- Final markups: view one highlight -------------------- | |
| def download_pdfHighlight(): | |
| pdf_link = request.args.get('pdfLink') | |
| keyword = request.args.get('keyword') | |
| if not pdf_link or not keyword: | |
| return "Missing required parameters.", 400 | |
| pdf_link = unquote(pdf_link) | |
| print("Extracted PDF Link:", pdf_link) | |
| print("Extracted Keyword:", keyword) | |
| global jsonoutput | |
| matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None) | |
| if matching_item: | |
| page_number = int(matching_item.get("Page")) - 1 | |
| stringtowrite = matching_item.get("head above 1") | |
| print(f"Page number for '{keyword}': {page_number}") | |
| else: | |
| page_number = 0 | |
| stringtowrite = None | |
| print("No match found in jsonoutput; defaulting to page 0.") | |
| pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0] | |
| if pdf_content is None: | |
| return "PDF content not found.", 404 | |
| return send_file( | |
| BytesIO(pdf_content), | |
| mimetype='application/pdf', | |
| as_attachment=False, | |
| download_name=f"annotated_page_{pageNumTextFound}.pdf" | |
| ) | |
| def findapiFilteredHeadings(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| listofheadings = data.get('listofheadings') # json array | |
| if not pdfLink or listofheadings is None: | |
| return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings) | |
| global jsonoutput | |
| jsonoutput = tablepdfoutput | |
| return jsonify(alltext) | |
| except Exception as e: | |
| print(f"Error in /findapiFilteredHeadings: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| def findapitobebilledonly(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink) | |
| # return jsonify(tablepdfoutput) | |
| # Parse JSON string → list of dicts | |
| data = json.loads(tablepdfoutput) | |
| # Collect all body parts | |
| html_body = "" | |
| for section in data: | |
| if "head above 2" in section: | |
| html_body += f"<h1>{section['head above 2']}</h1><br>" | |
| if "head above 1" in section: | |
| html_body += f"<h2>{section['head above 1']}</h2><br>" | |
| if "Subject" in section: | |
| html_body += f"<h3>{section['Subject']}</h3><br>" | |
| if "BodyText" in section: | |
| html_body += f"<p>{' '.join(section['BodyText'])}</p><br>" | |
| # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>" | |
| # Wrap everything into one HTML document | |
| html_content = f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>{filename}</title> | |
| <meta charset="utf-8"> | |
| </head> | |
| <body> | |
| {html_body} | |
| </body> | |
| </html> | |
| """ | |
| # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) | |
| return jsonify({"input_data": html_content,"Filename:":filename}) | |
| # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) | |
| except Exception as e: | |
| print(f"Error in /findapitobebilledonly: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| def findapitobebilledonlymarthe(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink) | |
| # return jsonify(tablepdfoutput) | |
| # Parse JSON string → list of dicts | |
| if isinstance(tablepdfoutput, str): | |
| data = json.loads(tablepdfoutput) | |
| else: | |
| data = tablepdfoutput | |
| # Collect all body parts | |
| html_body = "" | |
| for section in data: | |
| if "head above 2" in section: | |
| html_body += f"<h1>{section['head above 2']}</h1><br>" | |
| if "head above 1" in section: | |
| html_body += f"<h2>{section['head above 1']}</h2><br>" | |
| if "Subject" in section: | |
| html_body += f"<h3>{section['Subject']}</h3><br>" | |
| if "BodyText" in section: | |
| html_body += f"<p>{' '.join(section['BodyText'])}</p><br>" | |
| # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>" | |
| # Wrap everything into one HTML document | |
| html_content = f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>{filename}</title> | |
| <meta charset="utf-8"> | |
| </head> | |
| <body> | |
| {html_body} | |
| </body> | |
| </html> | |
| """ | |
| # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) | |
| return jsonify({"input_data": html_content,"Filename:":filename}) | |
| # return Response(html_content, mimetype="text/html", headers={"Filename": filename}) | |
| except Exception as e: | |
| print(f"Error in /findapitobebilledonly: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| def findapiAllDocNoNotbilled(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink) | |
| return jsonify(alltextNoNotbilled) | |
| except Exception as e: | |
| print(f"Error in /findapiAllDocNoNotbilled: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| # -------------------- Rawan - MC Connection -------------------- | |
| def findapi(): | |
| try: | |
| data = request.get_json(force=True) or {} | |
| pdfLink = data.get('filePath') | |
| if not pdfLink: | |
| return jsonify({"error": "Missing 'filePath'"}), 400 | |
| pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink) | |
| global jsonoutput | |
| jsonoutput = tablepdfoutput | |
| return jsonify(tablepdfoutput) | |
| except Exception as e: | |
| print(f"Error in /findapi: {e}") | |
| return jsonify({"error": str(e)}), 500 | |
| #--------------------testpage----------------------------- | |
| import socket | |
| from datetime import datetime | |
| def test_page(): | |
| # Get some system info | |
| hostname = socket.gethostname() | |
| current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| return f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Server Test Page</title> | |
| <style> | |
| body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }} | |
| .success {{ color: #2ecc71; font-size: 24px; }} | |
| .info {{ color: #34495e; margin-top: 10px; }} | |
| .container {{ max-width: 600px; margin: 0 auto; text-align: left; }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="success">🚀 Flask Server is Running!</div> | |
| <div class="container"> | |
| <p class="info"><strong>Hostname:</strong> {hostname}</p> | |
| <p class="info"><strong>Server Time:</strong> {current_time}</p> | |
| <p class="info"><strong>Endpoint:</strong> /testpage</p> | |
| <p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| # -------------------- Run -------------------- | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=5000, debug=True) |