InitialMarkups2

Sleeping

App Files Files Community

InitialMarkups2 / app.py

Marthee

Update app.py

11c40be verified 7 months ago

raw

history blame contribute delete

13.1 kB

	from flask import Flask, request, jsonify, abort , render_template , send_file
	import tsadropboxretrieval
	import json
	import Find_Hyperlinking_text
	import findspecsv1
	import InitialMarkups
	import requests
	from io import BytesIO
	import datetime
	import time
	from threading import Thread
	import urllib
	from urllib.parse import quote
	app = Flask(__name__)

	pageNumTextFound = 0
	BASE_URL = "https://findconsole-initialmarkups2.hf.space"
	# Simulate a backend readiness flag (replace with actual check if possible)
	backend_ready = False
	# @app.route("/")
	# def thismain():
	# print('Home page loaded')
	# return render_template("gui.html")

	@app.route("/keepaliveapii", methods=["GET", "POST"])
	def keepaliveapi():
	try:
	print('Keepalive pinged')
	return 'alivee'
	except Exception as error:
	print('Error in keepalive:', error)
	return jsonify(status="error", message=str(error)), 500



	@app.route("/")
	def home():
	global backend_ready
	# If backend not ready, show loading page
	if not backend_ready:
	return render_template("wake_and_redirect.html")
	else:
	# Redirect to your PDF viewer route when ready
	return redirect(url_for("view_pdf", **request.args))
	################################################################################################################################################################
	################################################################################################################################################################
	##################### Main console ###########################################################################################################
	################################################################################################################################################################
	################################################################################################################################################################

	@app.route('/view-pdf', methods=['GET'])
	def download_pdf():
	# Parse and decode pdfLink safely
	full_query_string = request.query_string.decode()
	parsed_params = urllib.parse.parse_qs(full_query_string)
	encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]

	if not encoded_pdf_link:
	return "Missing pdfLink parameter.", 400

	# Decode the URL-encoded PDF link
	pdf_link = urllib.parse.unquote(encoded_pdf_link)
	print("Extracted PDF Link:", pdf_link)

	try:
	# Use InitialMarkups to extract content
	pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
	except Exception as e:
	print("Error during PDF extraction:", e)
	return "PDF could not be processed.", 500

	if pdf_content is None or not pdf_content.startswith(b"%PDF"):
	return "PDF content not found or broken.", 404

	pdf_bytes = BytesIO(pdf_content)
	return send_file(
	pdf_bytes,
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"annotated_page_{pageNumTextFound}.pdf"
	)


	@app.route('/api/process-data', methods=['POST'])
	def receive_pdf_data():
	global pdf_content, pageNumTextFound

	# Get PDF link and keyword from finddata()
	pdfLink = finddata()

	if not pdfLink :
	return jsonify({"error": "'pdfLink' must be provided."}), 400

	try:
	print(pdfLink)


	pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
	dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')

	# Get metadata using the shared link
	metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
	dbPath='/TSA JOBS/ADR Test/FIND/'
	pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
	print('LINKS0',pdflink)

	dbPath='/TSA JOBS/ADR Test/FIND/'
	tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
	print(f"PDF successfully uploaded to Dropbox at")
	print('LINKS1',tablepdfLink)
	return jsonify({
	"message": "PDF processed successfully.",
	"PDF_MarkedUp": pdflink,
	'Table_PDF_Markup_Summary': tablepdfLink
	})

	except Exception as e:
	return jsonify({"error": str(e)}), 500
	################################################################################################################################################################
	################################################################################################################################################################
	##################### Not to billed not markuped up ###########################################################################################################
	################################################################################################################################################################
	################################################################################################################################################################
	@app.route('/findapitobebilled1', methods=['GET','POST'])
	def findapitobebilled1():
	try:
	print('In process [Try]')
	data = request.get_json()
	# Extracting values
	pdfLink = data.get('filePath')
	pdfbytes, pdf_document,tablepdfoutput, alltext_tobebilled= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)

	return jsonify(alltext_tobebilled)
	except Exception as e:
	return jsonify({"error": str(e)}), 500


	@app.route('/view-pdf-tobebilled', methods=['GET'])
	def download_pdf_tobebilled():
	# Parse and decode pdfLink safely
	full_query_string = request.query_string.decode()
	parsed_params = urllib.parse.parse_qs(full_query_string)
	encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]

	if not encoded_pdf_link:
	return "Missing pdfLink parameter.", 400

	# Decode the URL-encoded PDF link
	pdf_link = urllib.parse.unquote(encoded_pdf_link)
	print("Extracted PDF Link:", pdf_link)

	try:
	# Use InitialMarkups to extract content
	pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
	except Exception as e:
	print("Error during PDF extraction:", e)
	return "PDF could not be processed.", 500

	if pdf_content is None or not pdf_content.startswith(b"%PDF"):
	return "PDF content not found or broken.", 404

	pdf_bytes = BytesIO(pdf_content)
	return send_file(
	pdf_bytes,
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"annotated_page_{pageNumTextFound}.pdf"
	)

	################################################################################################################################################################
	################################################################################################################################################################
	##################### For final markups - view one highlight at a time - not used yet ###########################################################################################################
	################################################################################################################################################################
	################################################################################################################################################################


	@app.route('/view-highlight', methods=['GET','POST'])
	def download_pdfHighlight():

	# Manually parse the query parameters
	full_query_string = request.query_string.decode() # Get raw query string
	parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
	# Extract pdfLink and keyword manually
	pdf_link = parsed_params.get('pdfLink', [None])[0]
	keyword = parsed_params.get('keyword', [None])[0]
	# linktoreplace = [listofheadingsfromrawan["Link"]]
	if not pdf_link :
	return "Missing required parameters.", 400

	# Decode the extracted values
	pdf_link = urllib.parse.unquote(pdf_link)

	print("Extracted PDF Link:", pdf_link)
	print("Extracted Keywords:", keyword)
	createDF=False
	global jsonoutput
	matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)

	if matching_item:
	page_number = int(matching_item.get("Page"))-1
	stringtowrite = matching_item.get("head above 1")
	print(f"Page number for '{keyword}': {page_number}")
	else:
	page_number=0
	print("No match found.")
	pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link,keyword,page_number,stringtowrite)[0]
	if pdf_content is None:
	return "PDF content not found.", 404

	pdf_bytes = BytesIO(pdf_content)
	return send_file(
	pdf_bytes,
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"annotated_page_{pageNumTextFound}.pdf"
	)


	@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
	def findapiFilteredHeadings():
	try:
	print('In process [Try]')
	data = request.get_json()
	# Extracting values
	pdfLink = data.get('filePath')
	print(pdfLink)
	listofheadings = data.get('listofheadings') #in json format
	print(listofheadings)
	pdfbytes, pdf_document,tablepdfoutput,alltext= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
	global jsonoutput
	jsonoutput=tablepdfoutput
	return jsonify(alltext)
	except Exception as e:
	return jsonify({"error": str(e)}), 500



	@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
	def findapiAllDocNoNotbilled():
	try:
	print('In process [Try]')
	data = request.get_json()
	# Extracting values
	pdfLink = data.get('filePath')
	print(pdfLink)
	alltextNoNotbilled= InitialMarkups.extract_section_under_header_withoutNot(pdfLink)
	print(alltextNoNotbilled)
	return jsonify(alltextNoNotbilled)
	except Exception as e:
	return jsonify({"error": str(e)}), 500




	################################################################################################################################################################
	################################################################################################################################################################
	##################### For Rawan - MC Connection ###########################################################################################################
	################################################################################################################################################################
	################################################################################################################################################################

	@app.route('/findapi', methods=['GET','POST'])
	def findapi():
	try:
	print('In process [Try]')
	data = request.get_json()
	# Extracting values
	pdfLink = data.get('filePath')
	pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
	global jsonoutput
	jsonoutput=tablepdfoutput
	return jsonify(tablepdfoutput)
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	############################################# Testing #################################################

	def finddata():
	pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
	keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
	return pdfLink, keyword

	########################################### Running #####################################################
	#_________________________________________________________________________________________________________________________
	#_________________________________________________________________________________________________________________________

	#_________________________________________________________________________________________________________________________
	#_________________________________________________________________________________________________________________________

	#_________________________________________________________________________________________________________________________
	#_________________________________________________________________________________________________________________________


	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)