InitialMarkups2 / app.py
Marthee's picture
Update app.py
72309d1 verified
raw
history blame
4.88 kB
from flask import Flask, request, jsonify, abort , render_template , send_file
import tsadropboxretrieval
import json
import Find_Hyperlinking_text
import findspecsv1
import InitialMarkups
import requests
from io import BytesIO
import datetime
import time
from threading import Thread
import urllib
app = Flask(__name__)
pdf_content = None
pageNumTextFound = 0
BASE_URL = "https://findconsole-initialmarkups.hf.space" # Hugging Face Spaces base URL
@app.route("/", methods=["GET", "POST"])
def thismain():
print('ayhaga')
return render_template("gui.html")
@app.route('/view-pdf', methods=['GET'])
def download_pdf():
# Manually parse the query parameters
full_query_string = request.query_string.decode() # Get raw query string
parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
# Extract pdfLink and keyword manually
pdf_link = parsed_params.get('pdfLink', [None])[0]
if not pdf_link :
return "Missing required parameters.", 400
# Decode the extracted values
pdf_link = urllib.parse.unquote(pdf_link)
print("Extracted PDF Link:", pdf_link)
# print("Extracted Keywords:", keyword)
createDF=False
pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
if pdf_content is None:
return "PDF content not found.", 404
pdf_bytes = BytesIO(pdf_content)
return send_file(
pdf_bytes,
mimetype='application/pdf',
as_attachment=False,
download_name=f"annotated_page_{pageNumTextFound}.pdf"
)
@app.route('/api/process-data', methods=['POST'])
def receive_pdf_data():
global pdf_content, pageNumTextFound
# Get PDF link and keyword from finddata()
pdfLink = finddata()
if not pdfLink :
return jsonify({"error": "'pdfLink' must be provided."}), 400
try:
print(pdfLink)
pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
# Get metadata using the shared link
metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
dbPath='/TSA JOBS/ADR Test/FIND/'
pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
print('LINKS0',pdflink)
dbPath='/TSA JOBS/ADR Test/FIND/'
tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
print(f"PDF successfully uploaded to Dropbox at")
print('LINKS1',tablepdfLink)
return jsonify({
"message": "PDF processed successfully.",
"PDF_MarkedUp": pdflink,
'Table_PDF_Markup_Summary': tablepdfLink
})
except Exception as e:
return jsonify({"error": str(e)}), 500
def finddata():
pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
return pdfLink, keyword
#_________________________________________________________________________________________________________________________
#_________________________________________________________________________________________________________________________
#_________________________________________________________________________________________________________________________
#_________________________________________________________________________________________________________________________
#_________________________________________________________________________________________________________________________
#_________________________________________________________________________________________________________________________
def runn():
from gevent.pywsgi import WSGIServer
http_server = WSGIServer(('0.0.0.0', 7860), app)
http_server.serve_forever()
def keep_alive():
t=Thread(target=runn)
t.start()
dtn = datetime.datetime.now(datetime.timezone.utc)
print(dtn)
next_start = datetime.datetime(dtn.year, dtn.month, dtn.day, 21, 0, 0).astimezone(datetime.timezone.utc) #us - 2 = utc time (21 utc is 19:00 our time and 9 is 7 our time , it needs to run 9 utc time ____ )
print(next_start)
keep_alive()
while 1:
dtnNow = datetime.datetime.now(datetime.timezone.utc)
print(dtnNow)
if dtnNow >= next_start:
next_start += datetime.timedelta(hours=12) # 1 day
print('YES- 12 hours passed!!',next_start)
time.sleep(1800)
if __name__ == "__main__":
runn()
# app.run
# if __name__ == '__main__':
# app.run(host='0.0.0.0', port=7860)