Marthee commited on
Commit
820b0b1
·
verified ·
1 Parent(s): 5aa9ee2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -2
app.py CHANGED
@@ -64,6 +64,48 @@ def download_pdf():
64
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
65
  )
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  @app.route('/api/process-data', methods=['POST'])
68
  def receive_pdf_data():
69
  global pdf_content, pageNumTextFound
@@ -115,15 +157,19 @@ def findapi():
115
  except Exception as e:
116
  return jsonify({"error": str(e)}), 500
117
 
118
- @app.route('/findapiFilteredHeadings', methods=['GET'])
119
  def findapiFilteredHeadings():
120
  try:
121
  print('In process [Try]')
122
  data = request.get_json()
123
  # Extracting values
124
  pdfLink = data.get('filePath')
 
125
  listofheadings = data.get('listofheadings') #in json format
126
- pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink,listofheadings)
 
 
 
127
  return jsonify(tablepdfoutput)
128
  except Exception as e:
129
  return jsonify({"error": str(e)}), 500
 
64
  download_name=f"annotated_page_{pageNumTextFound}.pdf"
65
  )
66
 
67
+
68
+ @app.route('/view-highlight', methods=['GET','POST'])
69
+ def download_pdfHighlight():
70
+
71
+ # Manually parse the query parameters
72
+ full_query_string = request.query_string.decode() # Get raw query string
73
+ parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
74
+ # Extract pdfLink and keyword manually
75
+ pdf_link = parsed_params.get('pdfLink', [None])[0]
76
+ keyword = parsed_params.get('keyword', [None])[0]
77
+ # linktoreplace = [listofheadingsfromrawan["Link"]]
78
+ if not pdf_link :
79
+ return "Missing required parameters.", 400
80
+
81
+ # Decode the extracted values
82
+ pdf_link = urllib.parse.unquote(pdf_link)
83
+
84
+ print("Extracted PDF Link:", pdf_link)
85
+ print("Extracted Keywords:", keyword)
86
+ createDF=False
87
+ global jsonoutput
88
+ matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)
89
+
90
+ if matching_item:
91
+ page_number = int(matching_item.get("Page"))-1
92
+ stringtowrite = matching_item.get("head above 1")
93
+ print(f"Page number for '{keyword}': {page_number}")
94
+ else:
95
+ page_number=0
96
+ print("No match found.")
97
+ pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link,keyword,page_number,stringtowrite)[0]
98
+ if pdf_content is None:
99
+ return "PDF content not found.", 404
100
+
101
+ pdf_bytes = BytesIO(pdf_content)
102
+ return send_file(
103
+ pdf_bytes,
104
+ mimetype='application/pdf',
105
+ as_attachment=False,
106
+ download_name=f"annotated_page_{pageNumTextFound}.pdf"
107
+ )
108
+
109
  @app.route('/api/process-data', methods=['POST'])
110
  def receive_pdf_data():
111
  global pdf_content, pageNumTextFound
 
157
  except Exception as e:
158
  return jsonify({"error": str(e)}), 500
159
 
160
+ @app.route('/findapiFilteredHeadings', methods=['GET','POST'])
161
  def findapiFilteredHeadings():
162
  try:
163
  print('In process [Try]')
164
  data = request.get_json()
165
  # Extracting values
166
  pdfLink = data.get('filePath')
167
+ print(pdfLink)
168
  listofheadings = data.get('listofheadings') #in json format
169
+ print(listofheadings)
170
+ pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
171
+ global jsonoutput
172
+ jsonoutput=tablepdfoutput
173
  return jsonify(tablepdfoutput)
174
  except Exception as e:
175
  return jsonify({"error": str(e)}), 500