Marthee commited on
Commit
f5d33ec
·
verified ·
1 Parent(s): 42a945d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -28
app.py CHANGED
@@ -8,30 +8,47 @@ import urllib
8
 
9
  app = Flask(__name__)
10
 
11
- @app.route("/",methods=["GET", "POST"])
12
- def thismain():
13
- print('ayhaga')
14
- return render_template("gui.html")
 
 
15
 
16
  pdf_content = None
17
  pageNumTextFound = 0
18
- BASE_URL = "https://marthee-nbslink.hf.space" # Localhost URL for testing
 
 
 
 
 
19
 
20
  @app.route('/view-pdf', methods=['GET'])
21
  def download_pdf():
22
  global pdf_content, pageNumTextFound
23
- pdf_link = request.args.get('pdfLink')
24
- keyword = request.args.get('keyword')
25
 
26
- # Check if parameters exist
 
 
 
 
 
 
 
27
  if not pdf_link or not keyword:
28
  return "Missing required parameters.", 400
29
 
30
- # Decode URL-encoded parameters
31
- pdf_link = urllib.parse.unquote(pdf_link) # Decode URL encoding
32
- keyword = json.loads(urllib.parse.unquote(keyword)) # Decode and convert back to a list
 
 
 
 
 
 
33
 
34
- # Debugging output
35
  print("Extracted PDF Link:", pdf_link)
36
  print("Extracted Keywords:", keyword)
37
 
@@ -49,38 +66,38 @@ def download_pdf():
49
  @app.route('/api/process-data', methods=['POST'])
50
  def receive_pdf_data():
51
  global pdf_content, pageNumTextFound
52
- pdfLink,keyword=finddata()
53
- # Get PDF link and keyword from the request body
54
- # pdfLink = request.form.get('pdfLink')
55
- # keyword = request.form.get('keyword')
56
 
57
  if not pdfLink or not keyword:
58
  return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
59
 
60
  try:
61
- # # Call the function to process the PDF
62
- # keyword = json.loads(keyword)
63
- print(pdfLink,keyword)
64
- global pdf_content
65
- pdf_content, pageNumTextFound,highlight_rect = Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
66
 
67
  if pdf_content is None:
68
  return jsonify({"error": "No valid PDF content found."}), 404
69
 
70
- # Construct the URL in the desired format with the rectangle coordinates
71
  download_link = f"{BASE_URL}/view-pdf#page={pageNumTextFound}&zoom={highlight_rect}"
72
- print('download_link',download_link)
 
73
  return jsonify({
74
  "message": "PDF processed successfully.",
75
- "download_link": download_link # Return the formatted URL
76
  })
77
 
78
  except Exception as e:
79
  return jsonify({"error": str(e)}), 500
80
-
81
  def finddata():
82
- pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'; # Dropbox link
83
- keyword = ['115 INTEGRATED MRI ROOM LININGS','710 TRANSPORTATION'] ; # Example keyword
84
- return pdfLink,keyword
 
85
  if __name__ == '__main__':
86
  app.run(host='0.0.0.0', port=7860)
 
8
 
9
  app = Flask(__name__)
10
 
11
+ from flask import Flask, request, jsonify, send_file, render_template
12
+ import urllib.parse
13
+ import json
14
+ from io import BytesIO
15
+
16
+ app = Flask(__name__)
17
 
18
  pdf_content = None
19
  pageNumTextFound = 0
20
+ BASE_URL = "https://marthee-nbslink.hf.space" # Hugging Face Spaces base URL
21
+
22
+ @app.route("/", methods=["GET", "POST"])
23
+ def thismain():
24
+ print('ayhaga')
25
+ return render_template("gui.html")
26
 
27
  @app.route('/view-pdf', methods=['GET'])
28
  def download_pdf():
29
  global pdf_content, pageNumTextFound
 
 
30
 
31
+ # Manually parse the query parameters
32
+ full_query_string = request.query_string.decode() # Get raw query string
33
+ parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
34
+
35
+ # Extract pdfLink and keyword manually
36
+ pdf_link = parsed_params.get('pdfLink', [None])[0]
37
+ keyword = parsed_params.get('keyword', [None])[0]
38
+
39
  if not pdf_link or not keyword:
40
  return "Missing required parameters.", 400
41
 
42
+ # Decode the extracted values
43
+ pdf_link = urllib.parse.unquote(pdf_link)
44
+ keyword = urllib.parse.unquote(keyword)
45
+
46
+ # If the keyword is a JSON string, convert it back to a list
47
+ try:
48
+ keyword = json.loads(keyword)
49
+ except json.JSONDecodeError:
50
+ keyword = [keyword] # Treat it as a single keyword if not JSON
51
 
 
52
  print("Extracted PDF Link:", pdf_link)
53
  print("Extracted Keywords:", keyword)
54
 
 
66
  @app.route('/api/process-data', methods=['POST'])
67
  def receive_pdf_data():
68
  global pdf_content, pageNumTextFound
69
+
70
+ # Get PDF link and keyword from finddata()
71
+ pdfLink, keyword = finddata()
 
72
 
73
  if not pdfLink or not keyword:
74
  return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
75
 
76
  try:
77
+ print(pdfLink, keyword)
78
+
79
+ # Call function to process the PDF
80
+ pdf_content, pageNumTextFound, highlight_rect = Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
 
81
 
82
  if pdf_content is None:
83
  return jsonify({"error": "No valid PDF content found."}), 404
84
 
85
+ # Construct the URL with the rectangle coordinates
86
  download_link = f"{BASE_URL}/view-pdf#page={pageNumTextFound}&zoom={highlight_rect}"
87
+ print('Download Link:', download_link)
88
+
89
  return jsonify({
90
  "message": "PDF processed successfully.",
91
+ "download_link": download_link
92
  })
93
 
94
  except Exception as e:
95
  return jsonify({"error": str(e)}), 500
96
+
97
  def finddata():
98
+ pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
99
+ keyword = ['115 INTEGRATED MRI ROOM LININGS', '710 TRANSPORTATION']
100
+ return pdfLink, keyword
101
+
102
  if __name__ == '__main__':
103
  app.run(host='0.0.0.0', port=7860)