Spaces:
Sleeping
Sleeping
Update InitialMarkups.py
Browse files- InitialMarkups.py +7 -10
InitialMarkups.py
CHANGED
|
@@ -6,10 +6,9 @@ Automatically generated by Colab.
|
|
| 6 |
Original file is located at
|
| 7 |
https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
|
| 8 |
"""
|
|
|
|
| 9 |
|
| 10 |
-
pip install pymupdf
|
| 11 |
|
| 12 |
-
pip install fuzzywuzzy
|
| 13 |
|
| 14 |
from io import BytesIO
|
| 15 |
import re
|
|
@@ -600,7 +599,7 @@ def same_start_word(s1, s2):
|
|
| 600 |
return words1[0].lower() == words2[0].lower()
|
| 601 |
return False
|
| 602 |
|
| 603 |
-
|
| 604 |
def extract_section_under_header(pdf_path):
|
| 605 |
top_margin = 70
|
| 606 |
bottom_margin = 50
|
|
@@ -968,11 +967,9 @@ def extract_section_under_header(pdf_path):
|
|
| 968 |
page_highlights[page_num] = bbox
|
| 969 |
highlight_boxes(docHighlights, page_highlights)
|
| 970 |
|
| 971 |
-
docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
| 972 |
-
return json_output
|
| 973 |
-
|
| 974 |
-
pdflink='https://www.dropbox.com/scl/fi/jtffcxszwpcnc6wdo61p6/WH007-JAC-RP-XX-SP-AA-8501-Redoak-Pump-House-Specification.pdf?rlkey=unq4ag9eajezv2j6y6ewkkk5u&e=29&st=wu3vsd70&dl=0'
|
| 975 |
-
|
| 976 |
-
jsonOutput=extract_section_under_header(pdflink)
|
| 977 |
-
print(jsonOutput)
|
| 978 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
Original file is located at
|
| 7 |
https://colab.research.google.com/drive/12XfVkmKmN3oVjHhLVE0_GgkftgArFEK2
|
| 8 |
"""
|
| 9 |
+
baselink='https://find-initialmarkups.hf.space/view-pdf?'
|
| 10 |
|
|
|
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
from io import BytesIO
|
| 14 |
import re
|
|
|
|
| 599 |
return words1[0].lower() == words2[0].lower()
|
| 600 |
return False
|
| 601 |
|
| 602 |
+
|
| 603 |
def extract_section_under_header(pdf_path):
|
| 604 |
top_margin = 70
|
| 605 |
bottom_margin = 50
|
|
|
|
| 967 |
page_highlights[page_num] = bbox
|
| 968 |
highlight_boxes(docHighlights, page_highlights)
|
| 969 |
|
| 970 |
+
# docHighlights.save("highlighted_output.pdf", garbage=4, deflate=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
|
| 972 |
+
pdf_bytes = BytesIO()
|
| 973 |
+
docHighlights.save(pdf_bytes)
|
| 974 |
+
print('JSONN',json_output)
|
| 975 |
+
return pdf_bytes.getvalue(), docHighlights , json_output
|