Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +3 -3
InitialMarkups.py
CHANGED
|
@@ -1924,7 +1924,7 @@ def extract_section_under_header_tobebilled2(pdf_path):
|
|
| 1924 |
|
| 1925 |
def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
|
| 1926 |
# keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
|
| 1927 |
-
|
| 1928 |
keywords = {'installation', 'execution', 'miscellaneous items', 'workmanship', 'testing', 'labeling'}
|
| 1929 |
top_margin = 70
|
| 1930 |
bottom_margin = 50
|
|
@@ -1939,7 +1939,7 @@ def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
|
|
| 1939 |
parsed_url = urlparse(pdf_path)
|
| 1940 |
filename = os.path.basename(parsed_url.path)
|
| 1941 |
filename = unquote(filename) # decode URL-encoded characters
|
| 1942 |
-
|
| 1943 |
# Optimized URL handling
|
| 1944 |
if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
|
| 1945 |
pdf_path = pdf_path.replace('dl=0', 'dl=1')
|
|
@@ -2380,6 +2380,6 @@ def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
|
|
| 2380 |
pdf_bytes = BytesIO()
|
| 2381 |
docHighlights.save(pdf_bytes)
|
| 2382 |
|
| 2383 |
-
return pdf_bytes.getvalue(), docHighlights , json_output, Alltexttobebilled ,
|
| 2384 |
|
| 2385 |
|
|
|
|
| 1924 |
|
| 1925 |
def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
|
| 1926 |
# keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
|
| 1927 |
+
filenames=[]
|
| 1928 |
keywords = {'installation', 'execution', 'miscellaneous items', 'workmanship', 'testing', 'labeling'}
|
| 1929 |
top_margin = 70
|
| 1930 |
bottom_margin = 50
|
|
|
|
| 1939 |
parsed_url = urlparse(pdf_path)
|
| 1940 |
filename = os.path.basename(parsed_url.path)
|
| 1941 |
filename = unquote(filename) # decode URL-encoded characters
|
| 1942 |
+
filenames.append(filename)
|
| 1943 |
# Optimized URL handling
|
| 1944 |
if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
|
| 1945 |
pdf_path = pdf_path.replace('dl=0', 'dl=1')
|
|
|
|
| 2380 |
pdf_bytes = BytesIO()
|
| 2381 |
docHighlights.save(pdf_bytes)
|
| 2382 |
|
| 2383 |
+
return pdf_bytes.getvalue(), docHighlights , json_output, Alltexttobebilled , filenames
|
| 2384 |
|
| 2385 |
|