Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +4 -1
scrape_3gpp.py
CHANGED
|
@@ -424,11 +424,14 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
|
|
| 424 |
extracted_content.append(discussion_details)
|
| 425 |
|
| 426 |
elif category == "pdf":
|
|
|
|
| 427 |
tabLine = []
|
| 428 |
file = pdfReader
|
| 429 |
pdfNumberPages = len(file.pages)
|
|
|
|
|
|
|
| 430 |
for pdfPage in range(0, pdfNumberPages):
|
| 431 |
-
|
| 432 |
load_page = file.get_page(pdfPage)
|
| 433 |
text = load_page.extract_text()
|
| 434 |
lines = text.split("\n")
|
|
|
|
| 424 |
extracted_content.append(discussion_details)
|
| 425 |
|
| 426 |
elif category == "pdf":
|
| 427 |
+
print("Entered the PDF category")
|
| 428 |
tabLine = []
|
| 429 |
file = pdfReader
|
| 430 |
pdfNumberPages = len(file.pages)
|
| 431 |
+
print(f"This is the number of pages : {pdfNumberPages}")
|
| 432 |
+
|
| 433 |
for pdfPage in range(0, pdfNumberPages):
|
| 434 |
+
|
| 435 |
load_page = file.get_page(pdfPage)
|
| 436 |
text = load_page.extract_text()
|
| 437 |
lines = text.split("\n")
|