Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import io
|
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
-
def search_pdf(pdf_file,
|
| 9 |
search_results = []
|
| 10 |
if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case
|
| 11 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf")
|
|
@@ -17,13 +17,17 @@ def search_pdf(pdf_file, search_term):
|
|
| 17 |
# Split the text into lines and filter out empty lines
|
| 18 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
| 19 |
cleaned_text = '\n'.join(lines)
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
search_results.append((page_num + 1, cleaned_text))
|
| 22 |
return search_results
|
| 23 |
|
| 24 |
def final_result(pdf_file, search_term):
|
| 25 |
split_search = search_term.split(' ')
|
| 26 |
-
results = search_pdf(pdf_file, split_search
|
| 27 |
output_text = ""
|
| 28 |
if results:
|
| 29 |
for page_num, text in results:
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
+
def search_pdf(pdf_file, split_search):
|
| 9 |
search_results = []
|
| 10 |
if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case
|
| 11 |
doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf")
|
|
|
|
| 17 |
# Split the text into lines and filter out empty lines
|
| 18 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
| 19 |
cleaned_text = '\n'.join(lines)
|
| 20 |
+
k = 0
|
| 21 |
+
for i in range(len(split_search)):
|
| 22 |
+
if split_search[i].lower() in cleaned_text.lower():
|
| 23 |
+
k = k + 1
|
| 24 |
+
if k == len(split_search):
|
| 25 |
search_results.append((page_num + 1, cleaned_text))
|
| 26 |
return search_results
|
| 27 |
|
| 28 |
def final_result(pdf_file, search_term):
|
| 29 |
split_search = search_term.split(' ')
|
| 30 |
+
results = search_pdf(pdf_file, split_search)
|
| 31 |
output_text = ""
|
| 32 |
if results:
|
| 33 |
for page_num, text in results:
|