Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| # import PyPDF2 | |
| import fitz | |
| import io | |
| def search_pdf(pdf_file, split_search): | |
| search_results = [] | |
| if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case | |
| doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") | |
| else: # Handling local file case | |
| doc = fitz.open(pdf_file) | |
| for page_num in range(doc.page_count): | |
| page = doc.load_page(page_num) | |
| text = page.get_text() | |
| # Split the text into lines and filter out empty lines | |
| lines = [line.strip() for line in text.split('\n') if line.strip()] | |
| cleaned_text = '\n'.join(lines) | |
| k = 0 | |
| for i in range(len(split_search)): | |
| if split_search[i].lower() in cleaned_text.lower(): | |
| k = k + 1 | |
| if k == len(split_search): | |
| search_results.append((page_num + 1, cleaned_text)) | |
| return search_results | |
| def final_result(pdf_file, search_term): | |
| split_search = search_term.split(' ') | |
| results = search_pdf(pdf_file, split_search) | |
| output_text = "" | |
| if results: | |
| for page_num, text in results: | |
| # output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n" | |
| output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n" | |
| else: | |
| output_text = f"No results found for '{search_term}'." | |
| return output_text | |
| st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded") | |
| st.markdown("<h3 style='text-align:center; font-size:24px;'>Search in PDF</h3>", unsafe_allow_html=True) | |
| st.write("---") | |
| col1, col2 = st.columns(spec=[0.4,0.6]) | |
| # col3, col4 = st.columns(spec=[0.5,0.5]) | |
| with col1: | |
| input_file = st.file_uploader(label="Upload .pdf File", type='pdf') | |
| search_term = st.text_input(label="Enter Search-term", placeholder="Search here...") | |
| col3, col4 = st.columns(spec=[0.5,0.5]) | |
| with col3: | |
| all_data = st.button("Submit") | |
| # with col4: | |
| # st.write("") | |
| # clear_button = st.button("Clear") | |
| # if clear_button: | |
| # input_file = None | |
| # search_term = "" | |
| with col2: | |
| if all_data: | |
| if input_file is not None and search_term.strip() != "": | |
| result = final_result(input_file, search_term) | |
| st.text_area("Search Results", result, height=400) | |
| elif input_file is None: | |
| st.error("Please upload a PDF file") | |
| elif search_term.strip() == "": | |
| st.error("Please enter a search term") |