Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os | |
| from pathlib import Path | |
| import time | |
| from main import PDFProcessor, SecurityException | |
| # Configure page | |
| st.set_page_config( | |
| page_title="PDF Query Engine", | |
| page_icon="📚", | |
| layout="wide", | |
| ) | |
| # Initialize processor | |
| def get_processor(): | |
| return PDFProcessor() | |
| processor = get_processor() | |
| # Create upload directory if it doesn't exist | |
| upload_dir = Path("./uploads") | |
| upload_dir.mkdir(exist_ok=True) | |
| # Title and description | |
| st.title("PDF Query Engine 🔍") | |
| st.markdown(""" | |
| This application allows you to extract information from PDF documents using natural language queries. | |
| Upload a PDF, wait for it to be processed, then ask questions about its content! | |
| """) | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("About") | |
| st.info(""" | |
| This tool uses natural language processing to extract and query information from PDFs. | |
| **Features:** | |
| - Extract text from PDFs | |
| - Process into semantic chunks | |
| - Query using natural language | |
| - Get relevant context from the document | |
| """) | |
| st.header("Instructions") | |
| st.markdown(""" | |
| 1. Upload a PDF file (max 26MB) | |
| 2. Wait for processing to complete | |
| 3. Type your question in the query box | |
| 4. Review the results | |
| """) | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) | |
| # Process the uploaded file | |
| if uploaded_file is not None: | |
| # Save the uploaded file temporarily | |
| temp_file_path = os.path.join(upload_dir, uploaded_file.name) | |
| with open(temp_file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| # Check if file has already been processed | |
| file_hash = processor.get_file_hash(temp_file_path) | |
| persist_directory = os.path.join(processor.config["db_directory"], file_hash) | |
| already_processed = os.path.exists(persist_directory) | |
| # Display file info | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.success(f"File uploaded: {uploaded_file.name}") | |
| # Show file size | |
| file_size = os.path.getsize(temp_file_path) / (1024 * 1024) # Convert to MB | |
| st.info(f"File size: {file_size:.2f} MB") | |
| with col2: | |
| if already_processed: | |
| st.info("This file has already been processed and is ready for querying.") | |
| process_button = st.button("Re-process file") | |
| else: | |
| st.warning("This file needs to be processed before querying.") | |
| process_button = st.button("Process file") | |
| # Process the file when button is clicked | |
| if process_button: | |
| try: | |
| with st.spinner("Processing PDF... This may take a minute."): | |
| # Process file | |
| vector_store = processor.process_file(temp_file_path) | |
| if vector_store: | |
| st.success("PDF processed successfully! You can now query the document.") | |
| else: | |
| st.error("Failed to process PDF. The file might be empty or corrupted.") | |
| except SecurityException as e: | |
| st.error(f"Security error: {str(e)}") | |
| except Exception as e: | |
| st.error(f"Error processing file: {str(e)}") | |
| # Query interface | |
| st.header("Ask questions about the document") | |
| # Check if the document can be queried | |
| can_query = os.path.exists(persist_directory) | |
| if can_query: | |
| query = st.text_input("Enter your question:") | |
| k_value = st.slider("Number of results to return", min_value=1, max_value=10, value=3) | |
| if st.button("Search") and query: | |
| with st.spinner("Searching for answers..."): | |
| try: | |
| results = processor.query_document(temp_file_path, query, k=k_value) | |
| if not results: | |
| st.info("No relevant information found. Try rephrasing your question.") | |
| else: | |
| st.subheader("Search Results") | |
| for i, doc in enumerate(results): | |
| with st.expander(f"Result {i+1}"): | |
| st.markdown(doc.page_content) | |
| except Exception as e: | |
| st.error(f"Error during query: {str(e)}") | |
| else: | |
| st.info("Please process the document before querying.") | |
| # Add footer | |
| st.markdown("---") | |
| st.markdown("PDF Query Engine | Built with Streamlit and LangChain") |