Spaces:

nde-dilan
/

nerala_ai_backend

Runtime error

nerala_ai_backend / streamlit.py

Nde Dilan

Add application file

d4e21df 10 months ago

4.46 kB

	import streamlit as st
	import os
	from pathlib import Path
	import time
	from main import PDFProcessor, SecurityException

	# Configure page
	st.set_page_config(
	page_title="PDF Query Engine",
	page_icon="📚",
	layout="wide",
	)

	# Initialize processor
	@st.cache_resource
	def get_processor():
	return PDFProcessor()

	processor = get_processor()

	# Create upload directory if it doesn't exist
	upload_dir = Path("./uploads")
	upload_dir.mkdir(exist_ok=True)

	# Title and description
	st.title("PDF Query Engine 🔍")
	st.markdown("""
	This application allows you to extract information from PDF documents using natural language queries.
	Upload a PDF, wait for it to be processed, then ask questions about its content!
	""")

	# Sidebar
	with st.sidebar:
	st.header("About")
	st.info("""
	This tool uses natural language processing to extract and query information from PDFs.

	Features:
	- Extract text from PDFs
	- Process into semantic chunks
	- Query using natural language
	- Get relevant context from the document
	""")

	st.header("Instructions")
	st.markdown("""
	1. Upload a PDF file (max 26MB)
	2. Wait for processing to complete
	3. Type your question in the query box
	4. Review the results
	""")

	# File uploader
	uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

	# Process the uploaded file
	if uploaded_file is not None:
	# Save the uploaded file temporarily
	temp_file_path = os.path.join(upload_dir, uploaded_file.name)
	with open(temp_file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Check if file has already been processed
	file_hash = processor.get_file_hash(temp_file_path)
	persist_directory = os.path.join(processor.config["db_directory"], file_hash)
	already_processed = os.path.exists(persist_directory)

	# Display file info
	col1, col2 = st.columns(2)
	with col1:
	st.success(f"File uploaded: {uploaded_file.name}")

	# Show file size
	file_size = os.path.getsize(temp_file_path) / (1024 * 1024) # Convert to MB
	st.info(f"File size: {file_size:.2f} MB")

	with col2:
	if already_processed:
	st.info("This file has already been processed and is ready for querying.")
	process_button = st.button("Re-process file")
	else:
	st.warning("This file needs to be processed before querying.")
	process_button = st.button("Process file")

	# Process the file when button is clicked
	if process_button:
	try:
	with st.spinner("Processing PDF... This may take a minute."):
	# Process file
	vector_store = processor.process_file(temp_file_path)

	if vector_store:
	st.success("PDF processed successfully! You can now query the document.")
	else:
	st.error("Failed to process PDF. The file might be empty or corrupted.")
	except SecurityException as e:
	st.error(f"Security error: {str(e)}")
	except Exception as e:
	st.error(f"Error processing file: {str(e)}")

	# Query interface
	st.header("Ask questions about the document")

	# Check if the document can be queried
	can_query = os.path.exists(persist_directory)

	if can_query:
	query = st.text_input("Enter your question:")
	k_value = st.slider("Number of results to return", min_value=1, max_value=10, value=3)

	if st.button("Search") and query:
	with st.spinner("Searching for answers..."):
	try:
	results = processor.query_document(temp_file_path, query, k=k_value)

	if not results:
	st.info("No relevant information found. Try rephrasing your question.")
	else:
	st.subheader("Search Results")
	for i, doc in enumerate(results):
	with st.expander(f"Result {i+1}"):
	st.markdown(doc.page_content)
	except Exception as e:
	st.error(f"Error during query: {str(e)}")
	else:
	st.info("Please process the document before querying.")

	# Add footer
	st.markdown("---")
	st.markdown("PDF Query Engine \| Built with Streamlit and LangChain")