import streamlit as st
import os
from pathlib import Path
import time
from main import PDFProcessor, SecurityException

# Configure page
st.set_page_config(
    page_title="PDF Query Engine",
    page_icon="📚",
    layout="wide",
)

# Initialize processor
@st.cache_resource
def get_processor():
    return PDFProcessor()

processor = get_processor()

# Create upload directory if it doesn't exist
upload_dir = Path("./uploads")
upload_dir.mkdir(exist_ok=True)

# Title and description
st.title("PDF Query Engine 🔍")
st.markdown("""
This application allows you to extract information from PDF documents using natural language queries. 
Upload a PDF, wait for it to be processed, then ask questions about its content!
""")

# Sidebar
with st.sidebar:
    st.header("About")
    st.info("""
    This tool uses natural language processing to extract and query information from PDFs.
    
    **Features:**
    - Extract text from PDFs
    - Process into semantic chunks
    - Query using natural language
    - Get relevant context from the document
    """)
    
    st.header("Instructions")
    st.markdown("""
    1. Upload a PDF file (max 26MB)
    2. Wait for processing to complete
    3. Type your question in the query box
    4. Review the results
    """)

# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

# Process the uploaded file
if uploaded_file is not None:
    # Save the uploaded file temporarily
    temp_file_path = os.path.join(upload_dir, uploaded_file.name)
    with open(temp_file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    # Check if file has already been processed
    file_hash = processor.get_file_hash(temp_file_path)
    persist_directory = os.path.join(processor.config["db_directory"], file_hash)
    already_processed = os.path.exists(persist_directory)
    
    # Display file info
    col1, col2 = st.columns(2)
    with col1:
        st.success(f"File uploaded: {uploaded_file.name}")
        
        # Show file size
        file_size = os.path.getsize(temp_file_path) / (1024 * 1024)  # Convert to MB
        st.info(f"File size: {file_size:.2f} MB")
    
    with col2:
        if already_processed:
            st.info("This file has already been processed and is ready for querying.")
            process_button = st.button("Re-process file")
        else:
            st.warning("This file needs to be processed before querying.")
            process_button = st.button("Process file")
    
    # Process the file when button is clicked
    if process_button:
        try:
            with st.spinner("Processing PDF... This may take a minute."):
                # Process file
                vector_store = processor.process_file(temp_file_path)
                
                if vector_store:
                    st.success("PDF processed successfully! You can now query the document.")
                else:
                    st.error("Failed to process PDF. The file might be empty or corrupted.")
        except SecurityException as e:
            st.error(f"Security error: {str(e)}")
        except Exception as e:
            st.error(f"Error processing file: {str(e)}")
    
    # Query interface
    st.header("Ask questions about the document")
    
    # Check if the document can be queried
    can_query = os.path.exists(persist_directory)
    
    if can_query:
        query = st.text_input("Enter your question:")
        k_value = st.slider("Number of results to return", min_value=1, max_value=10, value=3)
        
        if st.button("Search") and query:
            with st.spinner("Searching for answers..."):
                try:
                    results = processor.query_document(temp_file_path, query, k=k_value)
                    
                    if not results:
                        st.info("No relevant information found. Try rephrasing your question.")
                    else:
                        st.subheader("Search Results")
                        for i, doc in enumerate(results):
                            with st.expander(f"Result {i+1}"):
                                st.markdown(doc.page_content)
                except Exception as e:
                    st.error(f"Error during query: {str(e)}")
    else:
        st.info("Please process the document before querying.")

# Add footer
st.markdown("---")
st.markdown("PDF Query Engine | Built with Streamlit and LangChain")