Spaces:

raymondEDS
/

dev_LMS

Sleeping

File size: 17,398 Bytes

import streamlit as st
import PyPDF2
import io
import base64
from datetime import datetime
import json
import tempfile
import os

# Page configuration
st.set_page_config(
    page_title="Dev LMS",
    page_icon="📚",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Initialize session state
if 'uploaded_documents' not in st.session_state:
    st.session_state.uploaded_documents = {}
if 'current_user' not in st.session_state:
    st.session_state.current_user = "User"

def save_document_info(filename, file_content, file_type, temp_path=None):
    """Save document information to session state"""
    if 'documents' not in st.session_state.uploaded_documents:
        st.session_state.uploaded_documents['documents'] = []
    
    document_info = {
        'filename': filename,
        'upload_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'file_type': file_type,
        'size': len(file_content),
        'content': file_content.decode('latin-1') if isinstance(file_content, bytes) else str(file_content),
        'temp_path': temp_path  # Store temp path for later use
    }
    
    st.session_state.uploaded_documents['documents'].append(document_info)

def extract_pdf_text_from_temp(temp_path):
    """Extract text from PDF file using temporary file path"""
    try:
        with open(temp_path, "rb") as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        return text
    except Exception as e:
        st.error(f"Error reading PDF: {str(e)}")
        return ""

def extract_pdf_text_from_memory(uploaded_file):
    """Extract text from PDF file in memory"""
    try:
        pdf_reader = PyPDF2.PdfReader(uploaded_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text
    except Exception as e:
        st.error(f"Error reading PDF: {str(e)}")
        return ""

def cleanup_temp_file(temp_path):
    """Clean up temporary file"""
    try:
        if temp_path and os.path.exists(temp_path):
            os.remove(temp_path)
    except Exception as e:
        st.warning(f"Could not clean up temporary file: {str(e)}")

def main():
    # Sidebar for navigation
    with st.sidebar:
        st.title("📚 Dev LMS")
        st.markdown("---")
        
        # Navigation
        page = st.selectbox(
            "Navigation",
            ["Dashboard", "Upload Documents", "My Documents", "Document Library", "Settings"]
        )
    
    # Main content area
    if page == "Dashboard":
        show_dashboard()
    elif page == "Upload Documents":
        show_upload_documents()
    elif page == "My Documents":
        show_my_documents()
    elif page == "Document Library":
        show_document_library()
    elif page == "Settings":
        show_settings()

def show_dashboard():
    """Show the main dashboard"""
    st.title("📊 Dashboard")
    st.markdown("---")
    
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.metric(
            label="Total Documents",
            value=len(st.session_state.uploaded_documents.get('documents', [])),
            delta="0"
        )
    
    with col2:
        st.metric(
            label="System Status",
            value="Active",
            delta="0"
        )
    
    with col3:
        st.metric(
            label="Storage Used",
            value="Session",
            delta="0"
        )
    
    st.markdown("---")
    
    # Recent activity
    st.subheader("📈 Recent Activity")
    documents = st.session_state.uploaded_documents.get('documents', [])
    if documents:
        recent_docs = documents[-5:]
        for doc in recent_docs:
            with st.container():
                col1, col2, col3 = st.columns([3, 2, 1])
                with col1:
                    st.write(f"**{doc['filename']}**")
                with col2:
                    st.write(doc['upload_time'])
                with col3:
                    st.write(f"{doc['file_type']}")
                st.markdown("---")
    else:
        st.info("No documents uploaded yet. Start by uploading a PDF document!")

def show_upload_documents():
    """Show document upload interface"""
    st.title("📤 Upload Documents")
    st.markdown("---")
    
    # Add information about file upload
    st.info("💡 **Note:** File upload uses temporary storage for better compatibility with Hugging Face Spaces.")
    
    uploaded_file = st.file_uploader(
        "Choose a PDF file",
        type=['pdf'],
        help="Upload PDF documents to the LMS (max 200MB)",
        accept_multiple_files=False
    )
    
    if uploaded_file is not None:
        try:
            # Display file info
            file_details = {
                "Filename": uploaded_file.name,
                "File size": f"{uploaded_file.size / 1024:.2f} KB",
                "File type": uploaded_file.type
            }
            
            st.write("**File Details:**")
            for key, value in file_details.items():
                st.write(f"- {key}: {value}")
            
            # Create temporary file for better PDF processing
            temp_path = None
            try:
                with tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf", delete=False) as temp:
                    bytes_data = uploaded_file.getvalue()
                    temp.write(bytes_data)
                    temp_path = temp.name
                
                st.success(f"📁 File temporarily stored at: {temp_path}")
                
                # Extract and display PDF content using temporary file
                pdf_text = extract_pdf_text_from_temp(temp_path)
                
                if pdf_text.strip():
                    st.subheader("📄 Document Preview")
                    with st.expander("View extracted text"):
                        st.text_area("PDF Content", pdf_text, height=300)
                else:
                    st.warning("⚠️ Could not extract text from this PDF. The file may be image-based or encrypted.")
                
                # Upload button
                if st.button("Upload Document", type="primary"):
                    try:
                        # Save document info with temporary file path
                        save_document_info(
                            uploaded_file.name,
                            bytes_data,
                            "PDF",
                            temp_path
                        )
                        
                        st.success(f"✅ Document '{uploaded_file.name}' uploaded successfully!")
                        st.balloons()
                        
                        # Clear the file uploader
                        st.rerun()
                        
                    except Exception as e:
                        st.error(f"❌ Error uploading document: {str(e)}")
                        st.info("💡 Try uploading a smaller file or refresh the page.")
                        # Clean up temp file on error
                        cleanup_temp_file(temp_path)
                        
            except Exception as e:
                st.error(f"❌ Error creating temporary file: {str(e)}")
                st.info("💡 Please try uploading a different PDF file.")
                cleanup_temp_file(temp_path)
                    
        except Exception as e:
            st.error(f"❌ Error processing file: {str(e)}")
            st.info("💡 Please try uploading a different PDF file.")
    
    # Add helpful tips
    with st.expander("💡 Upload Tips"):
        st.markdown("""
        **For best results:**
        - Use PDF files under 200MB
        - Ensure PDFs contain text (not just images)
        - Avoid password-protected PDFs
        - If upload fails, try refreshing the page
        
        **Technical details:**
        - Files are temporarily stored on the server
        - Text extraction uses temporary file processing
        - Automatic cleanup of temporary files
        
        **Supported formats:** PDF only
        """)

def show_my_documents():
    """Show uploaded documents"""
    st.title("📁 My Documents")
    st.markdown("---")
    
    documents = st.session_state.uploaded_documents.get('documents', [])
    
    if not documents:
        st.info("You haven't uploaded any documents yet.")
        return
    
    # Search functionality
    search_term = st.text_input("🔍 Search documents", placeholder="Enter filename or content...")
    
    # Filter documents based on search
    filtered_docs = documents
    if search_term:
        filtered_docs = [
            doc for doc in documents
            if search_term.lower() in doc['filename'].lower() or
               search_term.lower() in doc.get('content', '').lower()
        ]
    
    if not filtered_docs:
        st.warning("No documents match your search criteria.")
        return
    
    # Display documents
    for i, doc in enumerate(filtered_docs):
        with st.container():
            col1, col2, col3, col4 = st.columns([3, 2, 1, 1])
            
            with col1:
                st.write(f"**{doc['filename']}**")
            
            with col2:
                st.write(doc['upload_time'])
            
            with col3:
                st.write(f"{doc['file_type']}")
            
            with col4:
                if st.button(f"View {i}", key=f"view_{i}"):
                    st.subheader(f"📄 {doc['filename']}")
                    st.write(f"**Uploaded:** {doc['upload_time']}")
                    st.write(f"**Size:** {doc['size']} bytes")
                    
                    # Check if we have a temporary file path for better content extraction
                    if doc.get('temp_path') and os.path.exists(doc['temp_path']):
                        try:
                            # Extract fresh content from temporary file
                            fresh_content = extract_pdf_text_from_temp(doc['temp_path'])
                            if fresh_content.strip():
                                st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"fresh_content_{i}")
                            else:
                                # Fall back to stored content
                                if 'content' in doc and doc['content']:
                                    st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}")
                        except Exception as e:
                            st.warning(f"Could not read from temporary file: {str(e)}")
                            # Fall back to stored content
                            if 'content' in doc and doc['content']:
                                st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}")
                    else:
                        # Display stored content
                        if 'content' in doc and doc['content']:
                            st.text_area("Document Content", doc['content'], height=400, key=f"content_{i}")
            
            st.markdown("---")

def show_document_library():
    """Show all documents in the system"""
    st.title("📚 Document Library")
    st.markdown("---")
    
    documents = st.session_state.uploaded_documents.get('documents', [])
    
    if not documents:
        st.info("No documents have been uploaded to the system yet.")
        return
    
    # Search functionality
    search_term = st.text_input("🔍 Search all documents", placeholder="Enter filename or content...")
    
    # Filter documents based on search
    filtered_docs = documents
    if search_term:
        filtered_docs = [
            doc for doc in documents
            if search_term.lower() in doc['filename'].lower() or
               search_term.lower() in doc.get('content', '').lower()
        ]
    
    if not filtered_docs:
        st.warning("No documents match your search criteria.")
        return
    
    # Display documents
    for i, doc in enumerate(filtered_docs):
        with st.container():
            col1, col2, col3, col4 = st.columns([3, 2, 1, 1])
            
            with col1:
                st.write(f"**{doc['filename']}**")
            
            with col2:
                st.write(doc['upload_time'])
            
            with col3:
                st.write(f"{doc['file_type']}")
            
            with col4:
                if st.button(f"View {i}", key=f"lib_view_{i}"):
                    st.subheader(f"📄 {doc['filename']}")
                    st.write(f"**Uploaded:** {doc['upload_time']}")
                    st.write(f"**Size:** {doc['size']} bytes")
                    
                    # Check if we have a temporary file path for better content extraction
                    if doc.get('temp_path') and os.path.exists(doc['temp_path']):
                        try:
                            # Extract fresh content from temporary file
                            fresh_content = extract_pdf_text_from_temp(doc['temp_path'])
                            if fresh_content.strip():
                                st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"lib_fresh_content_{i}")
                            else:
                                # Fall back to stored content
                                if 'content' in doc and doc['content']:
                                    st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}")
                        except Exception as e:
                            st.warning(f"Could not read from temporary file: {str(e)}")
                            # Fall back to stored content
                            if 'content' in doc and doc['content']:
                                st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}")
                    else:
                        # Display stored content
                        if 'content' in doc and doc['content']:
                            st.text_area("Document Content", doc['content'], height=400, key=f"lib_content_{i}")
            
            st.markdown("---")

def show_settings():
    """Show user settings"""
    st.title("⚙️ Settings")
    st.markdown("---")
    
    st.subheader("🔧 System Information")
    st.write("**Version:** Dev LMS v1.0")
    st.write("**Features:**")
    st.write("- PDF document upload with temporary storage")
    st.write("- Document search and preview")
    st.write("- Document library")
    st.write("- Session-based storage")
    
    st.markdown("---")
    
    # Export data option
    if st.button("📥 Export All Data"):
        documents = st.session_state.uploaded_documents.get('documents', [])
        if documents:
            # Create JSON export
            export_data = {
                'export_date': datetime.now().isoformat(),
                'documents': documents
            }
            
            st.download_button(
                label="Download JSON Export",
                data=json.dumps(export_data, indent=2),
                file_name=f"lms_data_export.json",
                mime="application/json"
            )
        else:
            st.info("No data to export.")
    
    st.markdown("---")
    
    # Clear data option
    if st.button("🗑️ Clear All Data"):
        if st.session_state.uploaded_documents.get('documents'):
            # Clean up temporary files before clearing data
            documents = st.session_state.uploaded_documents['documents']
            for doc in documents:
                if doc.get('temp_path'):
                    cleanup_temp_file(doc['temp_path'])
            
            st.session_state.uploaded_documents['documents'] = []
            st.success("All documents and temporary files have been cleared!")
            st.rerun()
        else:
            st.info("No documents to clear.")
    
    st.markdown("---")
    
    # Cleanup temporary files option
    if st.button("🧹 Cleanup Temporary Files"):
        documents = st.session_state.uploaded_documents.get('documents', [])
        cleaned_count = 0
        
        for doc in documents:
            if doc.get('temp_path') and not os.path.exists(doc['temp_path']):
                # Remove temp_path reference if file doesn't exist
                doc.pop('temp_path', None)
                cleaned_count += 1
        
        if cleaned_count > 0:
            st.success(f"Cleaned up {cleaned_count} missing temporary file references!")
        else:
            st.info("No cleanup needed - all temporary files are properly managed.")
    
    st.markdown("---")
    
    # System status
    st.subheader("📊 System Status")
    documents = st.session_state.uploaded_documents.get('documents', [])
    temp_files_count = sum(1 for doc in documents if doc.get('temp_path') and os.path.exists(doc['temp_path']))
    
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Documents", len(documents))
    with col2:
        st.metric("Active Temp Files", temp_files_count)

if __name__ == "__main__":
    main()