import streamlit as st import PyPDF2 import io import base64 from datetime import datetime import json import tempfile import os # Page configuration st.set_page_config( page_title="Dev LMS", page_icon="๐Ÿ“š", layout="wide", initial_sidebar_state="expanded" ) # Initialize session state if 'uploaded_documents' not in st.session_state: st.session_state.uploaded_documents = {} if 'current_user' not in st.session_state: st.session_state.current_user = "User" def save_document_info(filename, file_content, file_type, temp_path=None): """Save document information to session state""" if 'documents' not in st.session_state.uploaded_documents: st.session_state.uploaded_documents['documents'] = [] document_info = { 'filename': filename, 'upload_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'file_type': file_type, 'size': len(file_content), 'content': file_content.decode('latin-1') if isinstance(file_content, bytes) else str(file_content), 'temp_path': temp_path # Store temp path for later use } st.session_state.uploaded_documents['documents'].append(document_info) def extract_pdf_text_from_temp(temp_path): """Extract text from PDF file using temporary file path""" try: with open(temp_path, "rb") as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text except Exception as e: st.error(f"Error reading PDF: {str(e)}") return "" def extract_pdf_text_from_memory(uploaded_file): """Extract text from PDF file in memory""" try: pdf_reader = PyPDF2.PdfReader(uploaded_file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text except Exception as e: st.error(f"Error reading PDF: {str(e)}") return "" def cleanup_temp_file(temp_path): """Clean up temporary file""" try: if temp_path and os.path.exists(temp_path): os.remove(temp_path) except Exception as e: st.warning(f"Could not clean up temporary file: {str(e)}") def main(): # Sidebar for navigation with st.sidebar: st.title("๐Ÿ“š Dev LMS") st.markdown("---") # Navigation page = st.selectbox( "Navigation", ["Dashboard", "Upload Documents", "My Documents", "Document Library", "Settings"] ) # Main content area if page == "Dashboard": show_dashboard() elif page == "Upload Documents": show_upload_documents() elif page == "My Documents": show_my_documents() elif page == "Document Library": show_document_library() elif page == "Settings": show_settings() def show_dashboard(): """Show the main dashboard""" st.title("๐Ÿ“Š Dashboard") st.markdown("---") col1, col2, col3 = st.columns(3) with col1: st.metric( label="Total Documents", value=len(st.session_state.uploaded_documents.get('documents', [])), delta="0" ) with col2: st.metric( label="System Status", value="Active", delta="0" ) with col3: st.metric( label="Storage Used", value="Session", delta="0" ) st.markdown("---") # Recent activity st.subheader("๐Ÿ“ˆ Recent Activity") documents = st.session_state.uploaded_documents.get('documents', []) if documents: recent_docs = documents[-5:] for doc in recent_docs: with st.container(): col1, col2, col3 = st.columns([3, 2, 1]) with col1: st.write(f"**{doc['filename']}**") with col2: st.write(doc['upload_time']) with col3: st.write(f"{doc['file_type']}") st.markdown("---") else: st.info("No documents uploaded yet. Start by uploading a PDF document!") def show_upload_documents(): """Show document upload interface""" st.title("๐Ÿ“ค Upload Documents") st.markdown("---") # Add information about file upload st.info("๐Ÿ’ก **Note:** File upload uses temporary storage for better compatibility with Hugging Face Spaces.") uploaded_file = st.file_uploader( "Choose a PDF file", type=['pdf'], help="Upload PDF documents to the LMS (max 200MB)", accept_multiple_files=False ) if uploaded_file is not None: try: # Display file info file_details = { "Filename": uploaded_file.name, "File size": f"{uploaded_file.size / 1024:.2f} KB", "File type": uploaded_file.type } st.write("**File Details:**") for key, value in file_details.items(): st.write(f"- {key}: {value}") # Create temporary file for better PDF processing temp_path = None try: with tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf", delete=False) as temp: bytes_data = uploaded_file.getvalue() temp.write(bytes_data) temp_path = temp.name st.success(f"๐Ÿ“ File temporarily stored at: {temp_path}") # Extract and display PDF content using temporary file pdf_text = extract_pdf_text_from_temp(temp_path) if pdf_text.strip(): st.subheader("๐Ÿ“„ Document Preview") with st.expander("View extracted text"): st.text_area("PDF Content", pdf_text, height=300) else: st.warning("โš ๏ธ Could not extract text from this PDF. The file may be image-based or encrypted.") # Upload button if st.button("Upload Document", type="primary"): try: # Save document info with temporary file path save_document_info( uploaded_file.name, bytes_data, "PDF", temp_path ) st.success(f"โœ… Document '{uploaded_file.name}' uploaded successfully!") st.balloons() # Clear the file uploader st.rerun() except Exception as e: st.error(f"โŒ Error uploading document: {str(e)}") st.info("๐Ÿ’ก Try uploading a smaller file or refresh the page.") # Clean up temp file on error cleanup_temp_file(temp_path) except Exception as e: st.error(f"โŒ Error creating temporary file: {str(e)}") st.info("๐Ÿ’ก Please try uploading a different PDF file.") cleanup_temp_file(temp_path) except Exception as e: st.error(f"โŒ Error processing file: {str(e)}") st.info("๐Ÿ’ก Please try uploading a different PDF file.") # Add helpful tips with st.expander("๐Ÿ’ก Upload Tips"): st.markdown(""" **For best results:** - Use PDF files under 200MB - Ensure PDFs contain text (not just images) - Avoid password-protected PDFs - If upload fails, try refreshing the page **Technical details:** - Files are temporarily stored on the server - Text extraction uses temporary file processing - Automatic cleanup of temporary files **Supported formats:** PDF only """) def show_my_documents(): """Show uploaded documents""" st.title("๐Ÿ“ My Documents") st.markdown("---") documents = st.session_state.uploaded_documents.get('documents', []) if not documents: st.info("You haven't uploaded any documents yet.") return # Search functionality search_term = st.text_input("๐Ÿ” Search documents", placeholder="Enter filename or content...") # Filter documents based on search filtered_docs = documents if search_term: filtered_docs = [ doc for doc in documents if search_term.lower() in doc['filename'].lower() or search_term.lower() in doc.get('content', '').lower() ] if not filtered_docs: st.warning("No documents match your search criteria.") return # Display documents for i, doc in enumerate(filtered_docs): with st.container(): col1, col2, col3, col4 = st.columns([3, 2, 1, 1]) with col1: st.write(f"**{doc['filename']}**") with col2: st.write(doc['upload_time']) with col3: st.write(f"{doc['file_type']}") with col4: if st.button(f"View {i}", key=f"view_{i}"): st.subheader(f"๐Ÿ“„ {doc['filename']}") st.write(f"**Uploaded:** {doc['upload_time']}") st.write(f"**Size:** {doc['size']} bytes") # Check if we have a temporary file path for better content extraction if doc.get('temp_path') and os.path.exists(doc['temp_path']): try: # Extract fresh content from temporary file fresh_content = extract_pdf_text_from_temp(doc['temp_path']) if fresh_content.strip(): st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"fresh_content_{i}") else: # Fall back to stored content if 'content' in doc and doc['content']: st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}") except Exception as e: st.warning(f"Could not read from temporary file: {str(e)}") # Fall back to stored content if 'content' in doc and doc['content']: st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}") else: # Display stored content if 'content' in doc and doc['content']: st.text_area("Document Content", doc['content'], height=400, key=f"content_{i}") st.markdown("---") def show_document_library(): """Show all documents in the system""" st.title("๐Ÿ“š Document Library") st.markdown("---") documents = st.session_state.uploaded_documents.get('documents', []) if not documents: st.info("No documents have been uploaded to the system yet.") return # Search functionality search_term = st.text_input("๐Ÿ” Search all documents", placeholder="Enter filename or content...") # Filter documents based on search filtered_docs = documents if search_term: filtered_docs = [ doc for doc in documents if search_term.lower() in doc['filename'].lower() or search_term.lower() in doc.get('content', '').lower() ] if not filtered_docs: st.warning("No documents match your search criteria.") return # Display documents for i, doc in enumerate(filtered_docs): with st.container(): col1, col2, col3, col4 = st.columns([3, 2, 1, 1]) with col1: st.write(f"**{doc['filename']}**") with col2: st.write(doc['upload_time']) with col3: st.write(f"{doc['file_type']}") with col4: if st.button(f"View {i}", key=f"lib_view_{i}"): st.subheader(f"๐Ÿ“„ {doc['filename']}") st.write(f"**Uploaded:** {doc['upload_time']}") st.write(f"**Size:** {doc['size']} bytes") # Check if we have a temporary file path for better content extraction if doc.get('temp_path') and os.path.exists(doc['temp_path']): try: # Extract fresh content from temporary file fresh_content = extract_pdf_text_from_temp(doc['temp_path']) if fresh_content.strip(): st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"lib_fresh_content_{i}") else: # Fall back to stored content if 'content' in doc and doc['content']: st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}") except Exception as e: st.warning(f"Could not read from temporary file: {str(e)}") # Fall back to stored content if 'content' in doc and doc['content']: st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}") else: # Display stored content if 'content' in doc and doc['content']: st.text_area("Document Content", doc['content'], height=400, key=f"lib_content_{i}") st.markdown("---") def show_settings(): """Show user settings""" st.title("โš™๏ธ Settings") st.markdown("---") st.subheader("๐Ÿ”ง System Information") st.write("**Version:** Dev LMS v1.0") st.write("**Features:**") st.write("- PDF document upload with temporary storage") st.write("- Document search and preview") st.write("- Document library") st.write("- Session-based storage") st.markdown("---") # Export data option if st.button("๐Ÿ“ฅ Export All Data"): documents = st.session_state.uploaded_documents.get('documents', []) if documents: # Create JSON export export_data = { 'export_date': datetime.now().isoformat(), 'documents': documents } st.download_button( label="Download JSON Export", data=json.dumps(export_data, indent=2), file_name=f"lms_data_export.json", mime="application/json" ) else: st.info("No data to export.") st.markdown("---") # Clear data option if st.button("๐Ÿ—‘๏ธ Clear All Data"): if st.session_state.uploaded_documents.get('documents'): # Clean up temporary files before clearing data documents = st.session_state.uploaded_documents['documents'] for doc in documents: if doc.get('temp_path'): cleanup_temp_file(doc['temp_path']) st.session_state.uploaded_documents['documents'] = [] st.success("All documents and temporary files have been cleared!") st.rerun() else: st.info("No documents to clear.") st.markdown("---") # Cleanup temporary files option if st.button("๐Ÿงน Cleanup Temporary Files"): documents = st.session_state.uploaded_documents.get('documents', []) cleaned_count = 0 for doc in documents: if doc.get('temp_path') and not os.path.exists(doc['temp_path']): # Remove temp_path reference if file doesn't exist doc.pop('temp_path', None) cleaned_count += 1 if cleaned_count > 0: st.success(f"Cleaned up {cleaned_count} missing temporary file references!") else: st.info("No cleanup needed - all temporary files are properly managed.") st.markdown("---") # System status st.subheader("๐Ÿ“Š System Status") documents = st.session_state.uploaded_documents.get('documents', []) temp_files_count = sum(1 for doc in documents if doc.get('temp_path') and os.path.exists(doc['temp_path'])) col1, col2 = st.columns(2) with col1: st.metric("Total Documents", len(documents)) with col2: st.metric("Active Temp Files", temp_files_count) if __name__ == "__main__": main()