Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import mailbox | |
| from fpdf import FPDF | |
| import os | |
| from datetime import datetime | |
| # Function to convert mbox to PDF | |
| def mbox_to_pdf(mbox_file, output_pdf): | |
| # Create PDF object | |
| pdf = FPDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| # Open mbox file | |
| mbox = mailbox.mbox(mbox_file) | |
| # Process each email | |
| for message in mbox: | |
| pdf.add_page() | |
| # Set font | |
| pdf.set_font("Arial", size=12) | |
| # Add email headers | |
| subject = message['subject'] if message['subject'] else 'No Subject' | |
| from_ = message['from'] if message['from'] else 'Unknown Sender' | |
| date = message['date'] if message['date'] else 'No Date' | |
| # Write headers to PDF | |
| pdf.cell(0, 10, f"Subject: {subject}", ln=True) | |
| pdf.cell(0, 10, f"From: {from_}", ln=True) | |
| pdf.cell(0, 10, f"Date: {date}", ln=True) | |
| pdf.cell(0, 10, "----------------------------------------", ln=True) | |
| # Add email body | |
| try: | |
| if message.is_multipart(): | |
| for part in message.walk(): | |
| if part.get_content_type() == 'text/plain': | |
| content = part.get_payload(decode=True) | |
| if content: | |
| # Decode content and handle potential encoding issues | |
| try: | |
| content = content.decode('utf-8') | |
| except: | |
| content = content.decode('latin-1', errors='replace') | |
| # Split content into lines and add to PDF | |
| for line in content.split('\n'): | |
| # Ensure line is not too long for PDF | |
| line = line[:180] if len(line) > 180 else line | |
| pdf.multi_cell(0, 10, line) | |
| else: | |
| content = message.get_payload(decode=True) | |
| if content: | |
| try: | |
| content = content.decode('utf-8') | |
| except: | |
| content = content.decode('latin-1', errors='replace') | |
| for line in content.split('\n'): | |
| line = line[:180] if len(line) > 180 else line | |
| pdf.multi_cell(0, 10, line) | |
| except Exception as e: | |
| pdf.multi_cell(0, 10, f"Error processing message content: {str(e)}") | |
| # Save PDF | |
| pdf.output(output_pdf) | |
| return output_pdf | |
| # Streamlit app | |
| def main(): | |
| st.title("MBOX to PDF Converter") | |
| st.write("Upload an MBOX file to convert it to PDF format") | |
| # File uploader | |
| uploaded_file = st.file_uploader("Choose an MBOX file", type=['mbox']) | |
| if uploaded_file is not None: | |
| # Save uploaded file temporarily | |
| temp_mbox = "temp.mbox" | |
| with open(temp_mbox, "wb") as f: | |
| f.write(uploaded_file.getvalue()) | |
| # Convert button | |
| if st.button("Convert to PDF"): | |
| with st.spinner("Converting..."): | |
| try: | |
| # Generate output filename with timestamp | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| output_pdf = f"converted_email_{timestamp}.pdf" | |
| # Convert mbox to pdf | |
| pdf_path = mbox_to_pdf(temp_mbox, output_pdf) | |
| # Provide download link | |
| with open(pdf_path, "rb") as f: | |
| st.download_button( | |
| label="Download PDF", | |
| data=f, | |
| file_name=output_pdf, | |
| mime="application/pdf" | |
| ) | |
| st.success("Conversion completed successfully!") | |
| # Clean up temporary files | |
| os.remove(temp_mbox) | |
| os.remove(pdf_path) | |
| except Exception as e: | |
| st.error(f"Error during conversion: {str(e)}") | |
| if os.path.exists(temp_mbox): | |
| os.remove(temp_mbox) | |
| if __name__ == "__main__": | |
| main() |