Spaces:

Suryacoder
/

Smart-Email-Sorter

Sleeping

Smart-Email-Sorter / backend /gmail_fetcher.py

Surya8663

Reduce number of emails fetched to 3 for performance

5c04706 4 months ago

3.9 kB

	import imaplib
	import email
	from email.header import decode_header
	import os
	from dotenv import load_dotenv
	from bs4 import BeautifulSoup # A library for parsing HTML

	# Load the environment variables from your .env file
	load_dotenv()

	def fetch_latest_emails(num_emails=5):
	try:
	# Get credentials securely from the .env file
	username = os.getenv("GMAIL_USER")
	password = os.getenv("GMAIL_PASSWORD")

	if not username or not password:
	print("Error: GMAIL_USER and GMAIL_PASSWORD must be set in the .env file.")
	return []

	# Connect to Gmail's server
	mail = imaplib.IMAP4_SSL("imap.gmail.com")
	mail.login(username, password)
	mail.select("inbox")

	# Search for all emails and get their IDs
	result, data = mail.search(None, "ALL")
	mail_ids = data[0].split()

	emails = []
	# Fetch the most recent emails based on num_emails
	for i in mail_ids[-num_emails:]:
	result, data = mail.fetch(i, "(RFC822)")
	raw_email = data[0][1]
	msg = email.message_from_bytes(raw_email)

	# Decode the subject line safely
	subject, encoding = decode_header(msg["Subject"])[0]
	if isinstance(subject, bytes):
	subject = subject.decode(encoding or "utf-8", errors="ignore")

	# Get the sender's address
	from_ = msg.get("From", "")

	# --- Improved Body Parsing ---
	body = ""
	if msg.is_multipart():
	for part in msg.walk():
	content_type = part.get_content_type()
	content_disposition = str(part.get("Content-Disposition"))

	# Skip attachments
	if "attachment" not in content_disposition:
	if content_type == "text/plain":
	try:
	body = part.get_payload(decode=True).decode()
	break # Found plain text, stop searching
	except:
	continue
	elif content_type == "text/html":
	# If we only find HTML, parse it to get clean text
	try:
	html_body = part.get_payload(decode=True).decode()
	soup = BeautifulSoup(html_body, "html.parser")
	body = soup.get_text()
	except:
	continue
	else:
	# For non-multipart emails, just get the payload
	try:
	body = msg.get_payload(decode=True).decode()
	except:
	body = ""

	emails.append({
	"From": from_,
	"Subject": subject,
	"Body": body.strip()
	})

	mail.logout()
	# Return emails in reverse order to show the newest one first
	return emails[::-1]

	except Exception as e:
	print(f"An error occurred while fetching emails: {e}")
	return []

	# NEW, ROBUST CODE for the end of gmail_fetcher.py
	if __name__ == "__main__":
	latest_emails = fetch_latest_emails()
	if latest_emails:
	print(f"Successfully fetched {len(latest_emails)} emails.")
	for e in latest_emails:
	# Safely encode and decode each part to prevent crashes in the terminal
	safe_from = e['From'].encode('utf-8', 'replace').decode('utf-8')
	safe_subject = e['Subject'].encode('utf-8', 'replace').decode('utf-8')
	safe_body = e['Body'][:100].encode('utf-8', 'replace').decode('utf-8')

	print(f"From: {safe_from}\nSubject: {safe_subject}\nBody: {safe_body}...\n")