Spaces:

Agents-MCP-Hackathon
/

MailQuery

Sleeping

App Files Files Community

Da-123 commited on Jun 9, 2025

Commit

5c85daa

1 Parent(s): 810398e

clean

Browse files

Files changed (2) hide show

agentic_implementation/email_scraper.py +2 -166
agentic_implementation/tools.py +1 -1

agentic_implementation/email_scraper.py CHANGED Viewed

@@ -13,7 +13,7 @@ from dotenv import load_dotenv
 from zoneinfo import ZoneInfo
 from email.utils import parsedate_to_datetime
 from typing import List, Dict
 load_dotenv()
 # Email credentials
@@ -26,33 +26,7 @@ def validate_email_setup():
     """Validate email setup and credentials"""
     print("=== Email Setup Validation ===")
-    # Check .env file existence
-    # env_file_exists = os.path.exists('.env')
-    # print(f".env file exists: {'✅ Yes' if env_file_exists else '❌ No'}")
-    # if not env_file_exists:
-    #     print("❌ No .env file found! Create one with:")
-    #     print("   EMAIL_ID=your_email@gmail.com")
-    #     print("   APP_PASSWORD=your_16_char_app_password")
-    #     print("   OPENAI_API_KEY=your_openai_key")
-    #     return False
-    # Check environment variables
     issues = []
-    # if not EMAIL_ID:
-    #     issues.append("EMAIL_ID not set or empty")
-    # elif '@' not in EMAIL_ID:
-    #     issues.append("EMAIL_ID doesn't look like an email address")
-    # elif not EMAIL_ID.endswith('@gmail.com'):
-    #     issues.append("EMAIL_ID should be a Gmail address (@gmail.com)")
-    # if not APP_PASSWORD:
-    #     issues.append("APP_PASSWORD not set or empty")
-    # elif len(APP_PASSWORD) != 16:
-    #     issues.append(f"APP_PASSWORD should be 16 characters, got {len(APP_PASSWORD)}")
-    # elif ' ' in APP_PASSWORD:
-    #     issues.append("APP_PASSWORD should not contain spaces (remove spaces from app password)")
     if not os.getenv("OPENAI_API_KEY"):
         issues.append("OPENAI_API_KEY not set (needed for query processing)")
@@ -76,9 +50,6 @@ def _imap_connect():
     if EMAIL_ID:
         print(f"Email ID: {EMAIL_ID[:5]}...@{EMAIL_ID.split('@')[1] if '@' in EMAIL_ID else 'INVALID'}")
-    # if APP_PASSWORD:
-    #     print(f"App Password length: {len(APP_PASSWORD)} characters")
-    #     print(f"App Password format: {'✅ Looks correct (16 chars)' if len(APP_PASSWORD) == 16 else f'❌ Expected 16 chars, got {len(APP_PASSWORD)}'}")
     if not EMAIL_ID or not APP_PASSWORD:
         error_msg = "Missing credentials in environment variables!"
@@ -198,141 +169,6 @@ def _is_date_in_range(email_date: str, start_date: str, end_date: str) -> bool:
     except ValueError:
         return False
-def scrape_emails_from_sender(sender_email: str, start_date: str, end_date: str) -> List[Dict]:
-    """
-    Scrape emails from specific sender within date range
-    Uses intelligent caching to avoid re-scraping
-    """
-    print(f"Scraping emails from {sender_email} between {start_date} and {end_date}")
-    # Load existing database
-    db = _load_email_db()
-    sender_email = sender_email.lower().strip()
-    # Check if we have cached emails for this sender
-    if sender_email in db:
-        cached_emails = db[sender_email].get("emails", [])
-        # Filter cached emails by date range
-        filtered_emails = [
-            email for email in cached_emails
-            if _is_date_in_range(email["date"], start_date, end_date)
-        ]
-        # Check if we need to scrape more recent emails
-        last_scraped = db[sender_email].get("last_scraped", "01-Jan-2020")
-        today = datetime.today().strftime("%d-%b-%Y")
-        if last_scraped == today and filtered_emails:
-            print(f"Using cached emails (last scraped: {last_scraped})")
-            return filtered_emails
-    # Need to scrape emails
-    try:
-        mail = _imap_connect()
-        # Prepare IMAP search criteria
-        start_imap = _date_to_imap_format(start_date)
-        # Add one day to end_date for BEFORE criteria (IMAP BEFORE is exclusive)
-        end_dt = datetime.strptime(end_date, "%d-%b-%Y") + timedelta(days=1)
-        end_imap = end_dt.strftime("%d-%b-%Y")
-        search_criteria = f'(FROM "{sender_email}") SINCE "{start_imap}" BEFORE "{end_imap}"'
-        print(f"IMAP search: {search_criteria}")
-        # Search for emails
-        status, data = mail.search(None, search_criteria)
-        if status != 'OK':
-            raise Exception(f"IMAP search failed: {status}")
-        email_ids = data[0].split()
-        print(f"Found {len(email_ids)} emails")
-        scraped_emails = []
-        # Process each email
-        for i, email_id in enumerate(email_ids):
-            try:
-                print(f"Processing email {i+1}/{len(email_ids)}")
-                # Fetch email
-                status, msg_data = mail.fetch(email_id, "(RFC822)")
-                if status != 'OK':
-                    continue
-                # Parse email
-                msg = message_from_bytes(msg_data[0][1])
-                # Extract information
-                subject = msg.get("Subject", "No Subject")
-                content = _email_to_clean_text(msg)
-                # Parse date
-                date_header = msg.get("Date", "")
-                if date_header:
-                    try:
-                        dt_obj = parsedate_to_datetime(date_header)
-                        # Convert to IST
-                        ist_dt = dt_obj.astimezone(ZoneInfo("Asia/Kolkata"))
-                        email_date = ist_dt.strftime("%d-%b-%Y")
-                        email_time = ist_dt.strftime("%H:%M:%S")
-                    except:
-                        email_date = datetime.today().strftime("%d-%b-%Y")
-                        email_time = "00:00:00"
-                else:
-                    email_date = datetime.today().strftime("%d-%b-%Y")
-                    email_time = "00:00:00"
-                # Get message ID for deduplication
-                message_id = msg.get("Message-ID", f"missing-{email_id.decode()}")
-                scraped_emails.append({
-                    "date": email_date,
-                    "time": email_time,
-                    "subject": subject,
-                    "content": content[:2000],  # Limit content length
-                    "message_id": message_id
-                })
-            except Exception as e:
-                print(f"Error processing email {email_id}: {e}")
-                continue
-        mail.logout()
-        # Update database
-        if sender_email not in db:
-            db[sender_email] = {"emails": [], "last_scraped": ""}
-        # Merge with existing emails (avoid duplicates)
-        existing_emails = db[sender_email].get("emails", [])
-        existing_ids = {email.get("message_id") for email in existing_emails}
-        new_emails = [
-            email for email in scraped_emails
-            if email["message_id"] not in existing_ids
-        ]
-        # Update database
-        db[sender_email]["emails"] = existing_emails + new_emails
-        db[sender_email]["last_scraped"] = datetime.today().strftime("%d-%b-%Y")
-        # Save database
-        _save_email_db(db)
-        # Return filtered results
-        all_emails = db[sender_email]["emails"]
-        filtered_emails = [
-            email for email in all_emails
-            if _is_date_in_range(email["date"], start_date, end_date)
-        ]
-        print(f"Scraped {len(new_emails)} new emails, returning {len(filtered_emails)} in date range")
-        return filtered_emails
-    except Exception as e:
-        print(f"Email scraping failed: {e}")
-        raise
 def scrape_emails_by_text_search(keyword: str, start_date: str, end_date: str) -> List[Dict]:
     """
@@ -455,7 +291,7 @@ def scrape_emails_by_text_search(keyword: str, start_date: str, end_date: str) -
 if __name__ == "__main__":
     # Test scraping
     try:
-        emails = scrape_emails_from_sender(
             "noreply@example.com",
             "01-Jun-2025",
             "07-Jun-2025"

 from zoneinfo import ZoneInfo
 from email.utils import parsedate_to_datetime
 from typing import List, Dict
+from logger import logger
 load_dotenv()
 # Email credentials
     """Validate email setup and credentials"""
     print("=== Email Setup Validation ===")
     issues = []
     if not os.getenv("OPENAI_API_KEY"):
         issues.append("OPENAI_API_KEY not set (needed for query processing)")
     if EMAIL_ID:
         print(f"Email ID: {EMAIL_ID[:5]}...@{EMAIL_ID.split('@')[1] if '@' in EMAIL_ID else 'INVALID'}")
     if not EMAIL_ID or not APP_PASSWORD:
         error_msg = "Missing credentials in environment variables!"
     except ValueError:
         return False
 def scrape_emails_by_text_search(keyword: str, start_date: str, end_date: str) -> List[Dict]:
     """
 if __name__ == "__main__":
     # Test scraping
     try:
+        emails = scrape_emails_by_text_search(
             "noreply@example.com",
             "01-Jun-2025",
             "07-Jun-2025"

agentic_implementation/tools.py CHANGED Viewed

@@ -6,7 +6,7 @@ from schemas import (
     SendReplyParams,
 )
 from typing import Any, Dict
-from email_scraper import scrape_emails_from_sender, scrape_emails_by_text_search, _load_email_db, _save_email_db, _is_date_in_range
 from datetime import datetime, timedelta
 from typing import List
 from openai import OpenAI

     SendReplyParams,
 )
 from typing import Any, Dict
+from email_scraper import scrape_emails_by_text_search, _load_email_db, _save_email_db, _is_date_in_range
 from datetime import datetime, timedelta
 from typing import List
 from openai import OpenAI