Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Query Parser with Intent Classification and Name-to-Email Resolution | |
| """ | |
| import json | |
| import os | |
| from datetime import datetime, timedelta | |
| from openai import OpenAI | |
| from typing import Dict, Optional, Tuple | |
| from dotenv import load_dotenv # <-- Add this | |
| # Load environment variables from .env file | |
| load_dotenv() # <-- Add this | |
| # Initialize OpenAI client | |
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| # File paths | |
| NAME_MAPPING_FILE = "name_mapping.json" | |
| EMAIL_DB_FILE = "email_db.json" | |
| def _llm(messages, model="gpt-4o-mini", temperature=0): | |
| """Helper function to call OpenAI API""" | |
| rsp = client.chat.completions.create( | |
| model=model, | |
| temperature=temperature, | |
| messages=messages, | |
| ) | |
| return rsp.choices[0].message.content.strip() | |
| def _load_name_mapping() -> Dict[str, str]: | |
| """Load name to email mapping from JSON file""" | |
| if not os.path.exists(NAME_MAPPING_FILE): | |
| return {} | |
| try: | |
| with open(NAME_MAPPING_FILE, "r") as f: | |
| return json.load(f) | |
| except (json.JSONDecodeError, IOError): | |
| return {} | |
| def _save_name_mapping(mapping: Dict[str, str]): | |
| """Save name to email mapping to JSON file""" | |
| with open(NAME_MAPPING_FILE, "w") as f: | |
| json.dump(mapping, f, indent=2) | |
| def _load_email_db() -> Dict: | |
| """Load email database""" | |
| if not os.path.exists(EMAIL_DB_FILE): | |
| return {} | |
| try: | |
| with open(EMAIL_DB_FILE, "r") as f: | |
| return json.load(f) | |
| except (json.JSONDecodeError, IOError): | |
| return {} | |
| def _save_email_db(db: Dict): | |
| """Save email database""" | |
| with open(EMAIL_DB_FILE, "w") as f: | |
| json.dump(db, f, indent=2) | |
| def extract_query_info(query: str) -> Dict: | |
| """ | |
| Extract intent and date range from user query using LLM | |
| """ | |
| today_str = datetime.today().strftime("%d-%b-%Y") | |
| system_prompt = f""" | |
| You are an email query parser. Today is {today_str}. | |
| Given a user query, extract: | |
| 1. sender_intent: The person/entity they want emails from (could be name or email) | |
| 2. start_date and end_date: Date range in DD-MMM-YYYY format | |
| For relative dates: | |
| - "last week" = 7 days ago to today | |
| - "yesterday" = yesterday only | |
| - "last month" = 30 days ago to today | |
| - "last 3 days" = 3 days ago to today | |
| Examples: | |
| - "emails from dev agarwal last week" → sender_intent: "dev agarwal" | |
| - "show amazon emails from last month" → sender_intent: "amazon" | |
| - "emails from john@company.com yesterday" → sender_intent: "john@company.com" | |
| Return ONLY valid JSON: | |
| {{ | |
| "sender_intent": "extracted name or email", | |
| "start_date": "DD-MMM-YYYY", | |
| "end_date": "DD-MMM-YYYY" | |
| }} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": query} | |
| ] | |
| result = _llm(messages) | |
| return json.loads(result) | |
| def resolve_sender_email(sender_intent: str) -> Tuple[Optional[str], bool]: | |
| """ | |
| Resolve sender intent to actual email address | |
| Returns: (email_address, needs_user_input) | |
| """ | |
| # Check if it's already an email address | |
| if "@" in sender_intent: | |
| return sender_intent.lower(), False | |
| # Load name mapping | |
| name_mapping = _load_name_mapping() | |
| # Normalize the intent (lowercase for comparison) | |
| normalized_intent = sender_intent.lower().strip() | |
| # Check direct match | |
| if normalized_intent in name_mapping: | |
| return name_mapping[normalized_intent], False | |
| # Check partial matches (fuzzy matching) | |
| for name, email in name_mapping.items(): | |
| if normalized_intent in name.lower() or name.lower() in normalized_intent: | |
| return email, False | |
| # No match found | |
| return None, True | |
| def store_name_email_mapping(name: str, email: str): | |
| """Store new name to email mapping""" | |
| name_mapping = _load_name_mapping() | |
| name_mapping[name.lower().strip()] = email.lower().strip() | |
| _save_name_mapping(name_mapping) | |
| def parse_email_query(query: str) -> Dict: | |
| """ | |
| Main function to parse email query | |
| Returns structured response with next steps | |
| """ | |
| try: | |
| # Step 1: Extract intent and dates | |
| query_info = extract_query_info(query) | |
| sender_intent = query_info["sender_intent"] | |
| start_date = query_info["start_date"] | |
| end_date = query_info["end_date"] | |
| # Step 2: Resolve sender email | |
| email_address, needs_input = resolve_sender_email(sender_intent) | |
| if needs_input: | |
| # Need to ask user for email address | |
| return { | |
| "status": "need_email_input", | |
| "sender_intent": sender_intent, | |
| "start_date": start_date, | |
| "end_date": end_date, | |
| "message": f"I don't have an email address for '{sender_intent}'. Please provide the email address." | |
| } | |
| else: | |
| # Ready to proceed with email scraping | |
| return { | |
| "status": "ready_to_scrape", | |
| "sender_intent": sender_intent, | |
| "resolved_email": email_address, | |
| "start_date": start_date, | |
| "end_date": end_date, | |
| "message": f"Found email: {email_address} for '{sender_intent}'" | |
| } | |
| except Exception as e: | |
| return { | |
| "status": "error", | |
| "error": str(e), | |
| "message": "Failed to parse query" | |
| } | |
| # Test the parser | |
| if __name__ == "__main__": | |
| # Test cases | |
| test_queries = [ | |
| "Show me emails from dev agarwal last week", | |
| "emails from amazon in the last month", | |
| "get john@company.com emails yesterday", | |
| "emails from new person last 3 days" | |
| ] | |
| for query in test_queries: | |
| print(f"\nQuery: {query}") | |
| result = parse_email_query(query) | |
| print(f"Result: {json.dumps(result, indent=2)}") |