# entity_extraction.py import re import dateparser # Extend your product list based on your dataset or domain PRODUCT_LIST = [ "productA", "productB", "productC", "laptop", "phone", "router", "headphones" ] # Keywords indicating complaints or issues COMPLAINT_KEYWORDS = [ "broken", "late", "error", "delay", "fault", "not working", "slow", "missing", "haven’t received" ] def extract_entities(text): """ Extracts products, dates, and complaint keywords from the input text. Args: text (str): Customer support ticket text. Returns: dict: Dictionary with lists of extracted 'products', 'dates', and 'complaints'. """ text_lower = text.lower() # Product extraction - check presence of product keywords products_found = [p for p in PRODUCT_LIST if p.lower() in text_lower] # Date extraction - exact dates and fuzzy relative dates date_phrases = re.findall( r'\b(?:last week|yesterday|today|on \w+ \d{1,2}|\d{2}/\d{2}/\d{4})\b', text_lower ) # Filter only valid dates using dateparser dates_found = [d for d in date_phrases if dateparser.parse(d)] # Complaint extraction - check for complaint keywords complaints_found = [word for word in COMPLAINT_KEYWORDS if word in text_lower] return { 'products': products_found, 'dates': dates_found, 'complaints': complaints_found } # Example usage if __name__ == "__main__": sample_text = ( "I ordered a laptop last week but still haven’t received it. " "This delay is frustrating and I need help." ) entities = extract_entities(sample_text) print("Extracted Entities:", entities)