CustomerSupportTicketClassifier / src /entity_extraction.py
Satyam0077's picture
Upload 4 files
b5c1242 verified
# entity_extraction.py
import re
import dateparser
# Extend your product list based on your dataset or domain
PRODUCT_LIST = [
"productA", "productB", "productC", "laptop", "phone", "router", "headphones"
]
# Keywords indicating complaints or issues
COMPLAINT_KEYWORDS = [
"broken", "late", "error", "delay", "fault", "not working", "slow", "missing", "haven’t received"
]
def extract_entities(text):
"""
Extracts products, dates, and complaint keywords from the input text.
Args:
text (str): Customer support ticket text.
Returns:
dict: Dictionary with lists of extracted 'products', 'dates', and 'complaints'.
"""
text_lower = text.lower()
# Product extraction - check presence of product keywords
products_found = [p for p in PRODUCT_LIST if p.lower() in text_lower]
# Date extraction - exact dates and fuzzy relative dates
date_phrases = re.findall(
r'\b(?:last week|yesterday|today|on \w+ \d{1,2}|\d{2}/\d{2}/\d{4})\b',
text_lower
)
# Filter only valid dates using dateparser
dates_found = [d for d in date_phrases if dateparser.parse(d)]
# Complaint extraction - check for complaint keywords
complaints_found = [word for word in COMPLAINT_KEYWORDS if word in text_lower]
return {
'products': products_found,
'dates': dates_found,
'complaints': complaints_found
}
# Example usage
if __name__ == "__main__":
sample_text = (
"I ordered a laptop last week but still haven’t received it. "
"This delay is frustrating and I need help."
)
entities = extract_entities(sample_text)
print("Extracted Entities:", entities)