Spaces:

Satyam0077
/

CustomerSupportTicketClassifier

Sleeping

Upload 4 files

b5c1242 verified 8 months ago

996 Bytes

	import pandas as pd
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	nltk.download('stopwords')
	nltk.download('punkt')
	nltk.download('wordnet')

	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()

	def clean_text(text):
	if pd.isna(text):
	return ""
	text = text.lower()
	text = re.sub(r'[^a-z0-9\s]', ' ', text) # remove special chars
	tokens = nltk.word_tokenize(text)
	tokens = [w for w in tokens if w not in stop_words]
	tokens = [lemmatizer.lemmatize(w) for w in tokens]
	return " ".join(tokens)

	def load_and_preprocess_data(filepath):
	df = pd.read_excel(filepath)
	# Drop rows with missing critical labels
	df = df.dropna(subset=['ticket_text', 'issue_type', 'urgency_level'])
	df['clean_text'] = df['ticket_text'].apply(clean_text)
	# Fill missing product info with empty string
	df['product'] = df['product'].fillna('')
	return df