Spaces:

jiekarl
/

Task-1

Sleeping

App Files Files Community

Task-1 / app.py

jiekarl

Update app.py

fa34829 verified 8 months ago

raw

history blame contribute delete

5.48 kB

	import pandas as pd
	import numpy as np
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	import gradio as gr
	import json

	# Download NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# DataLoad and Preprocess
	def load_data(file_path):
	df = pd.read_excel(file_path)
	print(f"Loaded data shape: {df.shape}")
	return df

	def clean_text(text):
	if not isinstance(text, str):
	return ""
	return re.sub(r'[^a-z0-9\s]', '', text.lower())

	def tokenize_lemmatize(text):
	lemmatizer = WordNetLemmatizer()
	stop_words = set(stopwords.words('english'))
	return [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]

	def preprocess_data(df):
	df[['ticket_text','issue_type','urgency_level']] = (
	df.groupby('product')[['ticket_text','issue_type','urgency_level']]
	.transform(lambda group: group.ffill().bfill())
	)
	df['clean_text'] = df['ticket_text'].apply(clean_text)
	df['processed_text'] = df['clean_text'].apply(lambda x: ' '.join(tokenize_lemmatize(x)))
	return df

	# Feature Engineering
	# Checking the sentimenet analysis on basees of different word like bad, good, late etc.
	def simple_sentiment(text):
	pos = ['good', 'great', 'excellent', 'thanks']
	neg = ['bad', 'broken', 'late', 'error', 'issue', 'problem']
	tokens = text.split()
	return (sum(w in pos for w in tokens) - sum(w in neg for w in tokens)) / (len(tokens) or 1)

	def feature_engineering(df):
	df['ticket_length'] = df['clean_text'].apply(len)
	df['word_count'] = df['clean_text'].apply(lambda x: len(x.split()))
	df['sentiment'] = df['clean_text'].apply(simple_sentiment)
	return df

	# Train Models
	# so here we are train the randomforest model.
	# and we need to train the model as per the requirement issue_type and uregency_level
	# also calculating the model performance by Classification Report
	def train_models(df):
	X = df[['processed_text', 'ticket_length', 'word_count', 'sentiment']]
	y_issue = df['issue_type']
	y_urgency = df['urgency_level']

	X_train, X_test, y_issue_train, y_issue_test, y_urgency_train, y_urgency_test = train_test_split(
	X, y_issue, y_urgency, test_size=0.2, random_state=42
	)

	text_pipe = Pipeline([
	('tfidf', TfidfVectorizer(max_features=500))
	])

	preprocessor = ColumnTransformer([
	('text', text_pipe, 'processed_text'),
	('numeric', 'passthrough', ['ticket_length', 'word_count', 'sentiment'])
	])

	issue_model = Pipeline([
	('pre', preprocessor),
	('clf', RandomForestClassifier(n_estimators=100, random_state=42))
	])

	urgency_model = Pipeline([
	('pre', preprocessor),
	('clf', RandomForestClassifier(n_estimators=100, random_state=42))
	])

	issue_model.fit(X_train, y_issue_train)
	urgency_model.fit(X_train, y_urgency_train)

	print("Issue Classification:\n", classification_report(y_issue_test, issue_model.predict(X_test)))
	print("Urgency Classification:\n", classification_report(y_urgency_test, urgency_model.predict(X_test)))

	return issue_model, urgency_model

	# Predict Single Ticket
	def predict_ticket(ticket_text, issue_model, urgency_model):
	cleaned = clean_text(ticket_text)
	processed = ' '.join(tokenize_lemmatize(cleaned))
	features = pd.DataFrame([{
	'processed_text': processed,
	'ticket_length': len(cleaned),
	'word_count': len(cleaned.split()),
	'sentiment': simple_sentiment(cleaned)
	}])
	return issue_model.predict(features)[0], urgency_model.predict(features)[0]

	# Generating the Gradio Interface as per task
	def create_gradio_interface(issue_model, urgency_model):
	def wrapped(ticket_text):
	try:
	issue, urgency = predict_ticket(ticket_text, issue_model, urgency_model)
	return issue, urgency
	except Exception as e:
	return f"Error: {e}", ""

	return gr.Interface(
	fn=wrapped,
	inputs=gr.Textbox(label="Ticket Text", lines=4),
	outputs=[
	gr.Textbox(label="Predicted Issue Type"),
	gr.Textbox(label="Predicted Urgency Level")
	],
	title="Support Ticket Classifier",
	description="Enter a ticket to classify its issue type and urgency level.",
	examples=[
	["payment issue with smartwatch v2, underbilled order 29224"],
	["Router stopped working after update, need immediate help"],
	["Received damaged headphones in shipment, request refund"],
	["ordered smartwatch v2 got protab x1 instead order number 76301"],
	["cant log account keep showing error help"],
	["tell ecobreeze ac warranty also available black"]
	]
	)



	if __name__ == "__main__":
	df = load_data("ai_dev_assignment_tickets_complex_1000.xls")
	df = preprocess_data(df)
	df = feature_engineering(df)
	issue_model, urgency_model = train_models(df)
	iface = create_gradio_interface(issue_model, urgency_model)

	#Deploy to public Gradio space (with temporary link)
	iface.launch(share=True)