Spaces:

Pushp123
/

Twitter_Data_Sentimental_Analysis

Sleeping

App Files Files Community

Twitter_Data_Sentimental_Analysis / app.py

Pushp123

Update app.py

1711fde verified 10 months ago

raw

history blame contribute delete

7.67 kB


	import gradio as gr
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import nltk
	nltk.download('stopwords', quiet=True)
	from nltk.corpus import stopwords
	from nltk.stem.porter import PorterStemmer
	from sklearn import metrics
	from sklearn.multiclass import OneVsRestClassifier
	from textblob import TextBlob
	from wordcloud import WordCloud

	twitter = pd.read_csv("Twitter_Data.csv")


	twitter['category'] = twitter['category'].replace({-1: 'negative', 0: 'neutral', 1: 'positive'})



	twitter.dropna(subset=['clean_text','category'] , inplace=True)



	text = ''

	for tweet in twitter[twitter['category'] == "positive"]['clean_text']:
	text += f" {tweet}"

	wordcloud = WordCloud(
	width = 3000, height = 2000, background_color = 'black',
	stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
	'''
	fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

	plt.imshow(wordcloud, interpolation= 'bilinear')
	plt.axis('off')
	plt.tight_layout(pad=0)
	plt.show()
	'''
	del text

	text = ''

	for tweet in twitter[twitter['category'] == "neutral"]['clean_text']:
	text += f" {tweet}"

	wordcloud = WordCloud(
	width = 3000, height = 2000, background_color = 'black',
	stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
	'''
	fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

	plt.imshow(wordcloud, interpolation= 'bilinear')
	plt.axis('off')
	plt.tight_layout(pad=0)
	plt.show()
	'''
	del text

	text = ''

	for tweet in twitter[twitter['category'] == "negative"]['clean_text']:
	text += f" {tweet}"

	wordcloud = WordCloud(
	width = 3000, height = 2000, background_color = 'black',
	stopwords = set(nltk.corpus.stopwords.words("english"))).generate(text)
	'''
	fig = plt.figure(figsize=(40,30), facecolor = 'k', edgecolor = 'k')

	plt.imshow(wordcloud, interpolation= 'bilinear')
	plt.axis('off')
	plt.tight_layout(pad=0)
	plt.show()
	'''
	del text

	#print(twitter['category'].value_counts())

	dist = twitter['category'].value_counts()
	def distribution_plot(x, y, name):
	plt.figure(figsize=(10, 6))
	sns.barplot(x=x, y=y)
	plt.title(name)
	plt.show()

	distribution_plot(x=dist.index, y=dist.values, name="Class Distribution Train")

	pol = lambda x: TextBlob(x).sentiment.polarity
	sub = lambda x: TextBlob(x).sentiment.subjectivity

	twitter['polarity'] = twitter['clean_text'].apply(pol)
	twitter['subjectivity'] = twitter['clean_text'].apply(sub)
	twitter
	'''
	# Plot Polarity

	plt.figure(figsize=(10,6))
	plt.hist(twitter['polarity'], bins=20, color='skyblue', edgecolor='black')
	plt.title("Distribution of Polarity")
	plt.xlabel("Polarity")
	plt.ylabel("Frequency")
	plt.grid(True)
	plt.show()

	# Plot Subjectivity

	plt.figure(figsize=(10,6))
	plt.hist(twitter['subjectivity'], bins=20, color='lightgreen', edgecolor='black')
	plt.title("Distribution of Subjectivity")
	plt.xlabel("Subjectivity")
	plt.ylabel("Frequency")
	plt.grid(True)
	plt.show()
	'''
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc
	from sklearn.feature_extraction.text import TfidfVectorizer

	vectorizer = TfidfVectorizer(max_features=5000)

	X = vectorizer.fit_transform(twitter['clean_text'])

	y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})

	X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)


	lr = LogisticRegression(max_iter=1000)
	lr.fit(X_train,y_train)
	y_pred = lr.predict(X_test)
	'''
	print("Accuracy:", accuracy_score(y_test, y_pred))
	print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
	print("Classification Report:\n", classification_report(y_test, y_pred))
	print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

	from sklearn.ensemble import RandomForestClassifier

	classifier = RandomForestClassifier(n_estimators=100, random_state=42)
	classifier.fit(X_train, y_train)

	y_pred = classifier.predict(X_test)

	print("Accuracy:", accuracy_score(y_test, y_pred))
	print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
	print("Classification Report:\n", classification_report(y_test, y_pred))
	print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

	from sklearn.svm import SVC

	classifier = SVC(kernel='linear', random_state=42)
	classifier.fit(X_train, y_train)

	y_pred = classifier.predict(X_test)

	print("Accuracy:", accuracy_score(y_test, y_pred))
	print("F1 Score:", f1_score(y_test, y_pred, average = 'weighted'))
	print("Classification Report:\n", classification_report(y_test, y_pred))
	print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
	'''
	from sklearn.ensemble import AdaBoostClassifier
	from sklearn .tree import DecisionTreeClassifier

	classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
	classifier.fit(X_train, y_train)

	y_pred = classifier.predict(X_test)

	AdaBoostClassifier

	# Importing necessary libraries

	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import AdaBoostClassifier
	from sklearn .tree import DecisionTreeClassifier
	from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_curve,auc


	vectorizer = TfidfVectorizer(max_features=5000)
	X = vectorizer.fit_transform(twitter['clean_text'])

	# Encode target label (category) into numeric values
	y = twitter['category'].map({'negative':0, 'neutral':1, 'positive':2})

	# Split the dataset into train and test sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



	classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
	classifier.fit(X_train, y_train)

	# Predict probabilities on the test set
	y_probs = classifier.predict_proba(X_test)
	'''
	# Calculate ROC curve and AUC for each class
	fpr = {}
	tpr = {}
	roc_auc = {}
	num_classes =3 # Number of classes (negative , neutral, positive)


	for i in range(num_classes):
	fpr[i], tpr[i], _ =roc_curve(y_test == i, y_probs[:,i])


	# Plot ROC curves
	plt.figure()
	for i in range (num_classes):
	plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC = {roc_auc[i]:.2f})")

	plt.plot([0,1], [0,1], 'k--') # Diagonal line
	plt.xlim([0.0,1.0])
	plt.ylim([0.0,1.05])
	plt.xlabel("False Positive Rate")
	plt.ylabel("True Positive Rate")
	plt.title("ROC Curves for Multi-Class Classification")
	plt.legend(loc='lower right')
	plt.show()


	# Evaluate the classifier
	print("Accuracy:", accuracy_score(y_test, classifier.predict(X_test)))
	print("F1 Score:", f1_score(y_test, classifier.predict(X_test), average = 'weighted'))
	print("Classification Report:\n", classification_report(y_test, classifier.predict(X_test)))
	print("Confusion Matrix:\n", confusion_matrix(y_test, classifier.predict(X_test)))



	'''





	# Function to make predictions
	def predict_sentiment(text):
	if not text.strip():
	return "Please enter some text."

	text_vector = vectorizer.transform([text])
	pred = classifier.predict(text_vector)[0]
	sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
	return sentiment_map[pred]

	# Create Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## Twitter Sentiment Analyzer")
	gr.Markdown("Enter a tweet and get its predicted sentiment:")

	with gr.Row():
	input_text = gr.Textbox(lines=3, placeholder="Type your tweet here...", label="Tweet")

	output = gr.Textbox(label="Predicted Sentiment")

	analyze_btn = gr.Button("Analyze Sentiment")
	analyze_btn.click(fn=predict_sentiment, inputs=input_text, outputs=output)

	demo.launch()