Update SentimentAnalyzerUsingDistilbert.py
Browse files
SentimentAnalyzerUsingDistilbert.py
CHANGED
|
@@ -1,99 +1,99 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import gradio as gr
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import matplotlib.pyplot as plt
|
| 5 |
-
from transformers import pipeline
|
| 6 |
-
from docx import Document
|
| 7 |
-
from PyPDF2 import PdfReader
|
| 8 |
-
|
| 9 |
-
# Initialize the sentiment analysis pipeline
|
| 10 |
-
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
| 11 |
-
|
| 12 |
-
# Function to analyze sentiment for a single sentence
|
| 13 |
-
def sentiment_analyzer(review):
|
| 14 |
-
sentiment = analyzer(review)
|
| 15 |
-
return sentiment[0]['label']
|
| 16 |
-
|
| 17 |
-
# Create a pie chart visualization for the sentiment
|
| 18 |
-
def sentiment_pie_chart(sentiment_labels):
|
| 19 |
-
sentiment_counts = pd.Series(sentiment_labels).value_counts()
|
| 20 |
-
|
| 21 |
-
fig, ax = plt.subplots()
|
| 22 |
-
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
|
| 23 |
-
ax.set_ylabel('')
|
| 24 |
-
ax.set_title('Sentiment Distribution')
|
| 25 |
-
|
| 26 |
-
return fig
|
| 27 |
-
|
| 28 |
-
# Function to analyze a single input sentence
|
| 29 |
-
def analyze_single_sentence(sentence):
|
| 30 |
-
sentiment = sentiment_analyzer(sentence)
|
| 31 |
-
return f"The sentiment of the sentence is: {sentiment}"
|
| 32 |
-
|
| 33 |
-
# Function to read and analyze reviews from Excel, PDF, or DOCX files
|
| 34 |
-
def read_reviews_and_analyze_sentiment(file_object):
|
| 35 |
-
if file_object.name.endswith('.xlsx'):
|
| 36 |
-
# Load the Excel file into a DataFrame
|
| 37 |
-
df = pd.read_excel(file_object)
|
| 38 |
-
if 'Reviews' not in df.columns:
|
| 39 |
-
raise ValueError("Excel file must contain a 'Reviews' column.")
|
| 40 |
-
reviews = df['Reviews'].tolist()
|
| 41 |
-
|
| 42 |
-
elif file_object.name.endswith('.docx'):
|
| 43 |
-
# Read the content of the DOCX file
|
| 44 |
-
doc = Document(file_object)
|
| 45 |
-
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
|
| 46 |
-
|
| 47 |
-
elif file_object.name.endswith('.pdf'):
|
| 48 |
-
# Read the content of the PDF file
|
| 49 |
-
reader = PdfReader(file_object)
|
| 50 |
-
text = ""
|
| 51 |
-
for page in reader.pages:
|
| 52 |
-
text += page.extract_text()
|
| 53 |
-
reviews = text.split('\n') # Assuming reviews are newline-separated
|
| 54 |
-
|
| 55 |
-
else:
|
| 56 |
-
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
|
| 57 |
-
|
| 58 |
-
# Analyze the sentiment of each review
|
| 59 |
-
sentiments = [sentiment_analyzer(review) for review in reviews]
|
| 60 |
-
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
|
| 61 |
-
|
| 62 |
-
# Generate pie chart
|
| 63 |
-
chart_object = sentiment_pie_chart(sentiments)
|
| 64 |
-
|
| 65 |
-
return df, chart_object
|
| 66 |
-
|
| 67 |
-
# Gradio interface combining single sentence analysis and file-based review sentiment analysis
|
| 68 |
-
def main_interface(input_option, sentence=None, file=None):
|
| 69 |
-
if input_option == "Single Sentence":
|
| 70 |
-
if sentence:
|
| 71 |
-
result = analyze_single_sentence(sentence)
|
| 72 |
-
return None, None, result # Single sentence output
|
| 73 |
-
else:
|
| 74 |
-
return None, None, "Please enter a sentence."
|
| 75 |
-
elif input_option == "File Upload":
|
| 76 |
-
if file:
|
| 77 |
-
df, chart_object = read_reviews_and_analyze_sentiment(file)
|
| 78 |
-
return df, chart_object, None # File output
|
| 79 |
-
else:
|
| 80 |
-
return None, None, "Please upload a file."
|
| 81 |
-
|
| 82 |
-
# Gradio interface
|
| 83 |
-
demo = gr.Interface(
|
| 84 |
-
fn=main_interface,
|
| 85 |
-
inputs=[
|
| 86 |
-
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
|
| 87 |
-
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
|
| 88 |
-
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
|
| 89 |
-
],
|
| 90 |
-
outputs=[
|
| 91 |
-
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
|
| 92 |
-
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
|
| 93 |
-
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
|
| 94 |
-
],
|
| 95 |
-
title="
|
| 96 |
-
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
|
| 97 |
-
)
|
| 98 |
-
|
| 99 |
-
demo.launch()
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
from docx import Document
|
| 7 |
+
from PyPDF2 import PdfReader
|
| 8 |
+
|
| 9 |
+
# Initialize the sentiment analysis pipeline
|
| 10 |
+
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
| 11 |
+
|
| 12 |
+
# Function to analyze sentiment for a single sentence
|
| 13 |
+
def sentiment_analyzer(review):
|
| 14 |
+
sentiment = analyzer(review)
|
| 15 |
+
return sentiment[0]['label']
|
| 16 |
+
|
| 17 |
+
# Create a pie chart visualization for the sentiment
|
| 18 |
+
def sentiment_pie_chart(sentiment_labels):
|
| 19 |
+
sentiment_counts = pd.Series(sentiment_labels).value_counts()
|
| 20 |
+
|
| 21 |
+
fig, ax = plt.subplots()
|
| 22 |
+
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
|
| 23 |
+
ax.set_ylabel('')
|
| 24 |
+
ax.set_title('Sentiment Distribution')
|
| 25 |
+
|
| 26 |
+
return fig
|
| 27 |
+
|
| 28 |
+
# Function to analyze a single input sentence
|
| 29 |
+
def analyze_single_sentence(sentence):
|
| 30 |
+
sentiment = sentiment_analyzer(sentence)
|
| 31 |
+
return f"The sentiment of the sentence is: {sentiment}"
|
| 32 |
+
|
| 33 |
+
# Function to read and analyze reviews from Excel, PDF, or DOCX files
|
| 34 |
+
def read_reviews_and_analyze_sentiment(file_object):
|
| 35 |
+
if file_object.name.endswith('.xlsx'):
|
| 36 |
+
# Load the Excel file into a DataFrame
|
| 37 |
+
df = pd.read_excel(file_object)
|
| 38 |
+
if 'Reviews' not in df.columns:
|
| 39 |
+
raise ValueError("Excel file must contain a 'Reviews' column.")
|
| 40 |
+
reviews = df['Reviews'].tolist()
|
| 41 |
+
|
| 42 |
+
elif file_object.name.endswith('.docx'):
|
| 43 |
+
# Read the content of the DOCX file
|
| 44 |
+
doc = Document(file_object)
|
| 45 |
+
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
|
| 46 |
+
|
| 47 |
+
elif file_object.name.endswith('.pdf'):
|
| 48 |
+
# Read the content of the PDF file
|
| 49 |
+
reader = PdfReader(file_object)
|
| 50 |
+
text = ""
|
| 51 |
+
for page in reader.pages:
|
| 52 |
+
text += page.extract_text()
|
| 53 |
+
reviews = text.split('\n') # Assuming reviews are newline-separated
|
| 54 |
+
|
| 55 |
+
else:
|
| 56 |
+
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
|
| 57 |
+
|
| 58 |
+
# Analyze the sentiment of each review
|
| 59 |
+
sentiments = [sentiment_analyzer(review) for review in reviews]
|
| 60 |
+
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
|
| 61 |
+
|
| 62 |
+
# Generate pie chart
|
| 63 |
+
chart_object = sentiment_pie_chart(sentiments)
|
| 64 |
+
|
| 65 |
+
return df, chart_object
|
| 66 |
+
|
| 67 |
+
# Gradio interface combining single sentence analysis and file-based review sentiment analysis
|
| 68 |
+
def main_interface(input_option, sentence=None, file=None):
|
| 69 |
+
if input_option == "Single Sentence":
|
| 70 |
+
if sentence:
|
| 71 |
+
result = analyze_single_sentence(sentence)
|
| 72 |
+
return None, None, result # Single sentence output
|
| 73 |
+
else:
|
| 74 |
+
return None, None, "Please enter a sentence."
|
| 75 |
+
elif input_option == "File Upload":
|
| 76 |
+
if file:
|
| 77 |
+
df, chart_object = read_reviews_and_analyze_sentiment(file)
|
| 78 |
+
return df, chart_object, None # File output
|
| 79 |
+
else:
|
| 80 |
+
return None, None, "Please upload a file."
|
| 81 |
+
|
| 82 |
+
# Gradio interface
|
| 83 |
+
demo = gr.Interface(
|
| 84 |
+
fn=main_interface,
|
| 85 |
+
inputs=[
|
| 86 |
+
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
|
| 87 |
+
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
|
| 88 |
+
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
|
| 89 |
+
],
|
| 90 |
+
outputs=[
|
| 91 |
+
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
|
| 92 |
+
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
|
| 93 |
+
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
|
| 94 |
+
],
|
| 95 |
+
title="Sentiment Analyzer",
|
| 96 |
+
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
demo.launch()
|