|
|
import torch
|
|
|
import gradio as gr
|
|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
from transformers import pipeline
|
|
|
from docx import Document
|
|
|
from PyPDF2 import PdfReader
|
|
|
|
|
|
|
|
|
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
|
|
|
|
|
|
|
|
def sentiment_analyzer(review):
|
|
|
sentiment = analyzer(review)
|
|
|
return sentiment[0]['label']
|
|
|
|
|
|
|
|
|
def sentiment_pie_chart(sentiment_labels):
|
|
|
sentiment_counts = pd.Series(sentiment_labels).value_counts()
|
|
|
|
|
|
fig, ax = plt.subplots()
|
|
|
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
|
|
|
ax.set_ylabel('')
|
|
|
ax.set_title('Sentiment Distribution')
|
|
|
|
|
|
return fig
|
|
|
|
|
|
|
|
|
def analyze_single_sentence(sentence):
|
|
|
sentiment = sentiment_analyzer(sentence)
|
|
|
return f"The sentiment of the sentence is: {sentiment}"
|
|
|
|
|
|
|
|
|
def read_reviews_and_analyze_sentiment(file_object):
|
|
|
if file_object.name.endswith('.xlsx'):
|
|
|
|
|
|
df = pd.read_excel(file_object)
|
|
|
if 'Reviews' not in df.columns:
|
|
|
raise ValueError("Excel file must contain a 'Reviews' column.")
|
|
|
reviews = df['Reviews'].tolist()
|
|
|
|
|
|
elif file_object.name.endswith('.docx'):
|
|
|
|
|
|
doc = Document(file_object)
|
|
|
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
|
|
|
|
|
|
elif file_object.name.endswith('.pdf'):
|
|
|
|
|
|
reader = PdfReader(file_object)
|
|
|
text = ""
|
|
|
for page in reader.pages:
|
|
|
text += page.extract_text()
|
|
|
reviews = text.split('\n')
|
|
|
|
|
|
else:
|
|
|
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
|
|
|
|
|
|
|
|
|
sentiments = [sentiment_analyzer(review) for review in reviews]
|
|
|
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
|
|
|
|
|
|
|
|
|
chart_object = sentiment_pie_chart(sentiments)
|
|
|
|
|
|
return df, chart_object
|
|
|
|
|
|
|
|
|
def main_interface(input_option, sentence=None, file=None):
|
|
|
if input_option == "Single Sentence":
|
|
|
if sentence:
|
|
|
result = analyze_single_sentence(sentence)
|
|
|
return None, None, result
|
|
|
else:
|
|
|
return None, None, "Please enter a sentence."
|
|
|
elif input_option == "File Upload":
|
|
|
if file:
|
|
|
df, chart_object = read_reviews_and_analyze_sentiment(file)
|
|
|
return df, chart_object, None
|
|
|
else:
|
|
|
return None, None, "Please upload a file."
|
|
|
|
|
|
|
|
|
demo = gr.Interface(
|
|
|
fn=main_interface,
|
|
|
inputs=[
|
|
|
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
|
|
|
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
|
|
|
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
|
|
|
],
|
|
|
outputs=[
|
|
|
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
|
|
|
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
|
|
|
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
|
|
|
],
|
|
|
title="@GenAILearniverse Project 3: Sentiment Analyzer",
|
|
|
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
|
|
|
)
|
|
|
|
|
|
demo.launch()
|
|
|
|