test / Sentiment_Analysis_gradio.py
DengJunTTT's picture
Add application file
850a0c3
import collections
import datasets
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
import transformers
import gradio as gr
import torch
import matplotlib.pyplot as plt
import numpy as np
class Transformer(nn.Module):
def __init__(self, transformer, output_dim, freeze):
super().__init__()
self.transformer = transformer
hidden_dim = transformer.config.hidden_size
self.fc = nn.Linear(hidden_dim, output_dim)
if freeze == True:
for param in transformer.parameters():
param.requires_grad = False
def forward(self, ids):
# ids = [batch_size, ids_dim]
output = self.transformer(ids, output_attentions=True)
hidden = output.last_hidden_state
attention = output.attentions[-1]
cls_hidden = hidden[:, 0, :]
pred = self.fc(torch.tanh(cls_hidden))
return pred
transformers_name = "bert-base-uncased"
tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name)
transformer = transformers.AutoModel.from_pretrained(transformers_name)
output_dim = 2
freeze = False
model = Transformer(transformer, output_dim, freeze)
model.load_state_dict(torch.load("transformer.pt"))
pos_or_neg = ["negative", "positive"]
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas
import os
import tempfile
import matplotlib.pyplot as plt
import numpy as np
# 对单条语句进行情感分析的函数
def analyze_single_sentiment(single_sentence):
if single_sentence:
ids = tokenizer(single_sentence)["input_ids"]
tensor = torch.LongTensor(ids).unsqueeze(dim=0)
prediction = model(tensor).squeeze(dim=0)
predicted_class = prediction.argmax(dim=-1).item()
probability = torch.softmax(prediction, dim=-1)
predicted_probability = probability[predicted_class].item()
print("predicted class is :", predicted_class)
return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}"
#对文件中多条语句进行情感分析的函数
def analyze_sentiment(file_path):
results = []
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for line in lines:
line = line.strip()
if line:
ids = tokenizer(line)["input_ids"]
tensor = torch.LongTensor(ids).unsqueeze(dim=0)
prediction = model(tensor).squeeze(dim=0)
predicted_class = prediction.argmax(dim=-1).item()
if predicted_class == 1:
sentiment = 'Positive'
else:
sentiment ='Negative'
results.append((line, sentiment))
return results
elif file_extension == '.xlsx':
df = pandas.read_excel(file_path)
# 假设语句在'Sentence'列,可按需修改
sentences = df['Sentence'].tolist()
for sentence in sentences:
sentence = sentence.strip()
if sentence:
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1).item()
if predicted_class == 1:
sentiment = 'Positive'
else:
sentiment ='Negative'
results.append((sentence, sentiment))
return results
else:
raise ValueError("不支持的文件格式")
# 根据原始文件格式保存分析结果到文件
def save_results_to_file(results, file_path, file_extension):
if file_extension == '.txt':
with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file:
for result in results:
sentence, sentiment = result
temp_result_file.write(f"{sentence}\t{sentiment}\n")
return temp_result_file.name
elif file_extension == '.xlsx':
df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment'])
with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file:
df_result.to_excel(temp_result_file, index=False)
return temp_result_file.name
else:
raise ValueError("不支持的文件格式")
# 根据情感分析结果绘制饼图
def plot_sentiment_pie_chart(results):
sentiment_counts = {'Positive': 0, 'Negative': 0}
for _, sentiment in results:
sentiment_counts[sentiment] += 1
total_count = len(results)
if total_count == 0:
return None
labels = list(sentiment_counts.keys())
sizes = [sentiment_counts[label] / total_count for label in labels]
colors = ['green' if label == 'Positive' else'red' for label in labels]
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax.axis('equal')
return fig
with gr.Blocks() as demo:
with gr.Tab("单条记录"):
gr.Markdown("### 单条语句情感分析")
gr.Markdown("在此处输入单条语句,点击分析按钮,即可获取该语句的情感分析结果。")
with gr.Column():
text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500)
text_button = gr.Button("分析", variant="primary")
text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500)
text_button.click(
fn=analyze_single_sentiment,
inputs=text_input,
outputs=text_output
)
with gr.Tab("文件"):
gr.Markdown("### 文件中语句情感分析")
gr.Markdown("上传包含多条语句的文件(支持.txt和.xlsx格式),点击分析按钮,将得到包含分析结果的对应格式文件,以及情感分布饼图。")
with gr.Column():
file_input = gr.File(label="上传包含语句的文件", min_width=500)
file_button = gr.Button("分析", variant="primary")
file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500)
pie_chart_output = gr.Plot(label="情感分布饼图")
file_button.click(
fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()),
plot_sentiment_pie_chart(analyze_sentiment(x))),
inputs=file_input,
outputs=[file_output, pie_chart_output]
)
demo.launch(share=True)