Spaces:

DengJunTTT
/

test

No application file

App Files Files Community

test / Sentiment_Analysis_gradio.py

DengJunTTT

Add application file

850a0c3 about 1 year ago

raw

history blame contribute delete

6.99 kB

	import collections

	import datasets
	import matplotlib.pyplot as plt
	import numpy as np
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import tqdm
	import transformers

	import gradio as gr
	import torch
	import matplotlib.pyplot as plt
	import numpy as np

	class Transformer(nn.Module):
	def __init__(self, transformer, output_dim, freeze):
	super().__init__()
	self.transformer = transformer
	hidden_dim = transformer.config.hidden_size
	self.fc = nn.Linear(hidden_dim, output_dim)

	if freeze == True:
	for param in transformer.parameters():
	param.requires_grad = False

	def forward(self, ids):
	# ids = [batch_size, ids_dim]
	output = self.transformer(ids, output_attentions=True)
	hidden = output.last_hidden_state

	attention = output.attentions[-1]
	cls_hidden = hidden[:, 0, :]

	pred = self.fc(torch.tanh(cls_hidden))
	return pred



	transformers_name = "bert-base-uncased"
	tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name)

	transformer = transformers.AutoModel.from_pretrained(transformers_name)
	output_dim = 2
	freeze = False
	model = Transformer(transformer, output_dim, freeze)

	model.load_state_dict(torch.load("transformer.pt"))

	pos_or_neg = ["negative", "positive"]



	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import pandas
	import os
	import tempfile
	import matplotlib.pyplot as plt
	import numpy as np


	# 对单条语句进行情感分析的函数
	def analyze_single_sentiment(single_sentence):
	if single_sentence:
	ids = tokenizer(single_sentence)["input_ids"]
	tensor = torch.LongTensor(ids).unsqueeze(dim=0)
	prediction = model(tensor).squeeze(dim=0)
	predicted_class = prediction.argmax(dim=-1).item()
	probability = torch.softmax(prediction, dim=-1)
	predicted_probability = probability[predicted_class].item()
	print("predicted class is :", predicted_class)
	return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}"

	#对文件中多条语句进行情感分析的函数
	def analyze_sentiment(file_path):
	results = []
	file_extension = os.path.splitext(file_path)[1].lower()
	if file_extension == '.txt':
	with open(file_path, 'r', encoding='utf-8') as file:
	lines = file.readlines()
	for line in lines:
	line = line.strip()
	if line:
	ids = tokenizer(line)["input_ids"]
	tensor = torch.LongTensor(ids).unsqueeze(dim=0)
	prediction = model(tensor).squeeze(dim=0)
	predicted_class = prediction.argmax(dim=-1).item()

	if predicted_class == 1:
	sentiment = 'Positive'
	else:
	sentiment ='Negative'
	results.append((line, sentiment))
	return results
	elif file_extension == '.xlsx':
	df = pandas.read_excel(file_path)
	# 假设语句在'Sentence'列，可按需修改
	sentences = df['Sentence'].tolist()
	for sentence in sentences:
	sentence = sentence.strip()
	if sentence:
	inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
	outputs = model(**inputs)
	logits = outputs.logits
	predicted_class = torch.argmax(logits, dim=1).item()
	if predicted_class == 1:
	sentiment = 'Positive'
	else:
	sentiment ='Negative'
	results.append((sentence, sentiment))
	return results
	else:
	raise ValueError("不支持的文件格式")

	# 根据原始文件格式保存分析结果到文件
	def save_results_to_file(results, file_path, file_extension):
	if file_extension == '.txt':
	with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file:
	for result in results:
	sentence, sentiment = result
	temp_result_file.write(f"{sentence}\t{sentiment}\n")
	return temp_result_file.name
	elif file_extension == '.xlsx':
	df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment'])
	with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file:
	df_result.to_excel(temp_result_file, index=False)
	return temp_result_file.name
	else:
	raise ValueError("不支持的文件格式")

	# 根据情感分析结果绘制饼图
	def plot_sentiment_pie_chart(results):
	sentiment_counts = {'Positive': 0, 'Negative': 0}
	for _, sentiment in results:
	sentiment_counts[sentiment] += 1
	total_count = len(results)
	if total_count == 0:
	return None
	labels = list(sentiment_counts.keys())
	sizes = [sentiment_counts[label] / total_count for label in labels]
	colors = ['green' if label == 'Positive' else'red' for label in labels]
	fig, ax = plt.subplots()
	ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
	ax.axis('equal')
	return fig


	with gr.Blocks() as demo:
	with gr.Tab("单条记录"):
	gr.Markdown("### 单条语句情感分析")
	gr.Markdown("在此处输入单条语句，点击分析按钮，即可获取该语句的情感分析结果。")
	with gr.Column():
	text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500)
	text_button = gr.Button("分析", variant="primary")
	text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500)
	text_button.click(
	fn=analyze_single_sentiment,
	inputs=text_input,
	outputs=text_output
	)
	with gr.Tab("文件"):
	gr.Markdown("### 文件中语句情感分析")
	gr.Markdown("上传包含多条语句的文件（支持.txt和.xlsx格式），点击分析按钮，将得到包含分析结果的对应格式文件，以及情感分布饼图。")
	with gr.Column():
	file_input = gr.File(label="上传包含语句的文件", min_width=500)
	file_button = gr.Button("分析", variant="primary")
	file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500)
	pie_chart_output = gr.Plot(label="情感分布饼图")
	file_button.click(
	fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()),
	plot_sentiment_pie_chart(analyze_sentiment(x))),
	inputs=file_input,
	outputs=[file_output, pie_chart_output]
	)

	demo.launch(share=True)