import collections import datasets import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn import torch.optim as optim import tqdm import transformers import gradio as gr import torch import matplotlib.pyplot as plt import numpy as np class Transformer(nn.Module): def __init__(self, transformer, output_dim, freeze): super().__init__() self.transformer = transformer hidden_dim = transformer.config.hidden_size self.fc = nn.Linear(hidden_dim, output_dim) if freeze == True: for param in transformer.parameters(): param.requires_grad = False def forward(self, ids): # ids = [batch_size, ids_dim] output = self.transformer(ids, output_attentions=True) hidden = output.last_hidden_state attention = output.attentions[-1] cls_hidden = hidden[:, 0, :] pred = self.fc(torch.tanh(cls_hidden)) return pred transformers_name = "bert-base-uncased" tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name) transformer = transformers.AutoModel.from_pretrained(transformers_name) output_dim = 2 freeze = False model = Transformer(transformer, output_dim, freeze) model.load_state_dict(torch.load("transformer.pt")) pos_or_neg = ["negative", "positive"] import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import pandas import os import tempfile import matplotlib.pyplot as plt import numpy as np # 对单条语句进行情感分析的函数 def analyze_single_sentiment(single_sentence): if single_sentence: ids = tokenizer(single_sentence)["input_ids"] tensor = torch.LongTensor(ids).unsqueeze(dim=0) prediction = model(tensor).squeeze(dim=0) predicted_class = prediction.argmax(dim=-1).item() probability = torch.softmax(prediction, dim=-1) predicted_probability = probability[predicted_class].item() print("predicted class is :", predicted_class) return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}" #对文件中多条语句进行情感分析的函数 def analyze_sentiment(file_path): results = [] file_extension = os.path.splitext(file_path)[1].lower() if file_extension == '.txt': with open(file_path, 'r', encoding='utf-8') as file: lines = file.readlines() for line in lines: line = line.strip() if line: ids = tokenizer(line)["input_ids"] tensor = torch.LongTensor(ids).unsqueeze(dim=0) prediction = model(tensor).squeeze(dim=0) predicted_class = prediction.argmax(dim=-1).item() if predicted_class == 1: sentiment = 'Positive' else: sentiment ='Negative' results.append((line, sentiment)) return results elif file_extension == '.xlsx': df = pandas.read_excel(file_path) # 假设语句在'Sentence'列,可按需修改 sentences = df['Sentence'].tolist() for sentence in sentences: sentence = sentence.strip() if sentence: inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True) outputs = model(**inputs) logits = outputs.logits predicted_class = torch.argmax(logits, dim=1).item() if predicted_class == 1: sentiment = 'Positive' else: sentiment ='Negative' results.append((sentence, sentiment)) return results else: raise ValueError("不支持的文件格式") # 根据原始文件格式保存分析结果到文件 def save_results_to_file(results, file_path, file_extension): if file_extension == '.txt': with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file: for result in results: sentence, sentiment = result temp_result_file.write(f"{sentence}\t{sentiment}\n") return temp_result_file.name elif file_extension == '.xlsx': df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment']) with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file: df_result.to_excel(temp_result_file, index=False) return temp_result_file.name else: raise ValueError("不支持的文件格式") # 根据情感分析结果绘制饼图 def plot_sentiment_pie_chart(results): sentiment_counts = {'Positive': 0, 'Negative': 0} for _, sentiment in results: sentiment_counts[sentiment] += 1 total_count = len(results) if total_count == 0: return None labels = list(sentiment_counts.keys()) sizes = [sentiment_counts[label] / total_count for label in labels] colors = ['green' if label == 'Positive' else'red' for label in labels] fig, ax = plt.subplots() ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90) ax.axis('equal') return fig with gr.Blocks() as demo: with gr.Tab("单条记录"): gr.Markdown("### 单条语句情感分析") gr.Markdown("在此处输入单条语句,点击分析按钮,即可获取该语句的情感分析结果。") with gr.Column(): text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500) text_button = gr.Button("分析", variant="primary") text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500) text_button.click( fn=analyze_single_sentiment, inputs=text_input, outputs=text_output ) with gr.Tab("文件"): gr.Markdown("### 文件中语句情感分析") gr.Markdown("上传包含多条语句的文件(支持.txt和.xlsx格式),点击分析按钮,将得到包含分析结果的对应格式文件,以及情感分布饼图。") with gr.Column(): file_input = gr.File(label="上传包含语句的文件", min_width=500) file_button = gr.Button("分析", variant="primary") file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500) pie_chart_output = gr.Plot(label="情感分布饼图") file_button.click( fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()), plot_sentiment_pie_chart(analyze_sentiment(x))), inputs=file_input, outputs=[file_output, pie_chart_output] ) demo.launch(share=True)