Spaces:
No application file
No application file
| import collections | |
| import datasets | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import tqdm | |
| import transformers | |
| import gradio as gr | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| class Transformer(nn.Module): | |
| def __init__(self, transformer, output_dim, freeze): | |
| super().__init__() | |
| self.transformer = transformer | |
| hidden_dim = transformer.config.hidden_size | |
| self.fc = nn.Linear(hidden_dim, output_dim) | |
| if freeze == True: | |
| for param in transformer.parameters(): | |
| param.requires_grad = False | |
| def forward(self, ids): | |
| # ids = [batch_size, ids_dim] | |
| output = self.transformer(ids, output_attentions=True) | |
| hidden = output.last_hidden_state | |
| attention = output.attentions[-1] | |
| cls_hidden = hidden[:, 0, :] | |
| pred = self.fc(torch.tanh(cls_hidden)) | |
| return pred | |
| transformers_name = "bert-base-uncased" | |
| tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name) | |
| transformer = transformers.AutoModel.from_pretrained(transformers_name) | |
| output_dim = 2 | |
| freeze = False | |
| model = Transformer(transformer, output_dim, freeze) | |
| model.load_state_dict(torch.load("transformer.pt")) | |
| pos_or_neg = ["negative", "positive"] | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import pandas | |
| import os | |
| import tempfile | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| # 对单条语句进行情感分析的函数 | |
| def analyze_single_sentiment(single_sentence): | |
| if single_sentence: | |
| ids = tokenizer(single_sentence)["input_ids"] | |
| tensor = torch.LongTensor(ids).unsqueeze(dim=0) | |
| prediction = model(tensor).squeeze(dim=0) | |
| predicted_class = prediction.argmax(dim=-1).item() | |
| probability = torch.softmax(prediction, dim=-1) | |
| predicted_probability = probability[predicted_class].item() | |
| print("predicted class is :", predicted_class) | |
| return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}" | |
| #对文件中多条语句进行情感分析的函数 | |
| def analyze_sentiment(file_path): | |
| results = [] | |
| file_extension = os.path.splitext(file_path)[1].lower() | |
| if file_extension == '.txt': | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| lines = file.readlines() | |
| for line in lines: | |
| line = line.strip() | |
| if line: | |
| ids = tokenizer(line)["input_ids"] | |
| tensor = torch.LongTensor(ids).unsqueeze(dim=0) | |
| prediction = model(tensor).squeeze(dim=0) | |
| predicted_class = prediction.argmax(dim=-1).item() | |
| if predicted_class == 1: | |
| sentiment = 'Positive' | |
| else: | |
| sentiment ='Negative' | |
| results.append((line, sentiment)) | |
| return results | |
| elif file_extension == '.xlsx': | |
| df = pandas.read_excel(file_path) | |
| # 假设语句在'Sentence'列,可按需修改 | |
| sentences = df['Sentence'].tolist() | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if sentence: | |
| inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True) | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| predicted_class = torch.argmax(logits, dim=1).item() | |
| if predicted_class == 1: | |
| sentiment = 'Positive' | |
| else: | |
| sentiment ='Negative' | |
| results.append((sentence, sentiment)) | |
| return results | |
| else: | |
| raise ValueError("不支持的文件格式") | |
| # 根据原始文件格式保存分析结果到文件 | |
| def save_results_to_file(results, file_path, file_extension): | |
| if file_extension == '.txt': | |
| with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file: | |
| for result in results: | |
| sentence, sentiment = result | |
| temp_result_file.write(f"{sentence}\t{sentiment}\n") | |
| return temp_result_file.name | |
| elif file_extension == '.xlsx': | |
| df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment']) | |
| with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file: | |
| df_result.to_excel(temp_result_file, index=False) | |
| return temp_result_file.name | |
| else: | |
| raise ValueError("不支持的文件格式") | |
| # 根据情感分析结果绘制饼图 | |
| def plot_sentiment_pie_chart(results): | |
| sentiment_counts = {'Positive': 0, 'Negative': 0} | |
| for _, sentiment in results: | |
| sentiment_counts[sentiment] += 1 | |
| total_count = len(results) | |
| if total_count == 0: | |
| return None | |
| labels = list(sentiment_counts.keys()) | |
| sizes = [sentiment_counts[label] / total_count for label in labels] | |
| colors = ['green' if label == 'Positive' else'red' for label in labels] | |
| fig, ax = plt.subplots() | |
| ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90) | |
| ax.axis('equal') | |
| return fig | |
| with gr.Blocks() as demo: | |
| with gr.Tab("单条记录"): | |
| gr.Markdown("### 单条语句情感分析") | |
| gr.Markdown("在此处输入单条语句,点击分析按钮,即可获取该语句的情感分析结果。") | |
| with gr.Column(): | |
| text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500) | |
| text_button = gr.Button("分析", variant="primary") | |
| text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500) | |
| text_button.click( | |
| fn=analyze_single_sentiment, | |
| inputs=text_input, | |
| outputs=text_output | |
| ) | |
| with gr.Tab("文件"): | |
| gr.Markdown("### 文件中语句情感分析") | |
| gr.Markdown("上传包含多条语句的文件(支持.txt和.xlsx格式),点击分析按钮,将得到包含分析结果的对应格式文件,以及情感分布饼图。") | |
| with gr.Column(): | |
| file_input = gr.File(label="上传包含语句的文件", min_width=500) | |
| file_button = gr.Button("分析", variant="primary") | |
| file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500) | |
| pie_chart_output = gr.Plot(label="情感分布饼图") | |
| file_button.click( | |
| fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()), | |
| plot_sentiment_pie_chart(analyze_sentiment(x))), | |
| inputs=file_input, | |
| outputs=[file_output, pie_chart_output] | |
| ) | |
| demo.launch(share=True) |