Spaces:
No application file
No application file
File size: 6,992 Bytes
850a0c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | import collections
import datasets
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
import transformers
import gradio as gr
import torch
import matplotlib.pyplot as plt
import numpy as np
class Transformer(nn.Module):
def __init__(self, transformer, output_dim, freeze):
super().__init__()
self.transformer = transformer
hidden_dim = transformer.config.hidden_size
self.fc = nn.Linear(hidden_dim, output_dim)
if freeze == True:
for param in transformer.parameters():
param.requires_grad = False
def forward(self, ids):
# ids = [batch_size, ids_dim]
output = self.transformer(ids, output_attentions=True)
hidden = output.last_hidden_state
attention = output.attentions[-1]
cls_hidden = hidden[:, 0, :]
pred = self.fc(torch.tanh(cls_hidden))
return pred
transformers_name = "bert-base-uncased"
tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name)
transformer = transformers.AutoModel.from_pretrained(transformers_name)
output_dim = 2
freeze = False
model = Transformer(transformer, output_dim, freeze)
model.load_state_dict(torch.load("transformer.pt"))
pos_or_neg = ["negative", "positive"]
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas
import os
import tempfile
import matplotlib.pyplot as plt
import numpy as np
# 对单条语句进行情感分析的函数
def analyze_single_sentiment(single_sentence):
if single_sentence:
ids = tokenizer(single_sentence)["input_ids"]
tensor = torch.LongTensor(ids).unsqueeze(dim=0)
prediction = model(tensor).squeeze(dim=0)
predicted_class = prediction.argmax(dim=-1).item()
probability = torch.softmax(prediction, dim=-1)
predicted_probability = probability[predicted_class].item()
print("predicted class is :", predicted_class)
return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}"
#对文件中多条语句进行情感分析的函数
def analyze_sentiment(file_path):
results = []
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for line in lines:
line = line.strip()
if line:
ids = tokenizer(line)["input_ids"]
tensor = torch.LongTensor(ids).unsqueeze(dim=0)
prediction = model(tensor).squeeze(dim=0)
predicted_class = prediction.argmax(dim=-1).item()
if predicted_class == 1:
sentiment = 'Positive'
else:
sentiment ='Negative'
results.append((line, sentiment))
return results
elif file_extension == '.xlsx':
df = pandas.read_excel(file_path)
# 假设语句在'Sentence'列,可按需修改
sentences = df['Sentence'].tolist()
for sentence in sentences:
sentence = sentence.strip()
if sentence:
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1).item()
if predicted_class == 1:
sentiment = 'Positive'
else:
sentiment ='Negative'
results.append((sentence, sentiment))
return results
else:
raise ValueError("不支持的文件格式")
# 根据原始文件格式保存分析结果到文件
def save_results_to_file(results, file_path, file_extension):
if file_extension == '.txt':
with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file:
for result in results:
sentence, sentiment = result
temp_result_file.write(f"{sentence}\t{sentiment}\n")
return temp_result_file.name
elif file_extension == '.xlsx':
df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment'])
with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file:
df_result.to_excel(temp_result_file, index=False)
return temp_result_file.name
else:
raise ValueError("不支持的文件格式")
# 根据情感分析结果绘制饼图
def plot_sentiment_pie_chart(results):
sentiment_counts = {'Positive': 0, 'Negative': 0}
for _, sentiment in results:
sentiment_counts[sentiment] += 1
total_count = len(results)
if total_count == 0:
return None
labels = list(sentiment_counts.keys())
sizes = [sentiment_counts[label] / total_count for label in labels]
colors = ['green' if label == 'Positive' else'red' for label in labels]
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax.axis('equal')
return fig
with gr.Blocks() as demo:
with gr.Tab("单条记录"):
gr.Markdown("### 单条语句情感分析")
gr.Markdown("在此处输入单条语句,点击分析按钮,即可获取该语句的情感分析结果。")
with gr.Column():
text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500)
text_button = gr.Button("分析", variant="primary")
text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500)
text_button.click(
fn=analyze_single_sentiment,
inputs=text_input,
outputs=text_output
)
with gr.Tab("文件"):
gr.Markdown("### 文件中语句情感分析")
gr.Markdown("上传包含多条语句的文件(支持.txt和.xlsx格式),点击分析按钮,将得到包含分析结果的对应格式文件,以及情感分布饼图。")
with gr.Column():
file_input = gr.File(label="上传包含语句的文件", min_width=500)
file_button = gr.Button("分析", variant="primary")
file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500)
pie_chart_output = gr.Plot(label="情感分布饼图")
file_button.click(
fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()),
plot_sentiment_pie_chart(analyze_sentiment(x))),
inputs=file_input,
outputs=[file_output, pie_chart_output]
)
demo.launch(share=True) |