File size: 6,992 Bytes
850a0c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import collections

import datasets
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
import transformers

import gradio as gr
import torch
import matplotlib.pyplot as plt
import numpy as np

class Transformer(nn.Module):
    def __init__(self, transformer, output_dim, freeze):
        super().__init__()
        self.transformer = transformer
        hidden_dim = transformer.config.hidden_size
        self.fc = nn.Linear(hidden_dim, output_dim)

        if freeze == True:
            for param in transformer.parameters():
                param.requires_grad = False
    
    def forward(self, ids):
        # ids = [batch_size, ids_dim]
        output = self.transformer(ids, output_attentions=True)
        hidden = output.last_hidden_state

        attention = output.attentions[-1]
        cls_hidden = hidden[:, 0, :]
        
        pred = self.fc(torch.tanh(cls_hidden))
        return pred



transformers_name = "bert-base-uncased"
tokenizer = transformers.AutoTokenizer.from_pretrained(transformers_name)

transformer = transformers.AutoModel.from_pretrained(transformers_name)
output_dim = 2
freeze = False
model = Transformer(transformer, output_dim, freeze)

model.load_state_dict(torch.load("transformer.pt"))

pos_or_neg = ["negative", "positive"]



import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas
import os
import tempfile
import matplotlib.pyplot as plt
import numpy as np


# 对单条语句进行情感分析的函数
def analyze_single_sentiment(single_sentence):
    if single_sentence:
        ids = tokenizer(single_sentence)["input_ids"]
        tensor = torch.LongTensor(ids).unsqueeze(dim=0)
        prediction = model(tensor).squeeze(dim=0)
        predicted_class = prediction.argmax(dim=-1).item()
        probability = torch.softmax(prediction, dim=-1)
        predicted_probability = probability[predicted_class].item()
        print("predicted class is :", predicted_class)
        return f"The comment is {pos_or_neg[predicted_class]}. Confidence is {predicted_probability:.2f}"

#对文件中多条语句进行情感分析的函数
def analyze_sentiment(file_path):
    results = []
    file_extension = os.path.splitext(file_path)[1].lower()
    if file_extension == '.txt':
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            for line in lines:
                line = line.strip()
                if line:
                    ids = tokenizer(line)["input_ids"]
                    tensor = torch.LongTensor(ids).unsqueeze(dim=0)
                    prediction = model(tensor).squeeze(dim=0)
                    predicted_class = prediction.argmax(dim=-1).item()

                    if predicted_class == 1:
                        sentiment = 'Positive'
                    else:
                        sentiment ='Negative'
                    results.append((line, sentiment))
        return results
    elif file_extension == '.xlsx':
        df = pandas.read_excel(file_path)
        # 假设语句在'Sentence'列,可按需修改
        sentences = df['Sentence'].tolist()  
        for sentence in sentences:
            sentence = sentence.strip()
            if sentence:
                inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
                outputs = model(**inputs)
                logits = outputs.logits
                predicted_class = torch.argmax(logits, dim=1).item()
                if predicted_class == 1:
                    sentiment = 'Positive'
                else:
                    sentiment ='Negative'
                results.append((sentence, sentiment))
        return results
    else:
        raise ValueError("不支持的文件格式")

# 根据原始文件格式保存分析结果到文件
def save_results_to_file(results, file_path, file_extension):
    if file_extension == '.txt':
        with tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8', suffix='.txt') as temp_result_file:
            for result in results:
                sentence, sentiment = result
                temp_result_file.write(f"{sentence}\t{sentiment}\n")
            return temp_result_file.name
    elif file_extension == '.xlsx':
        df_result = pandas.DataFrame(results, columns=['Sentence', 'Sentiment'])
        with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.xlsx') as temp_result_file:
            df_result.to_excel(temp_result_file, index=False)
            return temp_result_file.name
    else:
        raise ValueError("不支持的文件格式")

# 根据情感分析结果绘制饼图
def plot_sentiment_pie_chart(results):
    sentiment_counts = {'Positive': 0, 'Negative': 0}
    for _, sentiment in results:
        sentiment_counts[sentiment] += 1
    total_count = len(results)
    if total_count == 0:
        return None
    labels = list(sentiment_counts.keys())
    sizes = [sentiment_counts[label] / total_count for label in labels]
    colors = ['green' if label == 'Positive' else'red' for label in labels]
    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
    ax.axis('equal')
    return fig


with gr.Blocks() as demo:
    with gr.Tab("单条记录"):
        gr.Markdown("### 单条语句情感分析")
        gr.Markdown("在此处输入单条语句,点击分析按钮,即可获取该语句的情感分析结果。")
        with gr.Column():
            text_input = gr.Textbox(label="输入单条语句", lines=2, max_lines=5, min_width=500)
            text_button = gr.Button("分析", variant="primary")
            text_output = gr.Textbox(label="单条语句情感分析结果", lines=1, min_width=500)
            text_button.click(
                fn=analyze_single_sentiment,
                inputs=text_input,
                outputs=text_output
            )
    with gr.Tab("文件"):
        gr.Markdown("### 文件中语句情感分析")
        gr.Markdown("上传包含多条语句的文件(支持.txt和.xlsx格式),点击分析按钮,将得到包含分析结果的对应格式文件,以及情感分布饼图。")
        with gr.Column():
            file_input = gr.File(label="上传包含语句的文件", min_width=500)
            file_button = gr.Button("分析", variant="primary")
            file_output = gr.File(label="下载文件中语句情感分析结果文件", min_width=500)
            pie_chart_output = gr.Plot(label="情感分布饼图")
            file_button.click(
                fn=lambda x: (save_results_to_file(analyze_sentiment(x), x, os.path.splitext(x)[1].lower()),
                              plot_sentiment_pie_chart(analyze_sentiment(x))),
                inputs=file_input,
                outputs=[file_output, pie_chart_output]
            )

demo.launch(share=True)