Spaces:
Sleeping
Sleeping
Commit ·
55dd648
1
Parent(s): b4b8d2a
progress more 21
Browse files
app.py
CHANGED
|
@@ -16,10 +16,11 @@ import torch
|
|
| 16 |
mystem = Mystem()
|
| 17 |
|
| 18 |
# Set up the sentiment analyzers
|
| 19 |
-
|
| 20 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
| 21 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 22 |
finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
|
|
|
|
| 23 |
|
| 24 |
# Function for lemmatizing Russian text
|
| 25 |
def lemmatize_text(text):
|
|
@@ -59,16 +60,6 @@ def translate(text):
|
|
| 59 |
return translated_text
|
| 60 |
|
| 61 |
|
| 62 |
-
|
| 63 |
-
# Function for VADER sentiment analysis with label mapping
|
| 64 |
-
def get_vader_sentiment(text):
|
| 65 |
-
score = vader_analyzer.polarity_scores(text)["compound"]
|
| 66 |
-
if score > 0.2:
|
| 67 |
-
return "Positive"
|
| 68 |
-
elif score < -0.2:
|
| 69 |
-
return "Negative"
|
| 70 |
-
return "Neutral"
|
| 71 |
-
|
| 72 |
# Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
|
| 73 |
def get_mapped_sentiment(result):
|
| 74 |
label = result['label'].lower()
|
|
@@ -78,6 +69,11 @@ def get_mapped_sentiment(result):
|
|
| 78 |
return "Negative"
|
| 79 |
return "Neutral"
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
def get_finbert_sentiment(text):
|
| 82 |
result = finbert(text, truncation=True, max_length=512)[0]
|
| 83 |
return get_mapped_sentiment(result)
|
|
@@ -135,26 +131,26 @@ def process_file(uploaded_file):
|
|
| 135 |
progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
|
| 136 |
|
| 137 |
# Perform sentiment analysis
|
| 138 |
-
|
| 139 |
finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
|
| 140 |
roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
|
| 141 |
finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
|
| 142 |
|
| 143 |
# Add results to DataFrame
|
| 144 |
-
df['
|
| 145 |
df['FinBERT'] = finbert_results
|
| 146 |
df['RoBERTa'] = roberta_results
|
| 147 |
df['FinBERT-Tone'] = finbert_tone_results
|
| 148 |
df['Translated']
|
| 149 |
|
| 150 |
# Reorder columns
|
| 151 |
-
columns_order = ['Объект', '
|
| 152 |
df = df[columns_order]
|
| 153 |
|
| 154 |
return df
|
| 155 |
|
| 156 |
def main():
|
| 157 |
-
st.title("... приступим к анализу... версия
|
| 158 |
|
| 159 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
| 160 |
|
|
@@ -168,7 +164,7 @@ def main():
|
|
| 168 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
| 169 |
fig.suptitle("Распределение окраски по моделям")
|
| 170 |
|
| 171 |
-
models = ['
|
| 172 |
for i, model in enumerate(models):
|
| 173 |
ax = axs[i // 2, i % 2]
|
| 174 |
sentiment_counts = df[model].value_counts()
|
|
|
|
| 16 |
mystem = Mystem()
|
| 17 |
|
| 18 |
# Set up the sentiment analyzers
|
| 19 |
+
|
| 20 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
| 21 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 22 |
finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
|
| 23 |
+
sberubert = pipeline("sentiment-analysis", model = "ai-forever/ruBert-base")
|
| 24 |
|
| 25 |
# Function for lemmatizing Russian text
|
| 26 |
def lemmatize_text(text):
|
|
|
|
| 60 |
return translated_text
|
| 61 |
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
|
| 64 |
def get_mapped_sentiment(result):
|
| 65 |
label = result['label'].lower()
|
|
|
|
| 69 |
return "Negative"
|
| 70 |
return "Neutral"
|
| 71 |
|
| 72 |
+
def get_sberubert_sentiment(text):
|
| 73 |
+
result = sberubert(text, truncation=True, max_length=512)[0]
|
| 74 |
+
return get_mapped_sentiment(result)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
def get_finbert_sentiment(text):
|
| 78 |
result = finbert(text, truncation=True, max_length=512)[0]
|
| 79 |
return get_mapped_sentiment(result)
|
|
|
|
| 131 |
progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
|
| 132 |
|
| 133 |
# Perform sentiment analysis
|
| 134 |
+
rubert_results = [get_sberubert_sentiment(text) for text in translated_texts]
|
| 135 |
finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
|
| 136 |
roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
|
| 137 |
finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
|
| 138 |
|
| 139 |
# Add results to DataFrame
|
| 140 |
+
df['ruBERT'] = rubert_results
|
| 141 |
df['FinBERT'] = finbert_results
|
| 142 |
df['RoBERTa'] = roberta_results
|
| 143 |
df['FinBERT-Tone'] = finbert_tone_results
|
| 144 |
df['Translated']
|
| 145 |
|
| 146 |
# Reorder columns
|
| 147 |
+
columns_order = ['Объект', 'ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста', 'Translated' ]
|
| 148 |
df = df[columns_order]
|
| 149 |
|
| 150 |
return df
|
| 151 |
|
| 152 |
def main():
|
| 153 |
+
st.title("... приступим к анализу... версия 21")
|
| 154 |
|
| 155 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
| 156 |
|
|
|
|
| 164 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
| 165 |
fig.suptitle("Распределение окраски по моделям")
|
| 166 |
|
| 167 |
+
models = ['ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
|
| 168 |
for i, model in enumerate(models):
|
| 169 |
ax = axs[i // 2, i % 2]
|
| 170 |
sentiment_counts = df[model].value_counts()
|