Spaces:
Runtime error
Runtime error
File size: 2,489 Bytes
d88070f 7b8ab01 6da12cd d88070f 3df8d53 59e39cd 3df8d53 6da12cd d88070f 6da12cd 6c24e56 59e39cd 7b8ab01 73d67e3 7b8ab01 cb7bb0d 7b8ab01 3df8d53 d88070f 6da12cd 3df8d53 6da12cd 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 3df8d53 20f7681 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import tensorflow as tf
import numpy as np
import pickle
import torch
from transformers import AutoTokenizer, AutoModel
# добавляем нужные импорты
import re
import string
import emoji
import pymorphy2
import joblib
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
# ---------------------------
# ЗАГРУЗКА BERT
# ---------------------------
MODEL_NAME = 'sberbank-ai/ruBert-base'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bert_model = AutoModel.from_pretrained(MODEL_NAME)
# ---------------------------
# ЗАГРУЗКА SCALER и KERAS-МОДЕЛИ
# ---------------------------
with open("scaler.joblib", "rb") as f:
scaler = joblib.load(f)
keras_model = tf.keras.models.load_model("tf.keras", compile=False)
EMOTIONS = ["страх", "гнев", "грусть", "радость"]
# ---------------------------------------
# ФУНКЦИИ ДЛЯ ОБРАБОТКИ ЭМОДЗИ (добавь свои)
# ---------------------------------------
def remove_duplicate_emojis(text):
return text # заглушка — поставь свою реализацию
def is_emoji_spam(text):
return False # заглушка — поставь свою реализацию
def remove_all_emojis(text):
return text # заглушка — поставь свою реализацию
# ---------------------------
# ПРЕДОБРАБОТКА ТЕКСТА
# ---------------------------
def preprocess_text(text):
text = remove_duplicate_emojis(text)
if is_emoji_spam(text):
text = remove_all_emojis(text)
text = str(text).lower()
text = re.sub(r'http\S+|www\S+|https\S+', '', text)
text = re.sub(r'@\w+|#\w+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
text = emoji.demojize(text)
text = re.sub(r'\d+', '', text)
try:
tokens = word_tokenize(text, language="russian")
except:
tokens = text.split()
try:
stop_words = set(stopwords.words('russian'))
except:
stop_words = set()
tokens = [
word for word in tokens
if (word.isalpha() or (word.startswith(':') and word.endswith(':')))
and word not in stop_words
and len(word) > 2
]
try:
lemmatizer = pymorphy2.MorphAnalyzer()
tokens = [lemmatizer.parse(word)[0].normal_form for word in tokens]
except:
pass
return ' '.join(tokens)
|