Spaces:

Kdnv
/

nlp_project

Build error

App Files Files Community

Kdnv commited on Aug 9, 2024

Commit

b47c0e4

1 Parent(s): 70cc368

movie kdnv update

Browse files

Files changed (7) hide show

app.py +3 -3
models/bert.pt +3 -0
models/kdnv_models.py +71 -0
models/logistic_regression_pipeline.pkl +3 -0
models/lstm.pt +3 -0
models/word2int.json +0 -0
pages/kudinov_films.py +94 -0

app.py CHANGED Viewed

@@ -11,11 +11,11 @@ col1, col2, col3 = st.columns(3)
     # st.page_link('pages/chernyshov_learning.py', label='Обучение', icon='💀')
 with col2:
-    st.page_link('pages/natasha_model.py', label='Модель Наташи', icon='🤬')
     # st.page_link('pages/bond_learning.py', label='Обучение', icon='ℹ️')
 with col3:
-    st.page_link('pages/kdnv_model.py', label='Модель Серёжи К.', icon='🍻')
-    # st.page_link('pages/kdnv_history.py', label='Инфа по модели', icon='👀')
 st.divider()

     # st.page_link('pages/chernyshov_learning.py', label='Обучение', icon='💀')
 with col2:
+    st.page_link('pages/natasha_model.py', label='Токсикметр Наташи', icon='🤬')
     # st.page_link('pages/bond_learning.py', label='Обучение', icon='ℹ️')
 with col3:
+    st.page_link('pages/kdnv_model.py', label='Ночной собутыльник Серёжи', icon='🍻')
+    st.page_link('pages/kudinov_films.py', label='Оценщик фильмов Серёжи', icon='🎥')
 st.divider()

models/bert.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:177ac13ff0731ee3aa8ba9e4ec0b09f177f558cd9b0c0c596c73470d85dab44b
+size 117119720

models/kdnv_models.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from torch import nn
+import torch
+import torch.nn.functional as F
+HIDDEN_SIZE = 128
+EMBEDDING_DIM = 128
+VOCAB_SIZE = 1980
+class Bandanau(nn.Module):
+    def __init__(self, HIDDEN_SIZE) -> None:
+        super().__init__()
+        self.hidden_size = HIDDEN_SIZE
+        self.linearwk = nn.Linear(self.hidden_size, self.hidden_size)
+        self.linearwa = nn.Linear(self.hidden_size, self.hidden_size)
+        self.linearwv = nn.Linear(self.hidden_size, 1)
+    def forward(
+            self,
+            lstm_outputs: torch.Tensor,  # BATCH_SIZE x SEQ_LEN x HIDDEN_SIZE
+            final_hidden: torch.Tensor  # BATCH_SIZE x HIDDEN_SIZE
+    ):
+        final_hidden = final_hidden.unsqueeze(1)
+        wk_out = self.linearwk(lstm_outputs)
+        wa_out = self.linearwa(final_hidden)
+        plus = F.tanh(wk_out + wa_out)
+        wv_out = self.linearwv(plus)
+        attention_weights = F.softmax(wv_out, dim=1)
+        attention_weights = attention_weights.transpose(1, 2)
+        context = torch.bmm(attention_weights, wk_out)
+        context = context.squeeze(1)
+        attention_weights = attention_weights.squeeze(1)
+        return context, attention_weights
+# %%
+class LSTMConcatAttention(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.embedding = nn.Embedding(VOCAB_SIZE, EMBEDDING_DIM)
+        # self.embedding = embedding_layer
+        self.lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_SIZE, batch_first=True)
+        self.attn = Bandanau(HIDDEN_SIZE)
+        self.clf = nn.Sequential(
+            nn.Linear(HIDDEN_SIZE, 512),
+            nn.Dropout(0.3),
+            nn.Tanh(),
+            nn.Linear(512, 256),
+            nn.Dropout(0.3),
+            nn.Tanh(),
+            nn.Linear(256, 128),
+            nn.Dropout(0.3),
+            nn.Tanh(),
+            nn.Linear(128, 3)
+        )
+    def forward(self, x):
+        embeddings = self.embedding(x)
+        outputs, (h_n, _) = self.lstm(embeddings)
+        att_hidden, att_weights = self.attn(outputs, h_n.squeeze(0))
+        out = self.clf(att_hidden)
+        return out, att_weights

models/logistic_regression_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3613a44d3c9b524869e84b21eb720ac071e1188473def7a5ead3b14fafadde16
+size 5808993

models/lstm.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08546effc4bb9a268e1ec53b9ce41bd721673fb9053cff07a24dd62a87061bba
+size 2602410

models/word2int.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pages/kudinov_films.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch
+import streamlit as st
+from models.kdnv_models import LSTMConcatAttention
+from models.kdnv_preprocess import preprocess_single_string, data_preprocessing
+import json
+import transformers
+from torch import nn
+import joblib
+autotoken = transformers.AutoTokenizer.from_pretrained(
+    "cointegrated/rubert-tiny2"
+    )
+@st.cache_resource()
+def load_model_lstm():
+    model = LSTMConcatAttention()
+    model.load_state_dict(torch.load('models/lstm.pt', map_location=torch.device('cpu'), weights_only=True))
+    return model
+@st.cache_resource()
+def load_model_bert():
+    model = transformers.AutoModelForSequenceClassification.from_pretrained(
+        "cointegrated/rubert-tiny2"
+    )
+    model.classifier = nn.Sequential(
+        nn.Linear(in_features=312, out_features=256),
+        nn.Sigmoid(),
+        nn.Dropout(0.5),
+        nn.Linear(in_features=256, out_features=3)
+    )
+    model.load_state_dict(torch.load('models/bert.pt', map_location=torch.device('cpu'), weights_only=True))
+    return model
+model_bert = load_model_bert()
+model_lstm = load_model_lstm()
+model_lr = joblib.load('models/logistic_regression_pipeline.pkl')
+labels_dict = {
+    0: 'Негативный',
+    1: 'Нейтральный',
+    2: 'Позитивный'
+}
+with open('models/word2int.json', 'r') as f:
+    vocab2int = json.load(f)
+def predict_lstm(text):
+    sample = preprocess_single_string(text, 32, vocab2int).long()
+    with torch.no_grad():
+        return labels_dict[model_lstm(sample.unsqueeze(0))[0].argmax(dim=1).item()]
+def predict_bert(text):
+    sample = data_preprocessing(text)
+    sample = autotoken(sample, padding=True, truncation=True, return_tensors='pt')
+    with torch.no_grad():
+        output = model_bert(**sample)
+    return labels_dict[output.logits.argmax(dim=1).item()]
+def predict_lr(text):
+    sample = [data_preprocessing(text)]
+    return labels_dict[model_lr.predict(sample)[0]]
+st.title('Аналитик отзывов на фильмы')
+st.caption('От Серёжи')
+st.divider()
+with st.form(key='Отзыв'):
+    prompt = st.text_area("Ваш отзыв")
+    submit = st.form_submit_button('Оценивай!')
+if submit:
+    ans_lstm = predict_lstm(prompt)
+    ans_bert = predict_bert(prompt)
+    ans_lr = predict_lr(prompt)
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric(label="LSTM Prediction", value=ans_lstm)
+    with col2:
+        st.metric(label="BERT Prediction", value=ans_bert)
+    with col3:
+        st.metric(label="Logistic Regression Prediction", value=ans_lr)