for logreg custom class

Browse files

Files changed (11) hide show

models/__init__.py +0 -0
models/__pycache__/__init__.cpython-310.pyc +0 -0
models/model1/Custom_class.py +40 -0
models/model1/__init__.py +0 -0
models/model1/__pycache__/Custom_class.cpython-310.pyc +0 -0
models/model1/__pycache__/__init__.cpython-310.pyc +0 -0
models/model1/model_weights.pth +0 -3
models/model2/__init__.py +0 -0
models/model2/model.py +5 -0
pages/comments.py +0 -0
pages/policlinic.py +3 -1

models/__init__.py ADDED Viewed

File without changes

models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (148 Bytes). View file

models/model1/Custom_class.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import re
+import pandas as pd
+import numpy as np
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.decomposition import TruncatedSVD
+from sklearn.pipeline import Pipeline, FeatureUnion
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import Normalizer
+import joblib
+import nltk
+from nltk.corpus import stopwords
+from pymorphy2 import MorphAnalyzer
+import string
+nltk.download('stopwords')
+nltk.download('punkt')
+class TextPreprocessor(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        self.stop_words = set(stopwords.words('russian'))
+        self.morph = MorphAnalyzer()
+    def preprocess_text(self, text):
+        # Удаление всего, что не является буквами или знаками препинания
+        clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\s]')
+        text = clean_pattern.sub('', text)
+        url_pattern = re.compile(r'http\S+|www\S+|https\S+')
+        text = url_pattern.sub(r'', text)
+        text = text.translate(str.maketrans('', '', string.punctuation))
+        text = text.lower()
+        tokens = text.split()
+        lemmatized_text = ' '.join([self.morph.parse(word)[0].normal_form for word in tokens if word not in self.stop_words])
+        return lemmatized_text
+    def fit(self, X, y=None):
+        return self
+    def transform(self, X, y=None):
+        return X.apply(self.preprocess_text)

models/model1/__init__.py ADDED Viewed

File without changes

models/model1/__pycache__/Custom_class.cpython-310.pyc ADDED Viewed

Binary file (2.13 kB). View file

models/model1/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (155 Bytes). View file

models/model1/model_weights.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:da7fd2151d6a5446fc178462ff93ee61c24f98cb0aa41343e2e8c36802e2170b
-size 47712485

models/model2/__init__.py ADDED Viewed

File without changes

models/model2/model.py CHANGED Viewed

@@ -1,3 +1,8 @@
 class BERTClassifier(nn.Module):
     def __init__(self):
         super().__init__()

+import torch
+from torch import nn
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 class BERTClassifier(nn.Module):
     def __init__(self):
         super().__init__()

pages/comments.py ADDED Viewed

File without changes

pages/policlinic.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import streamlit as st
 import joblib
 import pandas as pd
 # Load the trained pipeline
-pipeline = joblib.load('logistic_regression_pipeline.pkl')
 # Streamlit application
 st.title('Классификация отзывов на русском языке')
@@ -13,3 +14,4 @@ input_text = st.text_area('Введите текст отзыва')
 if st.button('Предсказать'):
     prediction = pipeline.predict(pd.Series([input_text]))
     st.write(f'Предсказанный класс с помощью логрег: {prediction[0]}')

 import streamlit as st
 import joblib
 import pandas as pd
+from models.model1.Custom_class import TextPreprocessor
 # Load the trained pipeline
+pipeline = joblib.load('models/model1/logistic_regression_pipeline.pkl')
 # Streamlit application
 st.title('Классификация отзывов на русском языке')
 if st.button('Предсказать'):
     prediction = pipeline.predict(pd.Series([input_text]))
     st.write(f'Предсказанный класс с помощью логрег: {prediction[0]}')
+    st.write(f'1 - negative, 0 - positive')