osheina commited on
Commit
ddeaa8b
·
verified ·
1 Parent(s): 6cd99d3

Upload strim_nlp.py

Browse files
Files changed (1) hide show
  1. strim_nlp.py +58 -0
strim_nlp.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from catboost import CatBoostClassifier
4
+ import re
5
+ import string
6
+ from nltk.corpus import stopwords
7
+ from pymystem3 import Mystem
8
+ from joblib import load
9
+ import nltk
10
+
11
+ def data_preprocessing(text):
12
+ stop_words = set(stopwords.words('russian'))
13
+ text = text.lower()
14
+ text = re.sub("<.*?>", "", text)
15
+ text = re.sub(r'http\S+', " ", text)
16
+ text = re.sub(r'@\w+', ' ', text)
17
+ text = re.sub(r'#\w+', ' ', text)
18
+ text = re.sub(r'\d+', ' ', text)
19
+ text = "".join([c for c in text if c not in string.punctuation])
20
+ return " ".join([word for word in text.split() if word not in stop_words])
21
+
22
+ def lemmatize_text(text):
23
+ mystem = Mystem()
24
+ lemmas = mystem.lemmatize(text)
25
+ return ' '.join(lemmas)
26
+
27
+ model = CatBoostClassifier()
28
+ model.load_model('/Users/olgaseina/Desktop/NLP_project/cat_model4.cbm')
29
+
30
+ tfidf_vectorizer = load('/Users/olgaseina/Desktop/NLP_project/tfidf_vectorizer.joblib')
31
+
32
+ def classic_ml_page():
33
+ st.title("Классификация отзывов о медицинских учреждениях")
34
+ user_review = st.text_area("Введите ваш отзыв здесь:")
35
+
36
+ if st.button("Классифицировать"):
37
+ if user_review:
38
+ preprocessed_review = data_preprocessing(user_review)
39
+ lemmatized_review = lemmatize_text(preprocessed_review)
40
+ vectorized_review = tfidf_vectorizer.transform([lemmatized_review])
41
+ prediction = model.predict(vectorized_review)
42
+
43
+ if prediction[0] == 1:
44
+ st.write("Позитивный отзыв 😀")
45
+ else:
46
+ st.write("Негативный отзыв 😟")
47
+ else:
48
+ st.write("Пожалуйста, введите отзыв для классификации.")
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+