Spaces:

osheina
/

NLP_project

Sleeping

App Files Files Community

osheina commited on Apr 12, 2024

Commit

ddeaa8b

verified ·

1 Parent(s): 6cd99d3

Upload strim_nlp.py

Browse files

Files changed (1) hide show

strim_nlp.py +58 -0

strim_nlp.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import pandas as pd
+from catboost import CatBoostClassifier
+import re
+import string
+from nltk.corpus import stopwords
+from pymystem3 import Mystem
+from joblib import load
+import nltk
+def data_preprocessing(text):
+    stop_words = set(stopwords.words('russian'))
+    text = text.lower()
+    text = re.sub("<.*?>", "", text)
+    text = re.sub(r'http\S+', " ", text)
+    text = re.sub(r'@\w+', ' ', text)
+    text = re.sub(r'#\w+', ' ', text)
+    text = re.sub(r'\d+', ' ', text)
+    text = "".join([c for c in text if c not in string.punctuation])
+    return " ".join([word for word in text.split() if word not in stop_words])
+def lemmatize_text(text):
+    mystem = Mystem()
+    lemmas = mystem.lemmatize(text)
+    return ' '.join(lemmas)
+model = CatBoostClassifier()
+model.load_model('/Users/olgaseina/Desktop/NLP_project/cat_model4.cbm')
+tfidf_vectorizer = load('/Users/olgaseina/Desktop/NLP_project/tfidf_vectorizer.joblib')
+def classic_ml_page():
+    st.title("Классификация отзывов о медицинских учреждениях")
+    user_review = st.text_area("Введите ваш отзыв здесь:")
+    if st.button("Классифицировать"):
+        if user_review:
+            preprocessed_review = data_preprocessing(user_review)
+            lemmatized_review = lemmatize_text(preprocessed_review)
+            vectorized_review = tfidf_vectorizer.transform([lemmatized_review])
+            prediction = model.predict(vectorized_review)
+            if prediction[0] == 1:
+                st.write("Позитивный отзыв 😀")
+            else:
+                st.write("Негативный отзыв 😟")
+        else:
+            st.write("Пожалуйста, введите отзыв для классификации.")