| import nltk |
| import re |
| import pandas as pd |
| import numpy as np |
| from nltk.corpus import stopwords |
| from nltk.stem import WordNetLemmatizer |
| from nltk.tokenize import word_tokenize |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| import gradio as gr |
| import joblib |
| from joblib import dump, load |
| import os |
|
|
| nltk.download('punkt') |
| nltk.download('punkt_tab') |
| nltk.download('stopwords') |
| nltk.download('wordnet') |
|
|
| def preprocess_text(text): |
| text = text.lower() |
| text = re.sub(r'[^a-zA-Z\s]', '', text) |
| tokens = word_tokenize(text, language='english') |
| stop_words = set(stopwords.words('english')) |
| tokens = [word for word in tokens if word not in stop_words] |
| lemmatizer = WordNetLemmatizer() |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] |
| return ' '.join(tokens) |
|
|
| def custom_tokenizer(text): |
| return text.split() |
|
|
| |
| loaded_model_nb = load('best_nb_model.joblib') |
| loaded_vectorizer = load('tfidf_vectorizer.joblib') |
|
|
| def predict_sentiment(text): |
| |
| processed_text = preprocess_text(text) |
| |
| X = loaded_vectorizer.transform([processed_text]) |
| |
| prediction = loaded_model_nb.predict(X)[0] |
| return "Положительный" if prediction == 1 else "Отрицательный" |
|
|
| iface = gr.Interface( |
| fn=predict_sentiment, |
| inputs=gr.Textbox(lines=3, placeholder="Введите отзыв на английском..."), |
| outputs="label", |
| title="Анализатор отзывов о ресторанах", |
| |
| ) |
| iface.launch(ssr_mode=False) |
|
|