EagleOfEmpire's picture
Update app.py
6e45b18 verified
import nltk
import re
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import gradio as gr
import joblib
from joblib import dump, load
import os
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
def preprocess_text(text):
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
tokens = word_tokenize(text, language='english')
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words]
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(word) for word in tokens]
return ' '.join(tokens)
def custom_tokenizer(text):
return text.split()
# Загрузка модели и векторайзера
loaded_model_nb = load('best_nb_model.joblib')
loaded_vectorizer = load('tfidf_vectorizer.joblib')
def predict_sentiment(text):
# Предобработка текста
processed_text = preprocess_text(text)
# Векторизация
X = loaded_vectorizer.transform([processed_text])
# Предсказание
prediction = loaded_model_nb.predict(X)[0]
return "Положительный" if prediction == 1 else "Отрицательный"
iface = gr.Interface(
fn=predict_sentiment,
inputs=gr.Textbox(lines=3, placeholder="Введите отзыв на английском..."),
outputs="label",
title="Анализатор отзывов о ресторанах",
# examples=["The food was amazing!", "Service was terrible."] ← закомментировать
)
iface.launch(ssr_mode=False)