ebhon's picture
Update app.py
4ad696c verified
import streamlit as st
import joblib
import json
import re
import string
import numpy as np
import os
import nltk
from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
nltk_data_path = '/home/user/nltk_data'
os.makedirs(nltk_data_path, exist_ok=True)
nltk.download('stopwords', download_dir=nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)
nltk.download('punkt_tab', download_dir=nltk_data_path)
nltk.download('wordnet', download_dir=nltk_data_path)
nltk.data.path.append(nltk_data_path
)
model = load_model('model_improved.keras')
vectorizer = joblib.load('vectorizer.joblib')
with open('product_mapping.json', 'r') as file1:
product_mapping = json.load(file1)
reverse_mapping = {v: k for k, v in product_mapping.items()}
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def clean_text(text):
if text is None:
return ""
text = re.sub(r'\bx+\b', '', text)
text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
sentences = sent_tokenize(text)
cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
return ' '.join(cleaned_sentences)
def preprocessing_text(text):
text = clean_text(text)
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
words = word_tokenize(text)
words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
words = list(dict.fromkeys(words))
return ' '.join(words)
def make_prediction(input_text):
preprocessed_text = preprocessing_text(input_text)
vectorized_input = vectorizer.transform([preprocessed_text])
predictions = model.predict(vectorized_input)
predicted_class = np.argmax(predictions, axis=1)
predicted_label = reverse_mapping[predicted_class[0]]
return predicted_label
st.title("Text Classification with NLP")
st.write("Please type the customer's complaint into this text area")
user_input = st.text_area("Write here!", "")
if st.button("Classify"):
if user_input:
result = make_prediction(user_input)
st.write(f"Predicted Category: {result}")
else:
st.write("Please enter text to classify.")