import streamlit as st import joblib import json import re import string import numpy as np import os import nltk from tensorflow.keras.models import load_model from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from nltk.tokenize import word_tokenize, sent_tokenize from sklearn.feature_extraction.text import CountVectorizer nltk_data_path = '/home/user/nltk_data' os.makedirs(nltk_data_path, exist_ok=True) nltk.download('stopwords', download_dir=nltk_data_path) nltk.download('punkt', download_dir=nltk_data_path) nltk.download('punkt_tab', download_dir=nltk_data_path) nltk.download('wordnet', download_dir=nltk_data_path) nltk.data.path.append(nltk_data_path ) model = load_model('model_improved.keras') vectorizer = joblib.load('vectorizer.joblib') with open('product_mapping.json', 'r') as file1: product_mapping = json.load(file1) reverse_mapping = {v: k for k, v in product_mapping.items()} lemmatizer = WordNetLemmatizer() stop_words = set(stopwords.words('english')) def clean_text(text): if text is None: return "" text = re.sub(r'\bx+\b', '', text) text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text) sentences = sent_tokenize(text) cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence] return ' '.join(cleaned_sentences) def preprocessing_text(text): text = clean_text(text) text = text.lower() text = text.translate(str.maketrans('', '', string.punctuation)) words = word_tokenize(text) words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words] words = list(dict.fromkeys(words)) return ' '.join(words) def make_prediction(input_text): preprocessed_text = preprocessing_text(input_text) vectorized_input = vectorizer.transform([preprocessed_text]) predictions = model.predict(vectorized_input) predicted_class = np.argmax(predictions, axis=1) predicted_label = reverse_mapping[predicted_class[0]] return predicted_label st.title("Text Classification with NLP") st.write("Please type the customer's complaint into this text area") user_input = st.text_area("Write here!", "") if st.button("Classify"): if user_input: result = make_prediction(user_input) st.write(f"Predicted Category: {result}") else: st.write("Please enter text to classify.")