Spaces:
Build error
Build error
| import streamlit as st | |
| import joblib | |
| import json | |
| import re | |
| import string | |
| import numpy as np | |
| import os | |
| import nltk | |
| from tensorflow.keras.models import load_model | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.tokenize import word_tokenize, sent_tokenize | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| nltk_data_path = '/home/user/nltk_data' | |
| os.makedirs(nltk_data_path, exist_ok=True) | |
| nltk.download('stopwords', download_dir=nltk_data_path) | |
| nltk.download('punkt', download_dir=nltk_data_path) | |
| nltk.download('punkt_tab', download_dir=nltk_data_path) | |
| nltk.download('wordnet', download_dir=nltk_data_path) | |
| nltk.data.path.append(nltk_data_path | |
| ) | |
| model = load_model('model_improved.keras') | |
| vectorizer = joblib.load('vectorizer.joblib') | |
| with open('product_mapping.json', 'r') as file1: | |
| product_mapping = json.load(file1) | |
| reverse_mapping = {v: k for k, v in product_mapping.items()} | |
| lemmatizer = WordNetLemmatizer() | |
| stop_words = set(stopwords.words('english')) | |
| def clean_text(text): | |
| if text is None: | |
| return "" | |
| text = re.sub(r'\bx+\b', '', text) | |
| text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text) | |
| sentences = sent_tokenize(text) | |
| cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence] | |
| return ' '.join(cleaned_sentences) | |
| def preprocessing_text(text): | |
| text = clean_text(text) | |
| text = text.lower() | |
| text = text.translate(str.maketrans('', '', string.punctuation)) | |
| words = word_tokenize(text) | |
| words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words] | |
| words = list(dict.fromkeys(words)) | |
| return ' '.join(words) | |
| def make_prediction(input_text): | |
| preprocessed_text = preprocessing_text(input_text) | |
| vectorized_input = vectorizer.transform([preprocessed_text]) | |
| predictions = model.predict(vectorized_input) | |
| predicted_class = np.argmax(predictions, axis=1) | |
| predicted_label = reverse_mapping[predicted_class[0]] | |
| return predicted_label | |
| st.title("Text Classification with NLP") | |
| st.write("Please type the customer's complaint into this text area") | |
| user_input = st.text_area("Write here!", "") | |
| if st.button("Classify"): | |
| if user_input: | |
| result = make_prediction(user_input) | |
| st.write(f"Predicted Category: {result}") | |
| else: | |
| st.write("Please enter text to classify.") |