Spaces:
Build error
Build error
File size: 2,399 Bytes
faa09ba 27366d7 faa09ba c2c1ab9 2343a02 4ad696c c2c1ab9 faa09ba 1ac510e faa09ba 3c49663 faa09ba 3c49663 faa09ba 5164d8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import streamlit as st
import joblib
import json
import re
import string
import numpy as np
import os
import nltk
from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
nltk_data_path = '/home/user/nltk_data'
os.makedirs(nltk_data_path, exist_ok=True)
nltk.download('stopwords', download_dir=nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)
nltk.download('punkt_tab', download_dir=nltk_data_path)
nltk.download('wordnet', download_dir=nltk_data_path)
nltk.data.path.append(nltk_data_path
)
model = load_model('model_improved.keras')
vectorizer = joblib.load('vectorizer.joblib')
with open('product_mapping.json', 'r') as file1:
product_mapping = json.load(file1)
reverse_mapping = {v: k for k, v in product_mapping.items()}
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def clean_text(text):
if text is None:
return ""
text = re.sub(r'\bx+\b', '', text)
text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
sentences = sent_tokenize(text)
cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
return ' '.join(cleaned_sentences)
def preprocessing_text(text):
text = clean_text(text)
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
words = word_tokenize(text)
words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
words = list(dict.fromkeys(words))
return ' '.join(words)
def make_prediction(input_text):
preprocessed_text = preprocessing_text(input_text)
vectorized_input = vectorizer.transform([preprocessed_text])
predictions = model.predict(vectorized_input)
predicted_class = np.argmax(predictions, axis=1)
predicted_label = reverse_mapping[predicted_class[0]]
return predicted_label
st.title("Text Classification with NLP")
st.write("Please type the customer's complaint into this text area")
user_input = st.text_area("Write here!", "")
if st.button("Classify"):
if user_input:
result = make_prediction(user_input)
st.write(f"Predicted Category: {result}")
else:
st.write("Please enter text to classify.") |