Spaces:

ebhon
/

text-classification-nlp

Build error

File size: 2,399 Bytes

faa09ba
 
 
 
 
 
 
27366d7
faa09ba
 
 
 
 
 
 
c2c1ab9
 
 
 
 
 
2343a02
4ad696c
c2c1ab9
 
 
faa09ba
 
 
 
 
 
 
 
1ac510e
faa09ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c49663
faa09ba
3c49663
faa09ba
 
 
 
 
5164d8f

import streamlit as st
import joblib
import json
import re
import string
import numpy as np
import os
import nltk

from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer

nltk_data_path = '/home/user/nltk_data'

os.makedirs(nltk_data_path, exist_ok=True)

nltk.download('stopwords', download_dir=nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)
nltk.download('punkt_tab', download_dir=nltk_data_path)
nltk.download('wordnet', download_dir=nltk_data_path)

nltk.data.path.append(nltk_data_path
                     )
model = load_model('model_improved.keras')
vectorizer = joblib.load('vectorizer.joblib')

with open('product_mapping.json', 'r') as file1:
    product_mapping = json.load(file1)
reverse_mapping = {v: k for k, v in product_mapping.items()}

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    if text is None:
        return ""
    text = re.sub(r'\bx+\b', '', text)
    text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
    sentences = sent_tokenize(text)
    cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
    return ' '.join(cleaned_sentences)

def preprocessing_text(text):
    text = clean_text(text)
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = word_tokenize(text)
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    words = list(dict.fromkeys(words))
    return ' '.join(words)

def make_prediction(input_text):
    preprocessed_text = preprocessing_text(input_text)
    vectorized_input = vectorizer.transform([preprocessed_text])
    predictions = model.predict(vectorized_input)
    predicted_class = np.argmax(predictions, axis=1)
    predicted_label = reverse_mapping[predicted_class[0]]
    return predicted_label

st.title("Text Classification with NLP")
st.write("Please type the customer's complaint into this text area")

user_input = st.text_area("Write here!", "")
if st.button("Classify"):
    if user_input:
        result = make_prediction(user_input)
        st.write(f"Predicted Category:  {result}")
    else:
        st.write("Please enter text to classify.")