File size: 2,399 Bytes
faa09ba
 
 
 
 
 
 
27366d7
faa09ba
 
 
 
 
 
 
c2c1ab9
 
 
 
 
 
2343a02
4ad696c
c2c1ab9
 
 
faa09ba
 
 
 
 
 
 
 
1ac510e
faa09ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c49663
faa09ba
3c49663
faa09ba
 
 
 
 
5164d8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
import joblib
import json
import re
import string
import numpy as np
import os
import nltk

from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer

nltk_data_path = '/home/user/nltk_data'

os.makedirs(nltk_data_path, exist_ok=True)

nltk.download('stopwords', download_dir=nltk_data_path)
nltk.download('punkt', download_dir=nltk_data_path)
nltk.download('punkt_tab', download_dir=nltk_data_path)
nltk.download('wordnet', download_dir=nltk_data_path)

nltk.data.path.append(nltk_data_path
                     )
model = load_model('model_improved.keras')
vectorizer = joblib.load('vectorizer.joblib')

with open('product_mapping.json', 'r') as file1:
    product_mapping = json.load(file1)
reverse_mapping = {v: k for k, v in product_mapping.items()}

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    if text is None:
        return ""
    text = re.sub(r'\bx+\b', '', text)
    text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
    sentences = sent_tokenize(text)
    cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
    return ' '.join(cleaned_sentences)

def preprocessing_text(text):
    text = clean_text(text)
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = word_tokenize(text)
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    words = list(dict.fromkeys(words))
    return ' '.join(words)

def make_prediction(input_text):
    preprocessed_text = preprocessing_text(input_text)
    vectorized_input = vectorizer.transform([preprocessed_text])
    predictions = model.predict(vectorized_input)
    predicted_class = np.argmax(predictions, axis=1)
    predicted_label = reverse_mapping[predicted_class[0]]
    return predicted_label

st.title("Text Classification with NLP")
st.write("Please type the customer's complaint into this text area")

user_input = st.text_area("Write here!", "")
if st.button("Classify"):
    if user_input:
        result = make_prediction(user_input)
        st.write(f"Predicted Category:  {result}")
    else:
        st.write("Please enter text to classify.")