ebhon commited on
Commit
5164d8f
·
verified ·
1 Parent(s): 9d79ce5

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_improved.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import json
4
+ import re
5
+ import string
6
+ import numpy as np
7
+
8
+ from tensorflow.keras.models import load_model
9
+ from nltk.corpus import stopwords
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.tokenize import word_tokenize, sent_tokenize
12
+ from sklearn.feature_extraction.text import CountVectorizer
13
+
14
+ model = load_model('model_improved.keras')
15
+ vectorizer = joblib.load('vectorizer.joblib')
16
+
17
+ with open('product_mapping.json', 'r') as file1:
18
+ product_mapping = json.load(file1)
19
+ reverse_mapping = {v: k for k, v in product_mapping.items()}
20
+
21
+ lemmatizer = WordNetLemmatizer()
22
+ stop_words = set(stopwords.words('english'))
23
+
24
+ def clean_text(text):
25
+ if text is None:
26
+ return ""
27
+ text = re.sub(r'\bx+\b', '', text)
28
+ text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
29
+ sentences = sent_tokenize(text)
30
+ cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
31
+ return ' '.join(cleaned_sentences)
32
+
33
+ def preprocessing_text(text):
34
+ text = clean_text(text)
35
+ text = text.lower()
36
+ text = text.translate(str.maketrans('', '', string.punctuation))
37
+ words = word_tokenize(text)
38
+ words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
39
+ words = list(dict.fromkeys(words))
40
+ return ' '.join(words)
41
+
42
+ def make_prediction(input_text):
43
+ preprocessed_text = preprocessing_text(input_text)
44
+ vectorized_input = vectorizer.transform([preprocessed_text])
45
+ predictions = model.predict(vectorized_input)
46
+ predicted_class = np.argmax(predictions, axis=1)
47
+ predicted_label = reverse_mapping[predicted_class[0]]
48
+ return predicted_label
49
+
50
+ st.title("Text Classification with NLP")
51
+ st.write("Enter text to classify into predefined categories")
52
+
53
+ user_input = st.text_area("Input Text", "")
54
+ if st.button("Classify"):
55
+ if user_input:
56
+ result = make_prediction(user_input)
57
+ st.write(f"Predicted Category: {result}")
58
+ else:
59
+ st.write("Please enter text to classify.")
model_improved.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8cf1b8b6de272d5285f95c91a5fd545163792e58f8002ce6edab5977afe1567
3
+ size 6838141
product_mapping.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"credit_reporting": 0, "debt_collection": 1, "mortgages_and_loans": 2, "credit_card": 3, "retail_banking": 4}
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ tensorflow
3
+ joblib
4
+ nltk
5
+ scikit-learn
vectorizer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51b2f37d9f6e2386789075da4c2eea2866ddab70f71b57fcf921bc1e094c7d9
3
+ size 21015637