ebhon commited on
Commit
faa09ba
·
verified ·
1 Parent(s): 5164d8f

Update app.py

Browse files

add extra file

Files changed (1) hide show
  1. app.py +62 -58
app.py CHANGED
@@ -1,59 +1,63 @@
1
- import streamlit as st
2
- import joblib
3
- import json
4
- import re
5
- import string
6
- import numpy as np
7
-
8
- from tensorflow.keras.models import load_model
9
- from nltk.corpus import stopwords
10
- from nltk.stem import WordNetLemmatizer
11
- from nltk.tokenize import word_tokenize, sent_tokenize
12
- from sklearn.feature_extraction.text import CountVectorizer
13
-
14
- model = load_model('model_improved.keras')
15
- vectorizer = joblib.load('vectorizer.joblib')
16
-
17
- with open('product_mapping.json', 'r') as file1:
18
- product_mapping = json.load(file1)
19
- reverse_mapping = {v: k for k, v in product_mapping.items()}
20
-
21
- lemmatizer = WordNetLemmatizer()
22
- stop_words = set(stopwords.words('english'))
23
-
24
- def clean_text(text):
25
- if text is None:
26
- return ""
27
- text = re.sub(r'\bx+\b', '', text)
28
- text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
29
- sentences = sent_tokenize(text)
30
- cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
31
- return ' '.join(cleaned_sentences)
32
-
33
- def preprocessing_text(text):
34
- text = clean_text(text)
35
- text = text.lower()
36
- text = text.translate(str.maketrans('', '', string.punctuation))
37
- words = word_tokenize(text)
38
- words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
39
- words = list(dict.fromkeys(words))
40
- return ' '.join(words)
41
-
42
- def make_prediction(input_text):
43
- preprocessed_text = preprocessing_text(input_text)
44
- vectorized_input = vectorizer.transform([preprocessed_text])
45
- predictions = model.predict(vectorized_input)
46
- predicted_class = np.argmax(predictions, axis=1)
47
- predicted_label = reverse_mapping[predicted_class[0]]
48
- return predicted_label
49
-
50
- st.title("Text Classification with NLP")
51
- st.write("Enter text to classify into predefined categories")
52
-
53
- user_input = st.text_area("Input Text", "")
54
- if st.button("Classify"):
55
- if user_input:
56
- result = make_prediction(user_input)
57
- st.write(f"Predicted Category: {result}")
58
- else:
 
 
 
 
59
  st.write("Please enter text to classify.")
 
1
+ import streamlit as st
2
+ import joblib
3
+ import json
4
+ import re
5
+ import string
6
+ import numpy as np
7
+ import os
8
+
9
+ from tensorflow.keras.models import load_model
10
+ from nltk.corpus import stopwords
11
+ from nltk.stem import WordNetLemmatizer
12
+ from nltk.tokenize import word_tokenize, sent_tokenize
13
+ from sklearn.feature_extraction.text import CountVectorizer
14
+
15
+ if not os.path.exist('/root/nltk_data'):
16
+ os.system("python download_nltk_data.py")
17
+
18
+ model = load_model('model_improved.keras')
19
+ vectorizer = joblib.load('vectorizer.joblib')
20
+
21
+ with open('product_mapping.json', 'r') as file1:
22
+ product_mapping = json.load(file1)
23
+ reverse_mapping = {v: k for k, v in product_mapping.items()}
24
+
25
+ lemmatizer = WordNetLemmatizer()
26
+ stop_words = set(stopwords.words('english'))
27
+
28
+ def clean_text(text):
29
+ if text is None:
30
+ return ""
31
+ text = re.sub(r'\bx+\b', '', text)
32
+ text = re.sub(r'\b(\w+)( \1){2,}\b', r'\1', text)
33
+ sentences = sent_tokenize(text)
34
+ cleaned_sentences = [sentence.strip().capitalize() + '.' for sentence in sentences if sentence]
35
+ return ' '.join(cleaned_sentences)
36
+
37
+ def preprocessing_text(text):
38
+ text = clean_text(text)
39
+ text = text.lower()
40
+ text = text.translate(str.maketrans('', '', string.punctuation))
41
+ words = word_tokenize(text)
42
+ words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
43
+ words = list(dict.fromkeys(words))
44
+ return ' '.join(words)
45
+
46
+ def make_prediction(input_text):
47
+ preprocessed_text = preprocessing_text(input_text)
48
+ vectorized_input = vectorizer.transform([preprocessed_text])
49
+ predictions = model.predict(vectorized_input)
50
+ predicted_class = np.argmax(predictions, axis=1)
51
+ predicted_label = reverse_mapping[predicted_class[0]]
52
+ return predicted_label
53
+
54
+ st.title("Text Classification with NLP")
55
+ st.write("Enter text to classify into predefined categories")
56
+
57
+ user_input = st.text_area("Input Text", "")
58
+ if st.button("Classify"):
59
+ if user_input:
60
+ result = make_prediction(user_input)
61
+ st.write(f"Predicted Category: {result}")
62
+ else:
63
  st.write("Please enter text to classify.")