koushikvkr484 commited on
Commit
9a311f2
·
verified ·
1 Parent(s): 93f5bcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -111
app.py CHANGED
@@ -1,111 +1,112 @@
1
- import re
2
- import nltk
3
- import pickle
4
- import numpy as np
5
- import pandas as pd
6
- import streamlit as st
7
- from nltk.corpus import stopwords
8
- from nltk.tokenize import word_tokenize
9
- from keras.models import load_model
10
- from keras.preprocessing.text import Tokenizer
11
- from keras.preprocessing.sequence import pad_sequences
12
-
13
- # -----------------------------
14
- # NLTK Requirements
15
- # -----------------------------
16
- try:
17
- nltk.data.find('tokenizers/punkt')
18
- except:
19
- nltk.download('punkt')
20
-
21
- try:
22
- nltk.data.find('corpora/stopwords')
23
- except:
24
- nltk.download('stopwords')
25
-
26
- stop_english = set(stopwords.words("english"))
27
-
28
- # -----------------------------
29
- # Streamlit UI
30
- # -----------------------------
31
- st.title("Ticket Classification App")
32
-
33
- col1, col2 = st.columns(2)
34
- with col1:
35
- subject = st.text_input("Enter your subject:")
36
- with col2:
37
- body = st.text_input("Enter your body:")
38
-
39
- # -----------------------------
40
- # Load Model
41
- # -----------------------------
42
- model_path = "model.h5"
43
- model = load_model(model_path)
44
-
45
- # -----------------------------
46
- # Load Tokenizer (IMPORTANT)
47
- # -----------------------------
48
- with open("tokenizer.pkl", "rb") as f:
49
- tokenizer = pickle.load(f)
50
-
51
- MAX_SEQ_LEN = 107 # ← Must match training
52
-
53
-
54
- # -----------------------------
55
- # Clean Text
56
- # -----------------------------
57
- def clean_text(t):
58
- if pd.isna(t):
59
- return ""
60
-
61
- t = t.lower()
62
- tokens = word_tokenize(t)
63
- tokens = [w for w in tokens if w not in stop_english and len(w) > 2]
64
- t = " ".join(tokens)
65
-
66
- # regex cleaning
67
- t = re.sub(r"<.*?>", " ", t)
68
- t = re.sub(r"\\n", " ", t)
69
- t = re.sub(r"http\S+|www\.\S+", " ", t)
70
- t = re.sub(r"\S+@\S+", " ", t)
71
- t = re.sub(r"[%\[\]_\\<\(\]#\?\'\":\)\-\;\+\!\/,>\.\n\r]", " ", t)
72
- t = re.sub(r"\s+", " ", t).strip()
73
-
74
- return t
75
-
76
-
77
- # -----------------------------
78
- # Convert Text → Sequence
79
- # -----------------------------
80
- def convert_to_sequence(txt):
81
- seq = tokenizer.texts_to_sequences([txt]) # must be list
82
- padded = pad_sequences(seq, maxlen=MAX_SEQ_LEN, padding="pre", truncating="pre")
83
- return padded
84
-
85
-
86
- # -----------------------------
87
- # Example text for display
88
- # -----------------------------
89
- st.write("Account Disruption")
90
- st.write("""Dear Customer Support Team,
91
-
92
- I am writing to report a significant problem with the centralized account management portal,
93
- which currently appears to be offline.
94
- This outage is blocking access to account settings...
95
-
96
- """)
97
-
98
- # -----------------------------
99
- # Prediction
100
- # -----------------------------
101
- if st.button("Submit"):
102
- # combine subject & body
103
- raw_text = subject + " " + body
104
-
105
- cleaned = clean_text(raw_text)
106
- st.write("Cleaned Text:", cleaned)
107
-
108
- seq = convert_to_sequence(cleaned)
109
-
110
- preds = model.predict(seq)
111
- st.write("Model Output:", preds)
 
 
1
+ import re
2
+ import nltk
3
+ import pickle
4
+ import numpy as np
5
+ import pandas as pd
6
+ import streamlit as st
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ from keras.models import load_model
10
+ from keras.preprocessing.text import Tokenizer
11
+ from keras.utils import pad_sequences
12
+
13
+
14
+ # -----------------------------
15
+ # NLTK Requirements
16
+ # -----------------------------
17
+ try:
18
+ nltk.data.find('tokenizers/punkt')
19
+ except:
20
+ nltk.download('punkt')
21
+
22
+ try:
23
+ nltk.data.find('corpora/stopwords')
24
+ except:
25
+ nltk.download('stopwords')
26
+
27
+ stop_english = set(stopwords.words("english"))
28
+
29
+ # -----------------------------
30
+ # Streamlit UI
31
+ # -----------------------------
32
+ st.title("Ticket Classification App")
33
+
34
+ col1, col2 = st.columns(2)
35
+ with col1:
36
+ subject = st.text_input("Enter your subject:")
37
+ with col2:
38
+ body = st.text_input("Enter your body:")
39
+
40
+ # -----------------------------
41
+ # Load Model
42
+ # -----------------------------
43
+ model_path = "model.h5"
44
+ model = load_model(model_path)
45
+
46
+ # -----------------------------
47
+ # Load Tokenizer (IMPORTANT)
48
+ # -----------------------------
49
+ with open("tokenizer.pkl", "rb") as f:
50
+ tokenizer = pickle.load(f)
51
+
52
+ MAX_SEQ_LEN = 107 # ← Must match training
53
+
54
+
55
+ # -----------------------------
56
+ # Clean Text
57
+ # -----------------------------
58
+ def clean_text(t):
59
+ if pd.isna(t):
60
+ return ""
61
+
62
+ t = t.lower()
63
+ tokens = word_tokenize(t)
64
+ tokens = [w for w in tokens if w not in stop_english and len(w) > 2]
65
+ t = " ".join(tokens)
66
+
67
+ # regex cleaning
68
+ t = re.sub(r"<.*?>", " ", t)
69
+ t = re.sub(r"\\n", " ", t)
70
+ t = re.sub(r"http\S+|www\.\S+", " ", t)
71
+ t = re.sub(r"\S+@\S+", " ", t)
72
+ t = re.sub(r"[%\[\]_\\<\(\]#\?\'\":\)\-\;\+\!\/,>\.\n\r]", " ", t)
73
+ t = re.sub(r"\s+", " ", t).strip()
74
+
75
+ return t
76
+
77
+
78
+ # -----------------------------
79
+ # Convert Text → Sequence
80
+ # -----------------------------
81
+ def convert_to_sequence(txt):
82
+ seq = tokenizer.texts_to_sequences([txt]) # must be list
83
+ padded = pad_sequences(seq, maxlen=MAX_SEQ_LEN, padding="pre", truncating="pre")
84
+ return padded
85
+
86
+
87
+ # -----------------------------
88
+ # Example text for display
89
+ # -----------------------------
90
+ st.write("Account Disruption")
91
+ st.write("""Dear Customer Support Team,
92
+
93
+ I am writing to report a significant problem with the centralized account management portal,
94
+ which currently appears to be offline.
95
+ This outage is blocking access to account settings...
96
+
97
+ """)
98
+
99
+ # -----------------------------
100
+ # Prediction
101
+ # -----------------------------
102
+ if st.button("Submit"):
103
+ # combine subject & body
104
+ raw_text = subject + " " + body
105
+
106
+ cleaned = clean_text(raw_text)
107
+ st.write("Cleaned Text:", cleaned)
108
+
109
+ seq = convert_to_sequence(cleaned)
110
+
111
+ preds = model.predict(seq)
112
+ st.write("Model Output:", preds)