shubham680 commited on
Commit
5899ff7
·
verified ·
1 Parent(s): cc7b82b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -185
app.py CHANGED
@@ -1,185 +1,200 @@
1
- import os
2
- import base64
3
- import traceback
4
- import streamlit as st
5
- import numpy as np
6
- import pickle
7
- import tensorflow as tf
8
- from tensorflow.keras.preprocessing.sequence import pad_sequences
9
- from tensorflow.keras import layers
10
- from tensorflow.keras.models import load_model
11
- from gensim.models import FastText
12
- import nltk
13
- import re
14
- from nltk.corpus import stopwords
15
- from nltk.tokenize import TreebankWordTokenizer
16
-
17
- # ------------------- Config -------------------
18
- MODEL_PATH = "multi_task_bilstm_attention.h5"
19
- FASTTEXT_PATH = "fasttext_domain.model"
20
- TOKENIZER_PKL = "tokenizer.pkl"
21
- LE_TYPE_PKL = "le_type.pkl"
22
- LE_QUEUE_PKL = "le_queue.pkl"
23
- MLB_PKL = "mlb.pkl"
24
- META_PKL = "hierarchy_meta.pkl"
25
- MAX_LEN = 120
26
-
27
- # ------------------- NLTK -------------------
28
- try: _ = stopwords.words("english")
29
- except: nltk.download("stopwords")
30
- try: _ = nltk.word_tokenize("test")
31
- except: nltk.download("punkt")
32
-
33
- stop_words = set(stopwords.words("english"))
34
- tokenizer_nltk = TreebankWordTokenizer()
35
-
36
- def clean_text(text):
37
- text = str(text)
38
- text = re.sub(r"<.*?>", " ", text)
39
- text = re.sub(r"[^A-Za-z0-9 ]", " ", text)
40
- text = re.sub(r"\s+", " ", text).strip()
41
- return text.lower()
42
-
43
- def preprocess_text(text):
44
- toks = tokenizer_nltk.tokenize(clean_text(text))
45
- toks = [t for t in toks if t not in stop_words and len(t) > 1]
46
- return " ".join(toks)
47
-
48
- # ------------------- Custom Attention -------------------
49
- class AttentionLayer(layers.Layer):
50
- def build(self, input_shape):
51
- self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer="glorot_uniform", trainable=True)
52
- self.v = self.add_weight(shape=(input_shape[-1],), initializer="glorot_uniform", trainable=True)
53
- super().build(input_shape)
54
- def call(self, x):
55
- u = tf.tanh(tf.tensordot(x, self.W, axes=1))
56
- a = tf.nn.softmax(tf.tensordot(u, self.v, axes=1), axis=1)
57
- return tf.reduce_sum(x * tf.expand_dims(a, -1), axis=1)
58
-
59
- # ------------------- Safe Loaders -------------------
60
- def safe_pickle(p):
61
- return pickle.load(open(p, "rb")) if os.path.exists(p) else None
62
-
63
- def safe_model(p):
64
- if not os.path.exists(p): return None
65
- with tf.keras.utils.custom_object_scope({"AttentionLayer": AttentionLayer}):
66
- return load_model(p, compile=False)
67
-
68
- def safe_fasttext(p):
69
- return FastText.load(p) if os.path.exists(p) else None
70
-
71
- tokenizer = safe_pickle(TOKENIZER_PKL)
72
- le_type = safe_pickle(LE_TYPE_PKL)
73
- le_queue = safe_pickle(LE_QUEUE_PKL)
74
- mlb = safe_pickle(MLB_PKL)
75
- meta = safe_pickle(META_PKL)
76
-
77
- model = safe_model(MODEL_PATH)
78
- fasttext = safe_fasttext(FASTTEXT_PATH)
79
-
80
- if meta is None:
81
- type_queue_mask = None; type_queue_tag_mask = None; best_thr = 0.5
82
- else:
83
- type_queue_mask = meta.get("type_queue_mask", None)
84
- type_queue_tag_mask = meta.get("type_queue_tag_mask", None)
85
- best_thr = float(meta.get("best_thr", 0.5))
86
-
87
- # Fallbacks
88
- class DummyLE:
89
- def inverse_transform(self, X): return [str(int(x)) for x in X]
90
- class DummyMLB:
91
- def inverse_transform(self, X): return [tuple()]
92
-
93
- if tokenizer is None:
94
- from tensorflow.keras.preprocessing.text import Tokenizer
95
- tokenizer = Tokenizer(num_words=20000, oov_token="<OOV>")
96
- if le_type is None: le_type = DummyLE()
97
- if le_queue is None: le_queue = DummyLE()
98
- if mlb is None: mlb = DummyMLB()
99
-
100
- # ------------------- Inference -------------------
101
- def infer(text):
102
- if model is None: raise RuntimeError("Model not loaded")
103
- seq = tokenizer.texts_to_sequences([preprocess_text(text)])
104
- seq = pad_sequences(seq, maxlen=MAX_LEN)
105
-
106
- extra = np.zeros((1,2), dtype=np.int32)
107
- preds = model.predict([seq, extra], verbose=0) if len(model.inputs) > 1 else model.predict(seq, verbose=0)
108
- if isinstance(preds, (list,tuple)):
109
- p_type, p_queue, p_tags = preds[0][0], preds[1][0], preds[2][0]
110
- else:
111
- arr = preds[0]; n=len(arr); t=max(1,n//3)
112
- p_type, p_queue, p_tags = arr[:t], arr[t:2*t], arr[2*t:]
113
-
114
- t_idx = np.argmax(p_type)
115
- type_lbl = le_type.inverse_transform([t_idx])[0]
116
-
117
- q_idx = np.argmax(p_queue)
118
- queue_lbl = le_queue.inverse_transform([q_idx])[0]
119
-
120
- if type_queue_tag_mask is not None:
121
- mask = type_queue_tag_mask[t_idx, q_idx]
122
- mod = p_tags * mask if mask.sum() != 0 else p_tags
123
- else:
124
- mod = p_tags
125
-
126
- pred_bin = (mod >= best_thr).astype(int).reshape(1,-1)
127
- try: tags = mlb.inverse_transform(pred_bin)[0]
128
- except: tags = ()
129
-
130
- return type_lbl, queue_lbl, list(tags)
131
-
132
- # ------------------- UI -------------------
133
- st.set_page_config(page_title="Multilingual Ticket Classification")
134
-
135
- # Background + UI styling + BLACK fonts
136
- if os.path.exists("bg.jpg"):
137
- b64 = base64.b64encode(open("bg.jpg","rb").read()).decode()
138
- st.markdown(f"""
139
- <style>
140
- .stApp {{
141
- background-image: url("data:image/jpg;base64,{b64}");
142
- background-size: cover;
143
- }}
144
- * {{ color: black !important; }}
145
- .card {{
146
- background: rgba(255,255,255,0.92);
147
- border-radius: 12px;
148
- padding: 22px;
149
- }}
150
- </style>
151
- """, unsafe_allow_html=True)
152
-
153
- st.markdown("<h1 style='text-align:center;'>Multilingual Ticket Classification</h1>", unsafe_allow_html=True)
154
- st.markdown("<div class='card'>", unsafe_allow_html=True)
155
-
156
- message = st.text_area("Enter ticket message:", height=200)
157
-
158
- if st.button("Predict"):
159
- if not message.strip():
160
- st.warning("Please enter a ticket message.")
161
- else:
162
- try:
163
- t, q, tg = infer(message)
164
- st.subheader("TYPE")
165
- st.success(t)
166
-
167
- st.subheader("QUEUE")
168
- st.success(q)
169
-
170
- st.subheader("TAGS")
171
- st.success(", ".join(tg) if tg else "No tags predicted.")
172
- except Exception:
173
- st.error("Prediction failed — model or artifacts missing.")
174
- st.text(traceback.format_exc())
175
-
176
- st.markdown("</div>", unsafe_allow_html=True)
177
-
178
- # Invisible debug exists internally but 100% hidden
179
- st.markdown("""
180
- <style>
181
- div[data-testid="stExpander"] {visibility: hidden; height: 0px;}
182
- </style>s
183
- """, unsafe_allow_html=True)
184
- with st.expander("debug_info_hidden"):
185
- st.write("hidden diagnostics active")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import traceback
4
+ import streamlit as st
5
+ import numpy as np
6
+ import pickle
7
+ import tensorflow as tf
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from tensorflow.keras import layers
10
+ from tensorflow.keras.models import load_model
11
+ from gensim.models import FastText
12
+ import nltk
13
+ import re
14
+ from nltk.corpus import stopwords
15
+ from nltk.tokenize import TreebankWordTokenizer
16
+
17
+ # ------------------- Config -------------------
18
+ MODEL_PATH = "multi_task_bilstm_attention.h5"
19
+ FASTTEXT_PATH = "fasttext_domain.model"
20
+ TOKENIZER_PKL = "tokenizer.pkl"
21
+ LE_TYPE_PKL = "le_type.pkl"
22
+ LE_QUEUE_PKL = "le_queue.pkl"
23
+ MLB_PKL = "mlb.pkl"
24
+ META_PKL = "hierarchy_meta.pkl"
25
+ MAX_LEN = 120
26
+
27
+ # ------------------- NLTK -------------------
28
+ NLTK_DIR = "/root/nltk_data"
29
+ STOPWORDS_DIR = os.path.join(NLTK_DIR, "corpora", "stopwords")
30
+
31
+ # Create main dir if missing
32
+ os.makedirs(NLTK_DIR, exist_ok=True)
33
+
34
+ # Download only if NOT already present
35
+ if not os.path.exists(STOPWORDS_DIR):
36
+ nltk.download("stopwords", download_dir=NLTK_DIR)
37
+
38
+ # Punkt tokenizer
39
+ if not os.path.exists(os.path.join(NLTK_DIR, "tokenizers", "punkt")):
40
+ nltk.download("punkt", download_dir=NLTK_DIR)
41
+
42
+ # Load safely
43
+ stop_words = set(stopwords.words("english"))
44
+ tokenizer_nltk = TreebankWordTokenizer()
45
+ try: _ = nltk.word_tokenize("test")
46
+ except: nltk.download("punkt")
47
+
48
+ stop_words = set(stopwords.words("english"))
49
+ tokenizer_nltk = TreebankWordTokenizer()
50
+
51
+ def clean_text(text):
52
+ text = str(text)
53
+ text = re.sub(r"<.*?>", " ", text)
54
+ text = re.sub(r"[^A-Za-z0-9 ]", " ", text)
55
+ text = re.sub(r"\s+", " ", text).strip()
56
+ return text.lower()
57
+
58
+ def preprocess_text(text):
59
+ toks = tokenizer_nltk.tokenize(clean_text(text))
60
+ toks = [t for t in toks if t not in stop_words and len(t) > 1]
61
+ return " ".join(toks)
62
+
63
+ # ------------------- Custom Attention -------------------
64
+ class AttentionLayer(layers.Layer):
65
+ def build(self, input_shape):
66
+ self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer="glorot_uniform", trainable=True)
67
+ self.v = self.add_weight(shape=(input_shape[-1],), initializer="glorot_uniform", trainable=True)
68
+ super().build(input_shape)
69
+ def call(self, x):
70
+ u = tf.tanh(tf.tensordot(x, self.W, axes=1))
71
+ a = tf.nn.softmax(tf.tensordot(u, self.v, axes=1), axis=1)
72
+ return tf.reduce_sum(x * tf.expand_dims(a, -1), axis=1)
73
+
74
+ # ------------------- Safe Loaders -------------------
75
+ def safe_pickle(p):
76
+ return pickle.load(open(p, "rb")) if os.path.exists(p) else None
77
+
78
+ def safe_model(p):
79
+ if not os.path.exists(p): return None
80
+ with tf.keras.utils.custom_object_scope({"AttentionLayer": AttentionLayer}):
81
+ return load_model(p, compile=False)
82
+
83
+ def safe_fasttext(p):
84
+ return FastText.load(p) if os.path.exists(p) else None
85
+
86
+ tokenizer = safe_pickle(TOKENIZER_PKL)
87
+ le_type = safe_pickle(LE_TYPE_PKL)
88
+ le_queue = safe_pickle(LE_QUEUE_PKL)
89
+ mlb = safe_pickle(MLB_PKL)
90
+ meta = safe_pickle(META_PKL)
91
+
92
+ model = safe_model(MODEL_PATH)
93
+ fasttext = safe_fasttext(FASTTEXT_PATH)
94
+
95
+ if meta is None:
96
+ type_queue_mask = None; type_queue_tag_mask = None; best_thr = 0.5
97
+ else:
98
+ type_queue_mask = meta.get("type_queue_mask", None)
99
+ type_queue_tag_mask = meta.get("type_queue_tag_mask", None)
100
+ best_thr = float(meta.get("best_thr", 0.5))
101
+
102
+ # Fallbacks
103
+ class DummyLE:
104
+ def inverse_transform(self, X): return [str(int(x)) for x in X]
105
+ class DummyMLB:
106
+ def inverse_transform(self, X): return [tuple()]
107
+
108
+ if tokenizer is None:
109
+ from tensorflow.keras.preprocessing.text import Tokenizer
110
+ tokenizer = Tokenizer(num_words=20000, oov_token="<OOV>")
111
+ if le_type is None: le_type = DummyLE()
112
+ if le_queue is None: le_queue = DummyLE()
113
+ if mlb is None: mlb = DummyMLB()
114
+
115
+ # ------------------- Inference -------------------
116
+ def infer(text):
117
+ if model is None: raise RuntimeError("Model not loaded")
118
+ seq = tokenizer.texts_to_sequences([preprocess_text(text)])
119
+ seq = pad_sequences(seq, maxlen=MAX_LEN)
120
+
121
+ extra = np.zeros((1,2), dtype=np.int32)
122
+ preds = model.predict([seq, extra], verbose=0) if len(model.inputs) > 1 else model.predict(seq, verbose=0)
123
+ if isinstance(preds, (list,tuple)):
124
+ p_type, p_queue, p_tags = preds[0][0], preds[1][0], preds[2][0]
125
+ else:
126
+ arr = preds[0]; n=len(arr); t=max(1,n//3)
127
+ p_type, p_queue, p_tags = arr[:t], arr[t:2*t], arr[2*t:]
128
+
129
+ t_idx = np.argmax(p_type)
130
+ type_lbl = le_type.inverse_transform([t_idx])[0]
131
+
132
+ q_idx = np.argmax(p_queue)
133
+ queue_lbl = le_queue.inverse_transform([q_idx])[0]
134
+
135
+ if type_queue_tag_mask is not None:
136
+ mask = type_queue_tag_mask[t_idx, q_idx]
137
+ mod = p_tags * mask if mask.sum() != 0 else p_tags
138
+ else:
139
+ mod = p_tags
140
+
141
+ pred_bin = (mod >= best_thr).astype(int).reshape(1,-1)
142
+ try: tags = mlb.inverse_transform(pred_bin)[0]
143
+ except: tags = ()
144
+
145
+ return type_lbl, queue_lbl, list(tags)
146
+
147
+ # ------------------- UI -------------------
148
+ st.set_page_config(page_title="Multilingual Ticket Classification")
149
+
150
+ # Background + UI styling + BLACK fonts
151
+ if os.path.exists("bg.jpg"):
152
+ b64 = base64.b64encode(open("bg.jpg","rb").read()).decode()
153
+ st.markdown(f"""
154
+ <style>
155
+ .stApp {{
156
+ background-image: url("data:image/jpg;base64,{b64}");
157
+ background-size: cover;
158
+ }}
159
+ * {{ color: black !important; }}
160
+ .card {{
161
+ background: rgba(255,255,255,0.92);
162
+ border-radius: 12px;
163
+ padding: 22px;
164
+ }}
165
+ </style>
166
+ """, unsafe_allow_html=True)
167
+
168
+ st.markdown("<h1 style='text-align:center;'>Multilingual Ticket Classification</h1>", unsafe_allow_html=True)
169
+ st.markdown("<div class='card'>", unsafe_allow_html=True)
170
+
171
+ message = st.text_area("Enter ticket message:", height=200)
172
+
173
+ if st.button("Predict"):
174
+ if not message.strip():
175
+ st.warning("Please enter a ticket message.")
176
+ else:
177
+ try:
178
+ t, q, tg = infer(message)
179
+ st.subheader("TYPE")
180
+ st.success(t)
181
+
182
+ st.subheader("QUEUE")
183
+ st.success(q)
184
+
185
+ st.subheader("TAGS")
186
+ st.success(", ".join(tg) if tg else "No tags predicted.")
187
+ except Exception:
188
+ st.error("Prediction failed — model or artifacts missing.")
189
+ st.text(traceback.format_exc())
190
+
191
+ st.markdown("</div>", unsafe_allow_html=True)
192
+
193
+ # Invisible debug — exists internally but 100% hidden
194
+ st.markdown("""
195
+ <style>
196
+ div[data-testid="stExpander"] {visibility: hidden; height: 0px;}
197
+ </style>s
198
+ """, unsafe_allow_html=True)
199
+ with st.expander("debug_info_hidden"):
200
+ st.write("hidden diagnostics active")