| import re |
| import os |
| import nltk |
| import pickle |
| import numpy as np |
| import pandas as pd |
| import streamlit as st |
| import tensorflow as tf |
| from nltk.corpus import stopwords |
| from nltk.tokenize import word_tokenize |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| |
| |
|
|
| |
| |
| load_model = tf.keras.models.load_model |
|
|
| |
| |
| |
| @st.cache_resource |
| def setup_nltk(): |
| """Sets up NLTK data and returns English stopwords.""" |
| |
| |
| |
| try: |
| nltk.data.find('tokenizers/punkt') |
| except LookupError: |
| nltk.download('punkt') |
| |
| try: |
| nltk.data.find('corpora/stopwords') |
| except LookupError: |
| nltk.download('stopwords') |
| |
| return set(stopwords.words("english")) |
|
|
| stop_english = setup_nltk() |
|
|
| |
| |
| |
| MODEL_PATH = "model.h5" |
| LE_TYPE_PATH = "le_type.pkl" |
| LE_QUEUE_PATH = "le_queue.pkl" |
| MLB_PATH = "mlb.pkl" |
| TOKENIZER_PATH = "tokenizer.pkl" |
| MAX_SEQ_LEN = 107 |
|
|
| @st.cache_resource |
| def load_resources(): |
| """Loads all model artifacts, including the model and preprocessors.""" |
| try: |
| |
| |
| model = load_model(MODEL_PATH, compile=False) |
| |
| |
| with open(LE_TYPE_PATH, "rb") as f: |
| le_type = pickle.load(f) |
| with open(LE_QUEUE_PATH, "rb") as f: |
| le_queue = pickle.load(f) |
| with open(MLB_PATH, "rb") as f: |
| mlb = pickle.load(f) |
| with open(TOKENIZER_PATH, "rb") as f: |
| tokenizer = pickle.load(f) |
| |
| return model, le_type, le_queue, mlb, tokenizer |
| |
| except FileNotFoundError as e: |
| st.error(f"Required file not found: {e}. Please ensure all artifacts (model.h5, *.pkl) are uploaded.") |
| st.stop() |
| except Exception as e: |
| st.error(f"An error occurred while loading resources: {e}") |
| st.stop() |
|
|
| model, le_type, le_queue, mlb, tokenizer = load_resources() |
|
|
| |
|
|
| def clean_text(t): |
| """Performs text cleaning for a given string.""" |
| if pd.isna(t) or t is None: |
| return "" |
| |
| t = t.lower() |
| |
| tokens = word_tokenize(t) |
| tokens = [w for w in tokens if w not in stop_english and len(w) > 2 and w.isalnum()] |
| t = " ".join(tokens) |
| |
| |
| |
| t = re.sub(r"http\S+|www\.\S+|@\S+|\\n", " ", t) |
| |
| t = re.sub(r"[^a-zA-Z0-9\s]", " ", t) |
| t = re.sub(r"\s+", " ", t).strip() |
| |
| return t |
|
|
| def convert_to_sequence(txt): |
| """Converts cleaned text to a padded sequence.""" |
| seq = tokenizer.texts_to_sequences([txt]) |
| padded = pad_sequences( |
| seq, maxlen=MAX_SEQ_LEN, padding="pre", truncating="pre" |
| ) |
| return padded |
|
|
| |
|
|
| st.set_page_config(page_title="Ticket Classification") |
| st.title("🎫 Ticket Classification App") |
|
|
| |
| st.header("Example Input") |
| st.markdown("**Subject:** Account Disruption") |
| st.code("""Dear Customer Support Team, |
| I am writing to report a significant problem with the centralized account management portal...""") |
| st.write("---") |
|
|
| |
| col1, col2 = st.columns(2) |
| with col1: |
| subject = st.text_input("Enter your **Subject**:", key="subject_input") |
| with col2: |
| body = st.text_area("Enter your **Body**:", key="body_input", height=100) |
|
|
| |
|
|
| if st.button("Submit"): |
| if not subject and not body: |
| st.warning("Please enter a subject or body text to classify.") |
| else: |
| |
| raw_text = subject + " " + body |
| cleaned = clean_text(raw_text) |
| |
| st.subheader("Preprocessing Results") |
| st.info(f"**Cleaned Text:** {cleaned}") |
| |
| |
| seq = convert_to_sequence(cleaned) |
| |
| with st.spinner("Classifying ticket..."): |
| preds = model.predict(seq, verbose=0) |
| |
| pred_type_probs, pred_queue_probs, pred_tags_probs = preds |
| |
| |
| pred_type = le_type.inverse_transform([np.argmax(pred_type_probs)])[0] |
| pred_queue = le_queue.inverse_transform([np.argmax(pred_queue_probs)])[0] |
| |
| |
| pred_tags_binary = (pred_tags_probs >= 0.5).astype(int) |
| |
| pred_tags = mlb.inverse_transform(pred_tags_binary)[0] |
| |
| st.success("✅ Classification Complete!") |
| |
| st.subheader("Prediction Results") |
| st.metric("Predicted Type", pred_type) |
| st.metric("Predicted Queue", pred_queue) |
| |
| if pred_tags: |
| st.markdown(f"**Predicted Tags:** {', '.join(pred_tags)}") |
| else: |
| st.markdown("**Predicted Tags:** No significant tags found.") |
|
|