| | import os |
| | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
| | os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" |
| | import re |
| | import nltk |
| | import pickle |
| | import numpy as np |
| | import pandas as pd |
| | import streamlit as st |
| | import tensorflow as tf |
| | from nltk.corpus import stopwords |
| | from nltk.tokenize import word_tokenize |
| | from tensorflow.keras.preprocessing.sequence import pad_sequences |
| | |
| | |
| |
|
| | |
| | |
| | load_model = tf.keras.models.load_model |
| |
|
| | |
| | |
| | |
| | @st.cache_resource |
| | def setup_nltk(): |
| | """Sets up NLTK data and returns English stopwords.""" |
| | |
| | |
| | |
| | try: |
| | nltk.data.find('tokenizers/punkt') |
| | except LookupError: |
| | nltk.download('punkt') |
| | |
| | try: |
| | nltk.data.find('corpora/stopwords') |
| | except LookupError: |
| | nltk.download('stopwords') |
| | |
| | return set(stopwords.words("english")) |
| |
|
| | stop_english = setup_nltk() |
| |
|
| | |
| | |
| | |
| | MODEL_PATH = "model.h5" |
| | LE_TYPE_PATH = "le_type.pkl" |
| | LE_QUEUE_PATH = "le_queue.pkl" |
| | MLB_PATH = "mlb.pkl" |
| | TOKENIZER_PATH = "tokenizer.pkl" |
| | MAX_SEQ_LEN = 107 |
| |
|
| | @st.cache_resource |
| | def load_resources(): |
| | """Loads all model artifacts, including the model and preprocessors.""" |
| | try: |
| | |
| | |
| | model = load_model(MODEL_PATH, compile=False) |
| | |
| | |
| | with open(LE_TYPE_PATH, "rb") as f: |
| | le_type = pickle.load(f) |
| | with open(LE_QUEUE_PATH, "rb") as f: |
| | le_queue = pickle.load(f) |
| | with open(MLB_PATH, "rb") as f: |
| | mlb = pickle.load(f) |
| | with open(TOKENIZER_PATH, "rb") as f: |
| | tokenizer = pickle.load(f) |
| | |
| | return model, le_type, le_queue, mlb, tokenizer |
| | |
| | except FileNotFoundError as e: |
| | st.error(f"Required file not found: {e}. Please ensure all artifacts (model.h5, *.pkl) are uploaded.") |
| | st.stop() |
| | except Exception as e: |
| | st.error(f"An error occurred while loading resources: {e}") |
| | st.stop() |
| |
|
| | model, le_type, le_queue, mlb, tokenizer = load_resources() |
| |
|
| | |
| |
|
| | def clean_text(t): |
| | """Performs text cleaning for a given string.""" |
| | if pd.isna(t) or t is None: |
| | return "" |
| | |
| | t = t.lower() |
| | |
| | tokens = word_tokenize(t) |
| | tokens = [w for w in tokens if w not in stop_english and len(w) > 2 and w.isalnum()] |
| | t = " ".join(tokens) |
| | |
| | |
| | |
| | t = re.sub(r"http\S+|www\.\S+|@\S+|\\n", " ", t) |
| | |
| | t = re.sub(r"[^a-zA-Z0-9\s]", " ", t) |
| | t = re.sub(r"\s+", " ", t).strip() |
| | |
| | return t |
| |
|
| | def convert_to_sequence(txt): |
| | """Converts cleaned text to a padded sequence.""" |
| | seq = tokenizer.texts_to_sequences([txt]) |
| | padded = pad_sequences( |
| | seq, maxlen=MAX_SEQ_LEN, padding="pre", truncating="pre" |
| | ) |
| | return padded |
| |
|
| | |
| |
|
| | st.set_page_config(page_title="Ticket Classification") |
| | st.title("🎫 Ticket Classification App") |
| |
|
| | |
| | st.header("Example Input") |
| | st.markdown("**Subject:** Account Disruption") |
| | st.code("""Dear Customer Support Team, |
| | I am writing to report a significant problem with the centralized account management portal...""") |
| | st.write("---") |
| |
|
| | |
| | col1, col2 = st.columns(2) |
| | with col1: |
| | subject = st.text_input("Enter your **Subject**:", key="subject_input") |
| | with col2: |
| | body = st.text_area("Enter your **Body**:", key="body_input", height=100) |
| |
|
| | |
| |
|
| | if st.button("Submit"): |
| | if not subject and not body: |
| | st.warning("Please enter a subject or body text to classify.") |
| | else: |
| | |
| | raw_text = subject + " " + body |
| | cleaned = clean_text(raw_text) |
| | |
| | st.subheader("Preprocessing Results") |
| | st.info(f"**Cleaned Text:** {cleaned}") |
| | |
| | |
| | seq = convert_to_sequence(cleaned) |
| | |
| | with st.spinner("Classifying ticket..."): |
| | preds = model.predict(seq, verbose=0) |
| | |
| | pred_type_probs, pred_queue_probs, pred_tags_probs = preds |
| | |
| | |
| | pred_type = le_type.inverse_transform([np.argmax(pred_type_probs)])[0] |
| | pred_queue = le_queue.inverse_transform([np.argmax(pred_queue_probs)])[0] |
| | |
| | |
| | pred_tags_binary = (pred_tags_probs >= 0.5).astype(int) |
| | |
| | pred_tags = mlb.inverse_transform(pred_tags_binary)[0] |
| | |
| | st.success("✅ Classification Complete!") |
| | |
| | st.subheader("Prediction Results") |
| | st.metric("Predicted Type", pred_type) |
| | st.metric("Predicted Queue", pred_queue) |
| | |
| | if pred_tags: |
| | st.markdown(f"**Predicted Tags:** {', '.join(pred_tags)}") |
| | else: |
| | st.markdown("**Predicted Tags:** No significant tags found.") |