from tensorflow.keras.models import load_model import re import pandas as pd import numpy as np from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import streamlit as st import matplotlib.pyplot as plt from text_cleaning import preprocess_text st.header('IT Ticket Analysis and Classification') st.write(':blue[Model accuracy ranges from 85% to 95% depending on data quality.]') # Load the pre-trained model try: model = load_model('model.h5') st.success('Model is successfully loaded and ready for data analysis!') except Exception as e: st.error(f"Failed to load model: {e}") # File uploader for user to upload a CSV or Excel file file = st.file_uploader('Upload file as CSV or Excel format', type=['csv', 'xlsx']) if file is not None: try: # Read the uploaded file based on its extension if file.name.endswith('.csv'): df = pd.read_csv(file, header=0) st.success('CSV file successfully loaded!') elif file.name.endswith('.xlsx'): df = pd.read_excel(file, engine='openpyxl') st.success('Excel file successfully loaded!') # Display first few rows of the dataframe st.write('Here is a preview of your data:') st.dataframe(df.head()) st.write(f"Data Shape: {df.shape}") # Let user select the column for prediction column = st.selectbox('Select the Issue/Symptom Column:', ['Choose column'] + list(df.columns)) if column != 'Choose column': st.write(f'You selected the column: **{column}**') # Apply preprocessing df[column] = df[column].astype('str').apply(preprocess_text) # Tokenization and padding max_features = 5000 max_len = 150 tokenizer = Tokenizer(num_words=max_features, split=' ') tokenizer.fit_on_texts(df[column].values) X = tokenizer.texts_to_sequences(df[column].values) X = pad_sequences(X, maxlen=max_len) # Show spinner while processing the predictions with st.spinner('Analyzing data and will predict soon...'): # Perform prediction pred = model.predict(X) # Load category mapping cat = pd.read_csv('Cat.csv') categories = list(cat['Main Category']) # Ensure 'Main Category' exists # Reverse one-hot encoding Y_reversed = [categories[np.argmax(row)] for row in pred] # Convert predictions to DataFrame df_reversed = pd.DataFrame(Y_reversed, columns=['Main Category']) # Count and percentage for each category predicted_counts = df_reversed['Main Category'].value_counts() predicted_percentages = (predicted_counts / len(df_reversed)) * 100 results = pd.DataFrame({ 'Category': predicted_counts.index, 'Count': predicted_counts.values, 'Percentage': np.round(predicted_percentages.values, 2) }) st.write('Predicted Category Distribution:') st.dataframe(results) # Plotting a column chart fig, ax = plt.subplots(figsize=[10, 10]) bars = ax.barh(results['Category'].astype(str), results['Count'], color='skyblue') ax.set_title('Predicted Category Distribution', fontsize=15) ax.set_xlabel('Category') ax.set_ylabel('Count') # Annotate bars with percentage for bar, percentage in zip(bars, results['Percentage']): ax.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, f'{percentage:.1f}%', ha='left', va='center', fontsize=10) # Adjust the font size for the tick labels ax.tick_params(axis='x', labelsize=10) # X-axis tick labels ax.tick_params(axis='y', labelsize=10) # Y-axis tick labels st.pyplot(fig) except Exception as e: st.error(f"An error occurred: {e}") else: st.info('Please upload a file to proceed.')