cat_pred / app.py
Pradeep Kumar
Upload 5 files
7a65842 verified
from tensorflow.keras.models import load_model
import re
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import streamlit as st
import matplotlib.pyplot as plt
from text_cleaning import preprocess_text
st.header('IT Ticket Analysis and Classification')
st.write(':blue[Model accuracy ranges from 85% to 95% depending on data quality.]')
# Load the pre-trained model
try:
model = load_model('model.h5')
st.success('Model is successfully loaded and ready for data analysis!')
except Exception as e:
st.error(f"Failed to load model: {e}")
# File uploader for user to upload a CSV or Excel file
file = st.file_uploader('Upload file as CSV or Excel format', type=['csv', 'xlsx'])
if file is not None:
try:
# Read the uploaded file based on its extension
if file.name.endswith('.csv'):
df = pd.read_csv(file, header=0)
st.success('CSV file successfully loaded!')
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file, engine='openpyxl')
st.success('Excel file successfully loaded!')
# Display first few rows of the dataframe
st.write('Here is a preview of your data:')
st.dataframe(df.head())
st.write(f"Data Shape: {df.shape}")
# Let user select the column for prediction
column = st.selectbox('Select the Issue/Symptom Column:', ['Choose column'] + list(df.columns))
if column != 'Choose column':
st.write(f'You selected the column: **{column}**')
# Apply preprocessing
df[column] = df[column].astype('str').apply(preprocess_text)
# Tokenization and padding
max_features = 5000
max_len = 150
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(df[column].values)
X = tokenizer.texts_to_sequences(df[column].values)
X = pad_sequences(X, maxlen=max_len)
# Show spinner while processing the predictions
with st.spinner('Analyzing data and will predict soon...'):
# Perform prediction
pred = model.predict(X)
# Load category mapping
cat = pd.read_csv('Cat.csv')
categories = list(cat['Main Category']) # Ensure 'Main Category' exists
# Reverse one-hot encoding
Y_reversed = [categories[np.argmax(row)] for row in pred]
# Convert predictions to DataFrame
df_reversed = pd.DataFrame(Y_reversed, columns=['Main Category'])
# Count and percentage for each category
predicted_counts = df_reversed['Main Category'].value_counts()
predicted_percentages = (predicted_counts / len(df_reversed)) * 100
results = pd.DataFrame({
'Category': predicted_counts.index,
'Count': predicted_counts.values,
'Percentage': np.round(predicted_percentages.values, 2)
})
st.write('Predicted Category Distribution:')
st.dataframe(results)
# Plotting a column chart
fig, ax = plt.subplots(figsize=[10, 10])
bars = ax.barh(results['Category'].astype(str), results['Count'], color='skyblue')
ax.set_title('Predicted Category Distribution', fontsize=15)
ax.set_xlabel('Category')
ax.set_ylabel('Count')
# Annotate bars with percentage
for bar, percentage in zip(bars, results['Percentage']):
ax.text(bar.get_width(), bar.get_y() + bar.get_height() / 2,
f'{percentage:.1f}%', ha='left', va='center', fontsize=10)
# Adjust the font size for the tick labels
ax.tick_params(axis='x', labelsize=10) # X-axis tick labels
ax.tick_params(axis='y', labelsize=10) # Y-axis tick labels
st.pyplot(fig)
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.info('Please upload a file to proceed.')