Spaces:
Build error
Build error
| from tensorflow.keras.models import load_model | |
| import re | |
| import pandas as pd | |
| import numpy as np | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| from text_cleaning import preprocess_text | |
| st.header('IT Ticket Analysis and Classification') | |
| st.write(':blue[Model accuracy ranges from 85% to 95% depending on data quality.]') | |
| # Load the pre-trained model | |
| try: | |
| model = load_model('model.h5') | |
| st.success('Model is successfully loaded and ready for data analysis!') | |
| except Exception as e: | |
| st.error(f"Failed to load model: {e}") | |
| # File uploader for user to upload a CSV or Excel file | |
| file = st.file_uploader('Upload file as CSV or Excel format', type=['csv', 'xlsx']) | |
| if file is not None: | |
| try: | |
| # Read the uploaded file based on its extension | |
| if file.name.endswith('.csv'): | |
| df = pd.read_csv(file, header=0) | |
| st.success('CSV file successfully loaded!') | |
| elif file.name.endswith('.xlsx'): | |
| df = pd.read_excel(file, engine='openpyxl') | |
| st.success('Excel file successfully loaded!') | |
| # Display first few rows of the dataframe | |
| st.write('Here is a preview of your data:') | |
| st.dataframe(df.head()) | |
| st.write(f"Data Shape: {df.shape}") | |
| # Let user select the column for prediction | |
| column = st.selectbox('Select the Issue/Symptom Column:', ['Choose column'] + list(df.columns)) | |
| if column != 'Choose column': | |
| st.write(f'You selected the column: **{column}**') | |
| # Apply preprocessing | |
| df[column] = df[column].astype('str').apply(preprocess_text) | |
| # Tokenization and padding | |
| max_features = 5000 | |
| max_len = 150 | |
| tokenizer = Tokenizer(num_words=max_features, split=' ') | |
| tokenizer.fit_on_texts(df[column].values) | |
| X = tokenizer.texts_to_sequences(df[column].values) | |
| X = pad_sequences(X, maxlen=max_len) | |
| # Show spinner while processing the predictions | |
| with st.spinner('Analyzing data and will predict soon...'): | |
| # Perform prediction | |
| pred = model.predict(X) | |
| # Load category mapping | |
| cat = pd.read_csv('Cat.csv') | |
| categories = list(cat['Main Category']) # Ensure 'Main Category' exists | |
| # Reverse one-hot encoding | |
| Y_reversed = [categories[np.argmax(row)] for row in pred] | |
| # Convert predictions to DataFrame | |
| df_reversed = pd.DataFrame(Y_reversed, columns=['Main Category']) | |
| # Count and percentage for each category | |
| predicted_counts = df_reversed['Main Category'].value_counts() | |
| predicted_percentages = (predicted_counts / len(df_reversed)) * 100 | |
| results = pd.DataFrame({ | |
| 'Category': predicted_counts.index, | |
| 'Count': predicted_counts.values, | |
| 'Percentage': np.round(predicted_percentages.values, 2) | |
| }) | |
| st.write('Predicted Category Distribution:') | |
| st.dataframe(results) | |
| # Plotting a column chart | |
| fig, ax = plt.subplots(figsize=[10, 10]) | |
| bars = ax.barh(results['Category'].astype(str), results['Count'], color='skyblue') | |
| ax.set_title('Predicted Category Distribution', fontsize=15) | |
| ax.set_xlabel('Category') | |
| ax.set_ylabel('Count') | |
| # Annotate bars with percentage | |
| for bar, percentage in zip(bars, results['Percentage']): | |
| ax.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, | |
| f'{percentage:.1f}%', ha='left', va='center', fontsize=10) | |
| # Adjust the font size for the tick labels | |
| ax.tick_params(axis='x', labelsize=10) # X-axis tick labels | |
| ax.tick_params(axis='y', labelsize=10) # Y-axis tick labels | |
| st.pyplot(fig) | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| else: | |
| st.info('Please upload a file to proceed.') | |