louiecerv's picture
sync with remote
3035c77
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load dataset
def load_data(file_path):
try:
return pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
return pd.read_csv(file_path, encoding='ISO-8859-1')
# Convert 'Yes'/'No' to 1/0
def convert_yes_no(df, start_idx):
for col in df.columns[start_idx:]:
df[col] = df[col].str.strip().str.lower().map({'yes': 1, 'no': 0})
return df
# Calculate AI Competency Score
def calculate_ai_score(df, start_idx):
df['AI Competency Score'] = df.iloc[:, start_idx:].sum(axis=1)
return df
# Plot distribution for columns 0 to 9
def plot_distributions(df):
st.subheader("Distributions for Columns 0 to 9")
for col in df.columns[:10]:
plt.figure(figsize=(8, 4))
sns.countplot(data=df, x=col)
plt.xticks(rotation=45)
st.pyplot(plt)
# Plot average AI Competency Score grouped by selected column
def plot_grouped_avg(df, group_col):
grouped = df.groupby(group_col)['AI Competency Score'].mean().reset_index()
plt.figure(figsize=(8, 4))
sns.barplot(data=grouped, x=group_col, y='AI Competency Score')
plt.xticks(rotation=45)
st.pyplot(plt)
# Main app
def main():
st.title("AI Competency Analysis App")
# Load file directly from local directory
file_path = 'AI_Competency_DIT.csv'
df = load_data(file_path)
st.subheader("Dataset Preview")
st.dataframe(df)
# Distributions for columns 0 to 9
plot_distributions(df)
# Unique responses for column 10
st.subheader(f"Unique Responses for: {df.columns[10]}")
unique_responses = df.iloc[:, 10].dropna().unique()
st.write(unique_responses)
# Convert Yes/No to 1/0 for columns 11 onwards
df = convert_yes_no(df, 11)
# Calculate AI Competency Score
df = calculate_ai_score(df, 11)
st.subheader("Dataset with AI Competency Score")
st.dataframe(df)
# Grouped average AI Competency Score
st.subheader("Average AI Competency Score by Group")
group_by_col = st.selectbox("Select column to group by", df.columns[:9])
plot_grouped_avg(df, group_by_col)
# Additional Descriptive Statistics
st.subheader("Descriptive Statistics")
st.write(df.describe())
# Correlation heatmap
st.subheader("Correlation Heatmap")
plt.figure(figsize=(12, 8))
sns.heatmap(df.iloc[:, 11:].corr(), annot=True, cmap='coolwarm')
st.pyplot(plt)
if __name__ == '__main__':
main()