Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from dataset_import import load_data | |
| from data_cleaning import clean_data | |
| from data_labelling import label_dataset | |
| st.title("AnalyzeYT Dataset Analysis") | |
| # Load and clean dataset | |
| data = load_data() | |
| if data is not None: | |
| st.write("Loaded Data Preview:") | |
| st.write(data.head()) | |
| # Clean data | |
| data = clean_data(data) | |
| st.write("Cleaned Data Preview:") | |
| st.write(data.head()) | |
| # Show data description | |
| st.write("Data Description:") | |
| st.write(data.describe()) | |
| # Data visualization options | |
| st.write("Data Visualization:") | |
| chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram']) | |
| if chart_type == 'Line Chart': | |
| x_axis = st.selectbox("Select X-axis Column", data.columns) | |
| y_axis = st.selectbox("Select Y-axis Column", data.columns) | |
| title = st.text_input("Enter Chart Title", "Line Chart") | |
| st.line_chart(data[[x_axis, y_axis]]) | |
| st.write(f"Line Chart: {title}") | |
| elif chart_type == 'Bar Chart': | |
| x_axis = st.selectbox("Select X-axis Column", data.columns) | |
| y_axis = st.selectbox("Select Y-axis Column", data.columns) | |
| title = st.text_input("Enter Chart Title", "Bar Chart") | |
| st.bar_chart(data[[x_axis, y_axis]]) | |
| st.write(f"Bar Chart: {title}") | |
| elif chart_type == 'Histogram': | |
| selected_column = st.selectbox("Select Column for Histogram", data.columns) | |
| bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30) | |
| title = st.text_input("Enter Chart Title", "Histogram") | |
| plt.hist(data[selected_column], bins=bins) | |
| plt.title(title) | |
| plt.xlabel(selected_column) | |
| plt.ylabel('Frequency') | |
| st.pyplot() | |
| # Add option for labeling the dataset | |
| st.write("Labeling Options:") | |
| label_option = st.radio("Do you want to label your dataset?", ('No', 'Yes')) | |
| if label_option == 'Yes': | |
| # Select columns for labeling | |
| columns_to_label = st.multiselect("Select Columns to Label", data.columns) | |
| # Input new column names for labeled data | |
| new_column_names = [] | |
| for col in columns_to_label: | |
| new_name = st.text_input(f"Enter new name for labeled column '{col}':", f"{col}_labeled") | |
| new_column_names.append(new_name) | |
| output_name = st.text_input("Enter Output File Name", "labeled_dataset.csv") | |
| if st.button("Run Labeling"): | |
| labeled_data = label_dataset(data, columns_to_label, new_column_names) | |
| labeled_data.to_csv(output_name, index=False) | |
| st.write("Labeling Completed. Download your file below:") | |
| st.download_button(label="Download Labeled Dataset", data=labeled_data.to_csv(), file_name=output_name, mime='text/csv') | |