| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from scipy import stats | |
| class Analyzer: | |
| def perform_analysis(self, df): | |
| analysis_type = st.selectbox("Select analysis type", | |
| ["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"]) | |
| if analysis_type == "Descriptive Statistics": | |
| st.write(df.describe()) | |
| if st.checkbox("Show additional statistics"): | |
| st.write("Skewness:") | |
| st.write(df.skew()) | |
| st.write("Kurtosis:") | |
| st.write(df.kurtosis()) | |
| elif analysis_type == "Correlation Analysis": | |
| corr_matrix = df.corr() | |
| st.write(corr_matrix) | |
| if st.checkbox("Show heatmap"): | |
| fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r') | |
| st.plotly_chart(fig) | |
| elif analysis_type == "Hypothesis Testing": | |
| test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"]) | |
| if test_type == "T-Test": | |
| col1 = st.selectbox("Select first column", df.columns) | |
| col2 = st.selectbox("Select second column", df.columns) | |
| t_stat, p_value = stats.ttest_ind(df[col1], df[col2]) | |
| st.write(f"T-statistic: {t_stat}") | |
| st.write(f"P-value: {p_value}") | |
| elif test_type == "ANOVA": | |
| grouping_col = st.selectbox("Select grouping column", df.columns) | |
| value_col = st.selectbox("Select value column", df.columns) | |
| groups = [group for name, group in df.groupby(grouping_col)[value_col]] | |
| f_stat, p_value = stats.f_oneway(*groups) | |
| st.write(f"F-statistic: {f_stat}") | |
| st.write(f"P-value: {p_value}") | |
| elif test_type == "Chi-Square": | |
| col1 = st.selectbox("Select first column", df.columns) | |
| col2 = st.selectbox("Select second column", df.columns) | |
| contingency_table = pd.crosstab(df[col1], df[col2]) | |
| chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table) | |
| st.write(f"Chi-square statistic: {chi2}") | |
| st.write(f"P-value: {p_value}") | |
| elif analysis_type == "Custom Query": | |
| query = st.text_input("Enter a custom query (e.g., 'column_name > 5')") | |
| if query: | |
| try: | |
| result = df.query(query) | |
| st.write(result) | |
| except Exception as e: | |
| st.error(f"Error in query: {str(e)}") |