Spaces:

Ashar086
/

hackathon

Sleeping

App Files Files Community

Ashar086 commited on Oct 7, 2024

Commit

1123969

verified ·

1 Parent(s): b81d37e

Update analyzer.py

Browse files

Files changed (1) hide show

analyzer.py +57 -11

analyzer.py CHANGED Viewed

@@ -1,15 +1,61 @@
 import pandas as pd
 import numpy as np
 class Analyzer:
-    def analyze_data(self, df, prompt):
-        # This is a simple implementation. In a real-world scenario,
-        # you might want to use more sophisticated NLP techniques.
-        if "correlation" in prompt.lower():
-            return df.corr().to_string()
-        elif "summary" in prompt.lower():
-            return df.describe().to_string()
-        elif "unique" in prompt.lower():
-            return {col: df[col].nunique() for col in df.columns}
-        else:
-            return "I'm sorry, I couldn't understand your analysis request. Please try asking about correlation, summary statistics, or unique values."

+import streamlit as st
 import pandas as pd
 import numpy as np
+from scipy import stats
 class Analyzer:
+    def perform_analysis(self, df):
+        analysis_type = st.selectbox("Select analysis type",
+                                     ["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"])
+        if analysis_type == "Descriptive Statistics":
+            st.write(df.describe())
+            if st.checkbox("Show additional statistics"):
+                st.write("Skewness:")
+                st.write(df.skew())
+                st.write("Kurtosis:")
+                st.write(df.kurtosis())
+        elif analysis_type == "Correlation Analysis":
+            corr_matrix = df.corr()
+            st.write(corr_matrix)
+            if st.checkbox("Show heatmap"):
+                fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r')
+                st.plotly_chart(fig)
+        elif analysis_type == "Hypothesis Testing":
+            test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"])
+            if test_type == "T-Test":
+                col1 = st.selectbox("Select first column", df.columns)
+                col2 = st.selectbox("Select second column", df.columns)
+                t_stat, p_value = stats.ttest_ind(df[col1], df[col2])
+                st.write(f"T-statistic: {t_stat}")
+                st.write(f"P-value: {p_value}")
+            elif test_type == "ANOVA":
+                grouping_col = st.selectbox("Select grouping column", df.columns)
+                value_col = st.selectbox("Select value column", df.columns)
+                groups = [group for name, group in df.groupby(grouping_col)[value_col]]
+                f_stat, p_value = stats.f_oneway(*groups)
+                st.write(f"F-statistic: {f_stat}")
+                st.write(f"P-value: {p_value}")
+            elif test_type == "Chi-Square":
+                col1 = st.selectbox("Select first column", df.columns)
+                col2 = st.selectbox("Select second column", df.columns)
+                contingency_table = pd.crosstab(df[col1], df[col2])
+                chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
+                st.write(f"Chi-square statistic: {chi2}")
+                st.write(f"P-value: {p_value}")
+        elif analysis_type == "Custom Query":
+            query = st.text_input("Enter a custom query (e.g., 'column_name > 5')")
+            if query:
+                try:
+                    result = df.query(query)
+                    st.write(result)
+                except Exception as e:
+                    st.error(f"Error in query: {str(e)}")