import pandas as pd # BASIC INFO def get_shape(df): return { "rows": df.shape[0], "columns": df.shape[1] } def get_columns(df): return df.columns.tolist() def get_dtypes(df): return df.dtypes.astype(str).to_dict() # COUNT UNIQUE VALUES def count_unique(df, column): if column not in df.columns: return f"Column '{column}' not found." return int(df[column].nunique()) # VALUE COUNTS def get_value_counts(df, column, top_n=10): if column not in df.columns: return f"Column '{column}' not found." counts = ( df[column] .value_counts() .head(top_n) .to_dict() ) return counts # column mean def get_mean(df, column): if column not in df.columns: return f"Column '{column}' not found." if not pd.api.types.is_numeric_dtype(df[column]): return f"Column '{column}' is not numeric." return float(df[column].mean()) # groubby mean def groupby_mean( df, group_col, value_col, ascending=False ): if group_col not in df.columns: return f"Column '{group_col}' not found." if value_col not in df.columns: return f"Column '{value_col}' not found." if not pd.api.types.is_numeric_dtype(df[value_col]): return f"Column '{value_col}' is not numeric." result = ( df.groupby(group_col)[value_col] .mean() .sort_values(ascending=ascending) ) return result.head(10).to_dict() # correlation def calculate_correlation( df, col1, col2 ): if col1 not in df.columns: return f"Column '{col1}' not found." if col2 not in df.columns: return f"Column '{col2}' not found." if not pd.api.types.is_numeric_dtype(df[col1]): return f"Column '{col1}' is not numeric." if not pd.api.types.is_numeric_dtype(df[col2]): return f"Column '{col2}' is not numeric." corr = df[col1].corr(df[col2]) return float(corr) # max value row def get_max_row(df, column): if column not in df.columns: return f"Column '{column}' not found." if not pd.api.types.is_numeric_dtype(df[column]): return f"Column '{column}' is not numeric." idx = df[column].idxmax() return df.loc[idx].to_dict() # min value row def get_min_row(df, column): if column not in df.columns: return f"Column '{column}' not found." if not pd.api.types.is_numeric_dtype(df[column]): return f"Column '{column}' is not numeric." idx = df[column].idxmin() return df.loc[idx].to_dict()