RAG / Information.py
1MR's picture
Upload 4 files
5c802bc verified
import streamlit as st
import pandas as pd
import numpy as np
import io
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import base64
def show_general_data_statistics():
if "data" in st.session_state:
data = st.session_state["data"]
num_var = len(data.columns)
num_rows = len(data)
missing_cells = data.isnull().sum().sum()
missing_cells_percent = (missing_cells / (data.size)) * 100
duplicate_rows = data.duplicated().sum()
duplicate_rows_percent = (duplicate_rows / num_rows) * 100
var_types = data.dtypes.value_counts()
st.write("### General Data Statistics:")
st.write(f"- **Number of Variables:** {num_var}")
st.write(f"- **Number of Rows:** {num_rows}")
st.write(f"- **Missing Cells:** {missing_cells}")
st.write(f"- **Missing Cells (%):** {missing_cells_percent:.2f}%")
st.write(f"- **Duplicate Rows:** {duplicate_rows}")
st.write(f"- **Duplicate Rows (%):** {duplicate_rows_percent:.2f}%")
st.write("#### Variable Types:")
st.write(var_types)
else:
st.warning("Please upload a dataset first.")
def describe_data():
st.title("Describe Data")
if "data" in st.session_state:
data = st.session_state["data"]
st.write("Dataset Description:")
st.write(data.describe())
else:
st.warning("Please upload a dataset first.")
def info_data():
st.title("Dataset Info")
if "data" in st.session_state:
data = st.session_state["data"]
buffer = io.StringIO()
data.info(buf=buffer)
info = buffer.getvalue()
st.text(info)
else:
st.warning("Please upload a dataset first.")