Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| import streamlit as st | |
| import sdv | |
| from sdv.datasets.local import load_csvs | |
| from sdv.metadata import MultiTableMetadata | |
| from sdv.multi_table import HMASynthesizer | |
| import time | |
| import os | |
| import gc | |
| import warnings | |
| import plotly.express as px | |
| from PIL import Image | |
| from io import BytesIO | |
| from IPython.display import Image as IPImage, display | |
| from PyPDF2 import PdfReader | |
| from fpdf import FPDF | |
| import statsmodels.api as sm | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.express as px | |
| from IPython.display import Markdown | |
| import kaleido | |
| st.set_page_config(page_title='DATA DISCOVERY') | |
| st.title('AUTOMATED DATA CATALOGUE') | |
| st.subheader('SELECT DATABASE') | |
| select1=st.selectbox('DATABASE NAME',('DB_10001','Marcopolo_db'),key='dbname',index=None,placeholder='Select database name') | |
| if select1=='DB_10001': | |
| with st.spinner('Performing Data Discovery'): | |
| time.sleep(2) | |
| st.success('Data cataloguing complete!') | |
| datasets = load_csvs( | |
| folder_name='BIKE_STORE_DATABASE/', | |
| read_csv_parameters={ | |
| 'skipinitialspace': True, | |
| 'encoding': 'utf_8' | |
| }) | |
| metadata = MultiTableMetadata() | |
| metadata.detect_from_csvs(folder_name='BIKE_STORE_DATABASE/') | |
| python_dict = metadata.to_dict() | |
| st.markdown('---') | |
| st.subheader('DATA CATALOG') | |
| # st.json(python_dict) | |
| brands=datasets['brands'] | |
| categories=datasets['categories'] | |
| customers=datasets['customers'] | |
| orderitems=datasets['order_items'] | |
| orders=datasets['orders'] | |
| products=datasets['products'] | |
| staffs=datasets['staffs'] | |
| stocks=datasets['stocks'] | |
| stores=datasets['stores'] | |
| tables=python_dict['tables'] | |
| table_names=[*tables] | |
| col1, col2, col3 = st.columns([2,2,2]) | |
| with col1: | |
| def view_callback(): | |
| st.session_state.tdet = False | |
| view= st.button("LIST TABLES",key='view',on_click=view_callback) | |
| with col2: | |
| if 'tdet' not in st.session_state: | |
| st.session_state.tdet = False | |
| tdet1 = st.button("SHOW TABLE DETAILS") | |
| with col3: | |
| rel=st.button('SHOW RELATIONSHIPS',key='rel',on_click=view_callback) | |
| if tdet1: | |
| st.session_state.tdet = tdet1 | |
| if view: | |
| #st.write(python_dict) | |
| st.write(pd.DataFrame(table_names,columns=['TABLE NAME'])) | |
| if rel: | |
| rlist1=python_dict['relationships'] | |
| rdf=pd.DataFrame(columns=['PARENT TABLE','CHILD TABLE','PARENT PRIMARY KEY','CHILD FOREIGN KEY']) | |
| for i in range(len(rlist1)): | |
| rlist=rlist1[i] | |
| nrow=pd.DataFrame({'PARENT TABLE':rlist['parent_table_name'],'CHILD TABLE':rlist['child_table_name'],'PARENT PRIMARY KEY':rlist['parent_primary_key'],'CHILD FOREIGN KEY':rlist['child_foreign_key']},index=[i]) | |
| rdf=pd.concat([rdf,nrow],ignore_index=True) | |
| st.write(rdf) | |
| if st.session_state.tdet is True: | |
| def tdet_callback(): | |
| st.session_state.tdet=True | |
| st.subheader('Select table name to view') | |
| sbox1=st.selectbox('TABLE NAME',table_names,index=None,placeholder='Select table name',on_change=tdet_callback) | |
| col4, col5 = st.columns([1, 3]) | |
| with col4: | |
| preview= st.button("PREVIEW TABLE",key='preview') | |
| with col5: | |
| cdet = st.button("GET COLUMN DETAILS",key='prof') | |
| if preview: | |
| st.write(datasets[sbox1]) | |
| if cdet: | |
| cdetails=pd.DataFrame(columns=['Column Name','Data Type','Personal Identifiable Information']) | |
| t_dict=tables[sbox1] | |
| c_dict=t_dict['columns'] | |
| i=0 | |
| for key in c_dict: | |
| e_dict=c_dict[key] | |
| if 'pii' in e_dict: | |
| p='YES' | |
| else: | |
| p='NO' | |
| if e_dict['sdtype']=='datetime': | |
| v=e_dict['sdtype']+': '+e_dict['datetime_format'] | |
| else: | |
| v=e_dict['sdtype'] | |
| new_row=pd.DataFrame({'Column Name':key,'Data Type':v,'Personal Identifiable Information':p},index=[i]) | |
| cdetails=pd.concat([cdetails, new_row],ignore_index=True) | |
| i=i+1 | |
| if 'primary_key' in t_dict: | |
| st.write('Primary Key:',t_dict['primary_key']) | |
| else: | |
| st.write('Primary Key: No key can be detected') | |
| st.write(cdetails) | |