Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from pygwalker.api.streamlit import StreamlitRenderer | |
| from io import BytesIO | |
| import requests | |
| import streamlit as st | |
| from pymongo import MongoClient | |
| import os | |
| from dotenv import load_dotenv | |
| import json | |
| # Load environment variables | |
| load_dotenv() | |
| MONGO_URI = os.getenv("MONGO_URI") | |
| DB_NAME = os.getenv("DB_NAME") | |
| COLLECTION_NAME = os.getenv("COLLECTION_NAME") | |
| mongo_client = MongoClient(MONGO_URI) | |
| db = mongo_client[DB_NAME] | |
| collection = db[COLLECTION_NAME] | |
| def load_csv_from_url(csv_url): | |
| response = requests.get(csv_url) | |
| response.raise_for_status() # Ensure the request was successful | |
| return pd.read_csv(BytesIO(response.content)) | |
| # Column Analysis Function | |
| def analyze_column_data(df): | |
| analysis = {} | |
| for col in df.columns: | |
| if pd.api.types.is_numeric_dtype(df[col]): | |
| analysis[col] = { | |
| "Mean": df[col].mean(), | |
| "Median": df[col].median(), | |
| "Mode": df[col].mode()[0] if not df[col].mode().empty else None, | |
| "Unique Values": df[col].nunique(), | |
| "Null Values": df[col].isnull().sum() | |
| } | |
| else: | |
| analysis[col] = { | |
| "Unique Values": df[col].nunique(), | |
| "Null Values": df[col].isnull().sum(), | |
| "Top Categories": df[col].value_counts().head(5).to_dict() | |
| } | |
| return analysis | |
| # Streamlit Interface | |
| def display_csv_analysis(object_url): | |
| if st.button("Back",key="back_button"): | |
| st.session_state.page="view_excel" | |
| st.rerun() | |
| csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url") | |
| st.title("CSV File Analysis") | |
| # Load and display CSV data | |
| df = load_csv_from_url(csv_url) | |
| st.subheader("CSV Preview") | |
| st.dataframe(df) | |
| # Perform and display analysis | |
| st.subheader("Column Analysis") | |
| column_analysis = analyze_column_data(df) | |
| col1, col2 = st.columns(2) | |
| for idx, (col_name, col_data) in enumerate(column_analysis.items()): | |
| with col1 if idx % 2 == 0 else col2: | |
| st.markdown(f"**{col_name}**") | |
| st.write(col_data) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| st.subheader("Graphical Analysis of Table") | |
| pyg_app = StreamlitRenderer(df) | |
| pyg_app.explorer() | |