| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import io | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import LabelEncoder | |
| import seaborn as sns | |
| import base64 | |
| import json | |
| from langchain.docstore.document import Document | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import RetrievalQA | |
| from Information import show_general_data_statistics, describe_data, info_data | |
| from Preprocessing1 import preview_data, data_cleaning, modify_column_names | |
| from Preprocessing2 import handle_categorical_values, missing_values, handle_duplicates, handle_outliers | |
| from Virtualization import visualize_data | |
| def upload_data(): | |
| st.title("Upload Dataset") | |
| file = st.file_uploader("Upload your dataset", type=[ | |
| "csv", "xlsx"], key="file_uploader_1") | |
| if file: | |
| try: | |
| if file.name.endswith(".csv"): | |
| data = pd.read_csv(file) | |
| elif file.name.endswith(".xlsx"): | |
| data = pd.read_excel(file) | |
| st.session_state["data"] = data | |
| st.success("Dataset uploaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| return file | |
| def download_data(): | |
| """Downloads the DataFrame as a CSV file.""" | |
| if "data" in st.session_state and not st.session_state["data"].empty: | |
| csv = st.session_state["data"].to_csv(index=False).encode('utf-8') | |
| download_button = st.download_button( | |
| label="Download Cleaned Dataset", | |
| data=csv, | |
| file_name="cleaned_data.csv", | |
| mime="text/csv" | |
| ) | |
| if download_button: | |
| st.balloons() | |
| st.success("Dataset is ready for download!") | |
| else: | |
| st.warning( | |
| "No data available to download. Please modify or upload a dataset first.") | |
| def rag_chatbot(): | |
| pass | |
| def main(): | |
| st.sidebar.title("Navigation") | |
| options = st.sidebar.radio( | |
| "Go to", | |
| [ | |
| "Upload", | |
| "Preview", | |
| "Data Cleaning", | |
| "Modify Column Names", | |
| "General Data Statistics", | |
| "Describe", | |
| "Info", | |
| "Handle Categorical", | |
| "Missing Values", | |
| "Handle Duplicates", | |
| "Handle Outliers", | |
| "Visualize Data", | |
| "Download", | |
| "RAG Chatbot" | |
| ], | |
| key="unique_navigation_key", | |
| ) | |
| if options == "Upload": | |
| upload_data() | |
| elif options == "Preview": | |
| preview_data() | |
| elif options == "Data Cleaning": | |
| data_cleaning() | |
| elif options == "Modify Column Names": | |
| modify_column_names() | |
| elif options == "General Data Statistics": | |
| show_general_data_statistics() | |
| elif options == "Describe": | |
| describe_data() | |
| elif options == "Info": | |
| info_data() | |
| elif options == "Handle Categorical": | |
| handle_categorical_values() | |
| elif options == "Missing Values": | |
| missing_values() | |
| elif options == "Handle Duplicates": | |
| handle_duplicates() | |
| elif options == "Handle Outliers": | |
| handle_outliers() | |
| elif options == "Visualize Data": | |
| visualize_data() | |
| elif options == "Download": | |
| download_data() | |
| elif options == "RAG Chatbot": | |
| rag_chatbot() | |
| else: | |
| st.warning("Please upload a dataset first.") | |
| if __name__ == "__main__": | |
| main() | |