1MR commited on
Commit
13e10ec
·
verified ·
1 Parent(s): 1f1f213

Create Main.py

Browse files
Files changed (1) hide show
  1. Main.py +145 -0
Main.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import io
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.preprocessing import LabelEncoder
7
+ import seaborn as sns
8
+ import base64
9
+ import json
10
+ from langchain.docstore.document import Document
11
+ from langchain.vectorstores import Chroma
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+ from langchain.llms import HuggingFaceHub
14
+ from langchain.chains import RetrievalQA
15
+ from Information import show_general_data_statistics, describe_data, info_data
16
+ from Preprocessing1 import preview_data, data_cleaning, modify_column_names
17
+ from Preprocessing2 import handle_categorical_values, missing_values, handle_duplicates, handle_outliers
18
+ from RAG import create_doucment, ask_me, load_models_embedding, load_models_llm, create_database
19
+
20
+
21
+ def upload_data():
22
+ st.title("Upload Dataset")
23
+ file = st.file_uploader("Upload your dataset", type=[
24
+ "csv", "xlsx"], key="file_uploader_1")
25
+
26
+ if file:
27
+ try:
28
+ if file.name.endswith(".csv"):
29
+ data = pd.read_csv(file)
30
+ elif file.name.endswith(".xlsx"):
31
+ data = pd.read_excel(file)
32
+
33
+ st.session_state["data"] = data
34
+ st.success("Dataset uploaded successfully!")
35
+ except Exception as e:
36
+ st.error(f"Error loading file: {e}")
37
+ return file
38
+
39
+
40
+ def download_data():
41
+ """Downloads the DataFrame as a CSV file."""
42
+ if "data" in st.session_state and not st.session_state["data"].empty:
43
+ csv = st.session_state["data"].to_csv(index=False).encode('utf-8')
44
+
45
+ st.download_button(
46
+ label="Download Cleaned Dataset",
47
+ data=csv,
48
+ file_name="cleaned_data.csv",
49
+ mime="text/csv"
50
+ )
51
+
52
+ else:
53
+ st.warning(
54
+ "No data available to download. Please modify or upload a dataset first.")
55
+
56
+ # Upload, download, main ==> project
57
+ # Preview , Data cleaning Modify ==> Preprocessing1
58
+ # Data , Describe, info ==> Information
59
+ # Handle, Missing ==> Preprocessing2
60
+
61
+
62
+ def rag_chatbot():
63
+ st.title("RAG Chatbot")
64
+
65
+ # Check if data is uploaded
66
+ if "data" in st.session_state and isinstance(st.session_state["data"], pd.DataFrame):
67
+ df = st.session_state["data"]
68
+
69
+ # Convert data to documents
70
+ st.write("Processing the dataset...")
71
+ documents = create_doucment(df)
72
+ st.write(f"Created {len(documents)} documents.")
73
+
74
+ # Load models
75
+ st.write("Loading models...")
76
+ embedding = load_models_embedding()
77
+ llm = load_models_llm()
78
+
79
+ # Create retriever
80
+ retriever = create_database(embedding, documents).as_retriever()
81
+
82
+ # Ask a question
83
+ question = st.text_input("Ask a question about your dataset:")
84
+ if question:
85
+ response = ask_me(question, retriever, llm)
86
+ st.write(f"Answer: {response}")
87
+ else:
88
+ st.warning("Please upload a dataset to proceed.")
89
+
90
+
91
+ def main():
92
+ st.sidebar.title("Navigation")
93
+ options = st.sidebar.radio(
94
+ "Go to",
95
+ [
96
+ "Upload",
97
+ "Preview",
98
+ "Data Cleaning",
99
+ "Modify Column Names",
100
+ "General Data Statistics",
101
+ "Describe",
102
+ "Info",
103
+ "Handle Categorical",
104
+ "Missing Values",
105
+ "Handle Duplicates",
106
+ "Handle Outliers",
107
+ "Download",
108
+ "RAG Chatbot"
109
+ ],
110
+ key="unique_navigation_key",
111
+ )
112
+
113
+ if options == "Upload":
114
+ upload_data()
115
+ elif options == "Preview":
116
+ preview_data()
117
+ elif options == "Data Cleaning":
118
+ data_cleaning()
119
+ elif options == "Modify Column Names":
120
+ modify_column_names()
121
+ elif options == "General Data Statistics":
122
+ show_general_data_statistics()
123
+ elif options == "Describe":
124
+ describe_data()
125
+ elif options == "Info":
126
+ info_data()
127
+ elif options == "Handle Categorical":
128
+ handle_categorical_values()
129
+ elif options == "Missing Values":
130
+ missing_values()
131
+ elif options == "Handle Duplicates":
132
+ handle_duplicates()
133
+ elif options == "Handle Outliers":
134
+ handle_outliers()
135
+ elif options == "Download":
136
+ download_data()
137
+ elif options == "RAG Chatbot":
138
+ rag_chatbot()
139
+
140
+ else:
141
+ st.warning("Please upload a dataset first.")
142
+
143
+
144
+ if __name__ == "__main__":
145
+ main()