1MR commited on
Commit
d517ef0
·
verified ·
1 Parent(s): 48fce40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -67
app.py CHANGED
@@ -18,10 +18,46 @@ from Preprocessing2 import handle_categorical_values, missing_values, handle_dup
18
  from RAG import create_doucment, ask_me, load_models_embedding, load_models_llm, create_database
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def upload_data():
22
  st.title("Upload Dataset")
23
- file = st.file_uploader("Upload your dataset", type=[
24
- "csv", "xlsx"], key="file_uploader_1")
25
 
26
  if file:
27
  try:
@@ -34,26 +70,14 @@ def upload_data():
34
  st.success("Dataset uploaded successfully!")
35
  except Exception as e:
36
  st.error(f"Error loading file: {e}")
37
- return file
38
-
39
-
40
- def download_data():
41
- """Downloads the DataFrame as a CSV file."""
42
- if "data" in st.session_state and not st.session_state["data"].empty:
43
- csv = st.session_state["data"].to_csv(index=False).encode('utf-8')
44
-
45
- st.download_button(
46
- label="Download Cleaned Dataset",
47
- data=csv,
48
- file_name="cleaned_data.csv",
49
- mime="text/csv"
50
- )
51
 
 
 
 
 
52
  else:
53
- st.warning(
54
- "No data available to download. Please modify or upload a dataset first.")
55
-
56
-
57
  def rag_chatbot():
58
  st.title("RAG Chatbot")
59
 
@@ -63,78 +87,159 @@ def rag_chatbot():
63
 
64
  # Convert data to documents
65
  st.write("Processing the dataset...")
66
- documents = create_doucment(df)
67
- st.write(f"Created {len(documents)} documents.")
68
 
69
  # Load models
70
  st.write("Loading models...")
71
- embedding = load_models_embedding()
72
- llm = load_models_llm()
73
 
74
  # Create retriever
75
- retriever = create_database(embedding, documents).as_retriever()
76
 
77
  # Ask a question
78
  question = st.text_input("Ask a question about your dataset:")
79
  if question:
80
- response = ask_me(question, retriever, llm)
81
  st.write(f"Answer: {response}")
82
  else:
83
  st.warning("Please upload a dataset to proceed.")
84
 
85
-
86
  def main():
87
  st.sidebar.title("Navigation")
88
  options = st.sidebar.radio(
89
  "Go to",
90
- [
91
- "Upload",
92
- "Preview",
93
- "Data Cleaning",
94
- "Modify Column Names",
95
- "General Data Statistics",
96
- "Describe",
97
- "Info",
98
- "Handle Categorical",
99
- "Missing Values",
100
- "Handle Duplicates",
101
- "Handle Outliers",
102
- "Download",
103
- "RAG Chatbot"
104
- ],
105
- key="unique_navigation_key",
106
  )
107
 
108
  if options == "Upload":
109
  upload_data()
110
  elif options == "Preview":
111
  preview_data()
112
- elif options == "Data Cleaning":
113
- data_cleaning()
114
- elif options == "Modify Column Names":
115
- modify_column_names()
116
- elif options == "General Data Statistics":
117
- show_general_data_statistics()
118
- elif options == "Describe":
119
- describe_data()
120
- elif options == "Info":
121
- info_data()
122
- elif options == "Handle Categorical":
123
- handle_categorical_values()
124
- elif options == "Missing Values":
125
- missing_values()
126
- elif options == "Handle Duplicates":
127
- handle_duplicates()
128
- elif options == "Handle Outliers":
129
- handle_outliers()
130
- elif options == "Download":
131
- download_data()
132
  elif options == "RAG Chatbot":
133
  rag_chatbot()
134
 
135
- else:
136
- st.warning("Please upload a dataset first.")
137
-
138
-
139
  if __name__ == "__main__":
140
  main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  from RAG import create_doucment, ask_me, load_models_embedding, load_models_llm, create_database
19
 
20
 
21
+
22
+ # Helper Functions
23
+ def create_documents(df):
24
+ """Converts a DataFrame into a list of Document objects."""
25
+ documents = [
26
+ Document(
27
+ metadata={"id": str(i)},
28
+ page_content=json.dumps(row.to_dict())
29
+ )
30
+ for i, row in df.iterrows()
31
+ ]
32
+ return documents
33
+
34
+ def load_embedding_model():
35
+ """Loads the embedding model for vectorization."""
36
+ return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
+
38
+ def load_llm(api_key):
39
+ """Loads the LLM model for answering queries."""
40
+ return HuggingFaceHub(
41
+ repo_id="Qwen/Qwen2.5-72B-Instruct",
42
+ huggingfacehub_api_token=api_key,
43
+ model_kwargs={"temperature": 0.5, "max_length": 100}
44
+ )
45
+
46
+ def ask_question(question, retriever, llm):
47
+ """Uses a QA chain to retrieve and answer a question."""
48
+ qa_chain = RetrievalQA.from_chain_type(
49
+ retriever=retriever,
50
+ chain_type="stuff",
51
+ llm=llm,
52
+ return_source_documents=False
53
+ )
54
+ response = qa_chain.invoke({"query": question})
55
+ return response["result"]
56
+
57
+ # Streamlit App
58
  def upload_data():
59
  st.title("Upload Dataset")
60
+ file = st.file_uploader("Upload your dataset", type=["csv", "xlsx"])
 
61
 
62
  if file:
63
  try:
 
70
  st.success("Dataset uploaded successfully!")
71
  except Exception as e:
72
  st.error(f"Error loading file: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ def preview_data():
75
+ if "data" in st.session_state:
76
+ st.title("Preview Dataset")
77
+ st.dataframe(st.session_state["data"])
78
  else:
79
+ st.warning("Please upload a dataset first.")
80
+ api="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21"
 
 
81
  def rag_chatbot():
82
  st.title("RAG Chatbot")
83
 
 
87
 
88
  # Convert data to documents
89
  st.write("Processing the dataset...")
90
+ documents = create_documents(df)
 
91
 
92
  # Load models
93
  st.write("Loading models...")
94
+ embedding_model = load_embedding_model()
95
+ llm_model = load_llm(api_key=api[:-2])
96
 
97
  # Create retriever
98
+ retriever = FAISS.from_documents(documents, embedding=embedding_model).as_retriever()
99
 
100
  # Ask a question
101
  question = st.text_input("Ask a question about your dataset:")
102
  if question:
103
+ response = ask_question(question, retriever, llm_model)
104
  st.write(f"Answer: {response}")
105
  else:
106
  st.warning("Please upload a dataset to proceed.")
107
 
 
108
  def main():
109
  st.sidebar.title("Navigation")
110
  options = st.sidebar.radio(
111
  "Go to",
112
+ ["Upload", "Preview", "RAG Chatbot"],
113
+ key="navigation_key"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  )
115
 
116
  if options == "Upload":
117
  upload_data()
118
  elif options == "Preview":
119
  preview_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  elif options == "RAG Chatbot":
121
  rag_chatbot()
122
 
 
 
 
 
123
  if __name__ == "__main__":
124
  main()
125
+
126
+ # def upload_data():
127
+ # st.title("Upload Dataset")
128
+ # file = st.file_uploader("Upload your dataset", type=[
129
+ # "csv", "xlsx"], key="file_uploader_1")
130
+
131
+ # if file:
132
+ # try:
133
+ # if file.name.endswith(".csv"):
134
+ # data = pd.read_csv(file)
135
+ # elif file.name.endswith(".xlsx"):
136
+ # data = pd.read_excel(file)
137
+
138
+ # st.session_state["data"] = data
139
+ # st.success("Dataset uploaded successfully!")
140
+ # except Exception as e:
141
+ # st.error(f"Error loading file: {e}")
142
+ # return file
143
+
144
+
145
+ # def download_data():
146
+ # """Downloads the DataFrame as a CSV file."""
147
+ # if "data" in st.session_state and not st.session_state["data"].empty:
148
+ # csv = st.session_state["data"].to_csv(index=False).encode('utf-8')
149
+
150
+ # st.download_button(
151
+ # label="Download Cleaned Dataset",
152
+ # data=csv,
153
+ # file_name="cleaned_data.csv",
154
+ # mime="text/csv"
155
+ # )
156
+
157
+ # else:
158
+ # st.warning(
159
+ # "No data available to download. Please modify or upload a dataset first.")
160
+
161
+
162
+ # def rag_chatbot():
163
+ # st.title("RAG Chatbot")
164
+
165
+ # # Check if data is uploaded
166
+ # if "data" in st.session_state and isinstance(st.session_state["data"], pd.DataFrame):
167
+ # df = st.session_state["data"]
168
+
169
+ # # Convert data to documents
170
+ # st.write("Processing the dataset...")
171
+ # documents = create_doucment(df)
172
+ # st.write(f"Created {len(documents)} documents.")
173
+
174
+ # # Load models
175
+ # st.write("Loading models...")
176
+ # embedding = load_models_embedding()
177
+ # llm = load_models_llm()
178
+
179
+ # # Create retriever
180
+ # retriever = create_database(embedding, documents).as_retriever()
181
+
182
+ # # Ask a question
183
+ # question = st.text_input("Ask a question about your dataset:")
184
+ # if question:
185
+ # response = ask_me(question, retriever, llm)
186
+ # st.write(f"Answer: {response}")
187
+ # else:
188
+ # st.warning("Please upload a dataset to proceed.")
189
+
190
+
191
+ # def main():
192
+ # st.sidebar.title("Navigation")
193
+ # options = st.sidebar.radio(
194
+ # "Go to",
195
+ # [
196
+ # "Upload",
197
+ # "Preview",
198
+ # "Data Cleaning",
199
+ # "Modify Column Names",
200
+ # "General Data Statistics",
201
+ # "Describe",
202
+ # "Info",
203
+ # "Handle Categorical",
204
+ # "Missing Values",
205
+ # "Handle Duplicates",
206
+ # "Handle Outliers",
207
+ # "Download",
208
+ # "RAG Chatbot"
209
+ # ],
210
+ # key="unique_navigation_key",
211
+ # )
212
+
213
+ # if options == "Upload":
214
+ # upload_data()
215
+ # elif options == "Preview":
216
+ # preview_data()
217
+ # elif options == "Data Cleaning":
218
+ # data_cleaning()
219
+ # elif options == "Modify Column Names":
220
+ # modify_column_names()
221
+ # elif options == "General Data Statistics":
222
+ # show_general_data_statistics()
223
+ # elif options == "Describe":
224
+ # describe_data()
225
+ # elif options == "Info":
226
+ # info_data()
227
+ # elif options == "Handle Categorical":
228
+ # handle_categorical_values()
229
+ # elif options == "Missing Values":
230
+ # missing_values()
231
+ # elif options == "Handle Duplicates":
232
+ # handle_duplicates()
233
+ # elif options == "Handle Outliers":
234
+ # handle_outliers()
235
+ # elif options == "Download":
236
+ # download_data()
237
+ # elif options == "RAG Chatbot":
238
+ # rag_chatbot()
239
+
240
+ # else:
241
+ # st.warning("Please upload a dataset first.")
242
+
243
+
244
+ # if __name__ == "__main__":
245
+ # main()