Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import io | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import LabelEncoder | |
| import seaborn as sns | |
| import base64 | |
| import json | |
| from langchain.docstore.document import Document | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import RetrievalQA | |
| from Information import show_general_data_statistics, describe_data, info_data | |
| from Preprocessing1 import preview_data, data_cleaning, modify_column_names | |
| from Preprocessing2 import handle_categorical_values, missing_values, handle_duplicates, handle_outliers | |
| from RAG import create_doucment, ask_me, load_models_embedding, load_models_llm, create_database | |
| from langchain.vectorstores import FAISS | |
| # Helper Functions | |
| def create_documents(df): | |
| """Converts a DataFrame into a list of Document objects.""" | |
| documents = [ | |
| Document( | |
| metadata={"id": str(i)}, | |
| page_content=json.dumps(row.to_dict()) | |
| ) | |
| for i, row in df.iterrows() | |
| ] | |
| return documents | |
| def load_embedding_model(): | |
| """Loads the embedding model for vectorization.""" | |
| return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| def load_llm(api_key): | |
| """Loads the LLM model for answering queries.""" | |
| return HuggingFaceHub( | |
| repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| huggingfacehub_api_token=api_key, | |
| model_kwargs={"temperature": 0.5, "max_length": 100} | |
| ) | |
| def ask_question(question, retriever, llm): | |
| """Uses a QA chain to retrieve and answer a question.""" | |
| qa_chain = RetrievalQA.from_chain_type( | |
| retriever=retriever, | |
| chain_type="stuff", | |
| llm=llm, | |
| return_source_documents=False | |
| ) | |
| response = qa_chain.invoke({"query": question}) | |
| return response["result"] | |
| # Streamlit App | |
| def upload_data(): | |
| st.title("Upload Dataset") | |
| file = st.file_uploader("Upload your dataset", type=["csv", "xlsx"]) | |
| if file: | |
| try: | |
| if file.name.endswith(".csv"): | |
| data = pd.read_csv(file) | |
| elif file.name.endswith(".xlsx"): | |
| data = pd.read_excel(file) | |
| st.session_state["data"] = data | |
| st.success("Dataset uploaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| def preview_data(): | |
| if "data" in st.session_state: | |
| st.title("Preview Dataset") | |
| st.dataframe(st.session_state["data"]) | |
| else: | |
| st.warning("Please upload a dataset first.") | |
| api="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21" | |
| def rag_chatbot(): | |
| st.title("RAG Chatbot") | |
| # Check if data is uploaded | |
| if "data" in st.session_state and isinstance(st.session_state["data"], pd.DataFrame): | |
| df = st.session_state["data"] | |
| # Convert data to documents | |
| st.write("Processing the dataset...") | |
| documents = create_documents(df) | |
| # Load models | |
| st.write("Loading models...") | |
| embedding_model = load_embedding_model() | |
| llm_model = load_llm(api_key=api[:-2]) | |
| # Create retriever using Chroma | |
| # FAISS.from_documents(documents, embedding) | |
| retriever = FAISS.from_documents(documents, embedding=embedding_model).as_retriever() | |
| # Ask a question | |
| question = st.text_input("Ask a question about your dataset:") | |
| if question: | |
| response = ask_question(question, retriever, llm_model) | |
| st.write(f"Answer: {response}") | |
| else: | |
| st.warning("Please upload a dataset to proceed.") | |
| def main(): | |
| st.sidebar.title("Navigation") | |
| options = st.sidebar.radio( | |
| "Go to", | |
| ["Upload", "Preview", "RAG Chatbot"], | |
| key="navigation_key" | |
| ) | |
| if options == "Upload": | |
| upload_data() | |
| elif options == "Preview": | |
| preview_data() | |
| elif options == "RAG Chatbot": | |
| rag_chatbot() | |
| if __name__ == "__main__": | |
| main() | |
| # def upload_data(): | |
| # st.title("Upload Dataset") | |
| # file = st.file_uploader("Upload your dataset", type=[ | |
| # "csv", "xlsx"], key="file_uploader_1") | |
| # if file: | |
| # try: | |
| # if file.name.endswith(".csv"): | |
| # data = pd.read_csv(file) | |
| # elif file.name.endswith(".xlsx"): | |
| # data = pd.read_excel(file) | |
| # st.session_state["data"] = data | |
| # st.success("Dataset uploaded successfully!") | |
| # except Exception as e: | |
| # st.error(f"Error loading file: {e}") | |
| # return file | |
| # def download_data(): | |
| # """Downloads the DataFrame as a CSV file.""" | |
| # if "data" in st.session_state and not st.session_state["data"].empty: | |
| # csv = st.session_state["data"].to_csv(index=False).encode('utf-8') | |
| # st.download_button( | |
| # label="Download Cleaned Dataset", | |
| # data=csv, | |
| # file_name="cleaned_data.csv", | |
| # mime="text/csv" | |
| # ) | |
| # else: | |
| # st.warning( | |
| # "No data available to download. Please modify or upload a dataset first.") | |
| # def rag_chatbot(): | |
| # st.title("RAG Chatbot") | |
| # # Check if data is uploaded | |
| # if "data" in st.session_state and isinstance(st.session_state["data"], pd.DataFrame): | |
| # df = st.session_state["data"] | |
| # # Convert data to documents | |
| # st.write("Processing the dataset...") | |
| # documents = create_doucment(df) | |
| # st.write(f"Created {len(documents)} documents.") | |
| # # Load models | |
| # st.write("Loading models...") | |
| # embedding = load_models_embedding() | |
| # llm = load_models_llm() | |
| # # Create retriever | |
| # retriever = create_database(embedding, documents).as_retriever() | |
| # # Ask a question | |
| # question = st.text_input("Ask a question about your dataset:") | |
| # if question: | |
| # response = ask_me(question, retriever, llm) | |
| # st.write(f"Answer: {response}") | |
| # else: | |
| # st.warning("Please upload a dataset to proceed.") | |
| # def main(): | |
| # st.sidebar.title("Navigation") | |
| # options = st.sidebar.radio( | |
| # "Go to", | |
| # [ | |
| # "Upload", | |
| # "Preview", | |
| # "Data Cleaning", | |
| # "Modify Column Names", | |
| # "General Data Statistics", | |
| # "Describe", | |
| # "Info", | |
| # "Handle Categorical", | |
| # "Missing Values", | |
| # "Handle Duplicates", | |
| # "Handle Outliers", | |
| # "Download", | |
| # "RAG Chatbot" | |
| # ], | |
| # key="unique_navigation_key", | |
| # ) | |
| # if options == "Upload": | |
| # upload_data() | |
| # elif options == "Preview": | |
| # preview_data() | |
| # elif options == "Data Cleaning": | |
| # data_cleaning() | |
| # elif options == "Modify Column Names": | |
| # modify_column_names() | |
| # elif options == "General Data Statistics": | |
| # show_general_data_statistics() | |
| # elif options == "Describe": | |
| # describe_data() | |
| # elif options == "Info": | |
| # info_data() | |
| # elif options == "Handle Categorical": | |
| # handle_categorical_values() | |
| # elif options == "Missing Values": | |
| # missing_values() | |
| # elif options == "Handle Duplicates": | |
| # handle_duplicates() | |
| # elif options == "Handle Outliers": | |
| # handle_outliers() | |
| # elif options == "Download": | |
| # download_data() | |
| # elif options == "RAG Chatbot": | |
| # rag_chatbot() | |
| # else: | |
| # st.warning("Please upload a dataset first.") | |
| # if __name__ == "__main__": | |
| # main() | |