Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.document_loaders.csv_loader import CSVLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import CTransformers | |
| from langchain.chains import ConversationalRetrievalChain | |
| import pandas as pd | |
| from io import StringIO | |
| # Set up the Streamlit page configuration | |
| st.set_page_config(page_title="Conversational AI with CSV Data", page_icon="🤖", layout="wide") | |
| # Sidebar - User can input the CSV file path and query | |
| st.sidebar.title("Upload CSV and Ask Questions") | |
| uploaded_file = st.sidebar.file_uploader("Upload your CSV file", type="csv") | |
| if uploaded_file is not None: | |
| # Read the CSV file into a Pandas DataFrame | |
| string_data = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
| df = pd.read_csv(string_data) | |
| # Display the first few rows of the DataFrame | |
| st.write("Data successfully loaded!") | |
| st.write(df.head()) | |
| # Convert the DataFrame to the format expected by CSVLoader | |
| df.to_csv("temp.csv", index=False) | |
| # Load and process the CSV file using the CSVLoader | |
| loader = CSVLoader(file_path="temp.csv", encoding="utf-8", csv_args={'delimiter': ','}) | |
| data = loader.load() | |
| # Split the text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20) | |
| text_chunks = text_splitter.split_documents(data) | |
| # Display the number of chunks | |
| st.write(f"Number of text chunks created: {len(text_chunks)}") | |
| # Download Sentence Transformers Embedding from Hugging Face | |
| embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') | |
| # Convert the text chunks into embeddings and save the embeddings into FAISS knowledge base | |
| docsearch = FAISS.from_documents(text_chunks, embeddings) | |
| docsearch.save_local("vectorstore/db_faiss") | |
| # Load the LLM model | |
| llm = CTransformers( | |
| model="models/llama-2-7b-chat.ggmlv3.q4_0.bin", | |
| model_type="llama", | |
| max_new_tokens=512, | |
| temperature=0.1 | |
| ) | |
| # Create the conversational retrieval chain | |
| qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch.as_retriever()) | |
| # Chat history | |
| chat_history = [] | |
| # Input prompt from the user | |
| user_input = st.text_input("Ask a question about the data:") | |
| if st.button("Ask"): | |
| if user_input: | |
| result = qa({"question": user_input, "chat_history": chat_history}) | |
| st.write(f"Response: {result['answer']}") | |
| chat_history.append((user_input, result['answer'])) | |
| else: | |
| st.write("Please enter a question.") | |
| else: | |
| st.write("Please upload a CSV file to get started.") | |