LangChain_HF / Chat_with_Excel_Dataset /LangChain_with_CSV.py
EddyGiusepe's picture
Chat com seu doc
5be7935
"""
Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro
LangChain_with_CSV.py: Neste script realizamos queries em base as perguntas que temos
====================== armazenadas no arquivo excel (as quais foram armazenados no DB de Vetores).
Este estudo foi baseado no maravilhoso tutorial de Shabeel Kandi:
Link --> https://shabeelkandi.medium.com/chat-with-an-excel-dataset-with-openai-and-langchain-5520ce2ac5d3
"""
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import openai
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.getenv('OPENAI_API_KEY')
# Exemplo de Dataset:
# link to download data : https://www.kaggle.com/datasets/ashishraut64/indian-startups-top-300?resource=download
loader = CSVLoader(file_path='./Q&A_operadora_vivo.csv')
# Crio um índice usando os documentos carregados:
index_creator = VectorstoreIndexCreator(vectorstore_cls=Chroma, embedding=OpenAIEmbeddings())
docsearch = index_creator.from_loaders([loader])
llm = OpenAI(temperature=0.0,
max_tokens=256,
verbose=False
)
# Crio uma Chain de Q&A usando o índice: Ler --> https://python.langchain.com/docs/use_cases/question_answering/
chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=docsearch.vectorstore.as_retriever(k=1),
input_key="question"
)
while True:
query = input("question: ")
response = chain({"question": query})
print("\033[033manswer: \033[m" + response['result'])
if not query:
break