Spaces:
No application file
No application file
| """ | |
| Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro | |
| LangChain_with_CSV.py: Neste script realizamos queries em base as perguntas que temos | |
| ====================== armazenadas no arquivo excel (as quais foram armazenados no DB de Vetores). | |
| Este estudo foi baseado no maravilhoso tutorial de Shabeel Kandi: | |
| Link --> https://shabeelkandi.medium.com/chat-with-an-excel-dataset-with-openai-and-langchain-5520ce2ac5d3 | |
| """ | |
| from langchain.document_loaders import CSVLoader | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import OpenAI | |
| import openai | |
| import os | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from dotenv import find_dotenv, load_dotenv | |
| _ = load_dotenv(find_dotenv()) # read local .env file | |
| openai.api_key = os.getenv('OPENAI_API_KEY') | |
| # Exemplo de Dataset: | |
| # link to download data : https://www.kaggle.com/datasets/ashishraut64/indian-startups-top-300?resource=download | |
| loader = CSVLoader(file_path='./Q&A_operadora_vivo.csv') | |
| # Crio um índice usando os documentos carregados: | |
| index_creator = VectorstoreIndexCreator(vectorstore_cls=Chroma, embedding=OpenAIEmbeddings()) | |
| docsearch = index_creator.from_loaders([loader]) | |
| llm = OpenAI(temperature=0.0, | |
| max_tokens=256, | |
| verbose=False | |
| ) | |
| # Crio uma Chain de Q&A usando o índice: Ler --> https://python.langchain.com/docs/use_cases/question_answering/ | |
| chain = RetrievalQA.from_chain_type(llm=llm, | |
| chain_type="stuff", | |
| retriever=docsearch.vectorstore.as_retriever(k=1), | |
| input_key="question" | |
| ) | |
| while True: | |
| query = input("question: ") | |
| response = chain({"question": query}) | |
| print("\033[033manswer: \033[m" + response['result']) | |
| if not query: | |
| break | |