Spaces:
Runtime error
Runtime error
| pip install -qU cassio datasets langchain openai tiktoken | |
| # LangChain components to use | |
| from langchain.vectorstores.cassandra import Cassandra | |
| from langchain.indexes.vectorstore import VectorStoreIndexWrapper | |
| from langchain.llms import OpenAI | |
| from langchain.embeddings import OpenAIEmbeddings | |
| # Support for dataset retrieval with Hugging Face | |
| from datasets import load_dataset | |
| # With CassIO, the engine powering the Astra DB integration in LangChain, | |
| # you will also initialize the DB connection: | |
| import cassio | |
| pip install PyPDF2 | |
| from PyPDF2 import PdfReader | |
| ASTRA_DB_APPLICATION_TOKEN = "AstraCS:OsOjMKLLxkWFoUpmNbWeJwIP:d8b4df7fd17c288edd265f9d167fa821e97e9d97098842c2e3ed4140d756d02d" | |
| ASTRA_DB_ID = "f97bbcce-b48b-4b42-8ad0-fdc38b2e165e" # enter your Database ID | |
| OPENAI_API_KEY = "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" # enter your OpenAI key | |
| # provide the path of pdf file/files. | |
| pdfreader = PdfReader('Ethics.pdf') | |
| from typing_extensions import Concatenate | |
| # read text from pdf | |
| raw_text = '' | |
| for i, page in enumerate(pdfreader.pages): | |
| content = page.extract_text() | |
| if content: | |
| raw_text += content | |
| cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID) | |
| llm = OpenAI(openai_api_key=OPENAI_API_KEY) | |
| embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
| astra_vector_store = Cassandra( | |
| embedding=embedding, | |
| table_name="qa_mini_demo", | |
| session=None, | |
| keyspace=None, | |
| ) | |
| from langchain.text_splitter import CharacterTextSplitter | |
| # We need to split the text using Character Text Split such that it sshould not increse token size | |
| text_splitter = CharacterTextSplitter( | |
| separator = "\n", | |
| chunk_size = 800, | |
| chunk_overlap = 200, | |
| length_function = len, | |
| ) | |
| texts = text_splitter.split_text(raw_text) | |
| astra_vector_store.add_texts(texts[:]) | |
| print("Inserted %i headlines." % len(texts[:])) | |
| astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store) | |
| first_question = True | |
| while True: | |
| if first_question: | |
| query_text = input("\nEnter your question (or type 'quit' to exit): ").strip() | |
| else: | |
| query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip() | |
| if query_text.lower() == "quit": | |
| break | |
| if query_text == "": | |
| continue | |
| first_question = False | |
| print("\nQUESTION: \"%s\"" % query_text) | |
| answer = astra_vector_index.query(query_text, llm=llm).strip() | |
| print("ANSWER: \"%s\"\n" % answer) |