Spaces:
Runtime error
Runtime error
| from langchain.document_loaders import ApifyDatasetLoader | |
| from langchain.utilities import ApifyWrapper | |
| from langchain.docstore.document import Document | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings.cohere import CohereEmbeddings | |
| from langchain.vectorstores.deeplake import DeepLake | |
| from langchain_cohere import CohereRerank | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.memory import ConversationBufferWindowMemory | |
| import os | |
| from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain | |
| from langchain_groq import ChatGroq | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def get_and_load_data(): | |
| apify_key = os.getenv("apify") | |
| apify = ApifyWrapper() | |
| loader = apify.call_actor( | |
| actor_id="apify/website-content-crawler", | |
| run_input={"startUrls": [{"url": "https://en.wikipedia.org/wiki/Artificial_intelligence"}]}, | |
| dataset_mapping_function=lambda dataset_item: Document( | |
| page_content=dataset_item["text"] if dataset_item["text"] else "No content available", | |
| metadata={ | |
| "source": dataset_item["url"], | |
| "title": dataset_item["metadata"]["title"] | |
| } | |
| ), | |
| ) | |
| docs = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, chunk_overlap=20, length_function=len | |
| ) | |
| docs_split = text_splitter.split_documents(docs) | |
| embeddings = CohereEmbeddings(model="embed-english-v2.0") | |
| username = "gneyapandya1234" | |
| db_id= "educational_chatbot" | |
| dbs = DeepLake(dataset_path=f"hub://{username}/{db_id}", embedding_function=embeddings) | |
| dbs.add_documents(docs_split) | |
| def deeplake(): | |
| embeddings= CohereEmbeddings(model = "embed-english-v2.0") | |
| dbs = DeepLake( | |
| dataset_path="hub://gneyapandya1234/educational_chatbot", | |
| read_only=True, | |
| embedding_function= embeddings | |
| ) | |
| retriever = dbs.as_retriever() | |
| retriever.search_kwargs["distance_metric"] = "cos" | |
| retriever.search_kwargs["fetch_k"] = 20 | |
| # retriever.search_kwargs["maximal_marginal_relevance"] = True | |
| retriever.search_kwargs["k"] = 20 | |
| compressor = CohereRerank( | |
| model = "rerank-english-v2.0", | |
| top_n=5 | |
| ) | |
| compressor_retriever = ContextualCompressionRetriever( | |
| base_compressor = compressor , base_retriever=retriever | |
| ) | |
| print("DOne") | |
| return dbs, compressor_retriever, retriever | |
| def memory(): | |
| mem = ConversationBufferWindowMemory( | |
| k=3, | |
| memory_key="chat_history", | |
| return_messages=True, | |
| output_key="answer" | |
| ) | |
| return mem | |
| def create_llm(): | |
| llm = ChatGroq(api_key= os.getenv("GROQ_API_KEY"),model="llama3-70b-8192") | |
| return llm | |
| def chain(llm,compression_retriever,memory): | |
| qa = ConversationalRetrievalChain.from_llm( | |
| llm = llm, | |
| memory= memory, | |
| retriever= compression_retriever, | |
| verbose= True, | |
| return_source_documents=True | |
| ) | |
| return qa | |
| def final_function(): | |
| llm = create_llm() | |
| mem =memory() | |
| dbs, compressor_retriever, retriever = deeplake() | |
| qa= chain(llm,compressor_retriever,mem) | |
| return qa, mem | |