Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from typing import List, Tuple | |
| from langchain import PromptTemplate, LLMChain | |
| from langchain.document_loaders import TextLoader | |
| from langchain.embeddings import LlamaCppEmbeddings | |
| from langchain.llms import GPT4All | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores.faiss import FAISS | |
| from pydantic import BaseModel, Field | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import LlamaCpp | |
| import os.path | |
| import langchain | |
| from langchain.cache import InMemoryCache | |
| langchain.llm_cache = InMemoryCache() | |
| # Constants | |
| local_path = "./models/gpt4all-converted.bin" #GPT4 module 1 | |
| #local_path = "./models/ggml-gpt4all-l13b-snoozy.bin" | |
| model_path = "./models/ggml-model-q4_0.bin" #1st Embeddings | |
| #model_path = './models/ggjt-model.bin' #2st Embedding | |
| text_path = "./docs/acapglobal.txt" | |
| index_path = "./acapglobal_index" | |
| # Functions | |
| def initialize_embeddings() -> LlamaCppEmbeddings: | |
| return LlamaCppEmbeddings(model_path=model_path) | |
| def load_documents() -> List: | |
| loader = TextLoader(text_path,encoding="utf-8") | |
| return loader.load() | |
| def split_chunks(sources: List) -> List: | |
| chunks = [] | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32) | |
| for chunk in splitter.split_documents(sources): | |
| chunks.append(chunk) | |
| return chunks | |
| def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS: | |
| texts = [doc.page_content for doc in chunks] | |
| metadatas = [doc.metadata for doc in chunks] | |
| return FAISS.from_texts(texts, embeddings, metadatas=metadatas) | |
| # Main execution | |
| llm = GPT4All(model=local_path, n_ctx=512, verbose=True,cache=True,embedding=True) | |
| print('llm GPT4All set.') | |
| embeddings = initialize_embeddings() | |
| # when refresh document | |
| # | |
| # print('initialize_embeddings.') | |
| # sources = load_documents() | |
| # print('load_documents.') | |
| # chunks = split_chunks(sources) | |
| # print('split_chunks') | |
| # vectorstore = generate_index(chunks, embeddings) | |
| # print('generate_index') | |
| # vectorstore.save_local("acapglobal_index") | |
| # print('vectorstore: save_local') | |
| # | |
| # End When refresh document | |
| chat_history = [] | |
| #index = FAISS.load_local(index_path, embeddings) | |
| index = FAISS.load_local(index_path, embeddings) | |
| qa = ConversationalRetrievalChain.from_llm(llm, index.as_retriever(), max_tokens_limit=400) | |
| def search_query_data(s_query): | |
| print("search_query:") | |
| print( s_query) | |
| retrieval=qa({"question": s_query, "chat_history": chat_history}) | |
| return retrieval | |