| | import torch |
| | import locale |
| | import os |
| | from typing import Dict, List, Any |
| | from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig |
| | from langchain.llms import HuggingFacePipeline |
| | from langchain.retrievers.document_compressors import LLMChainExtractor |
| | from langchain.retrievers import ContextualCompressionRetriever |
| | from langchain.vectorstores import Chroma |
| | from langchain import PromptTemplate, LLMChain |
| | from langchain.chains import RetrievalQA, ConversationalRetrievalChain |
| | from langchain.prompts import PromptTemplate |
| | from langchain.prompts.prompt import PromptTemplate |
| | from langchain.memory import ConversationBufferMemory |
| | from langchain.embeddings import HuggingFaceBgeEmbeddings |
| | from langchain.document_loaders import WebBaseLoader |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain.chains.qa_with_sources import load_qa_with_sources_chain |
| | from langchain.chains.combine_documents import create_stuff_documents_chain |
| | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder |
| | from langchain_core.messages import HumanMessage |
| | from langchain_core.output_parsers import StrOutputParser |
| | from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough |
| | from operator import itemgetter |
| | from langchain.schema import format_document |
| | from langchain.memory import ConversationBufferMemory |
| | from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string |
| | from langchain_core.runnables import RunnableParallel |
| | from typing import Optional |
| | from langchain.vectorstores import FAISS |
| | from langchain_community.embeddings import HuggingFaceEmbeddings |
| | from langchain_community.vectorstores.utils import DistanceStrategy |
| |
|
| | EMBEDDING_MODEL_NAME = "mixedbread-ai/mxbai-embed-large-v1" |
| | MARKDOWN_SEPARATORS = [ |
| | "\n#{1,6} ", |
| | "```\n", |
| | "\n\\*\\*\\*+\n", |
| | "\n---+\n", |
| | "\n___+\n", |
| | "\n\n", |
| | "\n", |
| | " ", |
| | "", |
| | ] |
| |
|
| | class EndpointHandler(): |
| |
|
| | def __init__(self, path=""): |
| | |
| | os.environ["LANGCHAIN_TRACING_V2"] = "true" |
| | os.environ["LANGCHAIN_API_KEY"] = "ls__9834e6b2ff094d43a28418c9ecea2fd5" |
| | |
| | |
| | urls = [ |
| | "https://scholars.cityu.edu.hk/en/persons/man-hon-michael-cheung(0f913a96-a28d-47ea-848c-f444804c16f2).html", |
| | "https://scholars.cityu.edu.hk/en/persons/man-hon-michael-cheung(0f913a96-a28d-47ea-848c-f444804c16f2)/publications.html", |
| | "https://www.cityu.edu.hk/media/press-release/2022/05/17/cityu-council-announces-appointment-professor-freddy-boey-next-president", |
| | "https://www.cityu.edu.hk/media/press-release/2023/05/18/professor-freddy-boey-installed-5th-president-cityu", |
| | ] |
| | |
| | loader = WebBaseLoader(urls) |
| | docs = loader.load() |
| | |
| | text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( |
| | AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME), |
| | chunk_size=512, |
| | chunk_overlap=int(512 / 10), |
| | add_start_index=True, |
| | strip_whitespace=True, |
| | separators=MARKDOWN_SEPARATORS, |
| | ) |
| |
|
| | docs_processed = [] |
| | for doc in docs: |
| | docs_processed += text_splitter.split_documents([doc]) |
| |
|
| | |
| | unique_texts = {} |
| | docs_processed_unique = [] |
| | for doc in docs_processed: |
| | if doc.page_content not in unique_texts: |
| | unique_texts[doc.page_content] = True |
| | docs_processed_unique.append(doc) |
| |
|
| | embedding_model = HuggingFaceEmbeddings( |
| | model_name=EMBEDDING_MODEL_NAME, |
| | multi_process=True, |
| | model_kwargs={"device": "cuda"}, |
| | encode_kwargs={"normalize_embeddings": True}, |
| | ) |
| | |
| | self.vectorstore = FAISS.from_documents( |
| | docs_processed_unique, embedding_model, distance_strategy=DistanceStrategy.COSINE |
| | ) |
| |
|
| | |
| | READER_MODEL_NAME = path |
| |
|
| | bnb_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_use_double_quant=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_compute_dtype=torch.bfloat16, |
| | ) |
| | model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config) |
| | tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME) |
| |
|
| | |
| | |
| | |
| | self.READER_LLM = pipeline( |
| | model=model, |
| | tokenizer=tokenizer, |
| | task="text-generation", |
| | do_sample=True, |
| | temperature=0.2, |
| | repetition_penalty=1.1, |
| | return_full_text=False, |
| | max_new_tokens=256, |
| | ) |
| | |
| | prompt_in_chat_format = [ |
| | { |
| | "role": "system", |
| | "content": """Using the information contained in the context. |
| | Respond only to the question asked, response should be concise and relevant to the question. |
| | If the answer cannot be deduced from the context, do not give an answer.""", |
| | }, |
| | { |
| | "role": "user", |
| | "content": """Context: {context} |
| | Now here is the question you need to answer. |
| | Question: {question}""", |
| | }, |
| | ] |
| | |
| | self.RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template( |
| | prompt_in_chat_format, tokenize=False, add_generation_prompt=True |
| | ) |
| |
|
| | def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| | |
| | inputs = data.pop("inputs",data) |
| | date = data.pop("date", None) |
| |
|
| | retrieved_docs = self.vectorstore.similarity_search(query=inputs, k=2) |
| | |
| | retrieved_docs_text = [ |
| | doc.page_content for doc in retrieved_docs |
| | ] |
| | context = "\nExtracted documents:\n" |
| | context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(retrieved_docs_text)]) |
| |
|
| | final_prompt = self.RAG_PROMPT_TEMPLATE.format( |
| | question=inputs, context=context |
| | ) |
| | |
| | |
| | answer = self.READER_LLM(final_prompt)[0]["generated_text"] |
| |
|
| | return answer |
| | |