import gradio as gr import os import openai import requests import csv import faiss import tiktoken from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI from langchain.prompts import PromptTemplate from langchain.document_loaders import PyPDFLoader from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import HuggingFaceEmbeddings from langchain.embeddings import HuggingFaceHubEmbeddings from langchain.agents.agent_toolkits import create_retriever_tool from langchain.agents.agent_toolkits import create_conversational_retrieval_agent from langchain.memory import ChatMessageHistory from langchain.schema.messages import SystemMessage from sentence_transformers import SentenceTransformer, util LLM_MODEL = 'gpt-4-1106-preview' OPEN_AI_KEY = os.environ.get('OPEN_AI_KEY') class AASLDConversationalAgent(): def __init__(self) -> None: # loader = PyPDFLoader( # 'docs/aasld_practice_guidance_on_prevention_diagnosis441.pdf') loader = PyPDFLoader( 'aasld_practice_guidance_on_prevention_diagnosis441.pdf') pages = loader.load_and_split() embeddings_model = OpenAIEmbeddings(openai_api_key=OPEN_AI_KEY) # embeddings_model = HuggingFaceEmbeddings( # model_name='sentence-transformers/all-mpnet-base-v2', # model_kwargs={'device': 'cpu'}, # encode_kwargs={'normalize_embeddings': False} # ) self.doc_search = FAISS.from_documents(pages, embeddings_model) self.doc_retriever = self.doc_search.as_retriever( search_type="mmr", search_kwargs={"k": 10}) self.llm = ChatOpenAI(temperature = 0, model_name=LLM_MODEL, openai_api_key=OPEN_AI_KEY) tool = create_retriever_tool( self.doc_retriever, "SearchAASLDDocs", """Searches and returns documents regarding the AASLD Practice Guidance on Prevention, Diagnosis, and Treatment of Hepatocellular Carcinoma.""", ) self.tools = [tool] system_message = SystemMessage ( content=""" You are a chat assistant who searches the document for the given question and returns the summary of the results as a HTML body with formatting. The results text should be left aligned. If the document does not have the answers please let the users know it is unavailable and do not try to answer. """ ) self.rag_chat_agent = create_conversational_retrieval_agent( self.llm, self.tools, system_message=system_message, verbose=False, remember_intermediate_steps=False, max_token_limit=8000) def _get_html_content(self, res:str): print(res) return res.split('```html')[1].replace('```', '') def get_answer(self, question: str): res = self.rag_chat_agent(question) if '```html' in res['output']: html_res = self._get_html_content(res['output']) else: html_res = res['output'] return html_res AASLD_CONVERSATIONAL_AGENT = AASLDConversationalAgent() PAGE_TITLE_HTML = """