import gradio as gr import os import openai import requests import csv import faiss import tiktoken from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI from langchain.prompts import PromptTemplate from langchain.document_loaders import PyPDFLoader from langchain.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import HuggingFaceEmbeddings from langchain.embeddings import HuggingFaceHubEmbeddings from langchain.agents.agent_toolkits import create_retriever_tool from langchain.agents.agent_toolkits import create_conversational_retrieval_agent from langchain.memory import ChatMessageHistory from langchain.schema.messages import SystemMessage from sentence_transformers import SentenceTransformer, util LLM_MODEL = 'gpt-4-1106-preview' OPEN_AI_KEY = os.environ.get('OPEN_AI_KEY') class AASLDConversationalAgent(): def __init__(self) -> None: # loader = PyPDFLoader( # 'docs/aasld_practice_guidance_on_prevention_diagnosis441.pdf') loader = PyPDFLoader( 'aasld_practice_guidance_on_prevention_diagnosis441.pdf') pages = loader.load_and_split() embeddings_model = OpenAIEmbeddings(openai_api_key=OPEN_AI_KEY) # embeddings_model = HuggingFaceEmbeddings( # model_name='sentence-transformers/all-mpnet-base-v2', # model_kwargs={'device': 'cpu'}, # encode_kwargs={'normalize_embeddings': False} # ) self.doc_search = FAISS.from_documents(pages, embeddings_model) self.doc_retriever = self.doc_search.as_retriever( search_type="mmr", search_kwargs={"k": 10}) self.llm = ChatOpenAI(temperature = 0, model_name=LLM_MODEL, openai_api_key=OPEN_AI_KEY) tool = create_retriever_tool( self.doc_retriever, "SearchAASLDDocs", """Searches and returns documents regarding the AASLD Practice Guidance on Prevention, Diagnosis, and Treatment of Hepatocellular Carcinoma.""", ) self.tools = [tool] system_message = SystemMessage ( content=""" You are a chat assistant who searches the document for the given question and returns the summary of the results as a HTML body with formatting. The results text should be left aligned. If the document does not have the answers please let the users know it is unavailable and do not try to answer. """ ) self.rag_chat_agent = create_conversational_retrieval_agent( self.llm, self.tools, system_message=system_message, verbose=False, remember_intermediate_steps=False, max_token_limit=8000) def _get_html_content(self, res:str): print(res) return res.split('```html')[1].replace('```', '') def get_answer(self, question: str): res = self.rag_chat_agent(question) if '```html' in res['output']: html_res = self._get_html_content(res['output']) else: html_res = res['output'] return html_res AASLD_CONVERSATIONAL_AGENT = AASLDConversationalAgent() PAGE_TITLE_HTML = """

Arithmedics - AASLD Guidelines Chat Assistant

I am AASLD-GPT, trained on the AASLD guidelines for Hepatocellular Carcinoma (HCC). My role is to assist you in navigating and extracting information from these guidelines. As an early prototype, your patience and feedback are essential for my development and improvement.

""" def get_empty_state(): return [] def clear_conversation(): return (gr.update(value=None, visible=True), None, '', get_empty_state(), '', '') def submit_question(question): print(question) try: answer = AASLD_CONVERSATIONAL_AGENT.get_answer(question) print(answer) except Exception as e: print(e) answer = e['error']['message'] question = '{}