Spaces:

nklomp
/

rag

Runtime error

App Files Files Community

nklomp commited on Apr 9, 2024

Commit

5bb9c79

verified ·

1 Parent(s): b32c4b4

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -11

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ from langchain_community.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
-from langchain_community.llms import HuggingFaceHub
 #Llama2
 import torch
@@ -33,8 +34,8 @@ def get_pdf_text(pdf_docs):
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
-        chunk_size=1000, # the character length of the chunck
-        chunk_overlap=200, # the character length of the overlap between chuncks
         length_function=len # the length function - in this case, character length (aka the python len() fn.)
     )
     chunks = text_splitter.split_text(text)
@@ -76,14 +77,14 @@ def load_vectorstore(text_chunks,selected_embedding):
     return vectorstore
 def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
-    print('Seleted LLM: ' + selected_llm)
     print('Selected Temperature: ' + str(selected_temperature))
     if selected_llm == 'GPT 3.5':
         #openai_model = "gpt-4-turbo-preview"
         openai_model = "gpt-3.5-turbo"
         llm = ChatOpenAI(model=openai_model,temperature=selected_temperature)
-    elif selected_llm == 'Llama2':
         model_id = 'meta-llama/Llama-2-7b-chat-hf'
         hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
@@ -144,18 +145,25 @@ def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
         llm = HuggingFacePipeline(pipeline=pipeline)
     # Generic LLM
     memory = ConversationBufferMemory(
-    memory_key='chat_history', return_messages=True)
     conversation_chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
-        return_source_documents=False
     )
     #print(conversation_chain)
@@ -164,10 +172,17 @@ def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
 def handle_userinput(user_question):
-    print('Question: ' + user_question)
     response = st.session_state.conversation({'question': user_question})
-    st.session_state.chat_history = response['chat_history']
     for i, message in enumerate(st.session_state.chat_history):
         if i % 2 == 0:
@@ -197,7 +212,7 @@ def main():
             "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)
         selected_embedding = st.radio("Which Embedding?",["Cohere-multilingual-v3.0","OpenAI", "Instructor-xl"])
-        selected_llm = st.radio("Which LLM?",["GPT 3.5", "Llama2"])
         selected_temperature = st.slider('Temperature?', 0.0, 1.0, 0.1)
         if st.button("Process"):

 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
+from langchain_community.llms import HuggingFaceHub,HuggingFaceTextGenInference
 #Llama2
 import torch
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
+        chunk_size=500, # the character length of the chunck
+        chunk_overlap=100, # the character length of the overlap between chuncks
         length_function=len # the length function - in this case, character length (aka the python len() fn.)
     )
     chunks = text_splitter.split_text(text)
     return vectorstore
 def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
+    print('Selected LLM: ' + selected_llm)
     print('Selected Temperature: ' + str(selected_temperature))
     if selected_llm == 'GPT 3.5':
         #openai_model = "gpt-4-turbo-preview"
         openai_model = "gpt-3.5-turbo"
         llm = ChatOpenAI(model=openai_model,temperature=selected_temperature)
+    elif selected_llm == 'Llama2 local':
         model_id = 'meta-llama/Llama-2-7b-chat-hf'
         hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
         llm = HuggingFacePipeline(pipeline=pipeline)
+    elif selected_llm == 'Llama2 inference':
+        llm = HuggingFaceTextGenInference(
+        inference_server_url=os.environ.get("INFERENCE_URL"),
+        max_new_tokens=50,
+        timeout=1200,
+        temperature=selected_temperature
+)
     # Generic LLM
     memory = ConversationBufferMemory(
+        memory_key='chat_history', return_messages=True, output_key='answer')
     conversation_chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
+        return_source_documents=True,
+        verbose=True,
     )
     #print(conversation_chain)
 def handle_userinput(user_question):
+    #print('Question: ' + user_question)
     response = st.session_state.conversation({'question': user_question})
+    anser = response.get("answer")
+    sources = response.get("source_documents", [])
+    #print('Answer: ' + anser)
+    #print('Sources: ' + str(sources))
+    with st.expander("Sources"):
+        st.write(str(sources))
+    st.session_state.chat_history = response['chat_history']
     for i, message in enumerate(st.session_state.chat_history):
         if i % 2 == 0:
             "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)
         selected_embedding = st.radio("Which Embedding?",["Cohere-multilingual-v3.0","OpenAI", "Instructor-xl"])
+        selected_llm = st.radio("Which LLM?",["GPT 3.5", "Llama2 local" ,"Llama2 inference"])
         selected_temperature = st.slider('Temperature?', 0.0, 1.0, 0.1)
         if st.button("Process"):