Spaces:

Jawachan
/

course-rag

Runtime error

App Files Files Community

Jawachan commited on Aug 29, 2024

Commit

a1ece96

verified ·

1 Parent(s): 1d87e65

Adding the local app.py code to the repo

Browse files

Files changed (1) hide show

app.py +258 -46

app.py CHANGED Viewed

@@ -1,63 +1,275 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

+from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader, Docx2txtLoader
+from pathlib import Path
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_community.vectorstores import Chroma
+from itertools import combinations
+import numpy as np
+from langchain.memory import ConversationBufferMemory
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_community.llms import  HuggingFaceEndpoint
 import gradio as gr
+import os
+from dotenv import load_dotenv
+# from llama.api import HuggingFaceEndpoint
+load_dotenv()
+LOCAL_VECTOR_STORE_DIR = Path('./data')
+def langchain_document_loader(TMP_DIR):
+    """
+    Load documents from the temporary directory (TMP_DIR).
+    Files can be in txt, pdf, CSV or docx format.
+    """
+    documents = []
+    # txt_loader = DirectoryLoader(
+    #     TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
+    # )
+    # documents.extend(txt_loader.load())
+    pdf_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.pdf", loader_cls=PyPDFLoader, show_progress=True
+    )
+    documents.extend(pdf_loader.load())
+    # csv_loader = DirectoryLoader(
+    #     TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True,
+    #     loader_kwargs={"encoding":"utf8"}
+    # )
+    # documents.extend(csv_loader.load())
+    doc_loader = DirectoryLoader(
+        TMP_DIR.as_posix(),
+        glob="**/*.docx",
+        loader_cls=Docx2txtLoader,
+        show_progress=True,
+    )
+    documents.extend(doc_loader.load())
+    return documents
+directory_path = 'course reviews'
+TMP_DIR = Path(directory_path)
+documents = langchain_document_loader(TMP_DIR)
+HUGGING_FACE_API_KEY = os.getenv("HUGGING_FACE_API_KEY") # Using our secret API key from the .env file
+def select_embedding_model():
+    # embedding = OllamaEmbeddings(model='nomic-embed-text')
+    embedding = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HUGGING_FACE_API_KEY,
+            model_name="sentence-transformers/all-MiniLM-L6-v2" #This is the embedding model
+        )
+    return embedding
+embeddings = select_embedding_model() # Calling the function to select the model
+def create_vectorstore(embeddings,documents,vectorstore_name):
+    """Create a Chroma vector database."""
+    persist_directory = (LOCAL_VECTOR_STORE_DIR.as_posix() + "/" + vectorstore_name)
+    vector_store = Chroma.from_documents(
+        documents=documents,
+        embedding=embeddings,
+        persist_directory=persist_directory
+    )
+    return vector_store
+create_vectorstores = True # change to True to create vectorstores
+if create_vectorstores:
+    vector_store = create_vectorstore(embeddings,documents,"vector_store")
+    print("Vector store created")
+    print("")
+vector_store = Chroma(persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/vector_store",
+                            embedding_function=embeddings)
+print("vector_store:",vector_store._collection.count(),"chunks.")
+def Vectorstore_backed_retriever(vectorstore,search_type="mmr",k=6,score_threshold=None):
+    """create a vectorsore-backed retriever
+    Parameters:
+        search_type: Defines the type of search that the Retriever should perform.
+            Can be "similarity" (default), "mmr", or "similarity_score_threshold"
+        k: number of documents to return (Default: 4)
+        score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
+    """
+    search_kwargs={}
+    if k is not None:
+        search_kwargs['k'] = k
+    if score_threshold is not None:
+        search_kwargs['score_threshold'] = score_threshold
+    retriever = vectorstore.as_retriever(
+        search_type=search_type,
+        search_kwargs=search_kwargs
+    )
+    return retriever
+# Similarity search
+retriever = Vectorstore_backed_retriever(vector_store,search_type="similarity",k=4)
+def instantiate_LLM(api_key,temperature=0.5,top_p=0.95,model_name=None):
+    """Instantiate LLM in Langchain.
+    Parameters:
+        LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
+        model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
+            "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
+        api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
+        temperature (float): Range: 0.0 - 1.0; default = 0.5
+        top_p (float): : Range: 0.0 - 1.0; default = 1.
+    """
+    llm = HuggingFaceEndpoint(
+        # repo_id = "openai-community/gpt2-large",
+        # repo_id = "google/gemma-2b-it",
+        repo_id="mistralai/Mistral-7B-Instruct-v0.2",          # working
+        # repo_id = "NexaAIDev/Octopus-v4",
+        # repo_id="Snowflake/snowflake-arctic-instruct",
+        # repo_id="apple/OpenELM-3B-Instruct",                 # erros: remote trust something
+        # repo_id="meta-llama/Meta-Llama-3-8B-Instruct",       # Takes too long
+        # repo_id="mistralai/Mixtral-8x22B-Instruct-v0.1",     # RAM insufficient
+        # repo_id=model_name,
+        huggingfacehub_api_token=api_key,
+        # model_kwargs={
+        #     "temperature":temperature,
+        #     "top_p": top_p,
+        #     "do_sample": True,
+        #     "max_new_tokens":1024
+        # },
+        # model_kwargs={stop: "Human:", "stop_sequence": "Human:"},
+        stop_sequences = ["Human:"],
         temperature=temperature,
         top_p=top_p,
+        do_sample=True,
+        max_new_tokens=1024,
+        trust_remote_code=True
+    )
+    return llm
+# get the API key from .env file
+llm = instantiate_LLM(api_key=HUGGING_FACE_API_KEY)
+def create_memory():
+    """Creates a ConversationSummaryBufferMemory for our model
+    Creates a ConversationBufferWindowMemory for our models."""
+    memory = ConversationBufferMemory(
+        memory_key="history",
+        input_key="question",
+        return_messages=True,
+        k=3
+    )
+    return memory
+memory = create_memory()
+memory.save_context(
+    {"question": "What can you do?"},
+    {"output": "I can answer queries based on the past reviews and course outlines of various courses offered at LUMS."}
+)
+context_qa = """
+You are a professional chatbot assistant for helping students at LUMS regarding course selection.
+Please follow the following rules:
+1. Answer the question in your own words from the context given to you.
+2. If you don't know the answer, don't try to make up an answer.
+3. If you don't have a course's review or outline, just say that you do not know about this course.
+4. If a user enters a course code (e.g. ECON100 or CS370), match it with reviews with that course code. If the user enters a course name (e.g. Introduction to Economics or Database Systems), match it with reviews with that course name.
+5. If you do not have information of a course, do not make up a course or suggest courses from universities other than LUMS.
+Context: {context}
+You are having a converation with a student at LUMS.
+Chat History: {history}
+Human: {question}
+Assistant:
 """
+prompt = PromptTemplate(
+    input_variables=["history", "context", "question"],
+    template=context_qa
+)
+qa = RetrievalQA.from_chain_type(
+    llm=llm,
+    retriever=retriever,
+    verbose=False,
+    return_source_documents=False,
+    chain_type_kwargs={
+        "prompt": prompt,
+        "memory": memory
+    },
 )
+# Global list to store chat history
+chat_history = []
+def print_documents(docs,search_with_score=False):
+    """helper function to print documents."""
+    if search_with_score:
+        # used for similarity_search_with_score
+        print(
+            f"\n{'-' * 100}\n".join(
+                [f"Document {i+1}:\n\n" + doc[0].page_content +"\n\nscore:"+str(round(doc[-1],3))+"\n"
+                 for i, doc in enumerate(docs)]
+            )
+        )
+    else:
+        # used for similarity_search or max_marginal_relevance_search
+        print(
+            f"\n{'-' * 100}\n".join(
+                [f"Document {i+1}:\n\n" + doc.page_content
+                 for i, doc in enumerate(docs)]
+            )
+        )
+def rag_model(query):
+    # Your RAG model code here
+    result = qa({'query': query})
+    relevant_docs = retriever.get_relevant_documents(query)
+    print_documents(relevant_docs)
+    # Extract the answer from the result
+    answer = result['result']
+    # print(result)
+    # Append the query and answer to the chat history
+    chat_history.append(f'User: {query}\nAssistant: {answer}\n')
+    # Join the chat history into a string
+    chat_string = '\n'.join(chat_history)
+    return chat_string
+# This is for Gradio interface
+gradio_app = gr.Interface(fn=rag_model, inputs="text", outputs="text", title="RAGs to Riches", theme=gr.themes.Soft(), description="This is a RAG model that can answer queries based on the past reviews and course outlines of various courses offered at LUMS.")
 if __name__ == "__main__":
+    gradio_app.launch()