Spaces:

lubani
/

basic_rag_with_hf_inference

Sleeping

App Files Files Community

lubani commited on Mar 15, 2024

Commit

d54727c

1 Parent(s): 213d7a3

basic RAG

Browse files

Files changed (2) hide show

app.py +202 -0
custom_llm.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import time
+from llama_index.core import SimpleDirectoryReader
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import VectorStoreIndex
+from custom_llm import CustomLLM
+import gradio as gr
+# import shutil
+import tempfile
+# default model
+repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
+model_type = 'text-generation'
+API_TOKEN = os.getenv('HF_INFER_API')
+temp_dir = tempfile.TemporaryDirectory()
+embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+llm = CustomLLM(repo_id=repo_id, model_type=model_type, api_token=API_TOKEN)
+def add_text(history, text):
+    history = history + [(text, None)]
+    return history, gr.Textbox(value="", interactive=False)
+def hasFile(history):
+    for user_prompt, bot_response in history:
+        if '.pdf' in user_prompt.lower():
+            return True
+    return False
+def modelChanged(history, drop):
+    history = history + [(f'===> {drop}', None)]
+    return history, drop
+def getEngine(llm):
+    loader = SimpleDirectoryReader(
+        input_dir=temp_dir.name,
+        recursive=True,
+        required_exts=[".pdf", ".PDF"],
+    )
+    # Load files as documents
+    documents = loader.load_data()
+    # create an index in the memory
+    index = VectorStoreIndex.from_documents(
+        documents,
+        embed_model=embedding_model,
+    )
+    #create query_engine
+    query_engine = index.as_query_engine(llm=llm)
+    return query_engine
+def copy_pdf(source_path, destination_path):
+  # Open the source PDF file in binary read mode
+  with open(source_path, "rb") as source_file:
+    # Read the entire content of the source file
+    data = source_file.read()
+  # Open the destination file in binary write mode
+  with open(destination_path, "wb") as destination_file:
+    # Write the copied data to the destination file
+    destination_file.write(data)
+  # Print a success message
+  print(f"PDF copied successfully from {source_path} to {destination_path}")
+def add_file(history, file):
+    file_path = os.path.join(temp_dir.name, os.path.basename(file))
+    # shutil.copyfile(file.name, file_path) # <---Asynchronous
+    copy_pdf(file.name, file_path)
+    history = history + [(os.path.basename(file), None)]
+    return history
+def format_prompt(message, history, model):
+    if model is None or 'mistral' in model.lower():
+        prompt = "<s>"
+        for user_prompt, bot_response in history:
+            prompt += f"[INST] {user_prompt} [/INST]"
+            prompt += f" {bot_response}</s> "
+        prompt += f"[INST] {message} [/INST]"
+    elif 'google' in model.lower():
+        prompt = "<bos>"
+        for user_prompt, bot_response in history:
+            prompt += f"<start_of_turn>user {user_prompt} <end_of_turn><start_of_turn>model {bot_response}"
+        prompt += f"<start_of_turn>user {message} <end_of_turn><start_of_turn>model"
+    else:
+        prompt = ""
+    return prompt
+def bot(history, model=None):
+    print("===> model: ", model)
+    local_llm = llm
+    if model:
+        local_llm = CustomLLM(repo_id=model, model_type=model_type, api_token=API_TOKEN)
+    if len(history) > 0 and len(history[-1]) > 0 and '.pdf' in history[-1][0]:
+        response = "You uploaded a PDF file. You can ask questions from the file."
+    elif len(history) > 0 and len(history[-1]) > 0 and '===>' in history[-1][0]:
+        new_model = history[-1][0].replace("===>", "")
+        response = f"You have changed the model to {new_model}"
+    else:
+        prompt = history[-1][0]
+        if hasFile(history):
+            query_engine = getEngine(local_llm)
+            response = query_engine.query(prompt)
+            print("Response from file")
+        else:
+            response = local_llm.predict(format_prompt(prompt, history, model))
+            print("Response from Model")
+    # print(response)
+    # response = "Thats cool!"
+    history[-1][1] = ""
+    for character in str(response):
+        history[-1][1] += character
+        # time.sleep(0.05)
+        yield history
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        <div style="display: grid; justify-content: center;">
+            <h1>Basic RAG with Huggingface Inference API</h1>
+            <h4>For best performance start with small PDF files (less than 20 pages). </h4>
+        </div>
+        """
+    )
+    chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        bubble_full_width=False,
+        # avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
+    )
+    with gr.Row():
+        drop = gr.Dropdown(
+            [
+                ("Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
+                ("Mistral-7B-Instruct-v0.2", "mistralai/Mistral-7B-Instruct-v0.2"),
+                ("gemma-7b-it", "google/gemma-7b-it"),
+                ("gemma-2b-it", "google/gemma-2b-it")
+            ],
+            value="mistralai/Mixtral-8x7B-Instruct-v0.1",
+            label="Model",
+            info=""
+        )
+    with gr.Row():
+        txt = gr.Textbox(
+            scale=4,
+            show_label=False,
+            placeholder="Type your question and press enter",
+            container=False,
+        )
+        btn = gr.UploadButton("📁", file_types=[".pdf"])
+    drop.change(modelChanged, [chatbot, drop], [chatbot, drop], queue=False).then(
+        bot, [chatbot, drop], chatbot
+    )
+    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, [chatbot, drop], chatbot, api_name="bot_response"
+    )
+    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
+    file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(
+        bot, [chatbot, drop], chatbot
+    )
+demo.queue()
+demo.launch()

custom_llm.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from langchain_core.language_models.llms import LLM
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+import requests
+from typing import Any, List, Mapping, Optional, Literal
+class CustomLLM(LLM):
+	#Properties
+    repo_id : str
+    api_token : str
+    model_type: Literal["text2text-generation", "text-generation"]
+    max_new_tokens: int = None
+    temperature: float = 0.001
+    timeout: float = None
+    top_p: float = None
+    top_k : int = None
+    repetition_penalty : float = None
+    stop : List[str] = []
+    @property
+    def _llm_type(self) -> str:
+        return "custom"
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        headers = {"Authorization": f"Bearer {self.api_token}"}
+        API_URL = f"https://api-inference.huggingface.co/models/{self.repo_id}"
+        parameters_dict = {
+          'max_new_tokens': self.max_new_tokens,
+          'temperature': self.temperature,
+          'timeout': self.timeout,
+          'top_p': self.top_p,
+          'top_k': self.top_k,
+          'repetition_penalty': self.repetition_penalty,
+          'stop':self.stop
+        }
+        if self.model_type == 'text-generation':
+          parameters_dict["return_full_text"]=False
+        data = {"inputs": prompt, "parameters":parameters_dict, "options":{"wait_for_model":True}}
+        data = requests.post(API_URL, headers=headers, json=data).json()
+        return data[0]['generated_text']
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return  {
+          'repo_id': self.repo_id,
+          'model_type':self.model_type,
+          'stop_sequences':self.stop,
+          'max_new_tokens': self.max_new_tokens,
+          'temperature': self.temperature,
+          'timeout': self.timeout,
+          'top_p': self.top_p,
+          'top_k': self.top_k,
+          'repetition_penalty': self.repetition_penalty
+      }