Spaces:

GaborToth2
/

chatbot

Sleeping

App Files Files Community

GaborToth2 commited on Mar 28, 2025

Commit

1a7ff48

1 Parent(s): 317ef9d

implementing rag

Browse files

Files changed (2) hide show

app.py +29 -9
original.py +1 -9

app.py CHANGED Viewed

@@ -2,10 +2,27 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import cohere
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 client_cohere = cohere.Client(COHERE_API_KEY)
@@ -20,7 +37,14 @@ def respond(
     top_p,
     use_cohere_api,
 ):
-    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
@@ -32,6 +56,7 @@ def respond(
     response = ""
     if use_cohere_api:
         cohere_response = client_cohere.chat(
             message=message,
@@ -53,10 +78,6 @@ def respond(
             response += token
             yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -74,6 +95,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 from huggingface_hub import InferenceClient
 import os
 import cohere
+import faiss
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+documents = [
+    "The capital of France is Paris.",
+    "Python is a popular programming language.",
+    "The Eiffel Tower is located in Paris.",
+    "Llama is a type of animal found in South America.",
+    "Paris is known for its art, fashion, and culture.",
+    "Gabor Toth is the author of this document."
+]
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+document_embeddings = embedding_model.encode(documents, convert_to_tensor=True)
+document_embeddings_np = document_embeddings.cpu().numpy()
+index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
+index.add(document_embeddings_np)
 client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 client_cohere = cohere.Client(COHERE_API_KEY)
     top_p,
     use_cohere_api,
 ):
+    query_embedding = embedding_model.encode([message], convert_to_tensor=True)
+    query_embedding_np = query_embedding.cpu().numpy()
+    distances, indices = index.search(query_embedding_np, k=1)
+    relevant_document = documents[indices[0][0]]
+    messages = [{"role": "system", "content": system_message},{{"role": "system", "content": f"context: {relevant_document}"}}]
     for val in history:
         if val[0]:
     response = ""
     if use_cohere_api:
         cohere_response = client_cohere.chat(
             message=message,
             response += token
             yield response
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
     ],
 )
 if __name__ == "__main__":
     demo.launch()

original.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -39,10 +36,6 @@ def respond(
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -59,6 +52,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
         response += token
         yield response
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
     ],
 )
 if __name__ == "__main__":
     demo.launch()