Spaces:
Sleeping
Sleeping
Load and process the knowledge.txt file
Browse files
app.py
CHANGED
|
@@ -7,6 +7,42 @@ import numpy as np
|
|
| 7 |
# this client will handle making requests to the model to generate responses
|
| 8 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def respond(message, history):
|
| 11 |
|
| 12 |
system_message = "You are a kitchen sous chef. You always respond with a knowledgeable and upbeat attitude!"
|
|
@@ -66,7 +102,7 @@ with gr.Blocks() as chatbot:
|
|
| 66 |
with gr.Column(scale=2):
|
| 67 |
gr.ChatInterface(respond,
|
| 68 |
type="messages",
|
| 69 |
-
examples = ["
|
| 70 |
theme='d8ahazard/material_design_rd'
|
| 71 |
)
|
| 72 |
with gr.Row():
|
|
|
|
| 7 |
# this client will handle making requests to the model to generate responses
|
| 8 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 9 |
|
| 10 |
+
# Load and process the knowledge base text file
|
| 11 |
+
with open("knowledge.txt", "r", encoding="utf-8") as f:
|
| 12 |
+
knowledge_text = f.read()
|
| 13 |
+
|
| 14 |
+
# Split the text into chunks (for example, by paragraphs)
|
| 15 |
+
chunks = [chunk.strip() for chunk in knowledge_text.split("\n\n") if chunk.strip()]
|
| 16 |
+
|
| 17 |
+
# Load an embedding model (this one is light and fast)
|
| 18 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 19 |
+
|
| 20 |
+
# Precompute embeddings for all chunks (as a tensor for fast similarity search)
|
| 21 |
+
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True)
|
| 22 |
+
|
| 23 |
+
def get_relevant_context(query, top_k=3):
|
| 24 |
+
"""
|
| 25 |
+
Compute the embedding for the query, compare it against all chunk embeddings,
|
| 26 |
+
and return the top_k most similar chunks concatenated into a context string.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# Compute and normalize the query embedding
|
| 30 |
+
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
| 31 |
+
query_embedding = query_embedding / query_embedding.norm()
|
| 32 |
+
|
| 33 |
+
# Normalize chunk embeddings along the embedding dimension
|
| 34 |
+
norm_chunk_embeddings = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
|
| 35 |
+
|
| 36 |
+
# Compute cosine similarity between the query and each chunk
|
| 37 |
+
similarities = torch.matmul(norm_chunk_embeddings, query_embedding)
|
| 38 |
+
|
| 39 |
+
# Get the indices of the top_k most similar chunks
|
| 40 |
+
top_k_indices = torch.topk(similarities, k=top_k).indices.cpu().numpy()
|
| 41 |
+
|
| 42 |
+
# Concatenate the top chunks into a single context string
|
| 43 |
+
context = "\n\n".join([chunks[i] for i in top_k_indices])
|
| 44 |
+
return context
|
| 45 |
+
|
| 46 |
def respond(message, history):
|
| 47 |
|
| 48 |
system_message = "You are a kitchen sous chef. You always respond with a knowledgeable and upbeat attitude!"
|
|
|
|
| 102 |
with gr.Column(scale=2):
|
| 103 |
gr.ChatInterface(respond,
|
| 104 |
type="messages",
|
| 105 |
+
examples = ["What are some cooking tips for beginners?", "What toppings should I add to my ramen?", "What's the best recipe for homemade pizza dough?"],
|
| 106 |
theme='d8ahazard/material_design_rd'
|
| 107 |
)
|
| 108 |
with gr.Row():
|