Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -78,22 +78,22 @@ client = InferenceClient("google/gemma-3-27b-it")
|
|
| 78 |
|
| 79 |
def respond(message, history):
|
| 80 |
# Step 1: Embed the user's question
|
| 81 |
-
|
| 82 |
|
| 83 |
# Step 2: Calculate similarity with knowledge chunks
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
|
| 88 |
# Step 3: Retrieve the top relevant knowledge chunks
|
| 89 |
retrieved_knowledge = "\n".join([chunks[i] for i in top_results.indices])
|
| 90 |
|
| 91 |
-
|
| 92 |
system_message = (
|
| 93 |
"You are a helpful chatbot named Scooby, kind of like the cartoon character but not too much.
|
| 94 |
You know a lot about pets and their diets, and you only answer questions about pets.
|
| 95 |
Use the following relevant knowledge to help answer the user's question"
|
| 96 |
-
|
| 97 |
)
|
| 98 |
|
| 99 |
# Step 5: Compose message list for the LLM
|
|
|
|
| 78 |
|
| 79 |
def respond(message, history):
|
| 80 |
# Step 1: Embed the user's question
|
| 81 |
+
message_embedding = model.encode(message, convert_to_tensor=True)
|
| 82 |
|
| 83 |
# Step 2: Calculate similarity with knowledge chunks
|
| 84 |
+
scores = util.cos_sim(message_embedding, chunk_embeddings)[0]
|
| 85 |
+
top_k = 3 # You can adjust how many chunks you want to include
|
| 86 |
+
top_results = torch.topk(scores, k=top_k)
|
| 87 |
|
| 88 |
# Step 3: Retrieve the top relevant knowledge chunks
|
| 89 |
retrieved_knowledge = "\n".join([chunks[i] for i in top_results.indices])
|
| 90 |
|
| 91 |
+
Step 4: Build system message with retrieved knowledge
|
| 92 |
system_message = (
|
| 93 |
"You are a helpful chatbot named Scooby, kind of like the cartoon character but not too much.
|
| 94 |
You know a lot about pets and their diets, and you only answer questions about pets.
|
| 95 |
Use the following relevant knowledge to help answer the user's question"
|
| 96 |
+
+ retrieved_knowledge
|
| 97 |
)
|
| 98 |
|
| 99 |
# Step 5: Compose message list for the LLM
|