Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -69,32 +69,46 @@ client = InferenceClient("google/gemma-3-27b-it")
|
|
| 69 |
# messages.extend(history)
|
| 70 |
|
| 71 |
# messages.append({"role": "user", "content": message})
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
|
| 78 |
def respond(message, history):
|
| 79 |
-
#
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
-
#
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
|
|
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
#theme = gr.themes.Origin(primary_hue="orange",secondary_hue="indigo", neutral_hue="teal")
|
| 100 |
|
|
|
|
| 69 |
# messages.extend(history)
|
| 70 |
|
| 71 |
# messages.append({"role": "user", "content": message})
|
| 72 |
+
#response = ""
|
| 73 |
+
#for messages in client.chat_completion(messages,max_tokens = 2500, stream = True):
|
| 74 |
+
# token = messages.choices[0].delta.content
|
| 75 |
+
# response += token
|
| 76 |
+
# yield response
|
| 77 |
|
| 78 |
def respond(message, history):
|
| 79 |
+
# Step 1: Embed the user's question
|
| 80 |
+
message_embedding = model.encode(message, convert_to_tensor=True)
|
| 81 |
+
|
| 82 |
+
# Step 2: Calculate similarity with knowledge chunks
|
| 83 |
+
scores = util.cos_sim(message_embedding, chunk_embeddings)[0]
|
| 84 |
+
top_k = 3 # You can adjust how many chunks you want to include
|
| 85 |
+
top_results = torch.topk(scores, k=top_k)
|
| 86 |
+
|
| 87 |
+
# Step 3: Retrieve the top relevant knowledge chunks
|
| 88 |
+
retrieved_knowledge = "\n".join([chunks[i] for i in top_results.indices])
|
| 89 |
+
|
| 90 |
+
# Step 4: Build system message with retrieved knowledge
|
| 91 |
+
system_message = (
|
| 92 |
+
"You are a helpful chatbot named Scooby, kind of like the cartoon character but not too much. "
|
| 93 |
+
"You know a lot about pets and their diets, and you only answer questions about pets. "
|
| 94 |
+
"Use the following relevant knowledge to help answer the user's question:\n\n"
|
| 95 |
+
+ retrieved_knowledge
|
| 96 |
)
|
| 97 |
|
| 98 |
+
# Step 5: Compose message list for the LLM
|
| 99 |
+
messages = [{"role": "system", "content": system_message}]
|
| 100 |
+
|
| 101 |
+
if history:
|
| 102 |
+
messages.extend(history)
|
| 103 |
|
| 104 |
+
messages.append({"role": "user", "content": message})
|
| 105 |
|
| 106 |
+
# Step 6: Stream response
|
| 107 |
+
response = ""
|
| 108 |
+
for chunk in client.chat_completion(messages, max_tokens=2500, stream=True):
|
| 109 |
+
token = chunk.choices[0].delta.content
|
| 110 |
+
response += token
|
| 111 |
+
yield response
|
| 112 |
|
| 113 |
#theme = gr.themes.Origin(primary_hue="orange",secondary_hue="indigo", neutral_hue="teal")
|
| 114 |
|