AnaviJoshi commited on
Commit
3b463dc
·
verified ·
1 Parent(s): 907704a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -20
app.py CHANGED
@@ -69,32 +69,46 @@ client = InferenceClient("google/gemma-3-27b-it")
69
  # messages.extend(history)
70
 
71
  # messages.append({"role": "user", "content": message})
72
- # response = ""
73
- # for messages in client.chat_completion(messages,max_tokens = 2500, stream = True):
74
- # token = messages.choices[0].delta.content
75
- # response += token
76
- # yield response
77
 
78
  def respond(message, history):
79
- # Get relevant information from knowledge base
80
- relevant_chunks = get_top_chunks(message)
81
- context = "\n".join(relevant_chunks)
82
-
83
- # Build prompt
84
- prompt = (
85
- "You are Scooby, a helpful chatbot that only answers questions about pets and their diets.\n"
86
- "Use the following relevant information to help answer the user's question:\n\n"
87
- f"{context}\n\n"
88
- f"User: {message}\nScooby:"
 
 
 
 
 
 
 
89
  )
90
 
91
- # Stream response
92
- response = ""
93
- for chunk in client.text_generation(prompt=prompt, max_new_tokens=300, stream=True):
94
- response += chunk.token.text
95
- yield response
96
 
 
97
 
 
 
 
 
 
 
98
 
99
  #theme = gr.themes.Origin(primary_hue="orange",secondary_hue="indigo", neutral_hue="teal")
100
 
 
69
  # messages.extend(history)
70
 
71
  # messages.append({"role": "user", "content": message})
72
+ #response = ""
73
+ #for messages in client.chat_completion(messages,max_tokens = 2500, stream = True):
74
+ # token = messages.choices[0].delta.content
75
+ # response += token
76
+ # yield response
77
 
78
  def respond(message, history):
79
+ # Step 1: Embed the user's question
80
+ message_embedding = model.encode(message, convert_to_tensor=True)
81
+
82
+ # Step 2: Calculate similarity with knowledge chunks
83
+ scores = util.cos_sim(message_embedding, chunk_embeddings)[0]
84
+ top_k = 3 # You can adjust how many chunks you want to include
85
+ top_results = torch.topk(scores, k=top_k)
86
+
87
+ # Step 3: Retrieve the top relevant knowledge chunks
88
+ retrieved_knowledge = "\n".join([chunks[i] for i in top_results.indices])
89
+
90
+ # Step 4: Build system message with retrieved knowledge
91
+ system_message = (
92
+ "You are a helpful chatbot named Scooby, kind of like the cartoon character but not too much. "
93
+ "You know a lot about pets and their diets, and you only answer questions about pets. "
94
+ "Use the following relevant knowledge to help answer the user's question:\n\n"
95
+ + retrieved_knowledge
96
  )
97
 
98
+ # Step 5: Compose message list for the LLM
99
+ messages = [{"role": "system", "content": system_message}]
100
+
101
+ if history:
102
+ messages.extend(history)
103
 
104
+ messages.append({"role": "user", "content": message})
105
 
106
+ # Step 6: Stream response
107
+ response = ""
108
+ for chunk in client.chat_completion(messages, max_tokens=2500, stream=True):
109
+ token = chunk.choices[0].delta.content
110
+ response += token
111
+ yield response
112
 
113
  #theme = gr.themes.Origin(primary_hue="orange",secondary_hue="indigo", neutral_hue="teal")
114