mgetz commited on
Commit
4da32b5
·
verified ·
1 Parent(s): e115bbd

Added second dataset/checkboxes

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -12,8 +12,14 @@ with open("quentins_knowledge.txt", "r", encoding="utf-8") as file:
12
  # Read the entire contents of the file and store it in a variable
13
  quentins_knowledge = file.read()
14
 
 
 
 
 
 
15
  # Print the text below
16
  print(quentins_knowledge)
 
17
 
18
  #STEP 3 FROM SEMANTIC SEARCH
19
  def preprocess_text(text):
@@ -44,6 +50,9 @@ def preprocess_text(text):
44
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
45
  cleaned_chunks = preprocess_text(quentins_knowledge)
46
 
 
 
 
47
  #STEP 4 FROM SEMANTIC SEARCH
48
  # Load the pre-trained embedding model that converts text to vectors
49
  model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -64,6 +73,9 @@ def create_embeddings(text_chunks):
64
  # Call the create_embeddings function and store the result in a new chunk_embeddings variable
65
  chunk_embeddings = create_embeddings(cleaned_chunks)
66
 
 
 
 
67
  #STEP 5 FROM SEMANTIC SEARCH
68
  # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
69
  def get_top_chunks(query, chunk_embeddings, text_chunks):
@@ -99,18 +111,15 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
99
  # Return the list of most relevant chunks
100
  return top_chunks
101
 
102
- #STEP 6 FROM SEMANTIC SEARCH
103
- # Call the get_top_chunks function with the original query
104
- top_results = get_top_chunks("How does water get into the sky?", chunk_embeddings, cleaned_chunks)
105
-
106
- # Print the top results
107
- print(top_results)
108
-
109
  client = InferenceClient("google/gemma-3-27b-it")
110
 
111
  def respond(message, history, name, mood):
112
- duck_chunks = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
113
- print(duck_chunks)
 
 
 
 
114
  duck_info = "\n".join(duck_chunks)
115
  messages = [{"role": "system", "content": f"You are an extremely {mood} chatbot named Quentin. You are a rubber duck, with strong human emotions who helps the user with their problem. You talk to the user, whose name is {name}, in a way that reflects your {mood} mood. Make sure to use duck-themed references in your responses. Refer to the user by name as much as possible. Base your response on the provided context: {duck_info}. Always end your response with a brief, punchy tagline."}]
116
 
@@ -161,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Citrus(
161
  )) as chatbot:
162
  with gr.Row(scale=1):
163
  gr.Image("ask_quentin_banner.jpg", show_label = False, show_share_button = False, show_download_button = False)
 
164
  with gr.Row(scale=3):
165
  with gr.Column(scale=1):
166
  gr.Image("Quentin.png", show_label = False, show_share_button = False, show_download_button = False)
 
12
  # Read the entire contents of the file and store it in a variable
13
  quentins_knowledge = file.read()
14
 
15
+ #SECOND FEATURE
16
+ with open("quentins_alt_knowledge.txt", "r", encoding="utf-8") as file:
17
+ # Read the entire contents of the file and store it in a variable
18
+ quentins_alt_knowledge = file.read()
19
+
20
  # Print the text below
21
  print(quentins_knowledge)
22
+ print(quentins_alt_knowledge)
23
 
24
  #STEP 3 FROM SEMANTIC SEARCH
25
  def preprocess_text(text):
 
50
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
51
  cleaned_chunks = preprocess_text(quentins_knowledge)
52
 
53
+ #SECOND FEATURE
54
+ cleaned_alt_chunks = preprocess_text(quentins_alt_knowledge)
55
+
56
  #STEP 4 FROM SEMANTIC SEARCH
57
  # Load the pre-trained embedding model that converts text to vectors
58
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
73
  # Call the create_embeddings function and store the result in a new chunk_embeddings variable
74
  chunk_embeddings = create_embeddings(cleaned_chunks)
75
 
76
+ #SECOND FEATURE
77
+ alt_chunk_embeddings = create_embeddings(cleaned_alt_chunks)
78
+
79
  #STEP 5 FROM SEMANTIC SEARCH
80
  # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
81
  def get_top_chunks(query, chunk_embeddings, text_chunks):
 
111
  # Return the list of most relevant chunks
112
  return top_chunks
113
 
 
 
 
 
 
 
 
114
  client = InferenceClient("google/gemma-3-27b-it")
115
 
116
  def respond(message, history, name, mood):
117
+ if quentin_topic == "Self Help":
118
+ duck_chunks = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
119
+ print(duck_chunks)
120
+ elif quentin_topic == "Duck Facts":
121
+ duck_chunks = get_top_chunks(message, chunk_embeddings, cleaned_alt_chunks)
122
+ print(duck_chunks)
123
  duck_info = "\n".join(duck_chunks)
124
  messages = [{"role": "system", "content": f"You are an extremely {mood} chatbot named Quentin. You are a rubber duck, with strong human emotions who helps the user with their problem. You talk to the user, whose name is {name}, in a way that reflects your {mood} mood. Make sure to use duck-themed references in your responses. Refer to the user by name as much as possible. Base your response on the provided context: {duck_info}. Always end your response with a brief, punchy tagline."}]
125
 
 
170
  )) as chatbot:
171
  with gr.Row(scale=1):
172
  gr.Image("ask_quentin_banner.jpg", show_label = False, show_share_button = False, show_download_button = False)
173
+ quentin_topic = gr.CheckboxGroup(["Self Help", "Duck Facts"], label="What do you want help with?")
174
  with gr.Row(scale=3):
175
  with gr.Column(scale=1):
176
  gr.Image("Quentin.png", show_label = False, show_share_button = False, show_download_button = False)