Spaces:
Sleeping
Sleeping
Added second dataset/checkboxes
Browse files
app.py
CHANGED
|
@@ -12,8 +12,14 @@ with open("quentins_knowledge.txt", "r", encoding="utf-8") as file:
|
|
| 12 |
# Read the entire contents of the file and store it in a variable
|
| 13 |
quentins_knowledge = file.read()
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Print the text below
|
| 16 |
print(quentins_knowledge)
|
|
|
|
| 17 |
|
| 18 |
#STEP 3 FROM SEMANTIC SEARCH
|
| 19 |
def preprocess_text(text):
|
|
@@ -44,6 +50,9 @@ def preprocess_text(text):
|
|
| 44 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 45 |
cleaned_chunks = preprocess_text(quentins_knowledge)
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
#STEP 4 FROM SEMANTIC SEARCH
|
| 48 |
# Load the pre-trained embedding model that converts text to vectors
|
| 49 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -64,6 +73,9 @@ def create_embeddings(text_chunks):
|
|
| 64 |
# Call the create_embeddings function and store the result in a new chunk_embeddings variable
|
| 65 |
chunk_embeddings = create_embeddings(cleaned_chunks)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
| 67 |
#STEP 5 FROM SEMANTIC SEARCH
|
| 68 |
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
|
| 69 |
def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
@@ -99,18 +111,15 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
| 99 |
# Return the list of most relevant chunks
|
| 100 |
return top_chunks
|
| 101 |
|
| 102 |
-
#STEP 6 FROM SEMANTIC SEARCH
|
| 103 |
-
# Call the get_top_chunks function with the original query
|
| 104 |
-
top_results = get_top_chunks("How does water get into the sky?", chunk_embeddings, cleaned_chunks)
|
| 105 |
-
|
| 106 |
-
# Print the top results
|
| 107 |
-
print(top_results)
|
| 108 |
-
|
| 109 |
client = InferenceClient("google/gemma-3-27b-it")
|
| 110 |
|
| 111 |
def respond(message, history, name, mood):
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
duck_info = "\n".join(duck_chunks)
|
| 115 |
messages = [{"role": "system", "content": f"You are an extremely {mood} chatbot named Quentin. You are a rubber duck, with strong human emotions who helps the user with their problem. You talk to the user, whose name is {name}, in a way that reflects your {mood} mood. Make sure to use duck-themed references in your responses. Refer to the user by name as much as possible. Base your response on the provided context: {duck_info}. Always end your response with a brief, punchy tagline."}]
|
| 116 |
|
|
@@ -161,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Citrus(
|
|
| 161 |
)) as chatbot:
|
| 162 |
with gr.Row(scale=1):
|
| 163 |
gr.Image("ask_quentin_banner.jpg", show_label = False, show_share_button = False, show_download_button = False)
|
|
|
|
| 164 |
with gr.Row(scale=3):
|
| 165 |
with gr.Column(scale=1):
|
| 166 |
gr.Image("Quentin.png", show_label = False, show_share_button = False, show_download_button = False)
|
|
|
|
| 12 |
# Read the entire contents of the file and store it in a variable
|
| 13 |
quentins_knowledge = file.read()
|
| 14 |
|
| 15 |
+
#SECOND FEATURE
|
| 16 |
+
with open("quentins_alt_knowledge.txt", "r", encoding="utf-8") as file:
|
| 17 |
+
# Read the entire contents of the file and store it in a variable
|
| 18 |
+
quentins_alt_knowledge = file.read()
|
| 19 |
+
|
| 20 |
# Print the text below
|
| 21 |
print(quentins_knowledge)
|
| 22 |
+
print(quentins_alt_knowledge)
|
| 23 |
|
| 24 |
#STEP 3 FROM SEMANTIC SEARCH
|
| 25 |
def preprocess_text(text):
|
|
|
|
| 50 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 51 |
cleaned_chunks = preprocess_text(quentins_knowledge)
|
| 52 |
|
| 53 |
+
#SECOND FEATURE
|
| 54 |
+
cleaned_alt_chunks = preprocess_text(quentins_alt_knowledge)
|
| 55 |
+
|
| 56 |
#STEP 4 FROM SEMANTIC SEARCH
|
| 57 |
# Load the pre-trained embedding model that converts text to vectors
|
| 58 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 73 |
# Call the create_embeddings function and store the result in a new chunk_embeddings variable
|
| 74 |
chunk_embeddings = create_embeddings(cleaned_chunks)
|
| 75 |
|
| 76 |
+
#SECOND FEATURE
|
| 77 |
+
alt_chunk_embeddings = create_embeddings(cleaned_alt_chunks)
|
| 78 |
+
|
| 79 |
#STEP 5 FROM SEMANTIC SEARCH
|
| 80 |
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
|
| 81 |
def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
|
|
| 111 |
# Return the list of most relevant chunks
|
| 112 |
return top_chunks
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
client = InferenceClient("google/gemma-3-27b-it")
|
| 115 |
|
| 116 |
def respond(message, history, name, mood):
|
| 117 |
+
if quentin_topic == "Self Help":
|
| 118 |
+
duck_chunks = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
| 119 |
+
print(duck_chunks)
|
| 120 |
+
elif quentin_topic == "Duck Facts":
|
| 121 |
+
duck_chunks = get_top_chunks(message, chunk_embeddings, cleaned_alt_chunks)
|
| 122 |
+
print(duck_chunks)
|
| 123 |
duck_info = "\n".join(duck_chunks)
|
| 124 |
messages = [{"role": "system", "content": f"You are an extremely {mood} chatbot named Quentin. You are a rubber duck, with strong human emotions who helps the user with their problem. You talk to the user, whose name is {name}, in a way that reflects your {mood} mood. Make sure to use duck-themed references in your responses. Refer to the user by name as much as possible. Base your response on the provided context: {duck_info}. Always end your response with a brief, punchy tagline."}]
|
| 125 |
|
|
|
|
| 170 |
)) as chatbot:
|
| 171 |
with gr.Row(scale=1):
|
| 172 |
gr.Image("ask_quentin_banner.jpg", show_label = False, show_share_button = False, show_download_button = False)
|
| 173 |
+
quentin_topic = gr.CheckboxGroup(["Self Help", "Duck Facts"], label="What do you want help with?")
|
| 174 |
with gr.Row(scale=3):
|
| 175 |
with gr.Column(scale=1):
|
| 176 |
gr.Image("Quentin.png", show_label = False, show_share_button = False, show_download_button = False)
|