Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,10 @@ import torch
|
|
| 7 |
|
| 8 |
#STEP 2 FROM SEMATIC SEARCH
|
| 9 |
# Open the water_cycle.txt file in read mode with UTF-8 encoding
|
| 10 |
-
with open("
|
| 11 |
# Read the entire contents of the file and store it in a variable
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
#STEP 3 FROM SEMATIC SEARCH
|
| 16 |
def preprocess_text(text):
|
|
@@ -18,7 +18,7 @@ def preprocess_text(text):
|
|
| 18 |
cleaned_text = text.strip()
|
| 19 |
|
| 20 |
# Split the cleaned_text by every newline character (\n)
|
| 21 |
-
chunks = cleaned_text.split("
|
| 22 |
|
| 23 |
# Create an empty list to store cleaned chunks
|
| 24 |
cleaned_chunks = []
|
|
@@ -40,7 +40,7 @@ def preprocess_text(text):
|
|
| 40 |
return cleaned_chunks
|
| 41 |
|
| 42 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 43 |
-
cleaned_chunks = preprocess_text(
|
| 44 |
|
| 45 |
#STEP 4 FROM SEMATIC SEARCH
|
| 46 |
# Load the pre-trained embedding model that converts text to vectors
|
|
@@ -98,7 +98,9 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
| 98 |
|
| 99 |
#STEP 6 FROM SEMATIC SEARCH
|
| 100 |
# Call the get_top_chunks function with the original query
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# Print the top results
|
| 104 |
print(top_results)
|
|
@@ -107,11 +109,11 @@ print(top_results)
|
|
| 107 |
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
|
| 108 |
|
| 109 |
def respond(message, history):
|
| 110 |
-
|
| 111 |
-
print(
|
| 112 |
-
|
| 113 |
|
| 114 |
-
messages = [{"role": "system", "content": f"You're a friendly and gen z chatbot. Base your response on the provided context: {
|
| 115 |
|
| 116 |
if history:
|
| 117 |
messages.extend(history)
|
|
@@ -128,4 +130,6 @@ def respond(message, history):
|
|
| 128 |
|
| 129 |
chatbot = gr.ChatInterface(respond, type = 'messages')
|
| 130 |
chatbot.launch(debug = True)
|
|
|
|
|
|
|
| 131 |
|
|
|
|
| 7 |
|
| 8 |
#STEP 2 FROM SEMATIC SEARCH
|
| 9 |
# Open the water_cycle.txt file in read mode with UTF-8 encoding
|
| 10 |
+
with open("weather.txt", "r", encoding="utf-8") as file:
|
| 11 |
# Read the entire contents of the file and store it in a variable
|
| 12 |
+
weather_text = file.read()
|
| 13 |
+
|
| 14 |
|
| 15 |
#STEP 3 FROM SEMATIC SEARCH
|
| 16 |
def preprocess_text(text):
|
|
|
|
| 18 |
cleaned_text = text.strip()
|
| 19 |
|
| 20 |
# Split the cleaned_text by every newline character (\n)
|
| 21 |
+
chunks = cleaned_text.split("***")
|
| 22 |
|
| 23 |
# Create an empty list to store cleaned chunks
|
| 24 |
cleaned_chunks = []
|
|
|
|
| 40 |
return cleaned_chunks
|
| 41 |
|
| 42 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 43 |
+
cleaned_chunks = preprocess_text(weather_text) # Complete this line
|
| 44 |
|
| 45 |
#STEP 4 FROM SEMATIC SEARCH
|
| 46 |
# Load the pre-trained embedding model that converts text to vectors
|
|
|
|
| 98 |
|
| 99 |
#STEP 6 FROM SEMATIC SEARCH
|
| 100 |
# Call the get_top_chunks function with the original query
|
| 101 |
+
top_weather = get_top_chunks("How do you make banana bread?", chunk_embeddings, cleaned_chunks)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
|
| 105 |
# Print the top results
|
| 106 |
print(top_results)
|
|
|
|
| 109 |
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
|
| 110 |
|
| 111 |
def respond(message, history):
|
| 112 |
+
top_weather = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
| 113 |
+
print(top_weather)
|
| 114 |
+
str_top_weather = "\n".join(top_weather)
|
| 115 |
|
| 116 |
+
messages = [{"role": "system", "content": f"You're a friendly and gen z chatbot. Base your response on the provided context: {top_weather}."}]
|
| 117 |
|
| 118 |
if history:
|
| 119 |
messages.extend(history)
|
|
|
|
| 130 |
|
| 131 |
chatbot = gr.ChatInterface(respond, type = 'messages')
|
| 132 |
chatbot.launch(debug = True)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
|