Spaces:
Sleeping
Sleeping
help
Browse files
app.py
CHANGED
|
@@ -53,7 +53,9 @@ def preprocess_text(text):
|
|
| 53 |
return cleaned_chunks
|
| 54 |
|
| 55 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
#STEP 4 FROM SEMANTIC SEARCH
|
| 59 |
|
|
@@ -74,13 +76,14 @@ def create_embeddings(text_chunks):
|
|
| 74 |
return chunk_embeddings
|
| 75 |
|
| 76 |
# Call the create_embeddings function and store the result in a new chunk_embeddings variable
|
| 77 |
-
|
| 78 |
-
|
|
|
|
| 79 |
|
| 80 |
#STEP 5 FROM SEMANTIC SEARCH
|
| 81 |
|
| 82 |
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
|
| 83 |
-
def
|
| 84 |
# Convert the query text into a vector embedding
|
| 85 |
query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
|
| 86 |
|
|
@@ -112,11 +115,13 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
| 112 |
top_chunks.append(relevant_chunk)
|
| 113 |
|
| 114 |
# Return the list of most relevant chunks
|
| 115 |
-
return
|
|
|
|
|
|
|
| 116 |
|
| 117 |
#STEP 6 FROM SEMANTIC SEARCH
|
| 118 |
# Call the get_top_chunks function with the original query
|
| 119 |
-
top_results =
|
| 120 |
|
| 121 |
# Print the top results
|
| 122 |
print(top_results)
|
|
|
|
| 53 |
return cleaned_chunks
|
| 54 |
|
| 55 |
# Call the preprocess_text function and store the result in a cleaned_chunks variable
|
| 56 |
+
cleaned_slang_chunks = preprocess_text(slang_text) # Complete this line
|
| 57 |
+
cleaned_sejal_chunks = preprocess_text(sejal_text)
|
| 58 |
+
cleaned_shanvi_chunks = preprocess_text(shanvi_text)
|
| 59 |
|
| 60 |
#STEP 4 FROM SEMANTIC SEARCH
|
| 61 |
|
|
|
|
| 76 |
return chunk_embeddings
|
| 77 |
|
| 78 |
# Call the create_embeddings function and store the result in a new chunk_embeddings variable
|
| 79 |
+
chunk_embeddings_slang_text = create_embeddings(cleaned_slang_chunks)
|
| 80 |
+
chunk_embeddings_sejal_text = create_embeddings(cleaned_sejal_chunks)
|
| 81 |
+
chunk_embeddings_shanvi_text = create_embeddings(cleaned_shanvi_chunks)
|
| 82 |
|
| 83 |
#STEP 5 FROM SEMANTIC SEARCH
|
| 84 |
|
| 85 |
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
|
| 86 |
+
def get_slang_top_chunks(query, chunk_embeddings, text_chunks):
|
| 87 |
# Convert the query text into a vector embedding
|
| 88 |
query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
|
| 89 |
|
|
|
|
| 115 |
top_chunks.append(relevant_chunk)
|
| 116 |
|
| 117 |
# Return the list of most relevant chunks
|
| 118 |
+
return top_slang_chunks
|
| 119 |
+
|
| 120 |
+
|
| 121 |
|
| 122 |
#STEP 6 FROM SEMANTIC SEARCH
|
| 123 |
# Call the get_top_chunks function with the original query
|
| 124 |
+
top_results = get_top_slang_chunks("How does water get into the sky?", chunk_embeddings_slang_text, cleaned_chunks) # Complete this line
|
| 125 |
|
| 126 |
# Print the top results
|
| 127 |
print(top_results)
|