Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ with open("knowledge.txt", "r", encoding="utf-8") as file:
|
|
| 10 |
# opens the text, saves as "file"
|
| 11 |
# reads the text and saves as water_cycle_text variable
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
cleaned_text = recent.strip()
|
| 16 |
# cleaning up the text
|
|
@@ -24,13 +24,13 @@ for chunk in chunks:
|
|
| 24 |
if stripped_chunk:
|
| 25 |
cleaned_chunks.append(stripped_chunk)
|
| 26 |
# loop through chunks and add not empty chunks to cleaned_chunks list
|
| 27 |
-
|
| 28 |
|
| 29 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 30 |
|
| 31 |
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
|
| 32 |
# encode the model, pass through my cleaned chunks and convert to vector embeddings (not arrays)
|
| 33 |
-
|
| 34 |
|
| 35 |
def get_top_chunks(query):
|
| 36 |
# create my function taking query as parameter
|
|
@@ -42,10 +42,10 @@ def get_top_chunks(query):
|
|
| 42 |
# normailizing chunks for comparison of meaning
|
| 43 |
|
| 44 |
similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
|
| 45 |
-
|
| 46 |
# using matmul (matrix multiplication method) to compare query to chunks
|
| 47 |
top_indices = torch.topk(similarities, k=3).indices
|
| 48 |
-
|
| 49 |
# get the indices of the chunks that are most similar to query
|
| 50 |
|
| 51 |
top_chunks = []
|
|
|
|
| 10 |
# opens the text, saves as "file"
|
| 11 |
# reads the text and saves as water_cycle_text variable
|
| 12 |
|
| 13 |
+
|
| 14 |
|
| 15 |
cleaned_text = recent.strip()
|
| 16 |
# cleaning up the text
|
|
|
|
| 24 |
if stripped_chunk:
|
| 25 |
cleaned_chunks.append(stripped_chunk)
|
| 26 |
# loop through chunks and add not empty chunks to cleaned_chunks list
|
| 27 |
+
|
| 28 |
|
| 29 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 30 |
|
| 31 |
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
|
| 32 |
# encode the model, pass through my cleaned chunks and convert to vector embeddings (not arrays)
|
| 33 |
+
|
| 34 |
|
| 35 |
def get_top_chunks(query):
|
| 36 |
# create my function taking query as parameter
|
|
|
|
| 42 |
# normailizing chunks for comparison of meaning
|
| 43 |
|
| 44 |
similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
|
| 45 |
+
|
| 46 |
# using matmul (matrix multiplication method) to compare query to chunks
|
| 47 |
top_indices = torch.topk(similarities, k=3).indices
|
| 48 |
+
|
| 49 |
# get the indices of the chunks that are most similar to query
|
| 50 |
|
| 51 |
top_chunks = []
|