Spaces:

mingbaer
/

CapstoneProject

Runtime error

mingbaer commited on May 11, 2025

Commit

a50a5bf

verified ·

1 Parent(s): 29f636b

Coded Pull Relevant Info function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,3 +15,27 @@ import numpy as np
 with open("essay_writing.txt", "r", encoding="utf-8") as file:
     essay_writing = file.read()

 with open("essay_writing.txt", "r", encoding="utf-8") as file:
     essay_writing = file.read()
+# split the text into chunks
+cleaned_text = essay_writing.strip()
+chunks = cleaned_text.split("\n")
+cleaned_chunks = [chunk.strip() for chunk in chunks if stripped_chunk]
+# load an embedding model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
+def pull_relevant_info(query):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    query_embedding_normalized = query_embedding / query_embedding.norm()
+    chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
+    similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
+    top_indices = torch.topk(similarities, k=3).indices.cpu().numpy()
+    relevant_info = "\n".join([chunks[i] for i in top_indices])
+    return relevant_info