Spaces:

NHZ
/

First_Aid_Kit

Sleeping

NHZ commited on Jan 4, 2025

Commit

b8b3983

verified ·

1 Parent(s): 3fd2783

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,7 +64,7 @@ def extract_pdf_content(drive_url):
 def create_vector_store(text):
     # Split the text into sentences and clean it
     sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
     # Use Hugging Face transformer model for embeddings
     model_name = "sentence-transformers/all-MiniLM-L6-v2"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -73,17 +73,14 @@ def create_vector_store(text):
     def embed(sentence):
         tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
-            embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
         return embeddings
-    # Generate embeddings for cleaned sentences
-    embeddings = [embed(sentence)[0] for sentence in sentences]
-    # Convert embeddings to a numpy array
-    embeddings = np.array(embeddings, dtype=np.float32)
     # Create a FAISS vector store with sentences and their embeddings
-    vector_store = FAISS.from_embeddings(embeddings=embeddings, texts=sentences)
     return vector_store, sentences
 # Streamlit app

 def create_vector_store(text):
     # Split the text into sentences and clean it
     sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
     # Use Hugging Face transformer model for embeddings
     model_name = "sentence-transformers/all-MiniLM-L6-v2"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     def embed(sentence):
         tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
+            embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
         return embeddings
     # Create a FAISS vector store with sentences and their embeddings
+    vector_store = FAISS.from_texts(
+        texts=sentences,
+        embedding_function=embed
+    )
     return vector_store, sentences
 # Streamlit app