NHZ commited on
Commit
f8ec047
·
verified ·
1 Parent(s): 7eecbbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -59,8 +59,10 @@ def extract_pdf_content(drive_url):
59
 
60
  # Function to create a FAISS vector store
61
  def create_vector_store(text):
 
62
  sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
63
 
 
64
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
65
  tokenizer = AutoTokenizer.from_pretrained(model_name)
66
  model = AutoModel.from_pretrained(model_name)
@@ -71,9 +73,11 @@ def create_vector_store(text):
71
  embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
72
  return embeddings
73
 
74
- embeddings = [embed(sentence) for sentence in sentences]
75
- text_embeddings = [(sentences[i], embeddings[i]) for i in range(len(sentences))]
76
- vector_store = FAISS.from_embeddings(text_embeddings)
 
 
77
 
78
  return vector_store, sentences
79
 
 
59
 
60
  # Function to create a FAISS vector store
61
  def create_vector_store(text):
62
+ # Split the text into sentences and clean it
63
  sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
64
 
65
+ # Load the model and tokenizer from Hugging Face
66
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
67
  tokenizer = AutoTokenizer.from_pretrained(model_name)
68
  model = AutoModel.from_pretrained(model_name)
 
73
  embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
74
  return embeddings
75
 
76
+ # Create a function that directly returns embeddings
77
+ embedding_function = lambda x: embed(x)
78
+
79
+ # Create a FAISS vector store
80
+ vector_store = FAISS.from_texts(texts=sentences, embedding=embedding_function)
81
 
82
  return vector_store, sentences
83