Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,8 +59,10 @@ def extract_pdf_content(drive_url):
|
|
| 59 |
|
| 60 |
# Function to create a FAISS vector store
|
| 61 |
def create_vector_store(text):
|
|
|
|
| 62 |
sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
|
| 63 |
|
|
|
|
| 64 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
| 65 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 66 |
model = AutoModel.from_pretrained(model_name)
|
|
@@ -71,9 +73,11 @@ def create_vector_store(text):
|
|
| 71 |
embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
|
| 72 |
return embeddings
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
| 77 |
|
| 78 |
return vector_store, sentences
|
| 79 |
|
|
|
|
| 59 |
|
| 60 |
# Function to create a FAISS vector store
|
| 61 |
def create_vector_store(text):
|
| 62 |
+
# Split the text into sentences and clean it
|
| 63 |
sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
|
| 64 |
|
| 65 |
+
# Load the model and tokenizer from Hugging Face
|
| 66 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
| 67 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 68 |
model = AutoModel.from_pretrained(model_name)
|
|
|
|
| 73 |
embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
|
| 74 |
return embeddings
|
| 75 |
|
| 76 |
+
# Create a function that directly returns embeddings
|
| 77 |
+
embedding_function = lambda x: embed(x)
|
| 78 |
+
|
| 79 |
+
# Create a FAISS vector store
|
| 80 |
+
vector_store = FAISS.from_texts(texts=sentences, embedding=embedding_function)
|
| 81 |
|
| 82 |
return vector_store, sentences
|
| 83 |
|