Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -64,7 +64,7 @@ def extract_pdf_content(drive_url):
|
|
| 64 |
def create_vector_store(text):
|
| 65 |
# Split the text into sentences and clean it
|
| 66 |
sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
|
| 67 |
-
|
| 68 |
# Use Hugging Face transformer model for embeddings
|
| 69 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
| 70 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
@@ -73,17 +73,14 @@ def create_vector_store(text):
|
|
| 73 |
def embed(sentence):
|
| 74 |
tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
|
| 75 |
with torch.no_grad():
|
| 76 |
-
embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
|
| 77 |
return embeddings
|
| 78 |
|
| 79 |
-
# Generate embeddings for cleaned sentences
|
| 80 |
-
embeddings = [embed(sentence)[0] for sentence in sentences]
|
| 81 |
-
|
| 82 |
-
# Convert embeddings to a numpy array
|
| 83 |
-
embeddings = np.array(embeddings, dtype=np.float32)
|
| 84 |
-
|
| 85 |
# Create a FAISS vector store with sentences and their embeddings
|
| 86 |
-
vector_store = FAISS.
|
|
|
|
|
|
|
|
|
|
| 87 |
return vector_store, sentences
|
| 88 |
|
| 89 |
# Streamlit app
|
|
|
|
| 64 |
def create_vector_store(text):
|
| 65 |
# Split the text into sentences and clean it
|
| 66 |
sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
|
| 67 |
+
|
| 68 |
# Use Hugging Face transformer model for embeddings
|
| 69 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
| 70 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
| 73 |
def embed(sentence):
|
| 74 |
tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
|
| 75 |
with torch.no_grad():
|
| 76 |
+
embeddings = model(**tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
|
| 77 |
return embeddings
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Create a FAISS vector store with sentences and their embeddings
|
| 80 |
+
vector_store = FAISS.from_texts(
|
| 81 |
+
texts=sentences,
|
| 82 |
+
embedding_function=embed
|
| 83 |
+
)
|
| 84 |
return vector_store, sentences
|
| 85 |
|
| 86 |
# Streamlit app
|