Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from PyPDF2 import PdfReader
|
|
| 5 |
import gradio as gr
|
| 6 |
from datasets import Dataset, load_from_disk
|
| 7 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 8 |
|
| 9 |
# Extract text from PDF
|
| 10 |
def extract_text_from_pdf(pdf_path):
|
|
@@ -45,7 +46,19 @@ os.makedirs(index_path, exist_ok=True)
|
|
| 45 |
# Save the dataset to disk and create an index
|
| 46 |
dataset.save_to_disk(dataset_path)
|
| 47 |
dataset = load_from_disk(dataset_path)
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Custom retriever
|
| 51 |
def retrieve(query):
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from datasets import Dataset, load_from_disk
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
+
import numpy as np
|
| 9 |
|
| 10 |
# Extract text from PDF
|
| 11 |
def extract_text_from_pdf(pdf_path):
|
|
|
|
| 46 |
# Save the dataset to disk and create an index
|
| 47 |
dataset.save_to_disk(dataset_path)
|
| 48 |
dataset = load_from_disk(dataset_path)
|
| 49 |
+
|
| 50 |
+
# Add FAISS index while addressing numpy object deprecation
|
| 51 |
+
def add_faiss_index(dataset, column):
|
| 52 |
+
import faiss # Make sure faiss is installed
|
| 53 |
+
embeddings = np.array(dataset[column])
|
| 54 |
+
dim = embeddings.shape[1]
|
| 55 |
+
index = faiss.IndexFlatL2(dim)
|
| 56 |
+
index.add(embeddings)
|
| 57 |
+
dataset.add_faiss_index(column=column)
|
| 58 |
+
return dataset
|
| 59 |
+
|
| 60 |
+
dataset = add_faiss_index(dataset, column="embeddings")
|
| 61 |
+
dataset.save(index_path)
|
| 62 |
|
| 63 |
# Custom retriever
|
| 64 |
def retrieve(query):
|