Spaces:
Runtime error
Runtime error
Added scikit learn to requirements
Browse files- app.py +2 -3
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import tensorflow_hub as hub
|
|
| 6 |
import openai
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
-
from tqdm.auto import tqdm
|
| 10 |
from sklearn.neighbors import NearestNeighbors
|
| 11 |
|
| 12 |
|
|
@@ -29,7 +28,7 @@ def pdf_to_text(path, start_page=1, end_page=None):
|
|
| 29 |
|
| 30 |
text_list = []
|
| 31 |
|
| 32 |
-
for i in
|
| 33 |
text = doc.load_page(i).get_text("text")
|
| 34 |
text = preprocess(text)
|
| 35 |
text_list.append(text)
|
|
@@ -84,7 +83,7 @@ class SemanticSearch:
|
|
| 84 |
|
| 85 |
def get_text_embedding(self, texts, batch=1000):
|
| 86 |
embeddings = []
|
| 87 |
-
for i in
|
| 88 |
text_batch = texts[i:(i+batch)]
|
| 89 |
emb_batch = self.use(text_batch)
|
| 90 |
embeddings.append(emb_batch)
|
|
|
|
| 6 |
import openai
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
|
|
|
| 9 |
from sklearn.neighbors import NearestNeighbors
|
| 10 |
|
| 11 |
|
|
|
|
| 28 |
|
| 29 |
text_list = []
|
| 30 |
|
| 31 |
+
for i in range(start_page-1, end_page):
|
| 32 |
text = doc.load_page(i).get_text("text")
|
| 33 |
text = preprocess(text)
|
| 34 |
text_list.append(text)
|
|
|
|
| 83 |
|
| 84 |
def get_text_embedding(self, texts, batch=1000):
|
| 85 |
embeddings = []
|
| 86 |
+
for i in range(0, len(texts), batch):
|
| 87 |
text_batch = texts[i:(i+batch)]
|
| 88 |
emb_batch = self.use(text_batch)
|
| 89 |
embeddings.append(emb_batch)
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
PyMuPDF
|
| 2 |
openai
|
| 3 |
tensorflow==2.9.2
|
| 4 |
-
tensorflow-hub==0.12.0
|
|
|
|
|
|
| 1 |
PyMuPDF
|
| 2 |
openai
|
| 3 |
tensorflow==2.9.2
|
| 4 |
+
tensorflow-hub==0.12.0
|
| 5 |
+
scikit-learn==1.0.2
|