Spaces:
Runtime error
Runtime error
Remove inflect
Browse files- extractor/_utils.py +5 -3
- extractor/extract.py +0 -3
extractor/_utils.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import nmslib
|
| 2 |
import numpy as np
|
| 3 |
import streamlit as st
|
| 4 |
-
import inflect
|
| 5 |
import torch
|
| 6 |
|
| 7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 8 |
|
| 9 |
-
p = inflect.engine()
|
| 10 |
|
| 11 |
class FewDocumentsError(Exception):
|
| 12 |
def __init__(self, documents, size, msg):
|
|
@@ -18,7 +18,9 @@ class FewDocumentsError(Exception):
|
|
| 18 |
return repr(self.msg)
|
| 19 |
|
| 20 |
def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
lower_dataset = [document.lower() for document in dataset]
|
| 23 |
lower_query = query.lower()
|
| 24 |
lower_keywords = [keyword.lower() for keyword in keywords]
|
|
|
|
| 1 |
import nmslib
|
| 2 |
import numpy as np
|
| 3 |
import streamlit as st
|
| 4 |
+
# import inflect
|
| 5 |
import torch
|
| 6 |
|
| 7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 8 |
|
| 9 |
+
# p = inflect.engine()
|
| 10 |
|
| 11 |
class FewDocumentsError(Exception):
|
| 12 |
def __init__(self, documents, size, msg):
|
|
|
|
| 18 |
return repr(self.msg)
|
| 19 |
|
| 20 |
def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
|
| 21 |
+
# TODO: compare inflected forms
|
| 22 |
+
# word_in_text = lambda word, text: any([p.compare(word, w) for w in text.split()])
|
| 23 |
+
word_in_text = lambda word, text: word in set(text.split())
|
| 24 |
lower_dataset = [document.lower() for document in dataset]
|
| 25 |
lower_query = query.lower()
|
| 26 |
lower_keywords = [keyword.lower() for keyword in keywords]
|
extractor/extract.py
CHANGED
|
@@ -61,9 +61,6 @@ def extract(query: str, n: int=3, extracted_documents: list=None) -> str:
|
|
| 61 |
number_of_similar_files=10
|
| 62 |
)
|
| 63 |
|
| 64 |
-
from pprint import pprint
|
| 65 |
-
pprint(selected_paragraphs[:n])
|
| 66 |
-
|
| 67 |
text = '\n'.join(selected_paragraphs[:n])
|
| 68 |
|
| 69 |
return text
|
|
|
|
| 61 |
number_of_similar_files=10
|
| 62 |
)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
| 64 |
text = '\n'.join(selected_paragraphs[:n])
|
| 65 |
|
| 66 |
return text
|