Spaces:
Sleeping
Sleeping
caching the data loading
Browse files
app.py
CHANGED
|
@@ -6,19 +6,24 @@ from haystack.nodes import FARMReader, TfidfRetriever
|
|
| 6 |
import validators
|
| 7 |
import json
|
| 8 |
|
| 9 |
-
|
| 10 |
document_store = InMemoryDocumentStore()
|
| 11 |
-
docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
|
| 12 |
-
|
| 13 |
-
document_store.write_documents(docs)
|
| 14 |
retriever = TfidfRetriever(document_store=document_store)
|
| 15 |
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
| 16 |
-
|
| 17 |
pipeline = ExtractiveQAPipeline(reader, retriever)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
#Streamlit App
|
| 20 |
|
| 21 |
-
st.
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def ask_question(question):
|
| 24 |
prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
|
|
|
|
| 6 |
import validators
|
| 7 |
import json
|
| 8 |
|
| 9 |
+
#Haystack Components
|
| 10 |
document_store = InMemoryDocumentStore()
|
|
|
|
|
|
|
|
|
|
| 11 |
retriever = TfidfRetriever(document_store=document_store)
|
| 12 |
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
|
|
|
| 13 |
pipeline = ExtractiveQAPipeline(reader, retriever)
|
| 14 |
|
| 15 |
+
@st.cache
|
| 16 |
+
def load_and_write_data():
|
| 17 |
+
doc_dir = './article_txt_got'
|
| 18 |
+
docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
|
| 19 |
+
|
| 20 |
+
document_store.write_documents(docs)
|
| 21 |
+
|
| 22 |
#Streamlit App
|
| 23 |
|
| 24 |
+
st.title('Game of Thrones QA with Haystack')
|
| 25 |
+
|
| 26 |
+
load_and_write_data()
|
| 27 |
|
| 28 |
def ask_question(question):
|
| 29 |
prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
|