Spaces:

Tuana
/

GoT-QA-Haystack

Sleeping

Tuana commited on May 12, 2022

Commit

4ef8a52

1 Parent(s): f4082ea

caching the data loading

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,19 +6,24 @@ from haystack.nodes import FARMReader, TfidfRetriever
 import validators
 import json
-doc_dir = './article_txt_got'
 document_store = InMemoryDocumentStore()
-docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
-document_store.write_documents(docs)
 retriever = TfidfRetriever(document_store=document_store)
 reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
 pipeline = ExtractiveQAPipeline(reader, retriever)
 #Streamlit App
-st.set_page_config(page_title='Game of Thrones QA with Haystack')
 def ask_question(question):
     prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})

 import validators
 import json
+#Haystack Components
 document_store = InMemoryDocumentStore()
 retriever = TfidfRetriever(document_store=document_store)
 reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
 pipeline = ExtractiveQAPipeline(reader, retriever)
+@st.cache
+def load_and_write_data():
+    doc_dir = './article_txt_got'
+    docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
+    document_store.write_documents(docs)
 #Streamlit App
+st.title('Game of Thrones QA with Haystack')
+load_and_write_data()
 def ask_question(question):
     prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})