Nevermined
/

test_haystack

Model card Files Files and versions

josepablonevermined commited on Aug 17, 2023

Commit

cd7089a

·

1 Parent(s): f794aff

Upload 2 files

Files changed (2) hide show

handler.py +54 -0
requirements.txt +3 -0

handler.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs
+from haystack.schema import Answer
+from haystack.document_stores import InMemoryDocumentStore
+from haystack.pipelines import ExtractiveQAPipeline
+from haystack.nodes import FARMReader, TfidfRetriever
+import logging
+import json
+os.environ['TOKENIZERS_PARALLELISM'] ="false"
+#Haystack Components
+def start_haystack():
+    document_store = InMemoryDocumentStore()
+    load_and_write_data(document_store)
+    retriever = TfidfRetriever(document_store=document_store)
+    reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2-distilled", use_gpu=True)
+    pipeline = ExtractiveQAPipeline(reader, retriever)
+    return pipeline
+def load_and_write_data(document_store):
+    doc_dir = './dao_data'
+    print("Loading data ...")
+    docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
+    document_store.write_documents(docs)
+class EndpointHandler():
+    def __init__(self, path=""):
+        # load the optimized model
+        self.pipeline = start_haystack()
+    def __call__(self, data):
+        """
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
+        Return:
+            A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
+                - "label": A string representing what the label/class is. There can be multiple labels.
+                - "score": A score between 0 and 1 describing how confident the model is for this label/class.
+        """
+        question = data.pop("question", None)
+        if question is not None:
+            prediction = self.pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
+        else:
+            return {}
+        # postprocess the prediction
+        response = { "answer": prediction['answers'][0].answer}
+        return json.dumps(response)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+farm-haystack==1.19.0
+farm-haystack[inference]==1.19.0
+validators==0.21.1