shaina commited on
Commit
50dc97e
·
1 Parent(s): a435f62

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +108 -9
README.md CHANGED
@@ -1,12 +1,111 @@
1
  ---
2
- language:
3
- - en
4
-
5
- tags:
6
- - COVID-19
7
- - MPNet
8
- license: "MIT"
9
  datasets:
10
- - https://huggingface.co/datasets/shaina/covid19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- ---
 
1
  ---
2
+ language: en
3
+ license: mit
 
 
 
 
 
4
  datasets:
5
+ - covid19
6
+ ---
7
+
8
+ # CoQUAD_MPNet : MPNet model for COVID-19
9
+
10
+ ## Introduction
11
+
12
+ It is a state-of-the-art language model for MPNet for Covid-19 dataset with focus on post-covid.
13
+ ## How to use for Deepset Haystack
14
+
15
+ %cd /content/drive/MyDrive
16
+ !sudo apt-get install git-lfs
17
+
18
+ !git lfs install
19
+
20
+ !git clone https://huggingface.co/shaina/CoQUAD_MPNet
21
+ # if you want to clone without large files – just their pointers
22
+ # prepend your git clone with the following env var:
23
+ GIT_LFS_SKIP_SMUDGE=1
24
+
25
+
26
+
27
+
28
+ from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers
29
+ from haystack.nodes import FARMReader, TransformersReader
30
+ # Recommended: Start Elasticsearch using Docker via the Haystack utility function
31
+ from haystack.utils import launch_es
32
+
33
+ launch_es()
34
+ # In Colab / No Docker environments: Start Elasticsearch from source
35
+ ! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
36
+ ! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
37
+ ! chown -R daemon:daemon elasticsearch-7.9.2
38
+
39
+ import os
40
+ from subprocess import Popen, PIPE, STDOUT
41
+ es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
42
+ stdout=PIPE, stderr=STDOUT,
43
+ preexec_fn=lambda: os.setuid(1) # as daemon
44
+ )
45
+ # wait until ES has started
46
+ ! sleep 30
47
+ # Connect to Elasticsearch
48
+
49
+ from haystack.document_stores import ElasticsearchDocumentStore
50
+ document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
51
+
52
+ import pandas as pd
53
+ df=pd.read_excel('/content/covid.xlsx')
54
+ df.fillna(value="", inplace=True)
55
+
56
+ print(df.head())
57
+
58
+ from typing import List
59
+ import requests
60
+ import pandas as pd
61
+ from haystack import Document
62
+ from haystack.document_stores import FAISSDocumentStore
63
+ from haystack.nodes import RAGenerator, DensePassageRetriever
64
+
65
+ # Use data to initialize Document objects
66
+ titles = list(df["document_identifier"].values)
67
+ texts = list(df["document_text"].values)
68
+ documents: List[Document] = []
69
+ for title, text in zip(titles, texts):
70
+ documents.append(
71
+ Document(
72
+ content=text,
73
+ meta={
74
+ "name": title or ""
75
+ }
76
+ )
77
+ )
78
+
79
+
80
+
81
+
82
+ # Now, let's write the dicts containing documents to our DB.
83
+ document_store.write_documents(documents)
84
+
85
+ from haystack.nodes import ElasticsearchRetriever
86
+ retriever = ElasticsearchRetriever(document_store=document_store)
87
+ reader = FARMReader(model_name_or_path="/content/drive/MyDrive/CoQUAD_MPNet", use_gpu=True)
88
+
89
+ from haystack.pipelines import ExtractiveQAPipeline
90
+ pipe = ExtractiveQAPipeline(reader, retriever)
91
+ # You can configure how many candidates the reader and retriever shall return
92
+ # The higher top_k_retriever, the better (but also the slower) your answers.
93
+
94
+
95
+ prediction = pipe.run(
96
+ query="What is post-COVID?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
97
+ )
98
+
99
+ # Now you can either print the object directly...
100
+ from pprint import pprint
101
+
102
+ pprint(prediction)
103
+
104
+
105
+
106
+ ## Authors
107
+
108
+ Shaina Raza
109
+
110
 
111
+ ```