File size: 4,648 Bytes
19b8775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""
Defines a base class that can be used to annotate.
"""
import io
from multiprocessing import Process
from http.server import BaseHTTPRequestHandler, HTTPServer
from http import client as HTTPStatus
from stanza.protobuf import Document, parseFromDelimitedString, writeToDelimitedString
class Annotator(Process):
"""
This annotator base class hosts a lightweight server that accepts
annotation requests from CoreNLP.
Each annotator simply defines 3 functions: requires, provides and annotate.
This class takes care of defining appropriate endpoints to interface
with CoreNLP.
"""
@property
def name(self):
"""
Name of the annotator (used by CoreNLP)
"""
raise NotImplementedError()
@property
def requires(self):
"""
Requires has to specify all the annotations required before we
are called.
"""
raise NotImplementedError()
@property
def provides(self):
"""
The set of annotations guaranteed to be provided when we are done.
NOTE: that these annotations are either fully qualified Java
class names or refer to nested classes of
edu.stanford.nlp.ling.CoreAnnotations (as is the case below).
"""
raise NotImplementedError()
def annotate(self, ann):
"""
@ann: is a protobuf annotation object.
Actually populate @ann with tokens.
"""
raise NotImplementedError()
@property
def properties(self):
"""
Defines a Java property to define this annotator to CoreNLP.
"""
return {
"customAnnotatorClass.{}".format(self.name): "edu.stanford.nlp.pipeline.GenericWebServiceAnnotator",
"generic.endpoint": "http://{}:{}".format(self.host, self.port),
"generic.requires": ",".join(self.requires),
"generic.provides": ",".join(self.provides),
}
class _Handler(BaseHTTPRequestHandler):
annotator = None
def __init__(self, request, client_address, server):
BaseHTTPRequestHandler.__init__(self, request, client_address, server)
def do_GET(self):
"""
Handle a ping request
"""
if not self.path.endswith("/"): self.path += "/"
if self.path == "/ping/":
msg = "pong".encode("UTF-8")
self.send_response(HTTPStatus.OK)
self.send_header("Content-Type", "text/application")
self.send_header("Content-Length", len(msg))
self.end_headers()
self.wfile.write(msg)
else:
self.send_response(HTTPStatus.BAD_REQUEST)
self.end_headers()
def do_POST(self):
"""
Handle an annotate request
"""
if not self.path.endswith("/"): self.path += "/"
if self.path == "/annotate/":
# Read message
length = int(self.headers.get('content-length'))
msg = self.rfile.read(length)
# Do the annotation
doc = Document()
parseFromDelimitedString(doc, msg)
self.annotator.annotate(doc)
with io.BytesIO() as stream:
writeToDelimitedString(doc, stream)
msg = stream.getvalue()
# write message
self.send_response(HTTPStatus.OK)
self.send_header("Content-Type", "application/x-protobuf")
self.send_header("Content-Length", len(msg))
self.end_headers()
self.wfile.write(msg)
else:
self.send_response(HTTPStatus.BAD_REQUEST)
self.end_headers()
def __init__(self, host="", port=8432):
"""
Launches a server endpoint to communicate with CoreNLP
"""
Process.__init__(self)
self.host, self.port = host, port
self._Handler.annotator = self
def run(self):
"""
Runs the server using Python's simple HTTPServer.
TODO: make this multithreaded.
"""
httpd = HTTPServer((self.host, self.port), self._Handler)
sa = httpd.socket.getsockname()
serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
print(serve_message.format(host=sa[0], port=sa[1]))
try:
httpd.serve_forever()
except KeyboardInterrupt:
print("\nKeyboard interrupt received, exiting.")
httpd.shutdown()
|