File size: 31,375 Bytes
af1acfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 |
syntax = "proto2";
package edu.stanford.nlp.pipeline;
option java_package = "edu.stanford.nlp.pipeline";
option java_outer_classname = "CoreNLPProtos";
//
// From JAVANLP_HOME, you can build me with the command:
//
// protoc -I=src/edu/stanford/nlp/pipeline/ --java_out=src src/edu/stanford/nlp/pipeline/CoreNLP.proto
//
//
// To do the python version:
//
// protoc -I=./doc --python_out=./stanza/protobuf ./doc/CoreNLP.proto
//
//
// An enumeration for the valid languages allowed in CoreNLP
//
enum Language {
Unknown = 0;
Any = 1;
Arabic = 2;
Chinese = 3;
English = 4;
German = 5;
French = 6;
Hebrew = 7;
Spanish = 8;
UniversalEnglish = 9;
UniversalChinese = 10;
}
//
// A document; that is, the equivalent of an Annotation.
//
message Document {
required string text = 1;
repeated Sentence sentence = 2;
repeated CorefChain corefChain = 3;
optional string docID = 4;
optional string docDate = 7;
optional uint64 calendar = 8;
/**
* A peculiar field, for the corner case when a Document is
* serialized without any sentences. Otherwise
*/
repeated Token sentencelessToken = 5;
repeated Token character = 10;
repeated Quote quote = 6;
/**
* This field is for entity mentions across the document.
*/
repeated NERMention mentions = 9;
optional bool hasEntityMentionsAnnotation = 13; // used to differentiate between null and empty list
/**
* xml information
*/
optional bool xmlDoc = 11;
repeated Section sections = 12;
/** coref mentions for entire document **/
repeated Mention mentionsForCoref = 14;
optional bool hasCorefMentionAnnotation = 15;
optional bool hasCorefAnnotation = 16;
repeated int32 corefMentionToEntityMentionMappings = 17;
repeated int32 entityMentionToCorefMentionMappings = 18;
extensions 100 to 255;
}
//
// The serialized version of a CoreMap representing a sentence.
//
message Sentence {
repeated Token token = 1;
required uint32 tokenOffsetBegin = 2;
required uint32 tokenOffsetEnd = 3;
optional uint32 sentenceIndex = 4;
optional uint32 characterOffsetBegin = 5;
optional uint32 characterOffsetEnd = 6;
optional ParseTree parseTree = 7;
optional ParseTree binarizedParseTree = 31;
optional ParseTree annotatedParseTree = 32;
optional string sentiment = 33;
repeated ParseTree kBestParseTrees = 34;
optional DependencyGraph basicDependencies = 8;
optional DependencyGraph collapsedDependencies = 9;
optional DependencyGraph collapsedCCProcessedDependencies = 10;
optional DependencyGraph alternativeDependencies = 13;
repeated RelationTriple openieTriple = 14; // The OpenIE triples in the sentence
repeated RelationTriple kbpTriple = 16; // The KBP triples in this sentence
repeated SentenceFragment entailedSentence = 15; // The entailed sentences, by natural logic
repeated SentenceFragment entailedClause = 35; // The entailed clauses, by natural logic
optional DependencyGraph enhancedDependencies = 17;
optional DependencyGraph enhancedPlusPlusDependencies = 18;
repeated Token character = 19;
optional uint32 paragraph = 11;
optional string text = 12; // Only needed if we're only saving the sentence.
optional uint32 lineNumber = 20;
// Fields set by other annotators in CoreNLP
optional bool hasRelationAnnotations = 51;
repeated Entity entity = 52;
repeated Relation relation = 53;
optional bool hasNumerizedTokensAnnotation = 54;
repeated NERMention mentions = 55;
repeated Mention mentionsForCoref = 56;
optional bool hasCorefMentionsAnnotation = 57;
optional string sentenceID = 58; // Useful when storing sentences (e.g. ForEach)
optional string sectionDate = 59; // date of section
optional uint32 sectionIndex = 60; // section index for this sentence's section
optional string sectionName = 61; // name of section
optional string sectionAuthor = 62; // author of section
optional string docID = 63; // doc id
optional bool sectionQuoted = 64; // is this sentence in an xml quote in a post
optional bool hasEntityMentionsAnnotation = 65; // check if there are entity mentions
optional bool hasKBPTriplesAnnotation = 68; // check if there are KBP triples
optional bool hasOpenieTriplesAnnotation = 69; // check if there are OpenIE triples
// quote stuff
optional uint32 chapterIndex = 66;
optional uint32 paragraphIndex = 67;
// the quote annotator can soometimes add merged sentences
optional Sentence enhancedSentence = 70;
// speaker stuff
optional string speaker = 71; // The speaker speaking this sentence
optional string speakerType = 72; // The type of speaker speaking this sentence
extensions 100 to 255;
}
//
// The serialized version of a Token (a CoreLabel).
//
message Token {
// Fields set by the default annotators [new CoreNLP(new Properties())]
optional string word = 1; // the word's gloss (post-tokenization)
optional string pos = 2; // The word's part of speech tag
optional string value = 3; // The word's 'value', (e.g., parse tree node)
optional string category = 4; // The word's 'category' (e.g., parse tree node)
optional string before = 5; // The whitespace/xml before the token
optional string after = 6; // The whitespace/xml after the token
optional string originalText = 7; // The original text for this token
optional string ner = 8; // The word's NER tag
optional string coarseNER = 62; // The word's coarse NER tag
optional string fineGrainedNER = 63; // The word's fine-grained NER tag
repeated string nerLabelProbs = 66; // listing of probs
optional string normalizedNER = 9; // The word's normalized NER tag
optional string lemma = 10; // The word's lemma
optional uint32 beginChar = 11; // The character offset begin, in the document
optional uint32 endChar = 12; // The character offset end, in the document
optional uint32 utterance = 13; // The utterance tag used in dcoref
optional string speaker = 14; // The speaker speaking this word
optional string speakerType = 77; // The type of speaker speaking this word
optional uint32 beginIndex = 15; // The begin index of, e.g., a span
optional uint32 endIndex = 16; // The begin index of, e.g., a span
optional uint32 tokenBeginIndex = 17; // The begin index of the token
optional uint32 tokenEndIndex = 18; // The end index of the token
optional Timex timexValue = 19; // The time this word refers to
optional bool hasXmlContext = 21; // Used by clean xml annotator
repeated string xmlContext = 22; // Used by clean xml annotator
optional uint32 corefClusterID = 23; // The [primary] cluster id for this token
optional string answer = 24; // A temporary annotation which is occasionally left in
// optional string projectedCategory = 25; // The syntactic category of the maximal constituent headed by the word. Not used anywhere, so deleted.
optional uint32 headWordIndex = 26; // The index of the head word of this word.
optional Operator operator = 27; // If this is an operator, which one is it and what is its scope (as per Natural Logic)?
optional Polarity polarity = 28; // The polarity of this word, according to Natural Logic
optional string polarity_dir = 39; // The polarity of this word, either "up", "down", or "flat"
optional Span span = 29; // The span of a leaf node of a tree
optional string sentiment = 30; // The final sentiment of the sentence
optional int32 quotationIndex = 31; // The index of the quotation this token refers to
optional MapStringString conllUFeatures = 32;
optional string coarseTag = 33; // The coarse POS tag (used to store the UPOS tag)
optional Span conllUTokenSpan = 34;
optional string conllUMisc = 35;
optional MapStringString conllUSecondaryDeps = 36;
optional string wikipediaEntity = 37;
optional bool isNewline = 38;
// Fields set by other annotators in CoreNLP
optional string gender = 51; // gender annotation (machine reading)
optional string trueCase = 52; // true case type of token
optional string trueCaseText = 53; // true case gloss of token
// Chinese character info
optional string chineseChar = 54;
optional string chineseSeg = 55;
optional string chineseXMLChar = 60;
// Arabic character info
optional string arabicSeg = 76;
// Section info
optional string sectionName = 56;
optional string sectionAuthor = 57;
optional string sectionDate = 58;
optional string sectionEndLabel = 59;
// French tokens have parents
optional string parent = 61;
// mention index info
repeated uint32 corefMentionIndex = 64;
optional uint32 entityMentionIndex = 65;
// mwt stuff
optional bool isMWT = 67;
optional bool isFirstMWT = 68;
optional string mwtText = 69;
// setting this to a map might be nice, but there are a couple issues
// for one, there can be values with no key
// for another, it's a pain to correctly parse, since different treebanks
// can have different standards for how to write out the misc field
optional string mwtMisc = 78;
// number info
optional uint64 numericValue = 70;
optional string numericType = 71;
optional uint64 numericCompositeValue = 72;
optional string numericCompositeType = 73;
optional uint32 codepointOffsetBegin = 74;
optional uint32 codepointOffsetEnd = 75;
// Fields in the CoreLabel java class that are moved elsewhere
// string text @see Document#text + character offsets
// uint32 sentenceIndex @see Sentence#sentenceIndex
// string docID @see Document#docID
// uint32 paragraph @see Sentence#paragraph
// Most serialized annotations will not have this
// Some code paths may not correctly process this if serialized,
// since many places will read the index off the position in a sentence
// In particular, deserializing a Document using ProtobufAnnotationSerializer
// will clobber any index value
// But Semgrex and Ssurgeon in particular need a way
// to pass around nodes where the node's index is not strictly 1, 2, 3, ...
// thanks to the empty nodes in UD treebanks such as
// English EWT or Estonian EWT (not related to each other)
optional uint32 index = 79;
optional uint32 emptyIndex = 80;
extensions 100 to 255;
}
//
// An enumeration of valid sentiment values for the sentiment classifier.
//
enum Sentiment {
STRONG_NEGATIVE = 0;
WEAK_NEGATIVE = 1;
NEUTRAL = 2;
WEAK_POSITIVE = 3;
STRONG_POSITIVE = 4;
}
//
// A quotation marker in text
//
message Quote {
optional string text = 1;
optional uint32 begin = 2;
optional uint32 end = 3;
optional uint32 sentenceBegin = 5;
optional uint32 sentenceEnd = 6;
optional uint32 tokenBegin = 7;
optional uint32 tokenEnd = 8;
optional string docid = 9;
optional uint32 index = 10;
optional string author = 11;
optional string mention = 12;
optional uint32 mentionBegin = 13;
optional uint32 mentionEnd = 14;
optional string mentionType = 15;
optional string mentionSieve = 16;
optional string speaker = 17;
optional string speakerSieve = 18;
optional string canonicalMention = 19;
optional uint32 canonicalMentionBegin = 20;
optional uint32 canonicalMentionEnd = 21;
optional DependencyGraph attributionDependencyGraph = 22;
}
//
// A syntactic parse tree, with scores.
//
message ParseTree {
repeated ParseTree child = 1;
optional string value = 2;
optional uint32 yieldBeginIndex = 3;
optional uint32 yieldEndIndex = 4;
optional double score = 5;
optional Sentiment sentiment = 6;
}
//
// A dependency graph representation.
//
message DependencyGraph {
message Node {
required uint32 sentenceIndex = 1;
required uint32 index = 2;
optional uint32 copyAnnotation = 3;
optional uint32 emptyIndex = 4;
}
message Edge {
required uint32 source = 1;
required uint32 target = 2;
optional string dep = 3;
optional bool isExtra = 4;
optional uint32 sourceCopy = 5;
optional uint32 targetCopy = 6;
optional uint32 sourceEmpty = 8;
optional uint32 targetEmpty = 9;
optional Language language = 7 [default=Unknown];
}
repeated Node node = 1;
repeated Edge edge = 2;
repeated uint32 root = 3 [packed=true];
// optional: if this graph message is not part of a larger context,
// the tokens will help reconstruct the actual sentence
repeated Token token = 4;
// The values in this field will index directly into the node list
// This is useful so that additional information such as emptyIndex
// can be considered without having to pass it around a second time
repeated uint32 rootNode = 5 [packed=true];
}
//
// A coreference chain.
// These fields are not *really* optional. CoreNLP will crash without them.
//
message CorefChain {
message CorefMention {
optional int32 mentionID = 1;
optional string mentionType = 2;
optional string number = 3;
optional string gender = 4;
optional string animacy = 5;
optional uint32 beginIndex = 6;
optional uint32 endIndex = 7;
optional uint32 headIndex = 9;
optional uint32 sentenceIndex = 10;
optional uint32 position = 11; // the second element of position
}
required int32 chainID = 1;
repeated CorefMention mention = 2;
required uint32 representative = 3;
}
//
// a mention
//
message Mention {
optional int32 mentionID = 1;
optional string mentionType = 2;
optional string number = 3;
optional string gender = 4;
optional string animacy = 5;
optional string person = 6;
optional uint32 startIndex = 7;
optional uint32 endIndex = 9;
optional int32 headIndex = 10;
optional string headString = 11;
optional string nerString = 12;
optional int32 originalRef = 13;
optional int32 goldCorefClusterID = 14;
optional int32 corefClusterID = 15;
optional int32 mentionNum = 16;
optional int32 sentNum = 17;
optional int32 utter = 18;
optional int32 paragraph = 19;
optional bool isSubject = 20;
optional bool isDirectObject = 21;
optional bool isIndirectObject = 22;
optional bool isPrepositionObject = 23;
optional bool hasTwin = 24;
optional bool generic = 25;
optional bool isSingleton = 26;
optional bool hasBasicDependency = 27;
optional bool hasEnhancedDependency = 28;
optional bool hasContextParseTree = 29;
optional IndexedWord headIndexedWord = 30;
optional IndexedWord dependingVerb = 31;
optional IndexedWord headWord = 32;
optional SpeakerInfo speakerInfo = 33;
repeated IndexedWord sentenceWords = 50;
repeated IndexedWord originalSpan = 51;
repeated string dependents = 52;
repeated string preprocessedTerms = 53;
repeated int32 appositions = 54;
repeated int32 predicateNominatives = 55;
repeated int32 relativePronouns = 56;
repeated int32 listMembers = 57;
repeated int32 belongToLists = 58;
}
//
// store the position (sentence, token index) of a CoreLabel
//
message IndexedWord {
optional int32 sentenceNum = 1;
optional int32 tokenIndex = 2;
optional int32 docID = 3;
optional uint32 copyCount = 4;
}
//
// speaker info, this is used for Mentions
//
message SpeakerInfo {
optional string speakerName = 1;
repeated int32 mentions = 2;
}
//
// A Span of text
//
message Span {
required uint32 begin = 1;
required uint32 end = 2;
}
//
// A Timex object, representing a temporal expression (TIMe EXpression)
// These fields are not *really* optional. CoreNLP will crash without them.
//
message Timex {
optional string value = 1;
optional string altValue = 2;
optional string text = 3;
optional string type = 4;
optional string tid = 5;
optional uint32 beginPoint = 6;
optional uint32 endPoint = 7;
}
//
// A representation of an entity in a relation.
// This corresponds to the EntityMention, and more broadly the
// ExtractionObject classes.
//
message Entity {
optional uint32 headStart = 6;
optional uint32 headEnd = 7;
optional string mentionType = 8;
optional string normalizedName = 9;
optional uint32 headTokenIndex = 10;
optional string corefID = 11;
// inherited from ExtractionObject
optional string objectID = 1;
optional uint32 extentStart = 2;
optional uint32 extentEnd = 3;
optional string type = 4;
optional string subtype = 5;
// Implicit
// uint32 sentence @see implicit in sentence
}
//
// A representation of a relation, mirroring RelationMention
//
message Relation {
repeated string argName = 6;
repeated Entity arg = 7;
optional string signature = 8;
// inherited from ExtractionObject
optional string objectID = 1;
optional uint32 extentStart = 2;
optional uint32 extentEnd = 3;
optional string type = 4;
optional string subtype = 5;
// Implicit
// uint32 sentence @see implicit in sentence
}
//
// A Natural Logic operator
//
message Operator {
required string name = 1;
required int32 quantifierSpanBegin = 2;
required int32 quantifierSpanEnd = 3;
required int32 subjectSpanBegin = 4;
required int32 subjectSpanEnd = 5;
required int32 objectSpanBegin = 6;
required int32 objectSpanEnd = 7;
}
//
// The seven informative Natural Logic relations
//
enum NaturalLogicRelation {
EQUIVALENCE = 0;
FORWARD_ENTAILMENT = 1;
REVERSE_ENTAILMENT = 2;
NEGATION = 3;
ALTERNATION = 4;
COVER = 5;
INDEPENDENCE = 6;
}
//
// The polarity of a word, according to Natural Logic
//
message Polarity {
required NaturalLogicRelation projectEquivalence = 1;
required NaturalLogicRelation projectForwardEntailment = 2;
required NaturalLogicRelation projectReverseEntailment = 3;
required NaturalLogicRelation projectNegation = 4;
required NaturalLogicRelation projectAlternation = 5;
required NaturalLogicRelation projectCover = 6;
required NaturalLogicRelation projectIndependence = 7;
}
//
// An NER mention in the text
//
message NERMention {
optional uint32 sentenceIndex = 1;
required uint32 tokenStartInSentenceInclusive = 2;
required uint32 tokenEndInSentenceExclusive = 3;
required string ner = 4;
optional string normalizedNER = 5;
optional string entityType = 6;
optional Timex timex = 7;
optional string wikipediaEntity = 8;
optional string gender = 9;
optional uint32 entityMentionIndex = 10;
optional uint32 canonicalEntityMentionIndex = 11;
optional string entityMentionText = 12;
}
//
// An entailed sentence fragment.
// Created by the openie annotator.
//
message SentenceFragment {
repeated uint32 tokenIndex = 1;
optional uint32 root = 2;
optional bool assumedTruth = 3;
optional double score = 4;
}
//
// The index of a token in a document, including the sentence
// index and the offset.
//
message TokenLocation {
optional uint32 sentenceIndex = 1;
optional uint32 tokenIndex = 2;
}
//
// An OpenIE relation triple.
// Created by the openie annotator.
//
message RelationTriple {
optional string subject = 1; // The surface form of the subject
optional string relation = 2; // The surface form of the relation (required)
optional string object = 3; // The surface form of the object
optional double confidence = 4; // The [optional] confidence of the extraction
repeated TokenLocation subjectTokens = 13; // The tokens comprising the subject of the triple
repeated TokenLocation relationTokens = 14; // The tokens comprising the relation of the triple
repeated TokenLocation objectTokens = 15; // The tokens comprising the object of the triple
optional DependencyGraph tree = 8; // The dependency graph fragment for this triple
optional bool istmod = 9; // If true, this expresses an implicit tmod relation
optional bool prefixBe = 10; // If true, this relation string is missing a 'be' prefix
optional bool suffixBe = 11; // If true, this relation string is missing a 'be' suffix
optional bool suffixOf = 12; // If true, this relation string is missing a 'of' prefix
}
//
// A map from strings to strings.
// Used, minimally, in the CoNLLU featurizer
//
message MapStringString {
repeated string key = 1;
repeated string value = 2;
}
//
// A map from integers to strings.
// Used, minimally, in the CoNLLU featurizer
//
message MapIntString {
repeated uint32 key = 1;
repeated string value = 2;
}
//
// Store section info
//
message Section {
required uint32 charBegin = 1;
required uint32 charEnd = 2;
optional string author = 3;
repeated uint32 sentenceIndexes = 4;
optional string datetime = 5;
repeated Quote quotes = 6;
optional uint32 authorCharBegin = 7;
optional uint32 authorCharEnd = 8;
required Token xmlTag = 9;
}
// A message for requesting a semgrex
// Each sentence stores information about the tokens making up the
// corresponding graph
// An alternative would have been to use the existing Document or
// Sentence classes, but the problem with that is it would be
// ambiguous which dependency object to use.
message SemgrexRequest {
message Dependencies {
repeated Token token = 1;
required DependencyGraph graph = 2;
}
repeated string semgrex = 1;
repeated Dependencies query = 2;
}
// The response from running a semgrex
// If you pass in M semgrex expressions and N dependency graphs,
// this returns MxN nested results. Each SemgrexResult can match
// multiple times in one graph
//
// You may want to send multiple semgrexes per query because
// translating large numbers of dependency graphs to protobufs
// will be expensive, so doing several queries at once will save time
message SemgrexResponse {
message NamedNode {
required string name = 1;
required int32 matchIndex = 2;
}
message NamedRelation {
required string name = 1;
required string reln = 2;
}
message NamedEdge {
required string name = 1;
required int32 source = 2;
required int32 target = 3;
optional string reln = 4;
optional bool isExtra = 5;
optional uint32 sourceCopy = 6;
optional uint32 targetCopy = 7;
}
message Match {
required int32 matchIndex = 1;
repeated NamedNode node = 2;
repeated NamedRelation reln = 3;
repeated NamedEdge edge = 6;
// when processing multiple dependency graphs at once,
// which dependency graph this applies to
// indexed from 0
optional int32 graphIndex = 4;
// index of the semgrex expression this match applies to
// indexed from 0
optional int32 semgrexIndex = 5;
}
message SemgrexResult {
repeated Match match = 1;
}
message GraphResult {
repeated SemgrexResult result = 1;
}
repeated GraphResult result = 1;
}
// A message for processing an Ssurgeon
// Each sentence stores information about the tokens making up the
// corresponding graph
// An alternative would have been to use the existing Document or
// Sentence classes, but the problem with that is it would be
// ambiguous which dependency object to use. Another problem
// is that if the intent is to use multiple graphs from a
// Sentence, then edits to the nodes of one graph would show up
// in the nodes of the other graph (same backing CoreLabels)
// and the operations themselves may not have the intended effect.
// The Ssurgeon is composed of two pieces, the semgrex and the
// ssurgeon operations, along with some optional documentation.
message SsurgeonRequest {
message Ssurgeon {
optional string semgrex = 1;
repeated string operation = 2;
optional string id = 3;
optional string notes = 4;
optional string language = 5;
}
repeated Ssurgeon ssurgeon = 1;
repeated DependencyGraph graph = 2;
}
message SsurgeonResponse {
message SsurgeonResult {
optional DependencyGraph graph = 1;
optional bool changed = 2;
}
repeated SsurgeonResult result = 1;
}
// It's possible to send in a whole document, but we
// only care about the Sentences and Tokens
message TokensRegexRequest {
required Document doc = 1;
repeated string pattern = 2;
}
// The result will be a nested structure:
// repeated PatternMatch, one for each pattern
// each PatternMatch has a repeated Match,
// which tells you which sentence matched and where
message TokensRegexResponse {
message MatchLocation {
optional string text = 1;
optional int32 begin = 2;
optional int32 end = 3;
}
message Match {
required int32 sentence = 1;
required MatchLocation match = 2;
repeated MatchLocation group = 3;
}
message PatternMatch {
repeated Match match = 1;
}
repeated PatternMatch match = 1;
}
// A protobuf which allows to pass in a document with basic
// dependencies to be converted to enhanced
message DependencyEnhancerRequest {
required Document document = 1;
oneof ref {
Language language = 2;
// The expected value of this is a regex which matches relative pronouns
string relativePronouns = 3;
}
}
// A version of ParseTree with a flattened structure so that deep trees
// don't exceed the protobuf stack depth
message FlattenedParseTree {
message Node {
oneof contents {
bool openNode = 1;
bool closeNode = 2;
string value = 3;
}
optional double score = 4;
}
repeated Node nodes = 1;
}
// A protobuf for calling the java constituency parser evaluator from elsewhere
message EvaluateParserRequest {
message ParseResult {
required FlattenedParseTree gold = 1;
// repeated so you can send in kbest parses, if your parser handles that
// note that this already includes a score field
repeated FlattenedParseTree predicted = 2;
}
repeated ParseResult treebank = 1;
}
message EvaluateParserResponse {
required double f1 = 1;
optional double kbestF1 = 2;
// keep track of the individual tree F1 scores
repeated double treeF1 = 3;
}
// A protobuf for running Tsurgeon operations on constituency trees
message TsurgeonRequest {
message Operation {
required string tregex = 1;
repeated string tsurgeon = 2;
}
repeated Operation operations = 1;
repeated FlattenedParseTree trees = 2;
}
// The results of the Tsurgeon operation
message TsurgeonResponse {
repeated FlattenedParseTree trees = 1;
}
// Sent in Morphology requests - a stream of sentences with tagged words
message MorphologyRequest {
message TaggedWord {
required string word = 1;
optional string xpos = 2;
}
repeated TaggedWord words = 1;
}
// Sent back from the Morphology request - the words and their tags
message MorphologyResponse {
message WordTagLemma {
required string word = 1;
optional string xpos = 2;
required string lemma = 3;
}
repeated WordTagLemma words = 1;
}
// A request for converting constituency trees to dependency graphs
message DependencyConverterRequest {
repeated FlattenedParseTree trees = 1;
}
// The result of using the CoreNLP dependency converter.
// One graph per tree
message DependencyConverterResponse {
message DependencyConversion {
required DependencyGraph graph = 1;
optional FlattenedParseTree tree = 2;
}
repeated DependencyConversion conversions = 1;
}
|