File size: 8,679 Bytes
07ef7ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
syntax = "proto2";

package object_detection.protos;


// Configuration proto for defining input readers that generate Object Detection
// Examples from input sources. Input readers are expected to generate a
// dictionary of tensors, with the following fields populated:
//
// 'image': an [image_height, image_width, channels] image tensor that detection
//    will be run on.
// 'groundtruth_classes': a [num_boxes] int32 tensor storing the class
//    labels of detected boxes in the image.
// 'groundtruth_boxes': a [num_boxes, 4] float tensor storing the coordinates of
//    detected boxes in the image.
// 'groundtruth_instance_masks': (Optional), a [num_boxes, image_height,
//    image_width] float tensor storing binary mask of the objects in boxes.

// Instance mask format. Note that PNG masks are much more space efficient.
enum InstanceMaskType {
  DEFAULT = 0;          // Default implementation, currently NUMERICAL_MASKS
  NUMERICAL_MASKS = 1;  // [num_masks, H, W] float32 binary masks.
  PNG_MASKS = 2;        // Encoded PNG masks.
}

// Input type format: whether inputs are TfExamples or TfSequenceExamples.
enum InputType {
  INPUT_DEFAULT = 0;        // Default implementation, currently TF_EXAMPLE
  TF_EXAMPLE = 1;           // TfExample input
  TF_SEQUENCE_EXAMPLE = 2;  // TfSequenceExample Input
}

// Next id: 39
message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
  optional string name = 23 [default = ""];

  // Path to StringIntLabelMap pbtxt file specifying the mapping from string
  // labels to integer ids.
  optional string label_map_path = 1 [default = ""];

  // Whether data should be processed in the order they are read in, or
  // shuffled randomly.
  optional bool shuffle = 2 [default = true];

  // Buffer size to be used when shuffling.
  optional uint32 shuffle_buffer_size = 11 [default = 2048];

  // Buffer size to be used when shuffling file names.
  optional uint32 filenames_shuffle_buffer_size = 12 [default = 100];

  // The number of times a data source is read. If set to zero, the data source
  // will be reused indefinitely.
  optional uint32 num_epochs = 5 [default = 0];

  // Integer representing how often an example should be sampled. To feed
  // only 1/3 of your data into your model, set `sample_1_of_n_examples` to 3.
  // This is particularly useful for evaluation, where you might not prefer to
  // evaluate all of your samples.
  optional uint32 sample_1_of_n_examples = 22 [default = 1];

  // Number of file shards to read in parallel.
  //
  // When sample_from_datasets_weights are configured, num_readers is applied
  // for each dataset.
  optional uint32 num_readers = 6 [default = 64];

  // Number of batches to produce in parallel. If this is run on a 2x2 TPU set
  // this to 8.
  optional uint32 num_parallel_batches = 19 [default = 8];

  // Number of batches to prefetch. Prefetch decouples input pipeline and
  // model so they can be pipelined resulting in higher throughput. Set this
  // to a small constant and increment linearly until the improvements become
  // marginal or you exceed your cpu memory budget. Setting this to -1,
  // automatically tunes this value for you.
  optional int32 num_prefetch_batches = 20 [default = 2];

  // Maximum number of records to keep in reader queue.
  optional uint32 queue_capacity = 3 [default = 2000, deprecated = true];

  // Minimum number of records to keep in reader queue. A large value is needed
  // to generate a good random shuffle.
  optional uint32 min_after_dequeue = 4 [default = 1000, deprecated = true];

  // Number of records to read from each reader at once.
  optional uint32 read_block_length = 15 [default = 32];

  // Number of decoded records to prefetch before batching.
  optional uint32 prefetch_size = 13 [default = 512, deprecated = true];

  // Number of parallel decode ops to apply.
  optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated = true];

  // Drop remainder when batch size does not divide dataset size.
  optional bool drop_remainder = 35 [default = true];

  // If positive, TfExampleDecoder will try to decode rasters of additional
  // channels from tf.Examples.
  optional int32 num_additional_channels = 18 [default = 0];

  // Number of groundtruth keypoints per object.
  optional uint32 num_keypoints = 16 [default = 0];

  // Keypoint weights. These weights can be used to apply per-keypoint loss
  // multipliers. The size of this field should agree with `num_keypoints`.
  repeated float keypoint_type_weight = 26;

  // Maximum number of boxes to pad to during training / evaluation.
  // Set this to at least the maximum amount of boxes in the input data,
  // otherwise some groundtruth boxes may be clipped.
  optional int32 max_number_of_boxes = 21 [default = 100];

  // Whether to load multiclass scores from the dataset.
  optional bool load_multiclass_scores = 24 [default = false];

  // Whether to load context features from the dataset.
  optional bool load_context_features = 25 [default = false];

  // Whether to load context image ids from the dataset.
  optional bool load_context_image_ids = 36 [default = false];

  // Whether to load groundtruth instance masks.
  optional bool load_instance_masks = 7 [default = false];

  // Type of instance mask.
  optional InstanceMaskType mask_type = 10 [default = NUMERICAL_MASKS];

  // Whether to load DensePose data. If set, must also set load_instance_masks
  // to true.
  optional bool load_dense_pose = 31 [default = false];

  // Whether to load track information.
  optional bool load_track_id = 33 [default = false];

  // Whether to load keypoint depth features.
  optional bool load_keypoint_depth_features = 37 [default = false];

  // Whether to use the display name when decoding examples. This is only used
  // when mapping class text strings to integers.
  optional bool use_display_name = 17 [default = false];

  // Whether to include the source_id string in the input features.
  optional bool include_source_id = 27 [default = false];

  // Whether input data type is tf.Examples or tf.SequenceExamples
  optional InputType input_type = 30 [default = TF_EXAMPLE];

  // Which frame to choose from the input if Sequence Example. -1 indicates
  // random choice.
  optional int32 frame_index = 32 [default = -1];

  // Whether to use the label map and the keypoint text feature to construct the
  // keypoint coordinates/visibilities groundtruth tensors. Usually used when
  // training with multiple datasets that contain different subset of keypoints.
  optional bool use_keypoint_label_map = 38 [default = false];

  oneof input_reader {
    TFRecordInputReader tf_record_input_reader = 8;
    ExternalInputReader external_input_reader = 9;
  }

  // When multiple input files are configured, we can sample across them based
  // on weights.
  //
  // The number of weights must match the number of input files configured.
  //
  // The number of input readers per dataset is num_readers, scaled relative to
  // the dataset weight.
  //
  // When set, shuffling and shuffle buffer size, settings are
  // applied individually to each dataset.
  //
  // Implementation follows tf.data.experimental.sample_from_datasets sampling
  // strategy. Weights may take any value - only relative weights matter.
  //
  // Zero weights will result in a dataset not being sampled and no input
  // readers spawned.
  //
  // Examples, assuming two input files configured:
  //
  // Equal weighting:
  // sample_from_datasets_weights: 0.5
  // sample_from_datasets_weights: 0.5
  //
  // 2:1 weighting:
  // sample_from_datasets_weights: 2
  // sample_from_datasets_weights: 1
  //
  // Exclude the second dataset:
  // sample_from_datasets_weights: 1
  // sample_from_datasets_weights: 0
  repeated float sample_from_datasets_weights = 34;


  // Expand labels to ancestors or descendants in the hierarchy for
  // for positive and negative labels, respectively.
  optional bool expand_labels_hierarchy = 29 [default = false];
}

// An input reader that reads TF Example or TF Sequence Example protos from
// local TFRecord files.
message TFRecordInputReader {
  // Path(s) to `TFRecordFile`s.
  repeated string input_path = 1;
}

// An externally defined input reader. Users may define an extension to this
// proto to interface their own input readers.
message ExternalInputReader {
  extensions 1 to 999;
}