File size: 13,800 Bytes
e36aeda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build goexperiment.jsonv2

package jsontext

import (
	"bytes"
	"errors"
	"io"
	"slices"
	"sync"

	"encoding/json/internal/jsonflags"
	"encoding/json/internal/jsonwire"
)

// NOTE: Value is analogous to v1 json.RawMessage.

// AppendFormat formats the JSON value in src and appends it to dst
// according to the specified options.
// See [Value.Format] for more details about the formatting behavior.
//
// The dst and src may overlap.
// If an error is reported, then the entirety of src is appended to dst.
func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) {
	e := getBufferedEncoder(opts...)
	defer putBufferedEncoder(e)
	e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
	if err := e.s.WriteValue(src); err != nil {
		return append(dst, src...), err
	}
	return append(dst, e.s.Buf...), nil
}

// Value represents a single raw JSON value, which may be one of the following:
//   - a JSON literal (i.e., null, true, or false)
//   - a JSON string (e.g., "hello, world!")
//   - a JSON number (e.g., 123.456)
//   - an entire JSON object (e.g., {"fizz":"buzz"} )
//   - an entire JSON array (e.g., [1,2,3] )
//
// Value can represent entire array or object values, while [Token] cannot.
// Value may contain leading and/or trailing whitespace.
type Value []byte

// Clone returns a copy of v.
func (v Value) Clone() Value {
	return bytes.Clone(v)
}

// String returns the string formatting of v.
func (v Value) String() string {
	if v == nil {
		return "null"
	}
	return string(v)
}

// IsValid reports whether the raw JSON value is syntactically valid
// according to the specified options.
//
// By default (if no options are specified), it validates according to RFC 7493.
// It verifies whether the input is properly encoded as UTF-8,
// that escape sequences within strings decode to valid Unicode codepoints, and
// that all names in each object are unique.
// It does not verify whether numbers are representable within the limits
// of any common numeric type (e.g., float64, int64, or uint64).
//
// Relevant options include:
//   - [AllowDuplicateNames]
//   - [AllowInvalidUTF8]
//
// All other options are ignored.
func (v Value) IsValid(opts ...Options) bool {
	// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
	d := getBufferedDecoder(v, opts...)
	defer putBufferedDecoder(d)
	_, errVal := d.ReadValue()
	_, errEOF := d.ReadToken()
	return errVal == nil && errEOF == io.EOF
}

// Format formats the raw JSON value in place.
//
// By default (if no options are specified), it validates according to RFC 7493
// and produces the minimal JSON representation, where
// all whitespace is elided and JSON strings use the shortest encoding.
//
// Relevant options include:
//   - [AllowDuplicateNames]
//   - [AllowInvalidUTF8]
//   - [EscapeForHTML]
//   - [EscapeForJS]
//   - [PreserveRawStrings]
//   - [CanonicalizeRawInts]
//   - [CanonicalizeRawFloats]
//   - [ReorderRawObjects]
//   - [SpaceAfterColon]
//   - [SpaceAfterComma]
//   - [Multiline]
//   - [WithIndent]
//   - [WithIndentPrefix]
//
// All other options are ignored.
//
// It is guaranteed to succeed if the value is valid according to the same options.
// If the value is already formatted, then the buffer is not mutated.
func (v *Value) Format(opts ...Options) error {
	// TODO: Document support for [WithByteLimit] and [WithDepthLimit].
	return v.format(opts, nil)
}

// format accepts two []Options to avoid the allocation appending them together.
// It is equivalent to v.Format(append(opts1, opts2...)...).
func (v *Value) format(opts1, opts2 []Options) error {
	e := getBufferedEncoder(opts1...)
	defer putBufferedEncoder(e)
	e.s.Join(opts2...)
	e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
	if err := e.s.WriteValue(*v); err != nil {
		return err
	}
	if !bytes.Equal(*v, e.s.Buf) {
		*v = append((*v)[:0], e.s.Buf...)
	}
	return nil
}

// Compact removes all whitespace from the raw JSON value.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Compact is equivalent to calling [Value.Format] with the following options:
//   - [AllowDuplicateNames](true)
//   - [AllowInvalidUTF8](true)
//   - [PreserveRawStrings](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Compact(opts ...Options) error {
	return v.format([]Options{
		AllowDuplicateNames(true),
		AllowInvalidUTF8(true),
		PreserveRawStrings(true),
	}, opts)
}

// Indent reformats the whitespace in the raw JSON value so that each element
// in a JSON object or array begins on a indented line according to the nesting.
//
// It does not reformat JSON strings or numbers to use any other representation.
// To maximize the set of JSON values that can be formatted,
// this permits values with duplicate names and invalid UTF-8.
//
// Indent is equivalent to calling [Value.Format] with the following options:
//   - [AllowDuplicateNames](true)
//   - [AllowInvalidUTF8](true)
//   - [PreserveRawStrings](true)
//   - [Multiline](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
func (v *Value) Indent(opts ...Options) error {
	return v.format([]Options{
		AllowDuplicateNames(true),
		AllowInvalidUTF8(true),
		PreserveRawStrings(true),
		Multiline(true),
	}, opts)
}

// Canonicalize canonicalizes the raw JSON value according to the
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
// where it produces a stable representation of a JSON value.
//
// JSON strings are formatted to use their minimal representation,
// JSON numbers are formatted as double precision numbers according
// to some stable serialization algorithm.
// JSON object members are sorted in ascending order by name.
// All whitespace is removed.
//
// The output stability is dependent on the stability of the application data
// (see RFC 8785, Appendix E). It cannot produce stable output from
// fundamentally unstable input. For example, if the JSON value
// contains ephemeral data (e.g., a frequently changing timestamp),
// then the value is still unstable regardless of whether this is called.
//
// Canonicalize is equivalent to calling [Value.Format] with the following options:
//   - [CanonicalizeRawInts](true)
//   - [CanonicalizeRawFloats](true)
//   - [ReorderRawObjects](true)
//
// Any options specified by the caller are applied after the initial set
// and may deliberately override prior options.
//
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example, integer values
// beyond ±2⁵³ will lose their precision. To preserve the original representation
// of JSON integers, additionally set [CanonicalizeRawInts] to false:
//
//	v.Canonicalize(jsontext.CanonicalizeRawInts(false))
func (v *Value) Canonicalize(opts ...Options) error {
	return v.format([]Options{
		CanonicalizeRawInts(true),
		CanonicalizeRawFloats(true),
		ReorderRawObjects(true),
	}, opts)
}

// MarshalJSON returns v as the JSON encoding of v.
// It returns the stored value as the raw JSON output without any validation.
// If v is nil, then this returns a JSON null.
func (v Value) MarshalJSON() ([]byte, error) {
	// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
	if v == nil {
		return []byte("null"), nil
	}
	return v, nil
}

// UnmarshalJSON sets v as the JSON encoding of b.
// It stores a copy of the provided raw JSON input without any validation.
func (v *Value) UnmarshalJSON(b []byte) error {
	// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
	if v == nil {
		return errors.New("jsontext.Value: UnmarshalJSON on nil pointer")
	}
	*v = append((*v)[:0], b...)
	return nil
}

// Kind returns the starting token kind.
// For a valid value, this will never include [KindEndObject] or [KindEndArray].
func (v Value) Kind() Kind {
	if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
		return Kind(v[0]).normalize()
	}
	return invalidKind
}

const commaAndWhitespace = ", \n\r\t"

type objectMember struct {
	// name is the unquoted name.
	name []byte // e.g., "name"
	// buffer is the entirety of the raw JSON object member
	// starting from right after the previous member (or opening '{')
	// until right after the member value.
	buffer []byte // e.g., `, \n\r\t"name": "value"`
}

func (x objectMember) Compare(y objectMember) int {
	if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 {
		return c
	}
	// With [AllowDuplicateNames] or [AllowInvalidUTF8],
	// names could be identical, so also sort using the member value.
	return jsonwire.CompareUTF16(
		bytes.TrimLeft(x.buffer, commaAndWhitespace),
		bytes.TrimLeft(y.buffer, commaAndWhitespace))
}

var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }}

func getObjectMembers() *[]objectMember {
	ns := objectMemberPool.Get().(*[]objectMember)
	*ns = (*ns)[:0]
	return ns
}
func putObjectMembers(ns *[]objectMember) {
	if cap(*ns) < 1<<10 {
		clear(*ns) // avoid pinning name and buffer
		objectMemberPool.Put(ns)
	}
}

// mustReorderObjects reorders in-place all object members in a JSON value,
// which must be valid otherwise it panics.
func mustReorderObjects(b []byte) {
	// Obtain a buffered encoder just to use its internal buffer as
	// a scratch buffer for reordering object members.
	e2 := getBufferedEncoder()
	defer putBufferedEncoder(e2)

	// Disable unnecessary checks to syntactically parse the JSON value.
	d := getBufferedDecoder(b)
	defer putBufferedDecoder(d)
	d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
	mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
}

// mustReorderObjectsFromDecoder recursively reorders all object members in place
// according to the ordering specified in RFC 8785, section 3.2.3.
//
// Pre-conditions:
//   - The value is valid (i.e., no decoder errors should ever occur).
//   - Initial call is provided a Decoder reading from the start of v.
//
// Post-conditions:
//   - Exactly one JSON value is read from the Decoder.
//   - All fully-parsed JSON objects are reordered by directly moving
//     the members in the value buffer.
//
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
// where n is len(v) and m is the total number of object members.
func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) {
	switch tok, err := d.ReadToken(); tok.Kind() {
	case '{':
		// Iterate and collect the name and offsets for every object member.
		members := getObjectMembers()
		defer putObjectMembers(members)
		var prevMember objectMember
		isSorted := true

		beforeBody := d.InputOffset() // offset after '{'
		for d.PeekKind() != '}' {
			beforeName := d.InputOffset()
			var flags jsonwire.ValueFlags
			name, _ := d.s.ReadValue(&flags)
			name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
			mustReorderObjectsFromDecoder(d, scratch)
			afterValue := d.InputOffset()

			currMember := objectMember{name, d.s.buf[beforeName:afterValue]}
			if isSorted && len(*members) > 0 {
				isSorted = objectMember.Compare(prevMember, currMember) < 0
			}
			*members = append(*members, currMember)
			prevMember = currMember
		}
		afterBody := d.InputOffset() // offset before '}'
		d.ReadToken()

		// Sort the members; return early if it's already sorted.
		if isSorted {
			return
		}
		firstBufferBeforeSorting := (*members)[0].buffer
		slices.SortFunc(*members, objectMember.Compare)
		firstBufferAfterSorting := (*members)[0].buffer

		// Append the reordered members to a new buffer,
		// then copy the reordered members back over the original members.
		// Avoid swapping in place since each member may be a different size
		// where moving a member over a smaller member may corrupt the data
		// for subsequent members before they have been moved.
		//
		// The following invariant must hold:
		//	sum([m.after-m.before for m in members]) == afterBody-beforeBody
		commaAndWhitespacePrefix := func(b []byte) []byte {
			return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))]
		}
		sorted := (*scratch)[:0]
		for i, member := range *members {
			switch {
			case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]:
				// First member after sorting is not the first member before sorting,
				// so use the prefix of the first member before sorting.
				sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...)
				sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
			case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]:
				// Later member after sorting is the first member before sorting,
				// so use the prefix of the first member after sorting.
				sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...)
				sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...)
			default:
				sorted = append(sorted, member.buffer...)
			}
		}
		if int(afterBody-beforeBody) != len(sorted) {
			panic("BUG: length invariant violated")
		}
		copy(d.s.buf[beforeBody:afterBody], sorted)

		// Update scratch buffer to the largest amount ever used.
		if len(sorted) > len(*scratch) {
			*scratch = sorted
		}
	case '[':
		for d.PeekKind() != ']' {
			mustReorderObjectsFromDecoder(d, scratch)
		}
		d.ReadToken()
	default:
		if err != nil {
			panic("BUG: " + err.Error())
		}
	}
}