File size: 17,014 Bytes
0c51b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
//
// This file was copied from //depot/rs_xbox_dev_flight/xbox/drivers/Graphics/Pix3/PixEvt/inc/PIXEventsCommon.h#6
//
// To refresh: run 'UpdateFromXbox -latest'
//

/*==========================================================================;
*
*  Copyright (C) Microsoft Corporation.  All Rights Reserved.
*
*  File:       PIXEventsCommon.h
*  Content:    PIX include file
*              Don't include this file directly - use pix3.h
*
****************************************************************************/
#pragma once

#ifndef _PIXEventsCommon_H_
#define _PIXEventsCommon_H_

#if defined(_AMD64_) || defined(_X86_)
#include <emmintrin.h>
#endif // _AMD64_ || _X86_

extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime);

enum PIXEventType
{
    PIXEvent_EndEvent                       = 0x000,
    PIXEvent_BeginEvent_VarArgs             = 0x001,
    PIXEvent_BeginEvent_NoArgs              = 0x002,
    PIXEvent_SetMarker_VarArgs              = 0x007,
    PIXEvent_SetMarker_NoArgs               = 0x008,

    PIXEvent_EndEvent_OnContext             = 0x010,
    PIXEvent_BeginEvent_OnContext_VarArgs   = 0x011,
    PIXEvent_BeginEvent_OnContext_NoArgs    = 0x012,
    PIXEvent_SetMarker_OnContext_VarArgs    = 0x017,
    PIXEvent_SetMarker_OnContext_NoArgs     = 0x018,
};

static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
//this is used to make sure SSE string copy always will end 16-byte write in the current block
//this way only a check if destination < limit can be performed, instead of destination < limit - 1
//since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
//so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
//on next iteration check destination will be greater than limit
//this is used as well for fixed size UMD events and PIXEndEvent since these require less space
//than other variable length user events and do not need big reserved space
static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;

//Bits 7-19 (13 bits)
static const UINT64 PIXEventsBlockEndMarker     = 0x00000000000FFF80;

//Bits 10-19 (10 bits)
static const UINT64 PIXEventsTypeReadMask       = 0x00000000000FFC00;
static const UINT64 PIXEventsTypeWriteMask      = 0x00000000000003FF;
static const UINT64 PIXEventsTypeBitShift       = 10;

//Bits 20-63 (44 bits)
static const UINT64 PIXEventsTimestampReadMask  = 0xFFFFFFFFFFF00000;
static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
static const UINT64 PIXEventsTimestampBitShift  = 20;

inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
{
    return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
        (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
}

//Bits 60-63 (4)
static const UINT64 PIXEventsStringAlignmentWriteMask     = 0x000000000000000F;
static const UINT64 PIXEventsStringAlignmentReadMask      = 0xF000000000000000;
static const UINT64 PIXEventsStringAlignmentBitShift      = 60;

//Bits 55-59 (5)
static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
static const UINT64 PIXEventsStringCopyChunkSizeReadMask  = 0x0F80000000000000;
static const UINT64 PIXEventsStringCopyChunkSizeBitShift  = 55;

//Bit 54
static const UINT64 PIXEventsStringIsANSIWriteMask        = 0x0000000000000001;
static const UINT64 PIXEventsStringIsANSIReadMask         = 0x0040000000000000;
static const UINT64 PIXEventsStringIsANSIBitShift         = 54;

//Bit 53
static const UINT64 PIXEventsStringIsShortcutWriteMask    = 0x0000000000000001;
static const UINT64 PIXEventsStringIsShortcutReadMask     = 0x0020000000000000;
static const UINT64 PIXEventsStringIsShortcutBitShift     = 53;

inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
{
    return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
        ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
        (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
        (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
}

template<UINT alignment, class T>
inline bool PIXIsPointerAligned(T* pointer)
{
    return !(((UINT64)pointer) & (alignment - 1));
}

template<class T>
inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
{
    if (destination < limit)
    {
        *((T*)destination) = argument;
        ++destination;
    }
}

//floats must be cast to double during writing the data to be properly printed later when reading the data
//this is needed because when float is passed to varargs function it's cast to double
template<>
inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
{
    if (destination < limit)
    {
        *((double*)destination) = (double)(argument);
        ++destination;
    }
}

//char has to be cast to a longer signed integer type
//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
template<>
inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
{
    if (destination < limit)
    {
        *((INT64*)destination) = (INT64)(argument);
        ++destination;
    }
}

//unsigned char has to be cast to a longer unsigned integer type
//this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
template<>
inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
{
    if (destination < limit)
    {
        *destination = (UINT64)(argument);
        ++destination;
    }
}

//bool has to be cast to an integer since it's not explicitly supported by string format routines
//there's no format specifier for bool type, but it should work with integer format specifiers
template<>
inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
{
    if (destination < limit)
    {
        *destination = (UINT64)(argument);
        ++destination;
    }
}

inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
{
    *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
    while (destination < limit)
    {
        UINT64 c = argument[0];
        if (!c)
        {
            *destination++ = 0;
            return;
        }
        UINT64 x = c;
        c = argument[1];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 8;
        c = argument[2];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 16;
        c = argument[3];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 24;
        c = argument[4];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 32;
        c = argument[5];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 40;
        c = argument[6];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 48;
        c = argument[7];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 56;
        *destination++ = x;
        argument += 8;
    }
}

inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
{
    if (PIXIsPointerAligned<8>(argument))
    {
        *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
        UINT64* source = (UINT64*)argument;
        while (destination < limit)
        {
            UINT64 qword = *source++;
            *destination++ = qword;
            //check if any of the characters is a terminating zero
            if (!((qword & 0xFF00000000000000) &&
                (qword & 0xFF000000000000) &&
                (qword & 0xFF0000000000) &&
                (qword & 0xFF00000000) &&
                (qword & 0xFF000000) &&
                (qword & 0xFF0000) &&
                (qword & 0xFF00) &&
                (qword & 0xFF)))
            {
                break;
            }
        }
    }
    else
    {
        PIXCopyEventArgumentSlowest(destination, limit, argument);
    }
}

template<>
inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
{
    if (destination < limit)
    {
        if (argument != nullptr)
        {
#if defined(_AMD64_) || defined(_X86_)
            if (PIXIsPointerAligned<16>(argument))
            {
                *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
                __m128i zero = _mm_setzero_si128();
                if (PIXIsPointerAligned<16>(destination))
                {
                    while (destination < limit)
                    {
                        __m128i mem = _mm_load_si128((__m128i*)argument);
                        _mm_store_si128((__m128i*)destination, mem);
                        //check if any of the characters is a terminating zero
                        __m128i res = _mm_cmpeq_epi8(mem, zero);
                        destination += 2;
                        if (_mm_movemask_epi8(res))
                            break;
                        argument += 16;
                    }
                }
                else
                {
                    while (destination < limit)
                    {
                        __m128i mem = _mm_load_si128((__m128i*)argument);
                        _mm_storeu_si128((__m128i*)destination, mem);
                        //check if any of the characters is a terminating zero
                        __m128i res = _mm_cmpeq_epi8(mem, zero);
                        destination += 2;
                        if (_mm_movemask_epi8(res))
                            break;
                        argument += 16;
                    }
                }
            }
            else
#endif // _AMD64_ || _X86_
            {
                PIXCopyEventArgumentSlow(destination, limit, argument);
            }
        }
        else
        {
            *destination++ = 0ull;
        }
    }
}

template<>
inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
{
    PIXCopyEventArgument(destination, limit, (PCSTR)argument);
}

inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
{
    *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
    while (destination < limit)
    {
        UINT64 c = argument[0];
        if (!c)
        {
            *destination++ = 0;
            return;
        }
        UINT64 x = c;
        c = argument[1];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 16;
        c = argument[2];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 32;
        c = argument[3];
        if (!c)
        {
            *destination++ = x;
            return;
        }
        x |= c << 48;
        *destination++ = x;
        argument += 4;
    }
}

inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
{
    if (PIXIsPointerAligned<8>(argument))
    {
        *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
        UINT64* source = (UINT64*)argument;
        while (destination < limit)
        {
            UINT64 qword = *source++;
            *destination++ = qword;
            //check if any of the characters is a terminating zero
            //TODO: check if reversed condition is faster
            if (!((qword & 0xFFFF000000000000) &&
                (qword & 0xFFFF00000000) &&
                (qword & 0xFFFF0000) &&
                (qword & 0xFFFF)))
            {
                break;
            }
        }
    }
    else
    {
        PIXCopyEventArgumentSlowest(destination, limit, argument);
    }
}

template<>
inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
{
    if (destination < limit)
    {
        if (argument != nullptr)
        {
#if defined(_AMD64_) || defined(_X86_)
            if (PIXIsPointerAligned<16>(argument))
            {
                *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
                __m128i zero = _mm_setzero_si128();
                if (PIXIsPointerAligned<16>(destination))
                {
                    while (destination < limit)
                    {
                        __m128i mem = _mm_load_si128((__m128i*)argument);
                        _mm_store_si128((__m128i*)destination, mem);
                        //check if any of the characters is a terminating zero
                        __m128i res = _mm_cmpeq_epi16(mem, zero);
                        destination += 2;
                        if (_mm_movemask_epi8(res))
                            break;
                        argument += 8;
                    }
                }
                else
                {
                    while (destination < limit)
                    {
                        __m128i mem = _mm_load_si128((__m128i*)argument);
                        _mm_storeu_si128((__m128i*)destination, mem);
                        //check if any of the characters is a terminating zero
                        __m128i res = _mm_cmpeq_epi16(mem, zero);
                        destination += 2;
                        if (_mm_movemask_epi8(res))
                            break;
                        argument += 8;
                    }
                }
            }
            else
#endif // _AMD64_ || _X86_
            {
                PIXCopyEventArgumentSlow(destination, limit, argument);
            }
        }
        else
        {
            *destination++ = 0ull;
        }
    }
}

template<>
inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
{
    PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
};

#if defined(__d3d12_x_h__) || defined(__d3d12_h__)

inline void PIXSetMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
{
    commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
}

inline void PIXSetMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
{
    commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
}

inline void PIXBeginEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
{
    commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
}

inline void PIXBeginEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
{
    commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
}
inline void PIXEndEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
{
    commandList->EndEvent();
}

inline void PIXEndEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
{
    commandQueue->EndEvent();
}

#endif //__d3d12_x_h__

template<class T> struct PIXInferScopedEventType { typedef T Type; };
template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
#endif //_PIXEventsCommon_H_