1 //          Copyright Ferdinand Majerech 2014.
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 
7 /** Functionality needed to record profiling data.
8  *
9  * Profiler keeps track of profiling events.
10  *
11  * Zone is an RAII struct that records precise time at its construction and destruction.
12  *
13  * Realtime:
14  *
15  * Recording is very lightweight, without any complicated logic or heap allocations.
16  * This allows to use Tharsis-prof as a 'short-term _profiler' that can record profiling
17  * data over the duration of a few frames and be detect bottlenecks in real time. Game
18  * code can then react, for example by disabling resource intensive nonessential features
19  * (e.g. particle systems).
20  *
21  * Precision:
22  *
23  * Tharsis.prof measures time in D 'hectonanoseconds', or tenths of a microsecond. This is
24  * precise enough to detect irregular overhead during frames but not to profile individual
25  * instructions in a linear algebra function. Such use cases are better covered by
26  * a Callgrind or a sampling _profiler such as perf or CodeAnalyst. Also note that some
27  * platforms may not provide access to high-precision timers and the results may be even
28  * less precise. At least Linux, Windows and OSX should be alright, though.
29  */
30 module tharsis.prof.profiler;
31 
32 
33 import std.algorithm;
34 import std.datetime;
35 import std.exception;
36 
37 import tharsis.prof.event;
38 
39 // We're measuring in hectonanoseconds.
40 //
41 // Second is 10M hnsecs.
42 // Minute is 600M hnsecs.
43 // Hour is 36G hnsecs.
44 
45 
46 // TODO: Time slicing is unrealiable due to multiple events having the same time.
47 //       ChunkyEventList has the SliceExtents as a hack to avoid this issue, but it's not
48 //       a general solution. To get precise slices e.g. for Zones, we need to
49 //       differentiate Events with the same time. On way to do this is to encode an extra
50 //       number before each event: 0 by default (and ensure it takes 0 bytes in that
51 //       case), 1 for second event with the same time, 2 for third, etc.
52 //       We can use variable-length encoding just like when encoding time gaps.
53 //       TODO IMPROVEMENT: We probably don't even need to store this number in profile
54 //       data; just let EventRange generate it as a part of the event (eventIndex?)
55 //       - This should work even with wrapped/filtered/slice EventRanges as there is
56 //       always a full EventRange below (or for slices that aren't the full EventRange,
57 //       when we add explicit slicing, keep the index of the first Event in the slice.).
58 //       We can probably use this to replace SliceExtents (although ChunkyEventRange will
59 //       need custom code to generate event IDs as it contatenates multiple EventRanges)
60 //       2014-11-28
61 // TODO: Use core.time.Duration once its API is @nogc and update the examples to use it.
62 // TODO: Frame-based analysis. E.g. find the longest frame and create a range over its
63 //       zones (really just a time slice of a ZoneRange). 2014-08-31
64 // TODO: Slicing of EventRange, ZoneRange based on a time interval. 2014-08-31
65 // TODO: Once time slicing works, this example:
66 //       Get the longest, shortest (in duration) top-level zone, and all zones within it.
67 // TODO: ZoneTree - with a single parent node for all nodes with no parents.
68 //       It shouldn't use traditional separately allocated nodes connected by pointers,
69 //       and should instead use a big array of nodes and possibly auxiliary array/s to
70 //       translate parent IDs to node indices.
71 //       What ZoneTree should allow:
72 //         * Filtering nodes by percentage of parent duration (i.e. ignore anything that's
73 //           e.g. < 5% of parent)
74 //         * Tree nodes should have methods to return ForwardRanges of ZoneData,
75 //           and the nodes themselves should be ForwardRanges of nodes. This would allow
76 //           using std.algorithm.filter to filter nodes and still have access to their
77 //           children.
78 //       2014-08-31
79 // TODO: (maybe): Compress chunks of profileData by zlib or something similar. E.g.
80 //       compress every 64kiB. profileData can then be read through a range wrapper
81 //       that has a fixed-size decompression buffer. Compressed data in profileData would
82 //       be represented by a 'Compressed' EventID followed by length of compressed data.
83 // TODO: Compress info strings with a short string compressor like shoco or smaz (both at
84 //       github) (of course, will need to convert whatever we use to D).
85 
86 // TODO: Allow specifying zone info as a compile-time param:
87 //       If specified at compile time, instead of ZoneEvent we use a deduplicatedZoneEvent
88 //       or something with a similar name, which should just store a hash or maybe
89 //       a 2-byte index to a global immutable array of strings. That array would be
90 //       built in static this() emitted from all Zone() ctors with a compile-time info
91 //       string. (Since we know it at compile-time, we can do any deduplication at
92 //       compile-time or static ctor-time - and the restulting structure can be immutable
93 //       - usable from multiple threads)
94 //       To make this work with the profiling data stream, the first time a compile-time
95 //       string is used in a zone, add an event describing that string. Following events
96 //       will just contain an index.
97 
98 
99 /** _Zone of profiled code.
100  *
101  * Emits a zone start event (recording start time) at construction and a zone end event
102  * (recording end time) at destruction.
103  *
104  * Examples:
105  * --------------------
106  * // Zones can be nested:
107  * while(!done)
108  * {
109  *     auto frameZone = Zone(profiler, "frame");
110  *     {
111  *         auto renderingZone = Zone(profiler, "rendering");
112  *
113  *         // do rendering here
114  *     }
115  *     {
116  *         auto physicsZone = Zone(profiler, "physics");
117  *
118  *         // do physics here
119  *     }
120  * }
121  * --------------------
122  *
123  * --------------------
124  * // A nested zone must be fully contained in its parent zone, e.g. this won't work:
125  * auto zone1 = Zone(profiler, "zone1");
126  * while(!done)
127  * {
128  *     auto zone2 = Zone(profiler, "zone1");
129  *     // WRONG: zone1 destroyed manually before zone2
130  *     destroy(zone1);
131  *
132  *     // zone2 implicitly destroyed at the end of scope
133  * }
134  * --------------------
135  */
136 struct Zone
137 {
138 private:
139     // Nesting level of this Zone. Top-level zones have nestLevel_ of 1, their children 2, etc.
140     uint nestLevel_;
141 
142     // Reference to the Profiler so we can emit the zone end event from the dtor.
143     Profiler profiler_;
144 
145     // Copying a Zone would completely break it.
146     @disable this(this);
147     @disable bool opAssign(ref Zone);
148     @disable this();
149 
150 public:
151     /** Construct a zone to record with specified _profiler.
152      *
153      * Emits the zone start event.
154      *
155      * Params:
156      *
157      * profiler = Profiler to record into. If $(D null), the zone is ignored. This enables
158      *            'optional profiling', where instrumenting code (zones) is always present
159      *            in the code but only activated when a Profiler exists.
160      * info     = Zone information string. Used to recognize zones when parsing and
161      *            accumulating profile data. Can be the 'name' of the zone, possibly with
162      *            some extra _info (e.g. "frame": entire frame or "batch 5": fifth draw
163      *            batch). $(B Must not) be empty or longer than 255 characters and
164      *            $(B must not) contain zero ($(D '\0')) characters.
165      */
166     this(Profiler profiler, string info) @trusted nothrow
167     {
168         if(profiler !is null)
169         {
170             nestLevel_ = profiler.zoneStartEvent(info);
171         }
172         profiler_ = profiler;
173     }
174 
175     /** A shortcut to call variableEvent() of the zone's profiler, if any.
176      *
177      * If a null Profiler has been passed to the Zone's constructor, this call is ignored.
178      *
179      * See_Also: Profiler.variableEvent().
180      */
181     void variableEvent(string name, V)(const V value) @safe nothrow
182     {
183         if(profiler_ !is null)
184         {
185             profiler_.variableEvent!(name, V)(value);
186         }
187     }
188 
189     /** Destructor. Emits the zone end event with the Profiler.
190      *
191      * Can be called early (using destroy()) to end the zone before end of the scope.
192      */
193     ~this() @trusted nothrow
194     {
195         if(profiler_ !is null)
196         {
197             profiler_.zoneEndEvent(nestLevel_);
198         }
199         // Ensures that if we call the destructor early, we won't emit the end event twice.
200         profiler_ = null;
201     }
202 }
203 unittest
204 {
205     // Test 'null profiler' (Zone is automatically ignored if a null profiler is passed.)
206     foreach(i; 0 .. 10)
207     {
208         auto frameZone = Zone(null, "frame");
209         {
210             auto renderingZone = Zone(null, "rendering");
211         }
212         {
213             auto physicsZone = Zone(null, "physics");
214         }
215     }
216 }
217 
218 static assert(!allEventIDs.canFind!(e => e > eventIDMask),
219               "Too high EventID value; last 3 bits are reserved for byte count");
220 
221 /* All EventID values must be 5-bit integers, with 0 reserved for 'Checkpoint' EventID
222  * which is the only zero byte value that may appear in profiling data (this allows
223  * 'going backwards' in profile data).
224  *
225  * In profile data, an EventID is packed into a byte with a 3-bit 'byteCount' value,
226  * which specifies the number of bytes immediately after the EventID/byteCount byte
227  * used to encode time elapsed since the last event. Each of these bytes consists of
228  * 7 bits of the 'time elapsed' value (first byte - lowest 7 bits, second byte - higher
229  * 7 bits, etc. with as many bytes as there are (at most 7)). The highest bit is always
230  * set to 1 (to ensure the byte zero).
231  */
232 
233 // Number of bits in event ID bytes storing the event ID itself. Other bytes store 'byteCount'.
234 package enum eventIDBits = 5;
235 // Mask of the bits in an event ID byte that store the event ID itself.
236 package enum eventIDMask = 0b00011111;
237 
238 // Number of bits in a time byte used to store the actual time value.
239 package enum timeByteBits = 7;
240 // Mask of the bits in a time byte used to store the actual time value.
241 package enum timeByteMask = 0b01111111;
242 // Mask to set the highest bit of time bytes and other 7-bit encodings to 1.
243 package enum lastBit = 0b10000000;
244 
245 // Number of bytes storing absolute time in a checkpoint event (same format as time bytes).
246 package enum checkpointByteCount = 8;
247 
248 
249 /** Records profiling events into user-specified buffer.
250  *
251  * Used together with Zone to record data and with EventRange/ZoneRange/etc. for analysis.
252  *
253  * Profiler writes profiling data into a byte buffer passed to Profiler constructor by the
254  * user. Once there is not enough space to write any more profiling events, the profiler
255  * quietly ignores any events (this can be checked by outOfSpace()). Profiler $(B never
256  * allocates heap memory) by itself.
257  *
258  * Recorded data can be accessed at any time through profileData() and analyzed with help
259  * of EventRange, ZoneRange and other tharsis.prof utilities. reset() can be used to
260  * clear recorded data and start recording from scratch.
261  *
262  * Note:
263  *
264  * Profiler is $(B not) designed to be used from multiple threads. If you need to profile
265  * multiple threads, create a separate Profiler for each thread and either analyze the
266  * results through separate EventRange/ZoneRange instances, or merge them through
267  * accumulatedZoneRange.
268  *
269  * Note:
270  *
271  * Accessing profile data from an out-of-space profiler or in the middle of a zone will
272  * result in an EventRange that's missing some zone end events. Incomplete raw profiling
273  * results or EventRanges should never be concatenated. ZoneRange will automatically end
274  * the unfinished zones.
275  *
276  * Memory consumption:
277  *
278  * Depending on the worload and number of zones, Profiler can eat through assigned memory
279  * rather quickly.  With 10000 zones at 120 FPS the overhead is going to be around 14 MiB
280  * $(B per second).
281  */
282 final class Profiler
283 {
284     /// Diagnostics used to profile the profiler.
285     struct Diagnostics
286     {
287         size_t zoneStartCount;
288         size_t zoneEndCount;
289         size_t infoCount;
290         size_t frameCount;
291         size_t variableCount;
292     }
293 
294     /// Maximum size of any single event in bytes. Used to quickly check if we're out of space.
295     enum maxEventBytes = 512;
296 
297 private:
298     // Buffer we're recording profiling data to. Passed to constructor and never reallocated.
299     ubyte[] profileData_;
300 
301     // Size recorded data in profileData_.
302     //
303     // When this reaches profileData_.length, we're out of space (actually, a bit sooner,
304     // see outOfSpace()).
305     size_t profileDataUsed_;
306 
307     // Nesting level of the current zone (used to check that child zones are fully
308     // contained by their parents)
309     uint zoneNestLevel_;
310 
311     // Start time of the last event (as returned by std.datetime.Clock.currStdTime()).
312     ulong lastTime_;
313 
314     // Time when the Profiler was constructed (as returned by std.datetime.Clock.currStdTime()).
315     ulong startTime_;
316 
317     // Diagnostics about the profiler, such as which evenets are the most common.
318     Diagnostics diagnostics_;
319 
320 public:
321     /** Construct a Profiler writing profiling data to specified buffer.
322      *
323      * Profiler doesn't allocate heap memory. It will write profiling data into given
324      * buffer until it runs out of space, at which point it will silently stop profiling
325      * (this can be detected by outOfSpace() ).
326      *
327      * Params:
328      *
329      * profileBuffer = Buffer to write profile data to. Must be at least
330      * Profiler.maxEventBytes long.
331      */
332     this(ubyte[] profileBuffer) @safe nothrow
333     {
334         assert(profileBuffer.length >= maxEventBytes,
335                "Buffer passed to Profiler must be at least Profiler.maxEventBytes long");
336         profileData_     = profileBuffer;
337         profileDataUsed_ = 0;
338         startTime_ = lastTime_ = Clock.currStdTime().assumeWontThrow;
339     }
340 
341     /** Is the profiler out of space?
342      *
343      * When the profiler uses up all memory passed to the constructor, it quietly stops
344      * profiling. This can be used to determine if that has happened.
345      */
346     bool outOfSpace() @safe pure nothrow const @nogc
347     {
348         // maxEventBytes ensures there's enough space for any single event (the longest
349         // event, Info, can be at most 258 bytes long.
350         return profileDataUsed_ + maxEventBytes >= profileData_.length;
351     }
352 
353     /** Get diagnostics about the profiler, such as which events are the most common.
354      *
355      * Useful for profiling the profiler.
356      */
357     Diagnostics diagnostics() @safe pure nothrow const @nogc
358     {
359         return diagnostics_;
360     }
361 
362     /** Emit a checkpoint event.
363      *
364      * A checkpoint event inserts the absolute time of the last event before the
365      * checkpoint. This allows reconstrucing event times from the checkpoint instead of
366      * from the start. It also inserts a zero byte into profiling data, which no other
367      * event can do. This allows to rewind profiler data without having to go all the way
368      * to the start.
369      *
370      * Note: neither of these options is actually exploited at the moment; but they should
371      *       be useful for profiling visualizers and when processing massive profiling
372      *       outputs.
373      */
374     void checkpointEvent() @safe pure nothrow @nogc
375     {
376         if(outOfSpace) { return; }
377         eventWithTime(EventID.Checkpoint, 0);
378 
379         auto time = lastTime_ - startTime_;
380         // Add 8 time bytes (can represent 128 ** 8 hnsecs)
381         foreach(b; 0 .. checkpointByteCount) { time = addTimeByte(time); }
382     }
383 
384     /** Emit a variable event.
385      *
386      * Used to keep track of various variables that change over time, e.g. FPS, network
387      * traffic, entity count...; Despiker supports viewing the value of a variable once
388      * per frame.
389      *
390      * Params:
391      *
392      * name  = Variable name (specified at compile-time). The variable event itself does
393      *         not contain the name; the name is stored in an info event immediately
394      *         after the variable event.
395      * V     = Variable type. Only int, uint and float are supported at the moment.
396      * value = Value of the variable
397      *
398      * Example:
399      * --------------------
400      * // Profiler profiler
401      * profiler.variableEvent!"FPS"(4.5f);
402      * --------------------
403      */
404     void variableEvent(string name, V)(const V value) @trusted nothrow
405     {
406         static assert(name.length <= ubyte.max,
407                       "variableEvent name strings must be no longer than 255 bytes");
408 
409         if(outOfSpace) { return; }
410         ++diagnostics_.variableCount;
411 
412         const time = Clock.currStdTime.assumeWontThrow;
413         const timeGap = time - lastTime_;
414         lastTime_ = time;
415 
416         eventWithTime(EventID.Variable, timeGap);
417         enum varType = variableType!V;
418         profileData_[profileDataUsed_++] = varType;
419 
420 
421         V[1] valueCopy;
422         valueCopy[0] = value;
423         import std.system;
424 
425         import tinyendian;
426         // Variable bytes are always in big endian order.
427         if(std.system.endian != Endian.bigEndian)
428         {
429             swapByteOrder(valueCopy[]);
430         }
431 
432         static assert(V.sizeof == 4,
433                       "variableEvent() does not support types with sizeof other than 4 yet");
434 
435         const encoded = encode7Bit(cast(ubyte[4])valueCopy, profileData_[profileDataUsed_ .. $]);
436         profileDataUsed_ += encoded.length;
437         infoEvent(name);
438     }
439 
440     /** Emit a zone start event, when code enters a zone.
441      *
442      * Params:
443      *
444      * info = Information about the zone (e.g. its name). Will be added as an info event
445      *        following the zone start event. $(B Must not) be empty or longer than 255
446      *        characters and $(B must not) contain zero ($(D '\0')) characters.
447      *
448      * Returns: Nesting level of the newly started zone. Must be passed when corresponding
449      *          zoneEndEvent() is called. Used to ensure child events end before their
450      *          parent events.
451      *
452      * Note: zoneStartEvent should only be aleed directly where the Zone struct is not
453      *       sufficient (i.e. where a zone cannot be wrapped in a scope).
454      */
455     uint zoneStartEvent(const string info) @system nothrow
456     {
457         assert(info.length <= ubyte.max, "Zone info strings can be at most 255 bytes long");
458         assert(info.length > 0, "Zone info strings must not be empty");
459         assert(!(cast(ubyte[])info).canFind(0), "Zone info strings must not contain '\\0'");
460 
461         const time = Clock.currStdTime.assumeWontThrow;
462 
463         const timeGap = time - lastTime_;
464         lastTime_ = time;
465 
466         if(outOfSpace) { return ++zoneNestLevel_; }
467         ++diagnostics_.zoneStartCount;
468 
469         eventWithTime(EventID.ZoneStart, timeGap);
470         if(outOfSpace) { return ++zoneNestLevel_; }
471         infoEvent(info);
472 
473         return ++zoneNestLevel_;
474     }
475 
476     /** Emit a zone end event, when code exits a zone.
477      *
478      * Params:
479      *
480      * nestLevel = Nesting level of the zone. Used to check that zones are exited in the
481      *             correct (hierarchical) order, i.e. a child zone must be ended before
482      *             its parent zone.
483      *
484      * Note: zoneEndEvent should only be called directly where the Zone struct is not
485      *       sufficient (i.e. where a zone cannot be wrapped in a scope).
486      */
487     void zoneEndEvent(const uint nestLevel) @system nothrow
488     {
489         assert(nestLevel == zoneNestLevel_,
490                "Zones must be hierarchical; detected a zone that ends after its parent");
491         --zoneNestLevel_;
492         const time = Clock.currStdTime.assumeWontThrow;
493 
494         const timeGap = time - lastTime_;
495         lastTime_ = time;
496         if(outOfSpace) { return; }
497         ++diagnostics_.zoneEndCount;
498 
499         eventWithTime(EventID.ZoneEnd, timeGap);
500     }
501 
502 
503     /** Reset the profiler.
504      *
505      * Clears all profiled data. Reuses the buffer passed by the constructor to start
506      * profiling from scratch.
507      *
508      * Can only be called outside of any Zone.
509      */
510     void reset() @safe nothrow
511     {
512         // Doing the reset inside a zone would result in a zone end without zone start,
513         // which we can't handle (at least for now).
514         assert(zoneNestLevel_ == 0, "Profiler can only be reset() while not in a Zone");
515         profileData_[]   = 0;
516         profileDataUsed_ = 0;
517         startTime_ = lastTime_ = Clock.currStdTime().assumeWontThrow;
518     }
519 
520     /** Get the raw data recorded by the profiler.
521      *
522      * This is a slice to the buffer passed to Profiler's constructor.
523      */
524     const(ubyte)[] profileData() @safe pure nothrow const @nogc
525     {
526         return profileData_[0 .. profileDataUsed_];
527     }
528 
529     /** Get the nest level of the current zone, if any.
530      *
531      * 0 means the profiler is not in any zone.
532      */
533     uint zoneNestLevel() @safe pure nothrow const @nogc
534     {
535         return zoneNestLevel_;
536     }
537 
538 private:
539     /* Add one byte of a 'time byte' sequence that encodes a time span.
540      *
541      * Takes the time we're encoding and returns remainder of that time that is not yet
542      * encoded into time btyes (time / 128).
543      */
544     ulong addTimeByte(ulong time) @safe pure nothrow @nogc
545     {
546         // The last bit ensures the resulting byte is never 0
547         profileData_[profileDataUsed_++] = lastBit | cast(ubyte)(time & timeByteMask);
548         return time >> timeByteBits;
549     }
550 
551     /* Write an event ID and bytes specifying the time left (gap) since the last event.
552      *
553      * Event ID is packed *time gap byte count* into a byte (low 5 bits for event ID, high
554      * 3 bits for byte count). This byte is followed by time gap bytes, each of which
555      * stores 7 time gap bits (topmost bit is always 1 to ensure the bytes are never 0, to
556      * avoid confusion with checkpoint bytes, which are 0). The first byte stores the
557      * lowest 7 bits of the time gap, second stores the next 7 bits, etc.
558      */
559     void eventWithTime(EventID id, ulong timeLeft) @trusted pure nothrow @nogc
560     {
561         const idIndex = profileDataUsed_++;
562         size_t byteCount = 0;
563 
564         // Add as many time bytes as needed (1 byte can represent 128 hnsecs, 2 bytes can
565         // represent 128 ** 2, etc.)
566         while(timeLeft > 0)
567         {
568             timeLeft = addTimeByte(timeLeft);
569             ++byteCount;
570             if(byteCount >= 8)
571             {
572                 assert(false, "Tharsis.prof does not support time gaps over 228 years");
573             }
574         }
575         const ubyte idByte = cast(ubyte)(id | (byteCount << eventIDBits));
576         profileData_[idIndex] = idByte;
577         assert((idByte & eventIDMask) == id,  "EventID saved incorrectly");
578         assert((idByte >> eventIDBits) == byteCount, "byte count saved incorrectly");
579     }
580     unittest
581     {
582         import std.array;
583         import std.stdio;
584 
585         writeln("time gap recording unittest");
586 
587         ubyte[4096] storage;
588         auto profiler = new Profiler(storage[]);
589         const ulong time = 33457812484;
590         profiler.eventWithTime(EventID.ZoneStart, time);
591 
592         auto data = profiler.profileData;
593         ulong recordedTime = 0;
594         const timeBytes = data.front >> eventIDBits;
595 
596         data.popFront();
597         foreach(b; 0 .. timeBytes)
598         {
599             recordedTime += cast(ulong)(data.front() & timeByteMask) << (b * timeByteBits);
600             data.popFront();
601         }
602 
603         assert(recordedTime == time);
604     }
605 
606     /* Emit an info event.
607      *
608      * This event stores character data (at most 255 bytes). Currently info events are
609      * only used to 'describe' immediately previous zone start events.
610      */
611     void infoEvent(const string info) @trusted pure nothrow @nogc
612     {
613         assert(info.length <= ubyte.max, "Zone info strings can be at most 255 bytes long");
614         if(outOfSpace) { return; }
615         ++diagnostics_.infoCount;
616         eventWithTime(EventID.Info, 0);
617         profileData_[profileDataUsed_++] = cast(ubyte)(info.length);
618         profileData_[profileDataUsed_ .. profileDataUsed_ + info.length] = cast(ubyte[])info[];
619         profileDataUsed_ += info.length;
620     }
621 }
622 ///
623 unittest
624 {
625     ubyte[] storage = new ubyte[Profiler.maxEventBytes + 2048];
626     auto profiler = new Profiler(storage);
627 
628     // Simulate 16 'frames'
629     foreach(frame; 0 .. 16)
630     {
631         Zone topLevel = Zone(profiler, "frame");
632         // Record a variable event (useful for tracking FPS, entity count, network
633         // traffic, etc.). Only uint, int and float supported at the moment.
634         profiler.variableEvent!"frame" = cast(uint)frame;
635 
636         // Simulate frame overhead. Replace this with your frame code.
637         {
638             Zone nested1 = Zone(profiler, "frameStart");
639             foreach(i; 0 .. 1000) { continue; }
640         }
641         {
642             Zone nested2 = Zone(profiler, "frameCore");
643             foreach(i; 0 .. 10000) { continue; }
644         }
645     }
646 
647     // see tharsis.profiler.ranges for how to process recorded data
648 }
649 ///
650 unittest
651 {
652     // This example uses C malloc/free and std.typecons.scoped to show how to use Profiler
653     // without GC allocations.
654 
655     const storageLength = Profiler.maxEventBytes + 2048;
656 
657     import core.stdc.stdlib;
658     // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing.
659     ubyte[] storage  = (cast(ubyte*)malloc(storageLength))[0 .. storageLength];
660     scope(exit) { free(storage.ptr); }
661 
662     import std.typecons;
663     auto profiler = scoped!Profiler(storage);
664 
665     // std.typecons.scoped! stores the Profiler on the stack.
666     // Simulate 16 'frames'
667     foreach(frame; 0 .. 16)
668     {
669         Zone topLevel = Zone(profiler, "frame");
670 
671         // Simulate frame overhead. Replace this with your frame code.
672         {
673             Zone nested1 = Zone(profiler, "frameStart");
674             foreach(i; 0 .. 1000) { continue; }
675         }
676         {
677             Zone nested2 = Zone(profiler, "frameCore");
678             foreach(i; 0 .. 10000) { continue; }
679         }
680     }
681 
682     // see tharsis.profiler.ranges for how to process recorded data
683 }
684 unittest
685 {
686     import std.range;
687 
688     import tharsis.prof.ranges;
689 
690     Profiler profiler;
691 
692     void addZones()
693     {
694         foreach(i; 0 .. 3)
695         {
696             auto startTime = Clock.currStdTime().assumeWontThrow;
697             // Wait long enough to store the time gap in >2 bytes.
698             while(Clock.currStdTime().assumeWontThrow - startTime <= 65536)
699             {
700                 continue;
701             }
702             auto zone1 = Zone(profiler, "zone1");
703             {
704                 auto zone11 = Zone(profiler, "zone11");
705             }
706             startTime = Clock.currStdTime().assumeWontThrow;
707             // Wait long enough to store the time gap in >1 bytes.
708             while(Clock.currStdTime().assumeWontThrow - startTime <= 256)
709             {
710                 continue;
711             }
712             {
713                 auto zone12 = Zone(profiler, "zone12");
714             }
715             profiler.variableEvent!"var_i"  = cast(float)i;
716             profiler.variableEvent!"var_pi" = 3.14f;
717             profiler.checkpointEvent();
718         }
719     }
720 
721     {
722         auto storage = new ubyte[Profiler.maxEventBytes + 256];
723         profiler = new Profiler(storage);
724         addZones();
725 
726         // Check if the events generated are what we expect.
727         auto evts = profiler.profileData.eventRange;
728         foreach(i; 0 .. 3) with(EventID)
729         {
730             assert(evts.front.id == ZoneStart);                           evts.popFront();
731             assert(evts.front.id == Info && evts.front.info == "zone1");  evts.popFront();
732 
733             assert(evts.front.id == ZoneStart);                           evts.popFront();
734             assert(evts.front.id == Info && evts.front.info == "zone11"); evts.popFront();
735             assert(evts.front.id == ZoneEnd);                             evts.popFront();
736 
737             assert(evts.front.id == ZoneStart);                           evts.popFront();
738             assert(evts.front.id == Info && evts.front.info == "zone12"); evts.popFront();
739             assert(evts.front.id == ZoneEnd);                             evts.popFront();
740 
741             assert(evts.front.id == Variable &&
742                    evts.front.var.type == VariableType.Float &&
743                    evts.front.var.varFloat == cast(float)i);              evts.popFront();
744             assert(evts.front.id == Info && evts.front.info == "var_i");  evts.popFront();
745             assert(evts.front.id == Variable &&
746                    evts.front.var.type == VariableType.Float &&
747                    evts.front.var.varFloat == 3.14f);                     evts.popFront();
748             assert(evts.front.id == Info && evts.front.info == "var_pi"); evts.popFront();
749             const time = evts.front.time;
750 
751             // Checkpoint start time must match the previous event.
752             assert(evts.front.time == time);
753             assert(evts.front.id == Checkpoint);                          evts.popFront();
754             assert(evts.front.id == ZoneEnd);                             evts.popFront();
755         }
756 
757         auto zones = profiler.profileData.zoneRange;
758         assert(zones.walkLength == 9, "Unexpected number of zones");
759 
760         // Check that zones have expected nesting, infos and time ordering.
761         foreach(i; 0 .. 3)
762         {
763             const z11 = zones.front; zones.popFront();
764             const z12 = zones.front; zones.popFront();
765             const z1  = zones.front; zones.popFront();
766             assert(z11.parentID == z1.id && z12.parentID == z1.id);
767             assert(z11.nestLevel == 2 && z12.nestLevel == 2 && z1.nestLevel == 1);
768             assert(z11.info == "zone11" && z12.info == "zone12" && z1.info == "zone1");
769 
770             assert(z11.startTime >= z1.startTime &&
771                    z12.startTime >= z11.startTime + z11.duration &&
772                    z1.startTime + z1.duration >= z12.startTime + z12.duration);
773         }
774     }
775 
776     // Test what happens when Profiler runs out of space.
777     {
778         auto storage  = new ubyte[Profiler.maxEventBytes + 50];
779         profiler = new Profiler(storage);
780         addZones();
781 
782         ulong lastTime = 0;
783         foreach(zone; profiler.profileData.zoneRange)
784         {
785             assert(zone.endTime >= lastTime,
786                    "Incomplete profile data resulted in wrong order of zone end times");
787             lastTime = zone.endTime;
788         }
789     }
790 
791     {
792         auto storage  = new ubyte[Profiler.maxEventBytes + 128];
793         profiler = new Profiler(storage);
794         addZones();
795 
796         auto zones = profiler.profileData.zoneRange;
797 
798 
799         // Just count the number of instances of each zone.
800         size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc
801         {
802             return aPtr is null ? 1 : *aPtr + 1;
803         }
804         auto accumStorage = new AccumulatedZoneData!accum[zones.walkLength];
805         auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save);
806     }
807 }
808 
809 
810 package:
811 
812 /* Encode 4 bytes of data into 5 7-bit encoded bytes in target.
813  *
814  * The last bit of each byte in the 7-bit encoding is set to 1. This is used to avoid
815  * zero bytes in e.g. variable events (zero bytes can *only* be used by the
816  * CheckpointEvent, to allow searching for it backwards).
817  *
818  * Params:
819  *
820  * data   = Data to encode.
821  * target = First 5 bytes of target will store the encoded data. Must be at least 5 bytes.
822  *
823  * Returns: A 5-byte slice of target with the encoded data.
824  */
825 static ubyte[] encode7Bit(ubyte[4] data, ubyte[] target)
826     @safe pure nothrow @nogc
827 {
828     assert(target.length >= 5,
829             "Not enough space to encode a 4 bytes chunk into 7-bit encoding");
830 
831     // first 7 bits from first byte
832     ubyte b = lastBit | (data[0] & (0xFF >> 1));
833     target[0] = b;
834     // last bit from first byte (moved 7 to right),
835     // first 6 bits from second byte (moved by 1 to left)
836     b = lastBit | ((data[0] & (0xFF << 7)) >> 7) | ((data[1] & (0xFF >> 2)) << 1);
837     target[1] = b;
838     // last 2 bits from second byte (moved 6 to right),
839     // first 5 bits from third byte (moved 2 to left)
840     b = lastBit | ((data[1] & (0xFF << 6)) >> 6) | ((data[2] & (0xFF >> 3)) << 2);
841     target[2] = b;
842     // last 3 bits from third byte (moved 5 to right),
843     // first 4 bits from fourth byte (moved 3 to left)
844     b = lastBit | ((data[2] & (0xFF << 5)) >> 5) | ((data[3] & (0xFF >> 4)) << 3);
845     target[3] = b;
846     // last 4 bits from fourth byte (moved 4 to right)
847     b = lastBit | ((data[3] & (0xFF << 4)) >> 4);
848     target[4] = b;
849 
850     return target[0 .. 5];
851 }
852 
853 /* Decoded 4 bytes of data stored in 5 bytes of 7-bit encoded data.
854  *
855  * Params:
856  *
857  * data   = Data to decode.
858  * target = First 4 bytes of target will store the decoded data. Must be at least 4 bytes.
859  *
860  * Returns: A 4-byte slice of target with the encoded data.
861  *
862  * See_Also: encode7Bit
863  */
864 static ubyte[] decode7Bit(ubyte[5] encoded, ubyte[] target)
865     @safe pure nothrow @nogc
866 {
867     assert(target.length >= 4,
868             "Not enough space to decode a 5-byte chunk encoded in 7-bit encoding");
869 
870     target[0] = (encoded[0] & 0b01111111)        | ((encoded[1] & 0b00000001) << 7);
871     target[1] = ((encoded[1] & 0b01111110) >> 1) | ((encoded[2] & 0b00000011) << 6);
872     target[2] = ((encoded[2] & 0b01111100) >> 2) | ((encoded[3] & 0b00000111) << 5);
873     target[3] = ((encoded[3] & 0b01111000) >> 3) | ((encoded[4] & 0b00001111) << 4);
874 
875     return target[0 .. 4];
876 }
877 
878 unittest
879 {
880     import std.random;
881     foreach(attempt; 0 .. 1024)
882     {
883         uint[1] u;
884         float[1] f;
885         u[0] = uniform(uint.min, uint.max);
886         f[0] = uniform(-100000.0f, 100000.0f) + uniform(-1.0f, 1.0f);
887 
888         ubyte[5] encodeBuf;
889         ubyte[4] decodeBuf;
890         encode7Bit(cast(ubyte[4])u, encodeBuf[]);
891         decode7Bit(encodeBuf, decodeBuf[]);
892 
893         assert(*(cast(uint*)decodeBuf.ptr) == u[0],
894                 "encoded/decoded uint data does not match original");
895 
896         encode7Bit(cast(ubyte[4])f, encodeBuf[]);
897         decode7Bit(encodeBuf, decodeBuf[]);
898 
899         assert(*(cast(float*)decodeBuf.ptr) == f[0],
900                 "encoded/decoded float data does not match original");
901     }
902 }