1 //          Copyright Ferdinand Majerech 2014.
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 
7 /// Range that accumulates (merges) matching zones from one or more zone ranges.
8 module tharsis.prof.accumulatedzonerange;
9 
10 
11 import std.range;
12 import std.traits;
13 
14 import tharsis.prof.ranges;
15 import tharsis.prof.profiler;
16 
17 
18 
19 /** Data accumulated from multiple matching zones, generated by $(D
20  *  accumulatedZoneRange()).
21  *
22  * Extends $(D ZoneData) (derived using alias this) with a value returned by the $(D
23  * accumulate) function parameter of $(D accumulatedZoneRange()).
24  *
25  * Durations and start times of accumulated zones are summed into $(D zoneData.duration)
26  * and $(D zoneData.startTime). $(D id), $(D parentID) and $(D nestLevel) are updated so
27  * the elements of $(D accumulatedZoneRange) can still form trees just like elements of
28  * the $(D ZoneRange) that was accumulated.
29  */
30 struct AccumulatedZoneData(alias accumulate)
31 {
32     /// The 'base' ZoneData; startTime and duration are sums of accumulated ZoneData values.
33     ZoneData zoneData;
34     alias zoneData this;
35 
36     /// The value accumulated by the accumulate function.
37     ReturnType!accumulate accumulated;
38 }
39 
40 
41 /// Default match function for accumulatedZoneRange(). Compares ZoneData infos for equality.
42 bool defaultMatch(const(char)[] info1, const(char)[] info2) @safe pure nothrow @nogc
43 {
44     return info1 == info2;
45 }
46 
47 /** Returns a range that accumulates (merges) matching zones from one or more zone ranges.
48  *
49  * On each nesting level from top to bottom, finds zones that $(B match) based on given
50  * match function and merges them into one zone, $(B accumulating) data from merged zone
51  * using the accumulate function. Merged zones contain summed durations and start times.
52  * The default match function compares info strings of two zones for equality.
53  *
54  *
55  * Can be used e.g. to get a 'total' of all recorded frames. If each frame has one
56  * top-level zone with matching info strings, the top-level zones are merged, then
57  * matching children of these zones, and so on. The result is a zone range representing
58  * a single tree. The accumulate function can be used, for example, to calculate max
59  * duration of matching zones, getting a 'worst case frame scenario', to calculate the
60  * number of times each zone was entered, or even multiple things at the same time.
61  *
62  * Params:
63  *
64  * accumulate = A function alias that takes a pointer to the value accumulated so far, and
65  *              the next ZoneData to accumulate. It returns the resulting accumulated
66  *              value. The first parameter will be null on the first call.
67  *
68  *              Must be $(D pure nothrow @nogc).
69  *
70  * match      = A function alias that takes two const(char) arrays and returns a bool.
71  *              If true is returned, two zones with whose info strings were passed to
72  *              match() are considered the same zone and will be merged and accumulated.
73  *
74  *              Must be $(D pure nothrow @nogc).
75  *
76  *              An example use-case for a custom match() function is to accumulate related
77  *              zones with slightly different names (e.g. numbered draw batches), or
78  *              conversely, to prevent merging zones with identical names (e.g. to see
79  *              each individual draw as a separate zone).
80  *
81  * storage    = Array to use for temporary storage during accumulation $(B as well as)
82  *              storage in the returned range. Must be long enough to hold zones from all
83  *              passed zone ranges, i.e. the sum of their walkLengths. To determine this
84  *              length, use $(D import std.range; zoneRange.walkLength;).
85  * zones      = One or more zone ranges to accumulate.
86  *
87  * Returns: A ForwardRange of AccumulatedZoneData. Each element contails ZoneData plus the
88  *          return value of the accumulate function.
89  *
90  * Note: The current implementation is likely to be slow for large inputs. It's probably
91  *       too slow for real-time usage except if the inputs are very small.
92  *
93  * Example of an $(D accumulate) function:
94  * --------------------
95  * // Increments the accumulated value when called. Useful to determine the
96  * // number of times a Zone was entered.
97  * size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc
98  * {
99  *     return aPtr is null ? 1 : *aPtr + 1;
100  * }
101  * --------------------
102  */
103 auto accumulatedZoneRange(alias accumulate, alias match = defaultMatch, ZRange)
104                          (AccumulatedZoneData!accumulate[] storage, ZRange[] zones...)
105     @trusted pure nothrow @nogc
106 {
107     static assert(isForwardRange!ZRange && is(Unqual!(ElementType!ZRange) == ZoneData),
108                   "ZRange parameter of accumulatedZoneRange must be a forward range of "
109                   "ZoneData, e.g. ZoneRange");
110     debug
111     {
112         size_t zoneCount;
113         foreach(ref zoneRange; zones) { zoneCount += zoneRange.save.walkLength; }
114 
115         assert(storage.length >= zoneCount,
116                "storage param of accumulatedZoneRange must be long enough to hold zones "
117                "from all passed zone ranges");
118     }
119     alias AccumulatedZone = AccumulatedZoneData!accumulate;
120 
121     // Range returned by this function.
122     // Just a restricted array at the moment, but we can change it if we need to.
123     struct Range
124     {
125     private:
126         // Array storing the accumulated results. Slice of $(D storage) passed to
127         // accumulatedZoneRange
128         const(AccumulatedZone)[] array;
129 
130         static assert(isForwardRange!Range, "accumulated zone range must be a forward range");
131         static assert(is(Unqual!(ElementType!Range) == AccumulatedZone),
132                       "accumulated zone range must be a range of AccumulatedZoneData");
133 
134     public:
135         @safe pure nothrow @nogc:
136         // ForwardRange primitives.
137         AccumulatedZone front() const { return array.front;  }
138         void popFront()               { array.popFront;      }
139         bool empty()            const { return array.empty;  }
140         @property Range save()  const { return Range(array); }
141         // Number of zones in the range.
142         size_t length()         const { return array.length; }
143     }
144 
145     // Copy all zones into storage.
146     size_t i = 0;
147     foreach(ref zoneRange; zones) foreach(zone; zoneRange.save)
148     {
149         storage[i++] = AccumulatedZone(zone, accumulate(null, zone));
150     }
151     storage = storage[0 .. i];
152 
153     // Complexity of this is O(log(N) * N^2 + 2N^2) == O(log(N) * N^2).
154     // TODO: We could probably speed this up greatly by sorting storage by level, or even
155     //       by level first and parent ID second. That would make finding matching nodes
156     //       much faster. 2014-08-31
157 
158     // Start with merging topmost zones with no parents, then merge their children, etc.
159     // All zones in a single level that match are accumulated into one element. parentID
160     // of the children of merged zones are updated to point to the resulting zone.
161     for(size_t level = 1; ; ++level)
162     {
163         // We're not done as long as there's at least 1 elem at this nesting level.
164         bool notDone = false;
165         for(size_t e1Idx; e1Idx < storage.length; ++e1Idx)
166         {
167             auto e1 = &storage[e1Idx];
168             if(e1.nestLevel != level) { continue; }
169 
170             notDone = true;
171 
172             // Any elems until e1Idx that need to be merged are already merged, so start
173             // looking at e2Idx.
174             for(size_t e2Idx = e1Idx + 1; e2Idx < storage.length; ++e2Idx)
175             {
176                 auto e2 = &storage[e2Idx];
177                 if(e1.nestLevel != level) { continue; }
178 
179                 // Skip if the zones don't match.
180                 if(e1.parentID != e2.parentID || !match(e1.info, e2.info)) { continue; }
181 
182                 // Below code runs at most once per zone (a zone can be removed at most once).
183 
184                 e1.accumulated  = accumulate(&(e1.accumulated), e2.zoneData);
185                 e1.duration    += e2.duration;
186                 e1.startTime   += e2.startTime;
187 
188                 const idToRemove = e2.id;
189                 const idToReplaceWith = e1.id;
190 
191                 // Same as `storage = storage.remove(e2Idx);` but @nogc nothrow.
192                 foreach(offset, ref moveInto; storage[e2Idx .. $ - 1])
193                 {
194                     moveInto = storage[e2Idx + offset + 1];
195                 }
196                 storage.popBack();
197                 // This removes the element at e2Idx (which is greater than e1Idx), so we
198                 // must go 1 index back, Elements at after e2Idx may also be removed, but
199                 // that won't break our loop.
200                 --e2Idx;
201                 foreach(ref elem; storage) if(elem.parentID == idToRemove)
202                 {
203                     elem.parentID = idToReplaceWith;
204                 }
205             }
206         }
207 
208         if(!notDone) { break; }
209     }
210 
211     // Elements that weren't merged were removed from storage in preceding loops, so
212     // storage is likely a lot smaller at this point.
213     return Range(storage);
214 }
215 ///
216 unittest
217 {
218     // Count the number of times each zone was entered.
219 
220     import tharsis.prof;
221 
222     auto storage  = new ubyte[Profiler.maxEventBytes + 128];
223     auto profiler = new Profiler(storage);
224 
225     foreach(i; 0 .. 3)
226     {
227         import std.datetime;
228         auto startTime = Clock.currStdTime();
229         // Wait long enough so the time gap is represented by >2 bytes.
230         while(Clock.currStdTime() - startTime <= 65536) { continue; }
231         auto zone1 = Zone(profiler, "zone1");
232         {
233             auto zone11 = Zone(profiler, "zone11");
234         }
235         startTime = Clock.currStdTime();
236         // Wait long enough so the time gap is represented by >1 bytes.
237         while(Clock.currStdTime() - startTime <= 255) { continue; }
238         {
239             auto zone12 = Zone(profiler, "zone12");
240         }
241     }
242 
243 
244     // Count the number of instances of each zone.
245     size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc
246     {
247         return aPtr is null ? 1 : *aPtr + 1;
248     }
249 
250     auto zones        = profiler.profileData.zoneRange;
251     auto accumStorage = new AccumulatedZoneData!accum[zones.walkLength];
252     auto accumulated  = accumulatedZoneRange!accum(accumStorage, zones.save);
253 
254     assert(accumulated.walkLength == 3);
255 
256     import std.stdio;
257     foreach(zone; accumulated)
258     {
259         writeln(zone);
260     }
261 }
262 ///
263 unittest
264 {
265     // Accumulate minimum, maximum, average duration and more simultaneously.
266 
267     // This example also uses C malloc/free, std.typecons.scoped and std.container.Array
268     // to show how to do this without using the GC.
269 
270     import tharsis.prof;
271 
272     const storageLength = Profiler.maxEventBytes + 2048;
273 
274     import core.stdc.stdlib;
275     // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing.
276     ubyte[] storage  = (cast(ubyte*)malloc(storageLength))[0 .. storageLength];
277     scope(exit) { free(storage.ptr); }
278 
279     import std.typecons;
280     // std.typecons.scoped! stores the Profiler on the stack.
281     auto profiler = scoped!Profiler(storage);
282 
283     // Simulate 16 'frames'
284     foreach(frame; 0 .. 16)
285     {
286         Zone topLevel = Zone(profiler, "frame");
287 
288         // Simulate frame overhead. Replace this with your frame code.
289         {
290             Zone nested1 = Zone(profiler, "frameStart");
291             foreach(i; 0 .. 1000) { continue; }
292         }
293         {
294             Zone nested2 = Zone(profiler, "frameCore");
295             foreach(i; 0 .. 10000) { continue; }
296         }
297     }
298 
299     // Accumulate data into this struct.
300     struct ZoneStats
301     {
302         ulong minDuration;
303         ulong maxDuration;
304         // Needed to calculate average duration.
305         size_t instanceCount;
306 
307         // We also need the total duration to calculate average, but that is accumulated
308         // by default in AccumulatedZoneData.
309     }
310 
311     // Gets min, max, total duration as well as the number of times the zone was entered.
312     ZoneStats accum(ZoneStats* aPtr, ref const ZoneData z) pure nothrow @nogc
313     {
314         if(aPtr is null) { return ZoneStats(z.duration, z.duration, 1); }
315 
316         import std.algorithm: min, max;
317         return ZoneStats(min(aPtr.minDuration, z.duration),
318                         max(aPtr.maxDuration, z.duration),
319                         aPtr.instanceCount + 1);
320     }
321 
322     auto zones      = profiler.profileData.zoneRange;
323     // Allocate storage to accumulate in with malloc.
324     const zoneCount = zones.walkLength;
325     alias Data = AccumulatedZoneData!accum;
326     auto accumStorage = (cast(Data*)malloc(zoneCount * Data.sizeof))[0 .. zoneCount];
327     scope(exit) { free(accumStorage.ptr); }
328 
329     auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save);
330 
331     // Write out the results.
332     foreach(zone; accumulated) with(zone.accumulated)
333     {
334         import std.stdio;
335         writefln("id: %s, min: %s, max: %s, avg: %s, total: %s, count: %s",
336                  zone.id, minDuration, maxDuration,
337                  zone.duration / cast(double)instanceCount, zone.duration, instanceCount);
338     }
339 }
340 ///
341 unittest
342 {
343     // Get the average duration of a top-level zone. This is a good way to determine
344     // average frame duration as the top-level zone often encapsulates a frame.
345 
346     // This example also uses C malloc/free, std.typecons.scoped and std.container.Array
347     // to show how to do this without using the GC.
348 
349     import tharsis.prof;
350 
351     const storageLength = Profiler.maxEventBytes + 2048;
352 
353     import core.stdc.stdlib;
354     // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing.
355     ubyte[] storage  = (cast(ubyte*)malloc(storageLength))[0 .. storageLength];
356     scope(exit) { free(storage.ptr); }
357 
358     import std.typecons;
359     // std.typecons.scoped! stores the Profiler on the stack.
360     auto profiler = scoped!Profiler(storage);
361 
362     // Simulate 16 'frames'
363     foreach(frame; 0 .. 16)
364     {
365         Zone topLevel = Zone(profiler, "frame");
366 
367         // Simulate frame overhead. Replace this with your frame code.
368         {
369             Zone nested1 = Zone(profiler, "frameStart");
370             foreach(i; 0 .. 1000) { continue; }
371         }
372         {
373             Zone nested2 = Zone(profiler, "frameCore");
374             foreach(i; 0 .. 10000) { continue; }
375         }
376     }
377 
378     // Count the number of instances of each zone.
379     size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc
380     {
381         return aPtr is null ? 1 : *aPtr + 1;
382     }
383 
384     import std.algorithm;
385     // Top-level zones are level 1.
386     //
387     // Filtering zones before accumulating allows us to decrease memory space needed for
388     // accumulation, as well as speed up the accumulation, which is relatively expensive.
389     auto zones = profiler.profileData.zoneRange.filter!(z => z.nestLevel == 1);
390     // Allocate storage to accumulate in with malloc.
391     const zoneCount = zones.walkLength;
392     alias Data = AccumulatedZoneData!accum;
393     auto accumStorage = (cast(Data*)malloc(zoneCount * Data.sizeof))[0 .. zoneCount];
394     scope(exit) { free(accumStorage.ptr); }
395 
396     auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save);
397 
398     // If there is just one top-level zone, and it always has the same info ("frame" in
399     // this case), accumulatedZoneRange with the default match function will have exactly
400     // 1 element; with the accumulated result for all instances of the zone. Also here,
401     // we use $(D duration), which is accumulated by default.
402     import std.stdio;
403     writeln(accumulated.front.duration / cast(real)accumulated.front.accumulated);
404 }