1 // Copyright Ferdinand Majerech 2014. 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 7 /// Range that accumulates (merges) matching zones from one or more zone ranges. 8 module tharsis.prof.accumulatedzonerange; 9 10 11 import std.range; 12 import std.traits; 13 14 import tharsis.prof.ranges; 15 import tharsis.prof.profiler; 16 17 18 19 /** Data accumulated from multiple matching zones, generated by $(D 20 * accumulatedZoneRange()). 21 * 22 * Extends $(D ZoneData) (derived using alias this) with a value returned by the $(D 23 * accumulate) function parameter of $(D accumulatedZoneRange()). 24 * 25 * Durations and start times of accumulated zones are summed into $(D zoneData.duration) 26 * and $(D zoneData.startTime). $(D id), $(D parentID) and $(D nestLevel) are updated so 27 * the elements of $(D accumulatedZoneRange) can still form trees just like elements of 28 * the $(D ZoneRange) that was accumulated. 29 */ 30 struct AccumulatedZoneData(alias accumulate) 31 { 32 /// The 'base' ZoneData; startTime and duration are sums of accumulated ZoneData values. 33 ZoneData zoneData; 34 alias zoneData this; 35 36 /// The value accumulated by the accumulate function. 37 ReturnType!accumulate accumulated; 38 } 39 40 41 /// Default match function for accumulatedZoneRange(). Compares ZoneData infos for equality. 42 bool defaultMatch(const(char)[] info1, const(char)[] info2) @safe pure nothrow @nogc 43 { 44 return info1 == info2; 45 } 46 47 /** Returns a range that accumulates (merges) matching zones from one or more zone ranges. 48 * 49 * On each nesting level from top to bottom, finds zones that $(B match) based on given 50 * match function and merges them into one zone, $(B accumulating) data from merged zone 51 * using the accumulate function. Merged zones contain summed durations and start times. 52 * The default match function compares info strings of two zones for equality. 53 * 54 * 55 * Can be used e.g. to get a 'total' of all recorded frames. If each frame has one 56 * top-level zone with matching info strings, the top-level zones are merged, then 57 * matching children of these zones, and so on. The result is a zone range representing 58 * a single tree. The accumulate function can be used, for example, to calculate max 59 * duration of matching zones, getting a 'worst case frame scenario', to calculate the 60 * number of times each zone was entered, or even multiple things at the same time. 61 * 62 * Params: 63 * 64 * accumulate = A function alias that takes a pointer to the value accumulated so far, and 65 * the next ZoneData to accumulate. It returns the resulting accumulated 66 * value. The first parameter will be null on the first call. 67 * 68 * Must be $(D pure nothrow @nogc). 69 * 70 * match = A function alias that takes two const(char) arrays and returns a bool. 71 * If true is returned, two zones with whose info strings were passed to 72 * match() are considered the same zone and will be merged and accumulated. 73 * 74 * Must be $(D pure nothrow @nogc). 75 * 76 * An example use-case for a custom match() function is to accumulate related 77 * zones with slightly different names (e.g. numbered draw batches), or 78 * conversely, to prevent merging zones with identical names (e.g. to see 79 * each individual draw as a separate zone). 80 * 81 * storage = Array to use for temporary storage during accumulation $(B as well as) 82 * storage in the returned range. Must be long enough to hold zones from all 83 * passed zone ranges, i.e. the sum of their walkLengths. To determine this 84 * length, use $(D import std.range; zoneRange.walkLength;). 85 * zones = One or more zone ranges to accumulate. 86 * 87 * Returns: A ForwardRange of AccumulatedZoneData. Each element contails ZoneData plus the 88 * return value of the accumulate function. 89 * 90 * Note: The current implementation is likely to be slow for large inputs. It's probably 91 * too slow for real-time usage except if the inputs are very small. 92 * 93 * Example of an $(D accumulate) function: 94 * -------------------- 95 * // Increments the accumulated value when called. Useful to determine the 96 * // number of times a Zone was entered. 97 * size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc 98 * { 99 * return aPtr is null ? 1 : *aPtr + 1; 100 * } 101 * -------------------- 102 */ 103 auto accumulatedZoneRange(alias accumulate, alias match = defaultMatch, ZRange) 104 (AccumulatedZoneData!accumulate[] storage, ZRange[] zones...) 105 @trusted pure nothrow @nogc 106 { 107 static assert(isForwardRange!ZRange && is(Unqual!(ElementType!ZRange) == ZoneData), 108 "ZRange parameter of accumulatedZoneRange must be a forward range of " 109 "ZoneData, e.g. ZoneRange"); 110 debug 111 { 112 size_t zoneCount; 113 foreach(ref zoneRange; zones) { zoneCount += zoneRange.save.walkLength; } 114 115 assert(storage.length >= zoneCount, 116 "storage param of accumulatedZoneRange must be long enough to hold zones " 117 "from all passed zone ranges"); 118 } 119 alias AccumulatedZone = AccumulatedZoneData!accumulate; 120 121 // Range returned by this function. 122 // Just a restricted array at the moment, but we can change it if we need to. 123 struct Range 124 { 125 private: 126 // Array storing the accumulated results. Slice of $(D storage) passed to 127 // accumulatedZoneRange 128 const(AccumulatedZone)[] array; 129 130 static assert(isForwardRange!Range, "accumulated zone range must be a forward range"); 131 static assert(is(Unqual!(ElementType!Range) == AccumulatedZone), 132 "accumulated zone range must be a range of AccumulatedZoneData"); 133 134 public: 135 @safe pure nothrow @nogc: 136 // ForwardRange primitives. 137 AccumulatedZone front() const { return array.front; } 138 void popFront() { array.popFront; } 139 bool empty() const { return array.empty; } 140 @property Range save() const { return Range(array); } 141 // Number of zones in the range. 142 size_t length() const { return array.length; } 143 } 144 145 // Copy all zones into storage. 146 size_t i = 0; 147 foreach(ref zoneRange; zones) foreach(zone; zoneRange.save) 148 { 149 storage[i++] = AccumulatedZone(zone, accumulate(null, zone)); 150 } 151 storage = storage[0 .. i]; 152 153 // Complexity of this is O(log(N) * N^2 + 2N^2) == O(log(N) * N^2). 154 // TODO: We could probably speed this up greatly by sorting storage by level, or even 155 // by level first and parent ID second. That would make finding matching nodes 156 // much faster. 2014-08-31 157 158 // Start with merging topmost zones with no parents, then merge their children, etc. 159 // All zones in a single level that match are accumulated into one element. parentID 160 // of the children of merged zones are updated to point to the resulting zone. 161 for(size_t level = 1; ; ++level) 162 { 163 // We're not done as long as there's at least 1 elem at this nesting level. 164 bool notDone = false; 165 for(size_t e1Idx; e1Idx < storage.length; ++e1Idx) 166 { 167 auto e1 = &storage[e1Idx]; 168 if(e1.nestLevel != level) { continue; } 169 170 notDone = true; 171 172 // Any elems until e1Idx that need to be merged are already merged, so start 173 // looking at e2Idx. 174 for(size_t e2Idx = e1Idx + 1; e2Idx < storage.length; ++e2Idx) 175 { 176 auto e2 = &storage[e2Idx]; 177 if(e1.nestLevel != level) { continue; } 178 179 // Skip if the zones don't match. 180 if(e1.parentID != e2.parentID || !match(e1.info, e2.info)) { continue; } 181 182 // Below code runs at most once per zone (a zone can be removed at most once). 183 184 e1.accumulated = accumulate(&(e1.accumulated), e2.zoneData); 185 e1.duration += e2.duration; 186 e1.startTime += e2.startTime; 187 188 const idToRemove = e2.id; 189 const idToReplaceWith = e1.id; 190 191 // Same as `storage = storage.remove(e2Idx);` but @nogc nothrow. 192 foreach(offset, ref moveInto; storage[e2Idx .. $ - 1]) 193 { 194 moveInto = storage[e2Idx + offset + 1]; 195 } 196 storage.popBack(); 197 // This removes the element at e2Idx (which is greater than e1Idx), so we 198 // must go 1 index back, Elements at after e2Idx may also be removed, but 199 // that won't break our loop. 200 --e2Idx; 201 foreach(ref elem; storage) if(elem.parentID == idToRemove) 202 { 203 elem.parentID = idToReplaceWith; 204 } 205 } 206 } 207 208 if(!notDone) { break; } 209 } 210 211 // Elements that weren't merged were removed from storage in preceding loops, so 212 // storage is likely a lot smaller at this point. 213 return Range(storage); 214 } 215 /// 216 unittest 217 { 218 // Count the number of times each zone was entered. 219 220 import tharsis.prof; 221 222 auto storage = new ubyte[Profiler.maxEventBytes + 128]; 223 auto profiler = new Profiler(storage); 224 225 foreach(i; 0 .. 3) 226 { 227 import std.datetime; 228 auto startTime = Clock.currStdTime(); 229 // Wait long enough so the time gap is represented by >2 bytes. 230 while(Clock.currStdTime() - startTime <= 65536) { continue; } 231 auto zone1 = Zone(profiler, "zone1"); 232 { 233 auto zone11 = Zone(profiler, "zone11"); 234 } 235 startTime = Clock.currStdTime(); 236 // Wait long enough so the time gap is represented by >1 bytes. 237 while(Clock.currStdTime() - startTime <= 255) { continue; } 238 { 239 auto zone12 = Zone(profiler, "zone12"); 240 } 241 } 242 243 244 // Count the number of instances of each zone. 245 size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc 246 { 247 return aPtr is null ? 1 : *aPtr + 1; 248 } 249 250 auto zones = profiler.profileData.zoneRange; 251 auto accumStorage = new AccumulatedZoneData!accum[zones.walkLength]; 252 auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save); 253 254 assert(accumulated.walkLength == 3); 255 256 import std.stdio; 257 foreach(zone; accumulated) 258 { 259 writeln(zone); 260 } 261 } 262 /// 263 unittest 264 { 265 // Accumulate minimum, maximum, average duration and more simultaneously. 266 267 // This example also uses C malloc/free, std.typecons.scoped and std.container.Array 268 // to show how to do this without using the GC. 269 270 import tharsis.prof; 271 272 const storageLength = Profiler.maxEventBytes + 2048; 273 274 import core.stdc.stdlib; 275 // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing. 276 ubyte[] storage = (cast(ubyte*)malloc(storageLength))[0 .. storageLength]; 277 scope(exit) { free(storage.ptr); } 278 279 import std.typecons; 280 // std.typecons.scoped! stores the Profiler on the stack. 281 auto profiler = scoped!Profiler(storage); 282 283 // Simulate 16 'frames' 284 foreach(frame; 0 .. 16) 285 { 286 Zone topLevel = Zone(profiler, "frame"); 287 288 // Simulate frame overhead. Replace this with your frame code. 289 { 290 Zone nested1 = Zone(profiler, "frameStart"); 291 foreach(i; 0 .. 1000) { continue; } 292 } 293 { 294 Zone nested2 = Zone(profiler, "frameCore"); 295 foreach(i; 0 .. 10000) { continue; } 296 } 297 } 298 299 // Accumulate data into this struct. 300 struct ZoneStats 301 { 302 ulong minDuration; 303 ulong maxDuration; 304 // Needed to calculate average duration. 305 size_t instanceCount; 306 307 // We also need the total duration to calculate average, but that is accumulated 308 // by default in AccumulatedZoneData. 309 } 310 311 // Gets min, max, total duration as well as the number of times the zone was entered. 312 ZoneStats accum(ZoneStats* aPtr, ref const ZoneData z) pure nothrow @nogc 313 { 314 if(aPtr is null) { return ZoneStats(z.duration, z.duration, 1); } 315 316 import std.algorithm: min, max; 317 return ZoneStats(min(aPtr.minDuration, z.duration), 318 max(aPtr.maxDuration, z.duration), 319 aPtr.instanceCount + 1); 320 } 321 322 auto zones = profiler.profileData.zoneRange; 323 // Allocate storage to accumulate in with malloc. 324 const zoneCount = zones.walkLength; 325 alias Data = AccumulatedZoneData!accum; 326 auto accumStorage = (cast(Data*)malloc(zoneCount * Data.sizeof))[0 .. zoneCount]; 327 scope(exit) { free(accumStorage.ptr); } 328 329 auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save); 330 331 // Write out the results. 332 foreach(zone; accumulated) with(zone.accumulated) 333 { 334 import std.stdio; 335 writefln("id: %s, min: %s, max: %s, avg: %s, total: %s, count: %s", 336 zone.id, minDuration, maxDuration, 337 zone.duration / cast(double)instanceCount, zone.duration, instanceCount); 338 } 339 } 340 /// 341 unittest 342 { 343 // Get the average duration of a top-level zone. This is a good way to determine 344 // average frame duration as the top-level zone often encapsulates a frame. 345 346 // This example also uses C malloc/free, std.typecons.scoped and std.container.Array 347 // to show how to do this without using the GC. 348 349 import tharsis.prof; 350 351 const storageLength = Profiler.maxEventBytes + 2048; 352 353 import core.stdc.stdlib; 354 // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing. 355 ubyte[] storage = (cast(ubyte*)malloc(storageLength))[0 .. storageLength]; 356 scope(exit) { free(storage.ptr); } 357 358 import std.typecons; 359 // std.typecons.scoped! stores the Profiler on the stack. 360 auto profiler = scoped!Profiler(storage); 361 362 // Simulate 16 'frames' 363 foreach(frame; 0 .. 16) 364 { 365 Zone topLevel = Zone(profiler, "frame"); 366 367 // Simulate frame overhead. Replace this with your frame code. 368 { 369 Zone nested1 = Zone(profiler, "frameStart"); 370 foreach(i; 0 .. 1000) { continue; } 371 } 372 { 373 Zone nested2 = Zone(profiler, "frameCore"); 374 foreach(i; 0 .. 10000) { continue; } 375 } 376 } 377 378 // Count the number of instances of each zone. 379 size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc 380 { 381 return aPtr is null ? 1 : *aPtr + 1; 382 } 383 384 import std.algorithm; 385 // Top-level zones are level 1. 386 // 387 // Filtering zones before accumulating allows us to decrease memory space needed for 388 // accumulation, as well as speed up the accumulation, which is relatively expensive. 389 auto zones = profiler.profileData.zoneRange.filter!(z => z.nestLevel == 1); 390 // Allocate storage to accumulate in with malloc. 391 const zoneCount = zones.walkLength; 392 alias Data = AccumulatedZoneData!accum; 393 auto accumStorage = (cast(Data*)malloc(zoneCount * Data.sizeof))[0 .. zoneCount]; 394 scope(exit) { free(accumStorage.ptr); } 395 396 auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save); 397 398 // If there is just one top-level zone, and it always has the same info ("frame" in 399 // this case), accumulatedZoneRange with the default match function will have exactly 400 // 1 element; with the accumulated result for all instances of the zone. Also here, 401 // we use $(D duration), which is accumulated by default. 402 import std.stdio; 403 writeln(accumulated.front.duration / cast(real)accumulated.front.accumulated); 404 }