1 // Copyright Ferdinand Majerech 2014. 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 7 /** Functionality needed to record profiling data. 8 * 9 * Profiler keeps track of profiling events. 10 * 11 * Zone is an RAII struct that records precise time at its construction and destruction. 12 * 13 * Realtime: 14 * 15 * Recording is very lightweight, without any complicated logic or heap allocations. 16 * This allows to use Tharsis-prof as a 'short-term _profiler' that can record profiling 17 * data over the duration of a few frames and be detect bottlenecks in real time. Game 18 * code can then react, for example by disabling resource intensive nonessential features 19 * (e.g. particle systems). 20 * 21 * Precision: 22 * 23 * Tharsis.prof measures time in D 'hectonanoseconds', or tenths of a microsecond. This is 24 * precise enough to detect irregular overhead during frames but not to profile individual 25 * instructions in a linear algebra function. Such use cases are better covered by 26 * a Callgrind or a sampling _profiler such as perf or CodeAnalyst. Also note that some 27 * platforms may not provide access to high-precision timers and the results may be even 28 * less precise. At least Linux, Windows and OSX should be alright, though. 29 */ 30 module tharsis.prof.profiler; 31 32 33 import std.algorithm; 34 import std.datetime; 35 import std.exception; 36 37 import tharsis.prof.event; 38 39 // We're measuring in hectonanoseconds. 40 // 41 // Second is 10M hnsecs. 42 // Minute is 600M hnsecs. 43 // Hour is 36G hnsecs. 44 45 46 // TODO: Time slicing is unrealiable due to multiple events having the same time. 47 // ChunkyEventList has the SliceExtents as a hack to avoid this issue, but it's not 48 // a general solution. To get precise slices e.g. for Zones, we need to 49 // differentiate Events with the same time. On way to do this is to encode an extra 50 // number before each event: 0 by default (and ensure it takes 0 bytes in that 51 // case), 1 for second event with the same time, 2 for third, etc. 52 // We can use variable-length encoding just like when encoding time gaps. 53 // TODO IMPROVEMENT: We probably don't even need to store this number in profile 54 // data; just let EventRange generate it as a part of the event (eventIndex?) 55 // - This should work even with wrapped/filtered/slice EventRanges as there is 56 // always a full EventRange below (or for slices that aren't the full EventRange, 57 // when we add explicit slicing, keep the index of the first Event in the slice.). 58 // We can probably use this to replace SliceExtents (although ChunkyEventRange will 59 // need custom code to generate event IDs as it contatenates multiple EventRanges) 60 // 2014-11-28 61 // TODO: Use core.time.Duration once its API is @nogc and update the examples to use it. 62 // TODO: Frame-based analysis. E.g. find the longest frame and create a range over its 63 // zones (really just a time slice of a ZoneRange). 2014-08-31 64 // TODO: Slicing of EventRange, ZoneRange based on a time interval. 2014-08-31 65 // TODO: Once time slicing works, this example: 66 // Get the longest, shortest (in duration) top-level zone, and all zones within it. 67 // TODO: ZoneTree - with a single parent node for all nodes with no parents. 68 // It shouldn't use traditional separately allocated nodes connected by pointers, 69 // and should instead use a big array of nodes and possibly auxiliary array/s to 70 // translate parent IDs to node indices. 71 // What ZoneTree should allow: 72 // * Filtering nodes by percentage of parent duration (i.e. ignore anything that's 73 // e.g. < 5% of parent) 74 // * Tree nodes should have methods to return ForwardRanges of ZoneData, 75 // and the nodes themselves should be ForwardRanges of nodes. This would allow 76 // using std.algorithm.filter to filter nodes and still have access to their 77 // children. 78 // 2014-08-31 79 // TODO: (maybe): Compress chunks of profileData by zlib or something similar. E.g. 80 // compress every 64kiB. profileData can then be read through a range wrapper 81 // that has a fixed-size decompression buffer. Compressed data in profileData would 82 // be represented by a 'Compressed' EventID followed by length of compressed data. 83 // TODO: Compress info strings with a short string compressor like shoco or smaz (both at 84 // github) (of course, will need to convert whatever we use to D). 85 86 // TODO: Allow specifying zone info as a compile-time param: 87 // If specified at compile time, instead of ZoneEvent we use a deduplicatedZoneEvent 88 // or something with a similar name, which should just store a hash or maybe 89 // a 2-byte index to a global immutable array of strings. That array would be 90 // built in static this() emitted from all Zone() ctors with a compile-time info 91 // string. (Since we know it at compile-time, we can do any deduplication at 92 // compile-time or static ctor-time - and the restulting structure can be immutable 93 // - usable from multiple threads) 94 // To make this work with the profiling data stream, the first time a compile-time 95 // string is used in a zone, add an event describing that string. Following events 96 // will just contain an index. 97 98 99 /** _Zone of profiled code. 100 * 101 * Emits a zone start event (recording start time) at construction and a zone end event 102 * (recording end time) at destruction. 103 * 104 * Examples: 105 * -------------------- 106 * // Zones can be nested: 107 * while(!done) 108 * { 109 * auto frameZone = Zone(profiler, "frame"); 110 * { 111 * auto renderingZone = Zone(profiler, "rendering"); 112 * 113 * // do rendering here 114 * } 115 * { 116 * auto physicsZone = Zone(profiler, "physics"); 117 * 118 * // do physics here 119 * } 120 * } 121 * -------------------- 122 * 123 * -------------------- 124 * // A nested zone must be fully contained in its parent zone, e.g. this won't work: 125 * auto zone1 = Zone(profiler, "zone1"); 126 * while(!done) 127 * { 128 * auto zone2 = Zone(profiler, "zone1"); 129 * // WRONG: zone1 destroyed manually before zone2 130 * destroy(zone1); 131 * 132 * // zone2 implicitly destroyed at the end of scope 133 * } 134 * -------------------- 135 */ 136 struct Zone 137 { 138 private: 139 // Nesting level of this Zone. Top-level zones have nestLevel_ of 1, their children 2, etc. 140 uint nestLevel_; 141 142 // Reference to the Profiler so we can emit the zone end event from the dtor. 143 Profiler profiler_; 144 145 // Copying a Zone would completely break it. 146 @disable this(this); 147 @disable bool opAssign(ref Zone); 148 @disable this(); 149 150 public: 151 /** Construct a zone to record with specified _profiler. 152 * 153 * Emits the zone start event. 154 * 155 * Params: 156 * 157 * profiler = Profiler to record into. If $(D null), the zone is ignored. This enables 158 * 'optional profiling', where instrumenting code (zones) is always present 159 * in the code but only activated when a Profiler exists. 160 * info = Zone information string. Used to recognize zones when parsing and 161 * accumulating profile data. Can be the 'name' of the zone, possibly with 162 * some extra _info (e.g. "frame": entire frame or "batch 5": fifth draw 163 * batch). $(B Must not) be empty or longer than 255 characters and 164 * $(B must not) contain zero ($(D '\0')) characters. 165 */ 166 this(Profiler profiler, string info) @trusted nothrow 167 { 168 if(profiler !is null) 169 { 170 nestLevel_ = profiler.zoneStartEvent(info); 171 } 172 profiler_ = profiler; 173 } 174 175 /** A shortcut to call variableEvent() of the zone's profiler, if any. 176 * 177 * If a null Profiler has been passed to the Zone's constructor, this call is ignored. 178 * 179 * See_Also: Profiler.variableEvent(). 180 */ 181 void variableEvent(string name, V)(const V value) @safe nothrow 182 { 183 if(profiler_ !is null) 184 { 185 profiler_.variableEvent!(name, V)(value); 186 } 187 } 188 189 /** Destructor. Emits the zone end event with the Profiler. 190 * 191 * Can be called early (using destroy()) to end the zone before end of the scope. 192 */ 193 ~this() @trusted nothrow 194 { 195 if(profiler_ !is null) 196 { 197 profiler_.zoneEndEvent(nestLevel_); 198 } 199 // Ensures that if we call the destructor early, we won't emit the end event twice. 200 profiler_ = null; 201 } 202 } 203 unittest 204 { 205 // Test 'null profiler' (Zone is automatically ignored if a null profiler is passed.) 206 foreach(i; 0 .. 10) 207 { 208 auto frameZone = Zone(null, "frame"); 209 { 210 auto renderingZone = Zone(null, "rendering"); 211 } 212 { 213 auto physicsZone = Zone(null, "physics"); 214 } 215 } 216 } 217 218 static assert(!allEventIDs.canFind!(e => e > eventIDMask), 219 "Too high EventID value; last 3 bits are reserved for byte count"); 220 221 /* All EventID values must be 5-bit integers, with 0 reserved for 'Checkpoint' EventID 222 * which is the only zero byte value that may appear in profiling data (this allows 223 * 'going backwards' in profile data). 224 * 225 * In profile data, an EventID is packed into a byte with a 3-bit 'byteCount' value, 226 * which specifies the number of bytes immediately after the EventID/byteCount byte 227 * used to encode time elapsed since the last event. Each of these bytes consists of 228 * 7 bits of the 'time elapsed' value (first byte - lowest 7 bits, second byte - higher 229 * 7 bits, etc. with as many bytes as there are (at most 7)). The highest bit is always 230 * set to 1 (to ensure the byte zero). 231 */ 232 233 // Number of bits in event ID bytes storing the event ID itself. Other bytes store 'byteCount'. 234 package enum eventIDBits = 5; 235 // Mask of the bits in an event ID byte that store the event ID itself. 236 package enum eventIDMask = 0b00011111; 237 238 // Number of bits in a time byte used to store the actual time value. 239 package enum timeByteBits = 7; 240 // Mask of the bits in a time byte used to store the actual time value. 241 package enum timeByteMask = 0b01111111; 242 // Mask to set the highest bit of time bytes and other 7-bit encodings to 1. 243 package enum lastBit = 0b10000000; 244 245 // Number of bytes storing absolute time in a checkpoint event (same format as time bytes). 246 package enum checkpointByteCount = 8; 247 248 249 /** Records profiling events into user-specified buffer. 250 * 251 * Used together with Zone to record data and with EventRange/ZoneRange/etc. for analysis. 252 * 253 * Profiler writes profiling data into a byte buffer passed to Profiler constructor by the 254 * user. Once there is not enough space to write any more profiling events, the profiler 255 * quietly ignores any events (this can be checked by outOfSpace()). Profiler $(B never 256 * allocates heap memory) by itself. 257 * 258 * Recorded data can be accessed at any time through profileData() and analyzed with help 259 * of EventRange, ZoneRange and other tharsis.prof utilities. reset() can be used to 260 * clear recorded data and start recording from scratch. 261 * 262 * Note: 263 * 264 * Profiler is $(B not) designed to be used from multiple threads. If you need to profile 265 * multiple threads, create a separate Profiler for each thread and either analyze the 266 * results through separate EventRange/ZoneRange instances, or merge them through 267 * accumulatedZoneRange. 268 * 269 * Note: 270 * 271 * Accessing profile data from an out-of-space profiler or in the middle of a zone will 272 * result in an EventRange that's missing some zone end events. Incomplete raw profiling 273 * results or EventRanges should never be concatenated. ZoneRange will automatically end 274 * the unfinished zones. 275 * 276 * Memory consumption: 277 * 278 * Depending on the worload and number of zones, Profiler can eat through assigned memory 279 * rather quickly. With 10000 zones at 120 FPS the overhead is going to be around 14 MiB 280 * $(B per second). 281 */ 282 final class Profiler 283 { 284 /// Diagnostics used to profile the profiler. 285 struct Diagnostics 286 { 287 size_t zoneStartCount; 288 size_t zoneEndCount; 289 size_t infoCount; 290 size_t frameCount; 291 size_t variableCount; 292 } 293 294 /// Maximum size of any single event in bytes. Used to quickly check if we're out of space. 295 enum maxEventBytes = 512; 296 297 private: 298 // Buffer we're recording profiling data to. Passed to constructor and never reallocated. 299 ubyte[] profileData_; 300 301 // Size recorded data in profileData_. 302 // 303 // When this reaches profileData_.length, we're out of space (actually, a bit sooner, 304 // see outOfSpace()). 305 size_t profileDataUsed_; 306 307 // Nesting level of the current zone (used to check that child zones are fully 308 // contained by their parents) 309 uint zoneNestLevel_; 310 311 // Start time of the last event (as returned by std.datetime.Clock.currStdTime()). 312 ulong lastTime_; 313 314 // Time when the Profiler was constructed (as returned by std.datetime.Clock.currStdTime()). 315 ulong startTime_; 316 317 // Diagnostics about the profiler, such as which evenets are the most common. 318 Diagnostics diagnostics_; 319 320 public: 321 /** Construct a Profiler writing profiling data to specified buffer. 322 * 323 * Profiler doesn't allocate heap memory. It will write profiling data into given 324 * buffer until it runs out of space, at which point it will silently stop profiling 325 * (this can be detected by outOfSpace() ). 326 * 327 * Params: 328 * 329 * profileBuffer = Buffer to write profile data to. Must be at least 330 * Profiler.maxEventBytes long. 331 */ 332 this(ubyte[] profileBuffer) @safe nothrow 333 { 334 assert(profileBuffer.length >= maxEventBytes, 335 "Buffer passed to Profiler must be at least Profiler.maxEventBytes long"); 336 profileData_ = profileBuffer; 337 profileDataUsed_ = 0; 338 startTime_ = lastTime_ = Clock.currStdTime().assumeWontThrow; 339 } 340 341 /** Is the profiler out of space? 342 * 343 * When the profiler uses up all memory passed to the constructor, it quietly stops 344 * profiling. This can be used to determine if that has happened. 345 */ 346 bool outOfSpace() @safe pure nothrow const @nogc 347 { 348 // maxEventBytes ensures there's enough space for any single event (the longest 349 // event, Info, can be at most 258 bytes long. 350 return profileDataUsed_ + maxEventBytes >= profileData_.length; 351 } 352 353 /** Get diagnostics about the profiler, such as which events are the most common. 354 * 355 * Useful for profiling the profiler. 356 */ 357 Diagnostics diagnostics() @safe pure nothrow const @nogc 358 { 359 return diagnostics_; 360 } 361 362 /** Emit a checkpoint event. 363 * 364 * A checkpoint event inserts the absolute time of the last event before the 365 * checkpoint. This allows reconstrucing event times from the checkpoint instead of 366 * from the start. It also inserts a zero byte into profiling data, which no other 367 * event can do. This allows to rewind profiler data without having to go all the way 368 * to the start. 369 * 370 * Note: neither of these options is actually exploited at the moment; but they should 371 * be useful for profiling visualizers and when processing massive profiling 372 * outputs. 373 */ 374 void checkpointEvent() @safe pure nothrow @nogc 375 { 376 if(outOfSpace) { return; } 377 eventWithTime(EventID.Checkpoint, 0); 378 379 auto time = lastTime_ - startTime_; 380 // Add 8 time bytes (can represent 128 ** 8 hnsecs) 381 foreach(b; 0 .. checkpointByteCount) { time = addTimeByte(time); } 382 } 383 384 /** Emit a variable event. 385 * 386 * Used to keep track of various variables that change over time, e.g. FPS, network 387 * traffic, entity count...; Despiker supports viewing the value of a variable once 388 * per frame. 389 * 390 * Params: 391 * 392 * name = Variable name (specified at compile-time). The variable event itself does 393 * not contain the name; the name is stored in an info event immediately 394 * after the variable event. 395 * V = Variable type. Only int, uint and float are supported at the moment. 396 * value = Value of the variable 397 * 398 * Example: 399 * -------------------- 400 * // Profiler profiler 401 * profiler.variableEvent!"FPS"(4.5f); 402 * -------------------- 403 */ 404 void variableEvent(string name, V)(const V value) @trusted nothrow 405 { 406 static assert(name.length <= ubyte.max, 407 "variableEvent name strings must be no longer than 255 bytes"); 408 409 if(outOfSpace) { return; } 410 ++diagnostics_.variableCount; 411 412 const time = Clock.currStdTime.assumeWontThrow; 413 const timeGap = time - lastTime_; 414 lastTime_ = time; 415 416 eventWithTime(EventID.Variable, timeGap); 417 enum varType = variableType!V; 418 profileData_[profileDataUsed_++] = varType; 419 420 421 V[1] valueCopy; 422 valueCopy[0] = value; 423 import std.system; 424 425 import tinyendian; 426 // Variable bytes are always in big endian order. 427 if(std.system.endian != Endian.bigEndian) 428 { 429 swapByteOrder(valueCopy[]); 430 } 431 432 static assert(V.sizeof == 4, 433 "variableEvent() does not support types with sizeof other than 4 yet"); 434 435 const encoded = encode7Bit(cast(ubyte[4])valueCopy, profileData_[profileDataUsed_ .. $]); 436 profileDataUsed_ += encoded.length; 437 infoEvent(name); 438 } 439 440 /** Emit a zone start event, when code enters a zone. 441 * 442 * Params: 443 * 444 * info = Information about the zone (e.g. its name). Will be added as an info event 445 * following the zone start event. $(B Must not) be empty or longer than 255 446 * characters and $(B must not) contain zero ($(D '\0')) characters. 447 * 448 * Returns: Nesting level of the newly started zone. Must be passed when corresponding 449 * zoneEndEvent() is called. Used to ensure child events end before their 450 * parent events. 451 * 452 * Note: zoneStartEvent should only be aleed directly where the Zone struct is not 453 * sufficient (i.e. where a zone cannot be wrapped in a scope). 454 */ 455 uint zoneStartEvent(const string info) @system nothrow 456 { 457 assert(info.length <= ubyte.max, "Zone info strings can be at most 255 bytes long"); 458 assert(info.length > 0, "Zone info strings must not be empty"); 459 assert(!(cast(ubyte[])info).canFind(0), "Zone info strings must not contain '\\0'"); 460 461 const time = Clock.currStdTime.assumeWontThrow; 462 463 const timeGap = time - lastTime_; 464 lastTime_ = time; 465 466 if(outOfSpace) { return ++zoneNestLevel_; } 467 ++diagnostics_.zoneStartCount; 468 469 eventWithTime(EventID.ZoneStart, timeGap); 470 if(outOfSpace) { return ++zoneNestLevel_; } 471 infoEvent(info); 472 473 return ++zoneNestLevel_; 474 } 475 476 /** Emit a zone end event, when code exits a zone. 477 * 478 * Params: 479 * 480 * nestLevel = Nesting level of the zone. Used to check that zones are exited in the 481 * correct (hierarchical) order, i.e. a child zone must be ended before 482 * its parent zone. 483 * 484 * Note: zoneEndEvent should only be called directly where the Zone struct is not 485 * sufficient (i.e. where a zone cannot be wrapped in a scope). 486 */ 487 void zoneEndEvent(const uint nestLevel) @system nothrow 488 { 489 assert(nestLevel == zoneNestLevel_, 490 "Zones must be hierarchical; detected a zone that ends after its parent"); 491 --zoneNestLevel_; 492 const time = Clock.currStdTime.assumeWontThrow; 493 494 const timeGap = time - lastTime_; 495 lastTime_ = time; 496 if(outOfSpace) { return; } 497 ++diagnostics_.zoneEndCount; 498 499 eventWithTime(EventID.ZoneEnd, timeGap); 500 } 501 502 503 /** Reset the profiler. 504 * 505 * Clears all profiled data. Reuses the buffer passed by the constructor to start 506 * profiling from scratch. 507 * 508 * Can only be called outside of any Zone. 509 */ 510 void reset() @safe nothrow 511 { 512 // Doing the reset inside a zone would result in a zone end without zone start, 513 // which we can't handle (at least for now). 514 assert(zoneNestLevel_ == 0, "Profiler can only be reset() while not in a Zone"); 515 profileData_[] = 0; 516 profileDataUsed_ = 0; 517 startTime_ = lastTime_ = Clock.currStdTime().assumeWontThrow; 518 } 519 520 /** Get the raw data recorded by the profiler. 521 * 522 * This is a slice to the buffer passed to Profiler's constructor. 523 */ 524 const(ubyte)[] profileData() @safe pure nothrow const @nogc 525 { 526 return profileData_[0 .. profileDataUsed_]; 527 } 528 529 /** Get the nest level of the current zone, if any. 530 * 531 * 0 means the profiler is not in any zone. 532 */ 533 uint zoneNestLevel() @safe pure nothrow const @nogc 534 { 535 return zoneNestLevel_; 536 } 537 538 private: 539 /* Add one byte of a 'time byte' sequence that encodes a time span. 540 * 541 * Takes the time we're encoding and returns remainder of that time that is not yet 542 * encoded into time btyes (time / 128). 543 */ 544 ulong addTimeByte(ulong time) @safe pure nothrow @nogc 545 { 546 // The last bit ensures the resulting byte is never 0 547 profileData_[profileDataUsed_++] = lastBit | cast(ubyte)(time & timeByteMask); 548 return time >> timeByteBits; 549 } 550 551 /* Write an event ID and bytes specifying the time left (gap) since the last event. 552 * 553 * Event ID is packed *time gap byte count* into a byte (low 5 bits for event ID, high 554 * 3 bits for byte count). This byte is followed by time gap bytes, each of which 555 * stores 7 time gap bits (topmost bit is always 1 to ensure the bytes are never 0, to 556 * avoid confusion with checkpoint bytes, which are 0). The first byte stores the 557 * lowest 7 bits of the time gap, second stores the next 7 bits, etc. 558 */ 559 void eventWithTime(EventID id, ulong timeLeft) @trusted pure nothrow @nogc 560 { 561 const idIndex = profileDataUsed_++; 562 size_t byteCount = 0; 563 564 // Add as many time bytes as needed (1 byte can represent 128 hnsecs, 2 bytes can 565 // represent 128 ** 2, etc.) 566 while(timeLeft > 0) 567 { 568 timeLeft = addTimeByte(timeLeft); 569 ++byteCount; 570 if(byteCount >= 8) 571 { 572 assert(false, "Tharsis.prof does not support time gaps over 228 years"); 573 } 574 } 575 const ubyte idByte = cast(ubyte)(id | (byteCount << eventIDBits)); 576 profileData_[idIndex] = idByte; 577 assert((idByte & eventIDMask) == id, "EventID saved incorrectly"); 578 assert((idByte >> eventIDBits) == byteCount, "byte count saved incorrectly"); 579 } 580 unittest 581 { 582 import std.array; 583 import std.stdio; 584 585 writeln("time gap recording unittest"); 586 587 ubyte[4096] storage; 588 auto profiler = new Profiler(storage[]); 589 const ulong time = 33457812484; 590 profiler.eventWithTime(EventID.ZoneStart, time); 591 592 auto data = profiler.profileData; 593 ulong recordedTime = 0; 594 const timeBytes = data.front >> eventIDBits; 595 596 data.popFront(); 597 foreach(b; 0 .. timeBytes) 598 { 599 recordedTime += cast(ulong)(data.front() & timeByteMask) << (b * timeByteBits); 600 data.popFront(); 601 } 602 603 assert(recordedTime == time); 604 } 605 606 /* Emit an info event. 607 * 608 * This event stores character data (at most 255 bytes). Currently info events are 609 * only used to 'describe' immediately previous zone start events. 610 */ 611 void infoEvent(const string info) @trusted pure nothrow @nogc 612 { 613 assert(info.length <= ubyte.max, "Zone info strings can be at most 255 bytes long"); 614 if(outOfSpace) { return; } 615 ++diagnostics_.infoCount; 616 eventWithTime(EventID.Info, 0); 617 profileData_[profileDataUsed_++] = cast(ubyte)(info.length); 618 profileData_[profileDataUsed_ .. profileDataUsed_ + info.length] = cast(ubyte[])info[]; 619 profileDataUsed_ += info.length; 620 } 621 } 622 /// 623 unittest 624 { 625 ubyte[] storage = new ubyte[Profiler.maxEventBytes + 2048]; 626 auto profiler = new Profiler(storage); 627 628 // Simulate 16 'frames' 629 foreach(frame; 0 .. 16) 630 { 631 Zone topLevel = Zone(profiler, "frame"); 632 // Record a variable event (useful for tracking FPS, entity count, network 633 // traffic, etc.). Only uint, int and float supported at the moment. 634 profiler.variableEvent!"frame" = cast(uint)frame; 635 636 // Simulate frame overhead. Replace this with your frame code. 637 { 638 Zone nested1 = Zone(profiler, "frameStart"); 639 foreach(i; 0 .. 1000) { continue; } 640 } 641 { 642 Zone nested2 = Zone(profiler, "frameCore"); 643 foreach(i; 0 .. 10000) { continue; } 644 } 645 } 646 647 // see tharsis.profiler.ranges for how to process recorded data 648 } 649 /// 650 unittest 651 { 652 // This example uses C malloc/free and std.typecons.scoped to show how to use Profiler 653 // without GC allocations. 654 655 const storageLength = Profiler.maxEventBytes + 2048; 656 657 import core.stdc.stdlib; 658 // A simple typed-slice malloc wrapper function would avoid the ugly cast/slicing. 659 ubyte[] storage = (cast(ubyte*)malloc(storageLength))[0 .. storageLength]; 660 scope(exit) { free(storage.ptr); } 661 662 import std.typecons; 663 auto profiler = scoped!Profiler(storage); 664 665 // std.typecons.scoped! stores the Profiler on the stack. 666 // Simulate 16 'frames' 667 foreach(frame; 0 .. 16) 668 { 669 Zone topLevel = Zone(profiler, "frame"); 670 671 // Simulate frame overhead. Replace this with your frame code. 672 { 673 Zone nested1 = Zone(profiler, "frameStart"); 674 foreach(i; 0 .. 1000) { continue; } 675 } 676 { 677 Zone nested2 = Zone(profiler, "frameCore"); 678 foreach(i; 0 .. 10000) { continue; } 679 } 680 } 681 682 // see tharsis.profiler.ranges for how to process recorded data 683 } 684 unittest 685 { 686 import std.range; 687 688 import tharsis.prof.ranges; 689 690 Profiler profiler; 691 692 void addZones() 693 { 694 foreach(i; 0 .. 3) 695 { 696 auto startTime = Clock.currStdTime().assumeWontThrow; 697 // Wait long enough to store the time gap in >2 bytes. 698 while(Clock.currStdTime().assumeWontThrow - startTime <= 65536) 699 { 700 continue; 701 } 702 auto zone1 = Zone(profiler, "zone1"); 703 { 704 auto zone11 = Zone(profiler, "zone11"); 705 } 706 startTime = Clock.currStdTime().assumeWontThrow; 707 // Wait long enough to store the time gap in >1 bytes. 708 while(Clock.currStdTime().assumeWontThrow - startTime <= 256) 709 { 710 continue; 711 } 712 { 713 auto zone12 = Zone(profiler, "zone12"); 714 } 715 profiler.variableEvent!"var_i" = cast(float)i; 716 profiler.variableEvent!"var_pi" = 3.14f; 717 profiler.checkpointEvent(); 718 } 719 } 720 721 { 722 auto storage = new ubyte[Profiler.maxEventBytes + 256]; 723 profiler = new Profiler(storage); 724 addZones(); 725 726 // Check if the events generated are what we expect. 727 auto evts = profiler.profileData.eventRange; 728 foreach(i; 0 .. 3) with(EventID) 729 { 730 assert(evts.front.id == ZoneStart); evts.popFront(); 731 assert(evts.front.id == Info && evts.front.info == "zone1"); evts.popFront(); 732 733 assert(evts.front.id == ZoneStart); evts.popFront(); 734 assert(evts.front.id == Info && evts.front.info == "zone11"); evts.popFront(); 735 assert(evts.front.id == ZoneEnd); evts.popFront(); 736 737 assert(evts.front.id == ZoneStart); evts.popFront(); 738 assert(evts.front.id == Info && evts.front.info == "zone12"); evts.popFront(); 739 assert(evts.front.id == ZoneEnd); evts.popFront(); 740 741 assert(evts.front.id == Variable && 742 evts.front.var.type == VariableType.Float && 743 evts.front.var.varFloat == cast(float)i); evts.popFront(); 744 assert(evts.front.id == Info && evts.front.info == "var_i"); evts.popFront(); 745 assert(evts.front.id == Variable && 746 evts.front.var.type == VariableType.Float && 747 evts.front.var.varFloat == 3.14f); evts.popFront(); 748 assert(evts.front.id == Info && evts.front.info == "var_pi"); evts.popFront(); 749 const time = evts.front.time; 750 751 // Checkpoint start time must match the previous event. 752 assert(evts.front.time == time); 753 assert(evts.front.id == Checkpoint); evts.popFront(); 754 assert(evts.front.id == ZoneEnd); evts.popFront(); 755 } 756 757 auto zones = profiler.profileData.zoneRange; 758 assert(zones.walkLength == 9, "Unexpected number of zones"); 759 760 // Check that zones have expected nesting, infos and time ordering. 761 foreach(i; 0 .. 3) 762 { 763 const z11 = zones.front; zones.popFront(); 764 const z12 = zones.front; zones.popFront(); 765 const z1 = zones.front; zones.popFront(); 766 assert(z11.parentID == z1.id && z12.parentID == z1.id); 767 assert(z11.nestLevel == 2 && z12.nestLevel == 2 && z1.nestLevel == 1); 768 assert(z11.info == "zone11" && z12.info == "zone12" && z1.info == "zone1"); 769 770 assert(z11.startTime >= z1.startTime && 771 z12.startTime >= z11.startTime + z11.duration && 772 z1.startTime + z1.duration >= z12.startTime + z12.duration); 773 } 774 } 775 776 // Test what happens when Profiler runs out of space. 777 { 778 auto storage = new ubyte[Profiler.maxEventBytes + 50]; 779 profiler = new Profiler(storage); 780 addZones(); 781 782 ulong lastTime = 0; 783 foreach(zone; profiler.profileData.zoneRange) 784 { 785 assert(zone.endTime >= lastTime, 786 "Incomplete profile data resulted in wrong order of zone end times"); 787 lastTime = zone.endTime; 788 } 789 } 790 791 { 792 auto storage = new ubyte[Profiler.maxEventBytes + 128]; 793 profiler = new Profiler(storage); 794 addZones(); 795 796 auto zones = profiler.profileData.zoneRange; 797 798 799 // Just count the number of instances of each zone. 800 size_t accum(size_t* aPtr, ref const ZoneData z) pure nothrow @nogc 801 { 802 return aPtr is null ? 1 : *aPtr + 1; 803 } 804 auto accumStorage = new AccumulatedZoneData!accum[zones.walkLength]; 805 auto accumulated = accumulatedZoneRange!accum(accumStorage, zones.save); 806 } 807 } 808 809 810 package: 811 812 /* Encode 4 bytes of data into 5 7-bit encoded bytes in target. 813 * 814 * The last bit of each byte in the 7-bit encoding is set to 1. This is used to avoid 815 * zero bytes in e.g. variable events (zero bytes can *only* be used by the 816 * CheckpointEvent, to allow searching for it backwards). 817 * 818 * Params: 819 * 820 * data = Data to encode. 821 * target = First 5 bytes of target will store the encoded data. Must be at least 5 bytes. 822 * 823 * Returns: A 5-byte slice of target with the encoded data. 824 */ 825 static ubyte[] encode7Bit(ubyte[4] data, ubyte[] target) 826 @safe pure nothrow @nogc 827 { 828 assert(target.length >= 5, 829 "Not enough space to encode a 4 bytes chunk into 7-bit encoding"); 830 831 // first 7 bits from first byte 832 ubyte b = lastBit | (data[0] & (0xFF >> 1)); 833 target[0] = b; 834 // last bit from first byte (moved 7 to right), 835 // first 6 bits from second byte (moved by 1 to left) 836 b = lastBit | ((data[0] & (0xFF << 7)) >> 7) | ((data[1] & (0xFF >> 2)) << 1); 837 target[1] = b; 838 // last 2 bits from second byte (moved 6 to right), 839 // first 5 bits from third byte (moved 2 to left) 840 b = lastBit | ((data[1] & (0xFF << 6)) >> 6) | ((data[2] & (0xFF >> 3)) << 2); 841 target[2] = b; 842 // last 3 bits from third byte (moved 5 to right), 843 // first 4 bits from fourth byte (moved 3 to left) 844 b = lastBit | ((data[2] & (0xFF << 5)) >> 5) | ((data[3] & (0xFF >> 4)) << 3); 845 target[3] = b; 846 // last 4 bits from fourth byte (moved 4 to right) 847 b = lastBit | ((data[3] & (0xFF << 4)) >> 4); 848 target[4] = b; 849 850 return target[0 .. 5]; 851 } 852 853 /* Decoded 4 bytes of data stored in 5 bytes of 7-bit encoded data. 854 * 855 * Params: 856 * 857 * data = Data to decode. 858 * target = First 4 bytes of target will store the decoded data. Must be at least 4 bytes. 859 * 860 * Returns: A 4-byte slice of target with the encoded data. 861 * 862 * See_Also: encode7Bit 863 */ 864 static ubyte[] decode7Bit(ubyte[5] encoded, ubyte[] target) 865 @safe pure nothrow @nogc 866 { 867 assert(target.length >= 4, 868 "Not enough space to decode a 5-byte chunk encoded in 7-bit encoding"); 869 870 target[0] = (encoded[0] & 0b01111111) | ((encoded[1] & 0b00000001) << 7); 871 target[1] = ((encoded[1] & 0b01111110) >> 1) | ((encoded[2] & 0b00000011) << 6); 872 target[2] = ((encoded[2] & 0b01111100) >> 2) | ((encoded[3] & 0b00000111) << 5); 873 target[3] = ((encoded[3] & 0b01111000) >> 3) | ((encoded[4] & 0b00001111) << 4); 874 875 return target[0 .. 4]; 876 } 877 878 unittest 879 { 880 import std.random; 881 foreach(attempt; 0 .. 1024) 882 { 883 uint[1] u; 884 float[1] f; 885 u[0] = uniform(uint.min, uint.max); 886 f[0] = uniform(-100000.0f, 100000.0f) + uniform(-1.0f, 1.0f); 887 888 ubyte[5] encodeBuf; 889 ubyte[4] decodeBuf; 890 encode7Bit(cast(ubyte[4])u, encodeBuf[]); 891 decode7Bit(encodeBuf, decodeBuf[]); 892 893 assert(*(cast(uint*)decodeBuf.ptr) == u[0], 894 "encoded/decoded uint data does not match original"); 895 896 encode7Bit(cast(ubyte[4])f, encodeBuf[]); 897 decode7Bit(encodeBuf, decodeBuf[]); 898 899 assert(*(cast(float*)decodeBuf.ptr) == f[0], 900 "encoded/decoded float data does not match original"); 901 } 902 }