1
0
pl-libs/pl_profile.h
Jonathan Hoffstadt 95454b54f7
All checks were successful
Tests / Ubuntu (push) Successful in 6s
initial commit
2024-08-26 20:35:15 -05:00

512 lines
18 KiB
C

/*
pl_profile
Do this:
#define PL_PROFILE_IMPLEMENTATION
before you include this file in *one* C or C++ file to create the implementation.
// i.e. it should look like this:
#include ...
#include ...
#include ...
#define PL_PROFILE_IMPLEMENTATION
#include "pl_profile.h"
*/
// library version
#define PL_PROFILE_VERSION "0.2.0"
#define PL_PROFILE_VERSION_NUM 00200
/*
Index of this file:
// [SECTION] documentation
// [SECTION] header mess
// [SECTION] includes
// [SECTION] forward declarations & basic types
// [SECTION] public api
// [SECTION] structs
// [SECTION] internal api
// [SECTION] c file start
*/
//-----------------------------------------------------------------------------
// [SECTION] documentation
//-----------------------------------------------------------------------------
/*
SETUP
pl_create_profile_context:
plProfileContext* pl_create_profile_context();
Creates the global context used by the profiling system. Store the
pointer returned if you want to use the profiler across DLL boundaries.
See "pl_set_profile_context".
pl_cleanup_profile_context:
void pl_cleanup_profile_context();
Frees memory associated with the profiling system. Do not call functions
after this.
pl_set_profile_context:
void pl_set_profile_context(plProfileContext*);
Sets the current log context. Mostly used to allow profiling across
DLL boundaries.
pl_get_profile_context:
plProfileContext* pl_get_profile_context();
Returns the current profile context.
SAMPLING
pl_begin_profile_frame:
void pl_begin_profile_frame();
Begins a CPU profiling frame. Samples can now be taken.
pl_end_profile_frame:
void pl_end_profile_frame();
Ends a CPU profiling frame.
pl_begin_profile_sample:
void pl_begin_profile_sample(pcName);
Begins a CPU sample. Must have begun a profiling frame.
pl_end_profile_sample:
void pl_end_profile_sample();
Ends a CPU sample.
RETRIEVING RESULTS
pl_get_last_frame_samples:
plProfileSample* pl_get_last_frame_samples(uint32_t* puSize);
Returns samples from last frame. Call after "pl_end_profile_frame".
COMPILE TIME OPTIONS
* Turn profiling on by defining PL_PROFILE_ON
* Change allocators by defining both:
PL_PROFILE_ALLOC(x)
PL_PROFILE_FREE(x)
*/
//-----------------------------------------------------------------------------
// [SECTION] header mess
//-----------------------------------------------------------------------------
#ifndef PL_PROFILE_H
#define PL_PROFILE_H
//-----------------------------------------------------------------------------
// [SECTION] includes
//-----------------------------------------------------------------------------
#include <stdint.h>
//-----------------------------------------------------------------------------
// [SECTION] forward declarations & basic types
//-----------------------------------------------------------------------------
// forward declarations
typedef struct _plProfileSample plProfileSample;
typedef struct _plProfileContext plProfileContext;
//-----------------------------------------------------------------------------
// [SECTION] public api
//-----------------------------------------------------------------------------
#ifdef PL_PROFILE_ON
// setup/shutdown
#define pl_create_profile_context(ptContext) pl__create_profile_context()
#define pl_cleanup_profile_context() pl__cleanup_profile_context()
#define pl_set_profile_context(ptContext) pl__set_profile_context((ptContext))
#define pl_get_profile_context() pl__get_profile_context()
// frames
#define pl_begin_profile_frame() pl__begin_profile_frame()
#define pl_end_profile_frame() pl__end_profile_frame()
// samples
#define pl_begin_profile_sample(pcName) pl__begin_profile_sample((pcName))
#define pl_end_profile_sample() pl__end_profile_sample()
#define pl_get_last_frame_samples(puSize) pl__get_last_frame_samples((puSize))
#endif // PL_PROFILE_ON
//-----------------------------------------------------------------------------
// [SECTION] structs
//-----------------------------------------------------------------------------
typedef struct _plProfileSample
{
double dStartTime;
double dDuration;
const char* pcName;
uint32_t uDepth;
} plProfileSample;
//-----------------------------------------------------------------------------
// [SECTION] internal api
//-----------------------------------------------------------------------------
// setup/shutdown
plProfileContext* pl__create_profile_context (void);
void pl__cleanup_profile_context(void);
void pl__set_profile_context (plProfileContext* ptContext);
plProfileContext* pl__get_profile_context (void);
// frames
void pl__begin_profile_frame(void);
void pl__end_profile_frame (void);
// samples
void pl__begin_profile_sample(const char* pcName);
void pl__end_profile_sample (void);
plProfileSample* pl__get_last_frame_samples(uint32_t* puSize);
#ifndef PL_PROFILE_ON
#define pl_create_profile_context(ptContext) NULL
#define pl_cleanup_profile_context() //
#define pl_set_profile_context(ptContext) //
#define pl_get_profile_context() NULL
#define pl_begin_profile_frame(ulFrame) //
#define pl_end_profile_frame() //
#define pl_begin_profile_sample(pcName) //
#define pl_end_profile_sample() //
#define pl_get_last_frame_samples(puSize) NULL
#endif
#endif // PL_PROFILE_H
//-----------------------------------------------------------------------------
// [SECTION] c file start
//-----------------------------------------------------------------------------
/*
Index of this file:
// [SECTION] header mess
// [SECTION] includes
// [SECTION] global context
// [SECTION] internal structs
// [SECTION] internal api
// [SECTION] public api implementations
// [SECTION] internal api implementations
*/
//-----------------------------------------------------------------------------
// [SECTION] header mess
//-----------------------------------------------------------------------------
#ifdef PL_PROFILE_IMPLEMENTATION
#ifndef PL_ASSERT
#include <assert.h>
#define PL_ASSERT(x) assert((x))
#endif
#ifndef PL_PROFILE_ALLOC
#include <stdlib.h>
#define PL_PROFILE_ALLOC(x) malloc((x))
#define PL_PROFILE_FREE(x) free((x))
#endif
//-----------------------------------------------------------------------------
// [SECTION] includes
//-----------------------------------------------------------------------------
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#elif defined(__APPLE__)
#include <time.h> // clock_gettime_nsec_np
#else // linux
#include <time.h> // clock_gettime, clock_getres
#endif
//-----------------------------------------------------------------------------
// [SECTION] global context
//-----------------------------------------------------------------------------
plProfileContext* gTPProfileContext = NULL;
//-----------------------------------------------------------------------------
// [SECTION] internal structs
//-----------------------------------------------------------------------------
typedef struct _plProfileFrame
{
uint64_t ulFrame;
double dStartTime; // beginning of frame time
double dDuration; // total duration
double dInternalDuration; // profiler overhead
bool bSampleStackOverflowInUse;
uint32_t uTotalSampleStackSize;
uint32_t* puSampleStack;
uint32_t auSampleStack[256];
uint32_t uSampleStackCapacity;
uint32_t* puOverflowSampleStack;
uint32_t uOverflowSampleStackCapacity;
uint32_t uTotalSampleSize;
plProfileSample* ptSamples;
bool bOverflowInUse;
plProfileSample atSamples[256];
uint32_t uSampleCapacity;
uint32_t uOverflowSampleCapacity;
} plProfileFrame;
typedef struct _plProfileContext
{
double dStartTime;
uint64_t ulFrame;
plProfileFrame atFrames[2];
plProfileFrame* ptCurrentFrame;
plProfileFrame* ptLastFrame;
void* pInternal;
} plProfileContext;
//-----------------------------------------------------------------------------
// [SECTION] internal api
//-----------------------------------------------------------------------------
static void pl__push_sample_stack(plProfileFrame* ptFrame, uint32_t uSample);
static plProfileSample* pl__get_sample(plProfileFrame* ptFrame);
static inline uint32_t
pl__pop_sample_stack(plProfileFrame* ptFrame)
{
ptFrame->uTotalSampleStackSize--;
return ptFrame->puSampleStack[ptFrame->uTotalSampleStackSize];
}
static inline double
pl__get_wall_clock(void)
{
double dResult = 0;
#ifdef _WIN32
INT64 slPerfFrequency = *(INT64*)gTPProfileContext->pInternal;
INT64 slPerfCounter;
QueryPerformanceCounter((LARGE_INTEGER*)&slPerfCounter);
dResult = (double)slPerfCounter / (double)slPerfFrequency;
#elif defined(__APPLE__)
dResult = ((double)(clock_gettime_nsec_np(CLOCK_UPTIME_RAW)) / 1e9);
#else // linux
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
uint64_t nsec_count = ts.tv_nsec + ts.tv_sec * 1e9;
dResult = (double)nsec_count / *(double*)gTPProfileContext->pInternal;
#endif
return dResult;
}
//-----------------------------------------------------------------------------
// [SECTION] public api implementations
//-----------------------------------------------------------------------------
plProfileContext*
pl__create_profile_context(void)
{
// allocate context
plProfileContext* ptContext = (plProfileContext*)PL_PROFILE_ALLOC(sizeof(plProfileContext));
memset(ptContext, 0, sizeof(plProfileContext));
gTPProfileContext = ptContext;
// clock setup
#ifdef _WIN32
static INT64 slPerfFrequency = 0;
PL_ASSERT(QueryPerformanceFrequency((LARGE_INTEGER*)&slPerfFrequency));
ptContext->pInternal = &slPerfFrequency;
#elif defined(__APPLE__)
// no setup required
#else // linux
static struct timespec ts;
if (clock_getres(CLOCK_MONOTONIC, &ts) != 0)
{
PL_ASSERT(false && "clock_getres() failed");
}
static double dPerFrequency = 0.0;
dPerFrequency = 1e9/((double)ts.tv_nsec + (double)ts.tv_sec * (double)1e9);
ptContext->pInternal = &dPerFrequency;
#endif
ptContext->dStartTime = pl__get_wall_clock();
ptContext->ptCurrentFrame = &ptContext->atFrames[0];
ptContext->atFrames[0].uSampleCapacity = 256;
ptContext->atFrames[0].uSampleStackCapacity = 256;
ptContext->atFrames[1].uSampleCapacity = 256;
ptContext->atFrames[1].uSampleStackCapacity = 256;
ptContext->atFrames[0].ptSamples = ptContext->atFrames[0].atSamples;
ptContext->atFrames[1].ptSamples = ptContext->atFrames[1].atSamples;
ptContext->atFrames[0].puSampleStack = ptContext->atFrames[0].auSampleStack;
ptContext->atFrames[1].puSampleStack = ptContext->atFrames[1].auSampleStack;
ptContext->ptLastFrame = &ptContext->atFrames[0];
return ptContext;
}
void
pl__cleanup_profile_context(void)
{
for(uint32_t i = 0; i < 2; i++)
{
if(gTPProfileContext->atFrames[i].bOverflowInUse)
PL_PROFILE_FREE(gTPProfileContext->atFrames[i].ptSamples);
if(gTPProfileContext->atFrames[i].bSampleStackOverflowInUse)
PL_PROFILE_FREE(gTPProfileContext->atFrames[i].puSampleStack);
}
PL_PROFILE_FREE(gTPProfileContext);
gTPProfileContext = NULL;
}
void
pl__set_profile_context(plProfileContext* ptContext)
{
PL_ASSERT(ptContext && "profile context is NULL");
gTPProfileContext = ptContext;
}
plProfileContext*
pl__get_profile_context(void)
{
PL_ASSERT(gTPProfileContext && "no global log context set");
return gTPProfileContext;
}
void
pl__begin_profile_frame(void)
{
gTPProfileContext->ulFrame++;
gTPProfileContext->ptCurrentFrame = &gTPProfileContext->atFrames[gTPProfileContext->ulFrame % 2];
gTPProfileContext->ptCurrentFrame->dDuration = 0.0;
gTPProfileContext->ptCurrentFrame->dInternalDuration = 0.0;
gTPProfileContext->ptCurrentFrame->dStartTime = pl__get_wall_clock();
gTPProfileContext->ptCurrentFrame->uTotalSampleSize = 0;
}
void
pl__end_profile_frame(void)
{
gTPProfileContext->ptCurrentFrame->dDuration = pl__get_wall_clock() - gTPProfileContext->ptCurrentFrame->dStartTime;
gTPProfileContext->ptLastFrame = gTPProfileContext->ptCurrentFrame;
}
void
pl__begin_profile_sample(const char* pcName)
{
const double dCurrentInternalTime = pl__get_wall_clock();
plProfileFrame* ptCurrentFrame = gTPProfileContext->ptCurrentFrame;
uint32_t uSampleIndex = ptCurrentFrame->uTotalSampleSize;
plProfileSample* ptSample = pl__get_sample(ptCurrentFrame);
ptSample->dDuration = 0.0;
ptSample->dStartTime = pl__get_wall_clock();
ptSample->pcName = pcName;
ptSample->uDepth = ptCurrentFrame->uTotalSampleStackSize;
pl__push_sample_stack(ptCurrentFrame, uSampleIndex);
ptCurrentFrame->dInternalDuration += pl__get_wall_clock() - dCurrentInternalTime;
}
void
pl__end_profile_sample(void)
{
const double dCurrentInternalTime = pl__get_wall_clock();
plProfileFrame* ptCurrentFrame = gTPProfileContext->ptCurrentFrame;
plProfileSample* ptLastSample = &ptCurrentFrame->ptSamples[pl__pop_sample_stack(ptCurrentFrame)];
PL_ASSERT(ptLastSample && "Begin/end profile sample mismatch");
ptLastSample->dDuration = pl__get_wall_clock() - ptLastSample->dStartTime;
ptLastSample->dStartTime -= ptCurrentFrame->dStartTime;
ptCurrentFrame->dInternalDuration += pl__get_wall_clock() - dCurrentInternalTime;
}
plProfileSample*
pl__get_last_frame_samples(uint32_t* puSize)
{
plProfileFrame* ptFrame = gTPProfileContext->ptLastFrame;
if(puSize)
*puSize = ptFrame->uTotalSampleSize;
return ptFrame->ptSamples;
}
//-----------------------------------------------------------------------------
// [SECTION] internal api implementations
//-----------------------------------------------------------------------------
static void
pl__push_sample_stack(plProfileFrame* ptFrame, uint32_t uSample)
{
// check if new overflow
if(!ptFrame->bSampleStackOverflowInUse && ptFrame->uTotalSampleStackSize == ptFrame->uSampleStackCapacity)
{
ptFrame->puOverflowSampleStack = (uint32_t*)PL_PROFILE_ALLOC(sizeof(uint32_t) * ptFrame->uSampleStackCapacity * 2);
memset(ptFrame->puOverflowSampleStack, 0, sizeof(uint32_t) * ptFrame->uSampleStackCapacity * 2);
ptFrame->uOverflowSampleStackCapacity = ptFrame->uSampleStackCapacity * 2;
// copy stack samples
memcpy(ptFrame->puOverflowSampleStack, ptFrame->auSampleStack, sizeof(uint32_t) * ptFrame->uSampleStackCapacity);
ptFrame->bSampleStackOverflowInUse = true;
ptFrame->puSampleStack = ptFrame->puOverflowSampleStack;
}
// check if overflow reallocation is needed
else if(ptFrame->bSampleStackOverflowInUse && ptFrame->uTotalSampleStackSize == ptFrame->uOverflowSampleStackCapacity)
{
uint32_t* ptOldOverflowSamples = ptFrame->puOverflowSampleStack;
ptFrame->puOverflowSampleStack = (uint32_t*)PL_PROFILE_ALLOC(sizeof(uint32_t) * ptFrame->uOverflowSampleStackCapacity * 2);
memset(ptFrame->puOverflowSampleStack, 0, sizeof(uint32_t) * ptFrame->uOverflowSampleStackCapacity * 2);
// copy old values
memcpy(ptFrame->puOverflowSampleStack, ptOldOverflowSamples, sizeof(uint32_t) * ptFrame->uOverflowSampleStackCapacity);
ptFrame->uOverflowSampleStackCapacity *= 2;
PL_PROFILE_FREE(ptOldOverflowSamples);
ptFrame->puSampleStack = ptFrame->puOverflowSampleStack;
}
ptFrame->puSampleStack[ptFrame->uTotalSampleStackSize] = uSample;
ptFrame->uTotalSampleStackSize++;
}
static plProfileSample*
pl__get_sample(plProfileFrame* ptFrame)
{
plProfileSample* ptSample = NULL;
// check if new overflow
if(!ptFrame->bOverflowInUse && ptFrame->uTotalSampleSize == ptFrame->uSampleCapacity)
{
ptFrame->ptSamples = (plProfileSample*)PL_PROFILE_ALLOC(sizeof(plProfileSample) * ptFrame->uSampleCapacity * 2);
memset(ptFrame->ptSamples, 0, sizeof(plProfileSample) * ptFrame->uSampleCapacity * 2);
ptFrame->uOverflowSampleCapacity = ptFrame->uSampleCapacity * 2;
// copy stack samples
memcpy(ptFrame->ptSamples, ptFrame->atSamples, sizeof(plProfileSample) * ptFrame->uSampleCapacity);
ptFrame->bOverflowInUse = true;
}
// check if overflow reallocation is needed
else if(ptFrame->bOverflowInUse && ptFrame->uTotalSampleSize == ptFrame->uOverflowSampleCapacity)
{
plProfileSample* ptOldOverflowSamples = ptFrame->ptSamples;
ptFrame->ptSamples = (plProfileSample*)PL_PROFILE_ALLOC(sizeof(plProfileSample) * ptFrame->uOverflowSampleCapacity * 2);
memset(ptFrame->ptSamples, 0, sizeof(plProfileSample) * ptFrame->uOverflowSampleCapacity * 2);
// copy old values
memcpy(ptFrame->ptSamples, ptOldOverflowSamples, sizeof(plProfileSample) * ptFrame->uOverflowSampleCapacity);
ptFrame->uOverflowSampleCapacity *= 2;
PL_PROFILE_FREE(ptOldOverflowSamples);
}
ptSample = &ptFrame->ptSamples[ptFrame->uTotalSampleSize];
ptFrame->uTotalSampleSize++;
return ptSample;
}
#endif // PL_PROFILE_IMPLEMENTATION