For a 4 bit group n
, the value can be computed in 3 instructions using a 32bit unsigned value as a lookup table with 16 entries of two bits each:
(0xffffaa50 >> (n << 1)) & 3
This can be used to trim the end of the divide and conquer methods.
size_t blog32(uint32_t v) { size_t r = 0, t; t = (0 != (v >> 16)) << 4; v >>= t; r |= t; t = (0 != (v >> 8)) << 3; v >>= t; r |= t; t = (0 != (v >> 4)) << 2; v >>= t; r |= t; return r + ((0xffffaa50 >> (v << 1)) & 3);}
For 64bit, add one more level:
size_t blog64(uint64_t v) { size_t r = 0, t; t = (0 != (v >> 32)) << 5; v >>= t; r |= t; t = (0 != (v >> 16)) << 4; v >>= t; r |= t; t = (0 != (v >> 8)) << 3; v >>= t; r |= t; t = (0 != (v >> 4)) << 2; v >>= t; r |= t; return r + ((0xffffaa50 >> (v << 1)) & 3);}