From 037c0aa88cb8eb74f21ce24c80467b909028d2f6 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Tue, 1 Nov 2011 13:10:59 +0000 Subject: [PATCH] Move POPCOUNTL to cogl-util This moves the POPCOUNTL macro from cogl-winsys-glx to cogl-util and renames it to _cogl_util_popcountl so that it can be used in more places. The fallback function for when the GCC builtin is not available has been replaced with an 8-bit lookup table because the HAKMEM implementation doesn't look like it would work when longs are 64-bit so it's not suitable for a general purpose function on 64-bit architectures. Some of the pages regarding population counts seem to suggest that using a lookup table is the fastest method anyway. Reviewed-by: Robert Bragg --- cogl/cogl-util.c | 20 ++++++++++++++++++++ cogl/cogl-util.h | 34 +++++++++++++++++++++++++++++++--- cogl/winsys/cogl-winsys-glx.c | 21 ++++----------------- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/cogl/cogl-util.c b/cogl/cogl-util.c index 1cb38143c..487a762c1 100644 --- a/cogl/cogl-util.c +++ b/cogl/cogl-util.c @@ -100,3 +100,23 @@ _cogl_util_ffsl_wrapper (long int num) } #endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */ + +#ifndef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL + +const unsigned char +_cogl_util_popcount_table[256] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + +#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */ diff --git a/cogl/cogl-util.h b/cogl/cogl-util.h index d38167c6e..256ecaea5 100644 --- a/cogl/cogl-util.h +++ b/cogl/cogl-util.h @@ -100,6 +100,12 @@ _cogl_util_one_at_a_time_hash (unsigned int hash, unsigned int _cogl_util_one_at_a_time_mix (unsigned int hash); +/* These two builtins are available since GCC 3.4 */ +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define COGL_UTIL_HAVE_BUILTIN_FFSL +#define COGL_UTIL_HAVE_BUILTIN_POPCOUNTL +#endif + /* The 'ffs' function is part of C99 so it isn't always available */ #ifdef HAVE_FFS #define _cogl_util_ffs ffs @@ -110,9 +116,8 @@ _cogl_util_ffs (int num); /* The 'ffsl' function is non-standard but GCC has a builtin for it since 3.4 which we can use */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#ifdef COGL_UTIL_HAVE_BUILTIN_FFSL #define _cogl_util_ffsl __builtin_ffsl -#define COGL_UTIL_HAVE_BUILTIN_FFSL #else /* If ints and longs are the same size we can just use ffs. Hopefully the compiler will optimise away this conditional */ @@ -121,7 +126,30 @@ _cogl_util_ffs (int num); _cogl_util_ffsl_wrapper (x)) int _cogl_util_ffsl_wrapper (long int num); -#endif +#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */ + +#ifdef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL +#define _cogl_util_popcountl __builtin_popcountl +#else +extern const unsigned char _cogl_util_popcount_table[256]; + +/* There are many ways of doing popcount but doing a table lookup + seems to be the most robust against different sizes for long. Some + pages seem to claim it's the fastest method anyway. */ +static inline int +_cogl_util_popcountl (unsigned long num) +{ + int i; + int sum = 0; + + /* Let's hope GCC will unroll this loop.. */ + for (i = 0; i < sizeof (num); i++) + sum += _cogl_util_popcount_table[(num >> (i * 8)) & 0xff]; + + return sum; +} + +#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */ #ifdef COGL_HAS_GLIB_SUPPORT #define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR) diff --git a/cogl/winsys/cogl-winsys-glx.c b/cogl/winsys/cogl-winsys-glx.c index 05e2dac14..0dce47413 100644 --- a/cogl/winsys/cogl-winsys-glx.c +++ b/cogl/winsys/cogl-winsys-glx.c @@ -49,6 +49,7 @@ #include "cogl-onscreen-private.h" #include "cogl-swap-chain-private.h" #include "cogl-xlib-renderer.h" +#include "cogl-util.h" #include #include @@ -1697,22 +1698,6 @@ should_use_rectangle (CoglContext *context) return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE; } -/* GCC's population count builtin is available since version 3.4 */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) -#define POPCOUNTL(n) __builtin_popcountl(n) -#else -/* HAKMEM 169 */ -static int -hakmem_popcountl (unsigned long n) -{ - unsigned long tmp; - - tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111); - return ((tmp + (tmp >> 3)) & 030707070707) % 63; -} -#define POPCOUNTL(n) hakmem_popcountl(n) -#endif - static gboolean try_create_glx_pixmap (CoglContext *context, CoglTexturePixmapX11 *tex_pixmap, @@ -1765,7 +1750,9 @@ try_create_glx_pixmap (CoglContext *context, * number of 1-bits in color masks against the color depth requested * by the client. */ - if (POPCOUNTL(visual->red_mask|visual->green_mask|visual->blue_mask) == depth) + if (_cogl_util_popcountl (visual->red_mask | + visual->green_mask | + visual->blue_mask) == depth) attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT; else attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT;