Move POPCOUNTL to cogl-util

This moves the POPCOUNTL macro from cogl-winsys-glx to cogl-util and
renames it to _cogl_util_popcountl so that it can be used in more
places. The fallback function for when the GCC builtin is not
available has been replaced with an 8-bit lookup table because the
HAKMEM implementation doesn't look like it would work when longs are
64-bit so it's not suitable for a general purpose function on 64-bit
architectures. Some of the pages regarding population counts seem to
suggest that using a lookup table is the fastest method anyway.

Reviewed-by: Robert Bragg <robert@linux.intel.com>
This commit is contained in:
Neil Roberts 2011-11-01 13:10:59 +00:00
parent 436a7a45da
commit 037c0aa88c
3 changed files with 55 additions and 20 deletions

View File

@ -100,3 +100,23 @@ _cogl_util_ffsl_wrapper (long int num)
} }
#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */ #endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
#ifndef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
const unsigned char
_cogl_util_popcount_table[256] =
{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */

View File

@ -100,6 +100,12 @@ _cogl_util_one_at_a_time_hash (unsigned int hash,
unsigned int unsigned int
_cogl_util_one_at_a_time_mix (unsigned int hash); _cogl_util_one_at_a_time_mix (unsigned int hash);
/* These two builtins are available since GCC 3.4 */
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
#define COGL_UTIL_HAVE_BUILTIN_FFSL
#define COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
#endif
/* The 'ffs' function is part of C99 so it isn't always available */ /* The 'ffs' function is part of C99 so it isn't always available */
#ifdef HAVE_FFS #ifdef HAVE_FFS
#define _cogl_util_ffs ffs #define _cogl_util_ffs ffs
@ -110,9 +116,8 @@ _cogl_util_ffs (int num);
/* The 'ffsl' function is non-standard but GCC has a builtin for it /* The 'ffsl' function is non-standard but GCC has a builtin for it
since 3.4 which we can use */ since 3.4 which we can use */
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) #ifdef COGL_UTIL_HAVE_BUILTIN_FFSL
#define _cogl_util_ffsl __builtin_ffsl #define _cogl_util_ffsl __builtin_ffsl
#define COGL_UTIL_HAVE_BUILTIN_FFSL
#else #else
/* If ints and longs are the same size we can just use ffs. Hopefully /* If ints and longs are the same size we can just use ffs. Hopefully
the compiler will optimise away this conditional */ the compiler will optimise away this conditional */
@ -121,7 +126,30 @@ _cogl_util_ffs (int num);
_cogl_util_ffsl_wrapper (x)) _cogl_util_ffsl_wrapper (x))
int int
_cogl_util_ffsl_wrapper (long int num); _cogl_util_ffsl_wrapper (long int num);
#endif #endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
#ifdef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
#define _cogl_util_popcountl __builtin_popcountl
#else
extern const unsigned char _cogl_util_popcount_table[256];
/* There are many ways of doing popcount but doing a table lookup
seems to be the most robust against different sizes for long. Some
pages seem to claim it's the fastest method anyway. */
static inline int
_cogl_util_popcountl (unsigned long num)
{
int i;
int sum = 0;
/* Let's hope GCC will unroll this loop.. */
for (i = 0; i < sizeof (num); i++)
sum += _cogl_util_popcount_table[(num >> (i * 8)) & 0xff];
return sum;
}
#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */
#ifdef COGL_HAS_GLIB_SUPPORT #ifdef COGL_HAS_GLIB_SUPPORT
#define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR) #define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR)

View File

@ -49,6 +49,7 @@
#include "cogl-onscreen-private.h" #include "cogl-onscreen-private.h"
#include "cogl-swap-chain-private.h" #include "cogl-swap-chain-private.h"
#include "cogl-xlib-renderer.h" #include "cogl-xlib-renderer.h"
#include "cogl-util.h"
#include <stdlib.h> #include <stdlib.h>
#include <sys/types.h> #include <sys/types.h>
@ -1697,22 +1698,6 @@ should_use_rectangle (CoglContext *context)
return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE; return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE;
} }
/* GCC's population count builtin is available since version 3.4 */
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
#define POPCOUNTL(n) __builtin_popcountl(n)
#else
/* HAKMEM 169 */
static int
hakmem_popcountl (unsigned long n)
{
unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
#define POPCOUNTL(n) hakmem_popcountl(n)
#endif
static gboolean static gboolean
try_create_glx_pixmap (CoglContext *context, try_create_glx_pixmap (CoglContext *context,
CoglTexturePixmapX11 *tex_pixmap, CoglTexturePixmapX11 *tex_pixmap,
@ -1765,7 +1750,9 @@ try_create_glx_pixmap (CoglContext *context,
* number of 1-bits in color masks against the color depth requested * number of 1-bits in color masks against the color depth requested
* by the client. * by the client.
*/ */
if (POPCOUNTL(visual->red_mask|visual->green_mask|visual->blue_mask) == depth) if (_cogl_util_popcountl (visual->red_mask |
visual->green_mask |
visual->blue_mask) == depth)
attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT; attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT;
else else
attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT; attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT;