Add a workaround for slow read pixels on Mesa

Mesa before version 8.0.2 has a slow read pixels path that gets used
with the Intel driver where it converts all of the pixels into a
floating point representation and back even if the data is being read
into exactly the same format. There is however a faster path using the
blitter when reading into a PBO with BGRA format. It works out faster
to read into a PBO and then memcpy back out into the application's
buffer even though it adds an extra memcpy. This patch adds a
workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects
this situation. In that case it will create a temporary CoglBitmap
using cogl_bitmap_new_with_size, read into it and then memcpy the data
back out.

The main impetus for this patch is that Gnome Shell has implemented
this workaround directly using GL calls but it seems like the kind of
thing that would sit better at the Cogl layer.

Reviewed-by: Robert Bragg <robert@linux.intel.com>
This commit is contained in:
Neil Roberts 2012-03-27 17:24:50 +01:00
parent 2037e0f4f1
commit cb146dc515
4 changed files with 150 additions and 6 deletions

View File

@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
#define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1) #define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
/* Private flags that can internally be added to CoglReadPixelsFlags */
typedef enum
{
/* If this is set then the data will not be flipped to compensate
for GL's upside-down coordinate system but instead will be left
in whatever order GL gives us (which will depend on whether the
framebuffer is offscreen or not) */
COGL_READ_PIXELS_NO_FLIP = 1L << 30
} CoglPrivateReadPixelsFlags;
struct _CoglFramebuffer struct _CoglFramebuffer
{ {
CoglObject _parent; CoglObject _parent;

View File

@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
return FALSE; return FALSE;
} }
static gboolean
_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
int x,
int y,
CoglReadPixelsFlags source,
CoglBitmap *bitmap)
{
CoglContext *ctx;
CoglPixelFormat format;
CoglBitmap *pbo;
int width;
int height;
gboolean res;
_COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
_COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
ctx = cogl_framebuffer_get_context (framebuffer);
width = cogl_bitmap_get_width (bitmap);
height = cogl_bitmap_get_height (bitmap);
format = cogl_bitmap_get_format (bitmap);
pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
if (pbo == NULL)
return FALSE;
/* Read into the pbo. We need to disable the flipping because the
blit fast path in the driver does not work with
GL_PACK_INVERT_MESA is set */
res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
x, y,
source |
COGL_READ_PIXELS_NO_FLIP,
pbo);
if (res)
{
guint8 *dst;
/* Copy the pixels back into application's buffer */
dst = _cogl_bitmap_map (bitmap,
COGL_BUFFER_ACCESS_WRITE,
COGL_BUFFER_MAP_HINT_DISCARD);
if (dst == NULL)
res = FALSE;
else
{
const guint8 *src;
src = _cogl_bitmap_map (pbo,
COGL_BUFFER_ACCESS_READ,
0 /* hints */);
if (src == NULL)
res = FALSE;
else
{
int src_rowstride = cogl_bitmap_get_rowstride (pbo);
int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
int to_copy =
_cogl_pixel_format_get_bytes_per_pixel (format) * width;
int y;
/* If the framebuffer is onscreen we need to flip the
data while copying */
if (!cogl_is_offscreen (framebuffer))
{
src += src_rowstride * (height - 1);
src_rowstride = -src_rowstride;
}
for (y = 0; y < height; y++)
{
memcpy (dst, src, to_copy);
dst += dst_rowstride;
src += src_rowstride;
}
_cogl_bitmap_unmap (pbo);
}
_cogl_bitmap_unmap (bitmap);
}
}
cogl_object_unref (pbo);
return res;
}
gboolean gboolean
cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer, cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
int x, int x,
@ -1960,7 +2051,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
int width; int width;
int height; int height;
_COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE); _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
_COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE); _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
if (!cogl_framebuffer_allocate (framebuffer, NULL)) if (!cogl_framebuffer_allocate (framebuffer, NULL))
@ -1970,6 +2061,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
width = cogl_bitmap_get_width (bitmap); width = cogl_bitmap_get_width (bitmap);
height = cogl_bitmap_get_height (bitmap); height = cogl_bitmap_get_height (bitmap);
format = cogl_bitmap_get_format (bitmap);
if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty) if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
{ {
@ -1986,6 +2078,32 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
return TRUE; return TRUE;
} }
/* Workaround for cases where its faster to read into a temporary
* PBO. This is only worth doing if:
*
* The GPU is an Intel GPU. In that case there is a known
* fast-path when reading into a PBO that will use the blitter
* instead of the Mesa fallback code. The driver bug will only be
* set if this is the case.
* We're not already reading into a PBO.
* The target format is BGRA. The fast-path blit does not get hit
* otherwise.
* The size of the data is not trivially small. This isn't a
* requirement to hit the fast-path blit but intuitively it feels
* like if the amount of data is too small then the cost of
* allocating a PBO will outweigh the cost of temporarily
* converting the data to floats.
*/
if ((ctx->gpu.driver_bugs &
COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) &&
(width > 8 || height > 8) &&
(format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
cogl_bitmap_get_buffer (bitmap) == NULL)
return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
x, y,
source,
bitmap);
/* make sure any batched primitives get emitted to the GL driver /* make sure any batched primitives get emitted to the GL driver
* before issuing our read pixels... * before issuing our read pixels...
*/ */
@ -2006,8 +2124,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
if (!cogl_is_offscreen (framebuffer)) if (!cogl_is_offscreen (framebuffer))
y = framebuffer_height - y - height; y = framebuffer_height - y - height;
format = cogl_bitmap_get_format (bitmap);
required_format = ctx->driver_vtable->pixel_format_to_gl (ctx, required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
format, format,
&gl_intformat, &gl_intformat,
@ -2017,6 +2133,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
/* NB: All offscreen rendering is done upside down so there is no need /* NB: All offscreen rendering is done upside down so there is no need
* to flip in this case... */ * to flip in this case... */
if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) && if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
(source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
!cogl_is_offscreen (framebuffer)) !cogl_is_offscreen (framebuffer))
{ {
GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE)); GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
@ -2152,7 +2269,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
/* NB: All offscreen rendering is done upside down so there is no need /* NB: All offscreen rendering is done upside down so there is no need
* to flip in this case... */ * to flip in this case... */
if (!cogl_is_offscreen (framebuffer) && !pack_invert_set) if (!cogl_is_offscreen (framebuffer) &&
(source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
!pack_invert_set)
{ {
guint8 *temprow; guint8 *temprow;
int rowstride; int rowstride;

View File

@ -40,7 +40,12 @@ typedef enum
typedef enum typedef enum
{ {
COGL_GPU_INFO_DRIVER_STUB /* If this bug is present then it is faster to read pixels into a
* PBO and then memcpy out of the PBO into system memory rather than
* directly read into system memory.
* https://bugs.freedesktop.org/show_bug.cgi?id=46631
*/
COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0
} CoglGpuInfoDriverBug; } CoglGpuInfoDriverBug;
typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion; typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion;

View File

@ -250,5 +250,15 @@ _cogl_gpu_info_init (CoglContext *ctx,
} }
/* Determine the driver bugs */ /* Determine the driver bugs */
gpu->driver_bugs = 0;
/* In Mesa < 8.0.2 the glReadPixels implementation is really slow
because it converts each pixel to a floating point representation
and back even if the data could just be memcpy'd. The Intel
driver has a fast blit path when reading into a PBO. Reading into
a temporary PBO and then memcpying back out to the application's
memory is faster than a regular glReadPixels in this case */
if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL &&
gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA &&
gpu->driver_package_version < COGL_VERSION_ENCODE (8, 0, 2))
gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS;
} }