Add a workaround for slow read pixels on Mesa
Mesa before version 8.0.2 has a slow read pixels path that gets used with the Intel driver where it converts all of the pixels into a floating point representation and back even if the data is being read into exactly the same format. There is however a faster path using the blitter when reading into a PBO with BGRA format. It works out faster to read into a PBO and then memcpy back out into the application's buffer even though it adds an extra memcpy. This patch adds a workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects this situation. In that case it will create a temporary CoglBitmap using cogl_bitmap_new_with_size, read into it and then memcpy the data back out. The main impetus for this patch is that Gnome Shell has implemented this workaround directly using GL calls but it seems like the kind of thing that would sit better at the Cogl layer. Reviewed-by: Robert Bragg <robert@linux.intel.com>
This commit is contained in:
parent
2037e0f4f1
commit
cb146dc515
@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
|
||||
|
||||
#define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
|
||||
|
||||
/* Private flags that can internally be added to CoglReadPixelsFlags */
|
||||
typedef enum
|
||||
{
|
||||
/* If this is set then the data will not be flipped to compensate
|
||||
for GL's upside-down coordinate system but instead will be left
|
||||
in whatever order GL gives us (which will depend on whether the
|
||||
framebuffer is offscreen or not) */
|
||||
COGL_READ_PIXELS_NO_FLIP = 1L << 30
|
||||
} CoglPrivateReadPixelsFlags;
|
||||
|
||||
struct _CoglFramebuffer
|
||||
{
|
||||
CoglObject _parent;
|
||||
|
@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
|
||||
int x,
|
||||
int y,
|
||||
CoglReadPixelsFlags source,
|
||||
CoglBitmap *bitmap)
|
||||
{
|
||||
CoglContext *ctx;
|
||||
CoglPixelFormat format;
|
||||
CoglBitmap *pbo;
|
||||
int width;
|
||||
int height;
|
||||
gboolean res;
|
||||
|
||||
_COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
|
||||
_COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
|
||||
|
||||
ctx = cogl_framebuffer_get_context (framebuffer);
|
||||
|
||||
width = cogl_bitmap_get_width (bitmap);
|
||||
height = cogl_bitmap_get_height (bitmap);
|
||||
format = cogl_bitmap_get_format (bitmap);
|
||||
|
||||
pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
|
||||
if (pbo == NULL)
|
||||
return FALSE;
|
||||
|
||||
/* Read into the pbo. We need to disable the flipping because the
|
||||
blit fast path in the driver does not work with
|
||||
GL_PACK_INVERT_MESA is set */
|
||||
res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
|
||||
x, y,
|
||||
source |
|
||||
COGL_READ_PIXELS_NO_FLIP,
|
||||
pbo);
|
||||
|
||||
if (res)
|
||||
{
|
||||
guint8 *dst;
|
||||
|
||||
/* Copy the pixels back into application's buffer */
|
||||
dst = _cogl_bitmap_map (bitmap,
|
||||
COGL_BUFFER_ACCESS_WRITE,
|
||||
COGL_BUFFER_MAP_HINT_DISCARD);
|
||||
|
||||
if (dst == NULL)
|
||||
res = FALSE;
|
||||
else
|
||||
{
|
||||
const guint8 *src;
|
||||
|
||||
src = _cogl_bitmap_map (pbo,
|
||||
COGL_BUFFER_ACCESS_READ,
|
||||
0 /* hints */);
|
||||
if (src == NULL)
|
||||
res = FALSE;
|
||||
else
|
||||
{
|
||||
int src_rowstride = cogl_bitmap_get_rowstride (pbo);
|
||||
int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
|
||||
int to_copy =
|
||||
_cogl_pixel_format_get_bytes_per_pixel (format) * width;
|
||||
int y;
|
||||
|
||||
/* If the framebuffer is onscreen we need to flip the
|
||||
data while copying */
|
||||
if (!cogl_is_offscreen (framebuffer))
|
||||
{
|
||||
src += src_rowstride * (height - 1);
|
||||
src_rowstride = -src_rowstride;
|
||||
}
|
||||
|
||||
for (y = 0; y < height; y++)
|
||||
{
|
||||
memcpy (dst, src, to_copy);
|
||||
dst += dst_rowstride;
|
||||
src += src_rowstride;
|
||||
}
|
||||
|
||||
_cogl_bitmap_unmap (pbo);
|
||||
}
|
||||
|
||||
_cogl_bitmap_unmap (bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
cogl_object_unref (pbo);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
gboolean
|
||||
cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
int x,
|
||||
@ -1960,7 +2051,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
int width;
|
||||
int height;
|
||||
|
||||
_COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
|
||||
_COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
|
||||
_COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
|
||||
|
||||
if (!cogl_framebuffer_allocate (framebuffer, NULL))
|
||||
@ -1970,6 +2061,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
|
||||
width = cogl_bitmap_get_width (bitmap);
|
||||
height = cogl_bitmap_get_height (bitmap);
|
||||
format = cogl_bitmap_get_format (bitmap);
|
||||
|
||||
if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
|
||||
{
|
||||
@ -1986,6 +2078,32 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Workaround for cases where its faster to read into a temporary
|
||||
* PBO. This is only worth doing if:
|
||||
*
|
||||
* • The GPU is an Intel GPU. In that case there is a known
|
||||
* fast-path when reading into a PBO that will use the blitter
|
||||
* instead of the Mesa fallback code. The driver bug will only be
|
||||
* set if this is the case.
|
||||
* • We're not already reading into a PBO.
|
||||
* • The target format is BGRA. The fast-path blit does not get hit
|
||||
* otherwise.
|
||||
* • The size of the data is not trivially small. This isn't a
|
||||
* requirement to hit the fast-path blit but intuitively it feels
|
||||
* like if the amount of data is too small then the cost of
|
||||
* allocating a PBO will outweigh the cost of temporarily
|
||||
* converting the data to floats.
|
||||
*/
|
||||
if ((ctx->gpu.driver_bugs &
|
||||
COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) &&
|
||||
(width > 8 || height > 8) &&
|
||||
(format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
|
||||
cogl_bitmap_get_buffer (bitmap) == NULL)
|
||||
return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
|
||||
x, y,
|
||||
source,
|
||||
bitmap);
|
||||
|
||||
/* make sure any batched primitives get emitted to the GL driver
|
||||
* before issuing our read pixels...
|
||||
*/
|
||||
@ -2006,8 +2124,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
if (!cogl_is_offscreen (framebuffer))
|
||||
y = framebuffer_height - y - height;
|
||||
|
||||
format = cogl_bitmap_get_format (bitmap);
|
||||
|
||||
required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
|
||||
format,
|
||||
&gl_intformat,
|
||||
@ -2017,6 +2133,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
/* NB: All offscreen rendering is done upside down so there is no need
|
||||
* to flip in this case... */
|
||||
if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
|
||||
(source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
|
||||
!cogl_is_offscreen (framebuffer))
|
||||
{
|
||||
GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
|
||||
@ -2152,7 +2269,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
|
||||
|
||||
/* NB: All offscreen rendering is done upside down so there is no need
|
||||
* to flip in this case... */
|
||||
if (!cogl_is_offscreen (framebuffer) && !pack_invert_set)
|
||||
if (!cogl_is_offscreen (framebuffer) &&
|
||||
(source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
|
||||
!pack_invert_set)
|
||||
{
|
||||
guint8 *temprow;
|
||||
int rowstride;
|
||||
|
@ -40,7 +40,12 @@ typedef enum
|
||||
|
||||
typedef enum
|
||||
{
|
||||
COGL_GPU_INFO_DRIVER_STUB
|
||||
/* If this bug is present then it is faster to read pixels into a
|
||||
* PBO and then memcpy out of the PBO into system memory rather than
|
||||
* directly read into system memory.
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=46631
|
||||
*/
|
||||
COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0
|
||||
} CoglGpuInfoDriverBug;
|
||||
|
||||
typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion;
|
||||
|
@ -250,5 +250,15 @@ _cogl_gpu_info_init (CoglContext *ctx,
|
||||
}
|
||||
|
||||
/* Determine the driver bugs */
|
||||
gpu->driver_bugs = 0;
|
||||
|
||||
/* In Mesa < 8.0.2 the glReadPixels implementation is really slow
|
||||
because it converts each pixel to a floating point representation
|
||||
and back even if the data could just be memcpy'd. The Intel
|
||||
driver has a fast blit path when reading into a PBO. Reading into
|
||||
a temporary PBO and then memcpying back out to the application's
|
||||
memory is faster than a regular glReadPixels in this case */
|
||||
if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL &&
|
||||
gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA &&
|
||||
gpu->driver_package_version < COGL_VERSION_ENCODE (8, 0, 2))
|
||||
gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user