From c8112a1dbcd5f224b96a148593c7705273261fde Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 6 Nov 2018 14:31:53 +0200 Subject: [PATCH] cogl: remove mesa_46631_slow_read_pixels_workaround This function gets hit even today on relatively modern Intel systems (I have a Haswell Desktop with Mesa 18.2.4) if the pixel format is right. Presumably it makes things slower for no longer a reason. According to cb146dc515bf484192a8edfab716550a996b05df, this functionality was refactored into a workaround path in 2012. The commit message mentions the problem existing before Mesa 8.0.2. The number refers to https://bugs.freedesktop.org/show_bug.cgi?id=46631 . The use case where I hit this is when improving support for DisplayLink video outputs. These are used through a "secondary GPU", and since DisplayLink does not have a GPU, Mutter uses the CPU copy path with Cogl read-pixels[1]. If the DisplayLink framebuffer was allocated as DRM_FORMAT_XRGB8888 (the only format it currently handles correctly), mesa_46631_slow_read_pixels_workaround would get hit. The render buffer is the same format as the framebuffer, yet doing the copy XRGB -> XRGB ends up being slower than XRGB -> XBGR which makes no sense. This patch is not sufficient to fix the XRGB -> XRGB copy performance, but it is required. This patch reverts CoglGpuInfoDriverBug into what it was before cb146dc515bf484192a8edfab716550a996b05df. [1] This is not actually true until https://gitlab.gnome.org/GNOME/mutter/merge_requests/278 is merged. --- cogl/cogl/cogl-gpu-info-private.h | 7 +- cogl/cogl/cogl-gpu-info.c | 10 +- cogl/cogl/driver/gl/cogl-framebuffer-gl.c | 124 ---------------------- 3 files changed, 2 insertions(+), 139 deletions(-) diff --git a/cogl/cogl/cogl-gpu-info-private.h b/cogl/cogl/cogl-gpu-info-private.h index e532cdd5c..7ab4950f8 100644 --- a/cogl/cogl/cogl-gpu-info-private.h +++ b/cogl/cogl/cogl-gpu-info-private.h @@ -74,12 +74,7 @@ typedef enum typedef enum { - /* If this bug is present then it is faster to read pixels into a - * PBO and then memcpy out of the PBO into system memory rather than - * directly read into system memory. - * https://bugs.freedesktop.org/show_bug.cgi?id=46631 - */ - COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0 + COGL_GPU_INFO_DRIVER_STUB } CoglGpuInfoDriverBug; typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion; diff --git a/cogl/cogl/cogl-gpu-info.c b/cogl/cogl/cogl-gpu-info.c index a7897b236..d303da197 100644 --- a/cogl/cogl/cogl-gpu-info.c +++ b/cogl/cogl/cogl-gpu-info.c @@ -568,13 +568,5 @@ probed: gpu->architecture_name); /* Determine the driver bugs */ - - /* In Mesa the glReadPixels implementation is really slow - when using the Intel driver. The Intel - driver has a fast blit path when reading into a PBO. Reading into - a temporary PBO and then memcpying back out to the application's - memory is faster than a regular glReadPixels in this case */ - if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL && - gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA) - gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS; + gpu->driver_bugs = 0; } diff --git a/cogl/cogl/driver/gl/cogl-framebuffer-gl.c b/cogl/cogl/driver/gl/cogl-framebuffer-gl.c index e6979a608..70cd4304b 100644 --- a/cogl/cogl/driver/gl/cogl-framebuffer-gl.c +++ b/cogl/cogl/driver/gl/cogl-framebuffer-gl.c @@ -1240,96 +1240,6 @@ _cogl_framebuffer_gl_draw_indexed_attributes (CoglFramebuffer *framebuffer, _cogl_buffer_gl_unbind (buffer); } -static CoglBool -mesa_46631_slow_read_pixels_workaround (CoglFramebuffer *framebuffer, - int x, - int y, - CoglReadPixelsFlags source, - CoglBitmap *bitmap, - CoglError **error) -{ - CoglContext *ctx; - CoglPixelFormat format; - CoglBitmap *pbo; - int width; - int height; - CoglBool res; - uint8_t *dst; - const uint8_t *src; - - ctx = cogl_framebuffer_get_context (framebuffer); - - width = cogl_bitmap_get_width (bitmap); - height = cogl_bitmap_get_height (bitmap); - format = cogl_bitmap_get_format (bitmap); - - pbo = cogl_bitmap_new_with_size (ctx, width, height, format); - - /* Read into the pbo. We need to disable the flipping because the - blit fast path in the driver does not work with - GL_PACK_INVERT_MESA is set */ - res = _cogl_framebuffer_read_pixels_into_bitmap (framebuffer, - x, y, - source | - COGL_READ_PIXELS_NO_FLIP, - pbo, - error); - if (!res) - { - cogl_object_unref (pbo); - return FALSE; - } - - /* Copy the pixels back into application's buffer */ - dst = _cogl_bitmap_map (bitmap, - COGL_BUFFER_ACCESS_WRITE, - COGL_BUFFER_MAP_HINT_DISCARD, - error); - if (!dst) - { - cogl_object_unref (pbo); - return FALSE; - } - - src = _cogl_bitmap_map (pbo, - COGL_BUFFER_ACCESS_READ, - 0, /* hints */ - error); - if (src) - { - int src_rowstride = cogl_bitmap_get_rowstride (pbo); - int dst_rowstride = cogl_bitmap_get_rowstride (bitmap); - int to_copy = - _cogl_pixel_format_get_bytes_per_pixel (format) * width; - int y; - - /* If the framebuffer is onscreen we need to flip the - data while copying */ - if (!cogl_is_offscreen (framebuffer)) - { - src += src_rowstride * (height - 1); - src_rowstride = -src_rowstride; - } - - for (y = 0; y < height; y++) - { - memcpy (dst, src, to_copy); - dst += dst_rowstride; - src += src_rowstride; - } - - _cogl_bitmap_unmap (pbo); - } - else - res = FALSE; - - _cogl_bitmap_unmap (bitmap); - - cogl_object_unref (pbo); - - return res; -} - CoglBool _cogl_framebuffer_gl_read_pixels_into_bitmap (CoglFramebuffer *framebuffer, int x, @@ -1350,40 +1260,6 @@ _cogl_framebuffer_gl_read_pixels_into_bitmap (CoglFramebuffer *framebuffer, CoglBool pack_invert_set; int status = FALSE; - /* Workaround for cases where its faster to read into a temporary - * PBO. This is only worth doing if: - * - * • The GPU is an Intel GPU. In that case there is a known - * fast-path when reading into a PBO that will use the blitter - * instead of the Mesa fallback code. The driver bug will only be - * set if this is the case. - * • We're not already reading into a PBO. - * • The target format is BGRA. The fast-path blit does not get hit - * otherwise. - * • The size of the data is not trivially small. This isn't a - * requirement to hit the fast-path blit but intuitively it feels - * like if the amount of data is too small then the cost of - * allocating a PBO will outweigh the cost of temporarily - * converting the data to floats. - */ - if ((ctx->gpu.driver_bugs & - COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) && - (width > 8 || height > 8) && - (format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 && - cogl_bitmap_get_buffer (bitmap) == NULL) - { - CoglError *ignore_error = NULL; - - if (mesa_46631_slow_read_pixels_workaround (framebuffer, - x, y, - source, - bitmap, - &ignore_error)) - return TRUE; - else - cogl_error_free (ignore_error); - } - _cogl_framebuffer_flush_state (framebuffer, framebuffer, COGL_FRAMEBUFFER_STATE_BIND);