renderer/native: use cogl for CPU copy path

Use cogl_framebuffer_read_pixels_into_bitmap () instead of
glReadPixels () for the CPU copy path in multi-GPU support.

The cogl function employs several tricks to make the read-pixels as fast
as possible and does the y-flip as necessary. This should make the copy
more performant over all kinds of hardware.

This is expected to be used on virtual outputs (e.g. DisplayLink USB
docks and monitors) foremost, where the dumb buffer memory is just
regular system memory. If the dumb buffer memory is somehow slow, like
residing in discrete VRAM or having an unexpected caching mode, it may
be possible for the cogl function perform worse because it might do the
y-flip in-place in the dumb buffer. Hopefully that does not happen in
any practical scenario.

Calling meta_renderer_native_gles3_read_pixels () here was conceptually
wrong to begin with because it was done with the Cogl GL context of the
primary GPU, not on the GL ES 3 context of a secondary GPU. However,
due eglBindAPI being a no-op in Mesa and the glReadPixels () arguments
being compatible, it worked.

This patch adds a pixel format conversion table between DRM and Cogl
formats. It contains more formats than absolutely necessary and the
texture components field which is currently unused for completeness. See
Mutter issue #323. Making the table more complete documents better how
the pixel formats actually map so that posterity should be less likely
to be confused. This table could be shared with
shm_buffer_get_cogl_pixel_format () as well, but not with
meta_wayland_dma_buf_buffer_attach ().

On HP ProBook 4520s laptop (Mesa DRI Intel(R) Ironlake Mobile, Mesa
18.0.5), without this patch copy_shared_framebuffer_cpu () for a
DisplayLink output takes 5 seconds with a 1080p frame.  Obviously that
makes Mutter and gnome-shell completely unusable.  With this patch, that
function takes 13-18 ms which makes it usable if not fluent.

On Intel i7-4790 (Mesa DRI Intel(R) Haswell Desktop) machine, this patch
makes no significant difference (the copy takes 4-5 ms).
This commit is contained in:
Pekka Paalanen 2018-09-27 16:42:19 +03:00 committed by Ray Strode
parent 8c4743ee2a
commit d4bea60e1a

View File

@ -1776,22 +1776,82 @@ copy_shared_framebuffer_gpu (CoglOnscreen *onscreen,
&secondary_gpu_state->gbm.next_fb_id);
}
typedef struct _PixelFormatMap {
uint32_t drm_format;
CoglPixelFormat cogl_format;
CoglTextureComponents cogl_components;
} PixelFormatMap;
static const PixelFormatMap pixel_format_map[] = {
/* DRM formats are defined as little-endian, not machine endian. */
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
{ DRM_FORMAT_RGB565, COGL_PIXEL_FORMAT_RGB_565, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_ABGR8888, COGL_PIXEL_FORMAT_RGBA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_XBGR8888, COGL_PIXEL_FORMAT_RGBA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_ARGB8888, COGL_PIXEL_FORMAT_BGRA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_XRGB8888, COGL_PIXEL_FORMAT_BGRA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_BGRA8888, COGL_PIXEL_FORMAT_ARGB_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_BGRX8888, COGL_PIXEL_FORMAT_ARGB_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_RGBA8888, COGL_PIXEL_FORMAT_ABGR_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_RGBX8888, COGL_PIXEL_FORMAT_ABGR_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
#elif G_BYTE_ORDER == G_BIG_ENDIAN
/* DRM_FORMAT_RGB565 cannot be expressed. */
{ DRM_FORMAT_ABGR8888, COGL_PIXEL_FORMAT_ABGR_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_XBGR8888, COGL_PIXEL_FORMAT_ABGR_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_ARGB8888, COGL_PIXEL_FORMAT_ARGB_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_XRGB8888, COGL_PIXEL_FORMAT_ARGB_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_BGRA8888, COGL_PIXEL_FORMAT_BGRA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_BGRX8888, COGL_PIXEL_FORMAT_BGRA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
{ DRM_FORMAT_RGBA8888, COGL_PIXEL_FORMAT_RGBA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGBA },
{ DRM_FORMAT_RGBX8888, COGL_PIXEL_FORMAT_RGBA_8888_PRE, COGL_TEXTURE_COMPONENTS_RGB },
#else
#error "unexpected G_BYTE_ORDER"
#endif
};
static gboolean
cogl_pixel_format_from_drm_format (uint32_t drm_format,
CoglPixelFormat *out_format,
CoglTextureComponents *out_components)
{
const size_t n = G_N_ELEMENTS (pixel_format_map);
size_t i;
for (i = 0; i < n; i++)
{
if (pixel_format_map[i].drm_format == drm_format)
break;
}
if (i == n)
return FALSE;
if (out_format)
*out_format = pixel_format_map[i].cogl_format;
if (out_components)
*out_components = pixel_format_map[i].cogl_components;
return TRUE;
}
static void
copy_shared_framebuffer_cpu (CoglOnscreen *onscreen,
MetaOnscreenNativeSecondaryGpuState *secondary_gpu_state,
MetaRendererNativeGpuData *renderer_gpu_data)
{
CoglFramebuffer *framebuffer = COGL_FRAMEBUFFER (onscreen);
CoglOnscreenEGL *onscreen_egl = onscreen->winsys;
MetaOnscreenNative *onscreen_native = onscreen_egl->platform;
MetaRendererNative *renderer_native = onscreen_native->renderer_native;
MetaEgl *egl = meta_renderer_native_get_egl (renderer_native);
CoglContext *cogl_context = framebuffer->context;
int width, height;
uint8_t *target_data;
int target_stride_bytes;
uint32_t target_fb_id;
uint32_t target_drm_format;
MetaDumbBuffer *next_dumb_fb;
MetaDumbBuffer *current_dumb_fb;
CoglBitmap *dumb_bitmap;
CoglPixelFormat cogl_format;
gboolean ret;
width = cogl_framebuffer_get_width (framebuffer);
height = cogl_framebuffer_get_height (framebuffer);
@ -1809,12 +1869,28 @@ copy_shared_framebuffer_cpu (CoglOnscreen *onscreen,
target_data = secondary_gpu_state->cpu.dumb_fb->map;
target_stride_bytes = secondary_gpu_state->cpu.dumb_fb->stride_bytes;
target_fb_id = secondary_gpu_state->cpu.dumb_fb->fb_id;
target_drm_format = secondary_gpu_state->cpu.dumb_fb->drm_format;
meta_renderer_native_gles3_read_pixels (egl,
renderer_native->gles3,
width, height,
target_data,
target_stride_bytes);
ret = cogl_pixel_format_from_drm_format (target_drm_format,
&cogl_format,
NULL);
g_assert (ret);
dumb_bitmap = cogl_bitmap_new_for_data (cogl_context,
width,
height,
cogl_format,
target_stride_bytes,
target_data);
if (!cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
0 /* x */,
0 /* y */,
COGL_READ_PIXELS_COLOR_BUFFER,
dumb_bitmap))
g_warning ("Failed to CPU-copy to a secondary GPU output");
cogl_object_unref (dumb_bitmap);
secondary_gpu_state->gbm.next_fb_id = target_fb_id;
}