cogl: Remove GLX "threaded swap wait" used on Nvidia

Threaded swap wait was added for using together with the Nvidia GLX driver due to the lack of anything equivalent to the INTEL_swap_event GLX extension. The purpose was to avoid inhibiting the invocation of idle callbacks when constantly rendering, as the combination of throttling on swap-interval 1 and glxSwapBuffers() and the frame clock source having higher priority than the default idle callback sources meant they would never be invoked. This was solved in gbz#779039 by introducing a thread that took care of the vsync waiting, pushing frame completion events to the main thread meaning the main thread could go idle while waiting to draw the next frame instead of blocking on glxSwapBuffers(). As of https://gitlab.gnome.org/GNOME/mutter/merge_requests/363, the main thread will instead use prediction to estimate when the next frame should be drawn. A side effect of this is that even without INTEL_swap_event, we would not block as much, or at all, on glxSwapBuffers(), as at the time it is called, we have likely already hit the vblank, or will hit it soon. After having introduced the swap waiting thread, it was observed that the Nvidia driver used a considerable amount of CPU waiting for the vblank, effectively wasting CPU time. The need to call glFinish() was also problematic as it would wait for the frame to finish, before continuing. Due to this, remove the threaded swap wait, and rely only on the frame clock not scheduling frames too early. Fixes: https://bugzilla.gnome.org/show_bug.cgi?id=781835 Related: https://gitlab.gnome.org/GNOME/mutter/issues/700 [jadahl: Rewrote commit message] https://gitlab.gnome.org/GNOME/mutter/merge_requests/602
2019-05-31 14:59:22 +08:00 · 2019-05-31 14:59:22 +08:00 · 6ed5d2e2b4
commit 6ed5d2e2b4
parent d4eb222644
7 changed files with 2 additions and 329 deletions
--- a/cogl/cogl/cogl-private.h
+++ b/cogl/cogl/cogl-private.h
@ -76,9 +76,6 @@ typedef enum
  COGL_PRIVATE_FEATURE_GL_PROGRAMMABLE,
  COGL_PRIVATE_FEATURE_GL_EMBEDDED,
  COGL_PRIVATE_FEATURE_GL_WEB,
  /* This is currently only implemented for GLX, but isn't actually
   * that winsys dependent */
  COGL_PRIVATE_FEATURE_THREADED_SWAP_WAIT,
  COGL_N_PRIVATE_FEATURES
 } CoglPrivateFeature;
--- a/cogl/cogl/cogl-renderer-private.h
+++ b/cogl/cogl/cogl-renderer-private.h
@ -71,7 +71,6 @@ struct _CoglRenderer
  Display *foreign_xdpy;
  gboolean xlib_enable_event_retrieval;
  gboolean xlib_want_reset_on_video_memory_purge;
  gboolean xlib_enable_threaded_swap_wait;
 #endif
  CoglDriver driver;
--- a/cogl/cogl/cogl-renderer.c
+++ b/cogl/cogl/cogl-renderer.c
@ -262,17 +262,6 @@ cogl_xlib_renderer_request_reset_on_video_memory_purge (CoglRenderer *renderer,
  renderer->xlib_want_reset_on_video_memory_purge = enable;
 }
 void
 cogl_xlib_renderer_set_threaded_swap_wait_enabled (CoglRenderer *renderer,
 						   gboolean enable)
 {
  g_return_if_fail (cogl_is_renderer (renderer));
  /* NB: Renderers are considered immutable once connected */
  g_return_if_fail (!renderer->connected);
  renderer->xlib_enable_threaded_swap_wait = enable;
 }
 #endif /* COGL_HAS_XLIB_SUPPORT */
 gboolean
--- a/cogl/cogl/cogl-xlib-renderer.h
+++ b/cogl/cogl/cogl-xlib-renderer.h
@ -167,36 +167,6 @@ void
 cogl_xlib_renderer_set_event_retrieval_enabled (CoglRenderer *renderer,
                                                gboolean enable);
 /**
 * cogl_xlib_renderer_set_threaded_swap_wait_enabled: (skip)
 * @renderer: a #CoglRenderer
 * @enable: The new value
 *
 * Sets whether Cogl is allowed to use a separate threaded to wait for the
 * completion of glXSwapBuffers() and call the frame callback for the
 * corresponding #CoglOnscreen. This is a way of emulating the
 * INTEL_swap_event extension, and will only ever be used if
 * INTEL_swap_event is not present; it will also only be used for
 * specific white-listed drivers that are known to work correctly with
 * multiple contexts sharing state between threads.
 *
 * The advantage of enabling this is that it will allow your main loop
 * to do other work while waiting for the system to be ready to draw
 * the next frame, instead of blocking in glXSwapBuffers(). A disadvantage
 * is that the driver will be prevented from buffering up multiple frames
 * even if it thinks that it would be advantageous. In general, this
 * will work best for something like a system compositor that is doing
 * simple drawing but handling lots of other complex tasks.
 * 
 * If you enable this, you must call XInitThreads() before any other
 * X11 calls in your program. (See the documentation for XInitThreads())
 *
 * Stability: unstable
 */
 void
 cogl_xlib_renderer_set_threaded_swap_wait_enabled (CoglRenderer *renderer,
 						   gboolean enable);
 /**
 * cogl_xlib_renderer_get_display: (skip)
 */
--- a/cogl/cogl/winsys/cogl-winsys-glx.c
+++ b/cogl/cogl/winsys/cogl-winsys-glx.c
@ -100,14 +100,6 @@ typedef struct _CoglOnscreenGLX
  uint32_t pending_sync_notify;
  uint32_t pending_complete_notify;
  uint32_t pending_resize_notify;
  GThread *swap_wait_thread;
  GQueue *swap_wait_queue;
  GCond swap_wait_cond;
  GMutex swap_wait_mutex;
  int swap_wait_pipe[2];
  GLXContext swap_wait_context;
  gboolean closing_down;
 } CoglOnscreenGLX;
 typedef struct _CoglPixmapTextureEyeGLX
@ -893,29 +885,6 @@ update_winsys_features (CoglContext *context, GError **error)
                      COGL_FEATURE_ID_PRESENTATION_TIME,
                      TRUE);
    }
  else
    {
      CoglGpuInfo *info = &context->gpu;
      if (glx_display->have_vblank_counter &&
 	  context->display->renderer->xlib_enable_threaded_swap_wait &&
 	  info->vendor == COGL_GPU_INFO_VENDOR_NVIDIA)
        {
          COGL_FLAGS_SET (context->winsys_features,
                          COGL_WINSYS_FEATURE_SYNC_AND_COMPLETE_EVENT, TRUE);
          COGL_FLAGS_SET (context->winsys_features,
                          COGL_WINSYS_FEATURE_SWAP_BUFFERS_EVENT, TRUE);
          /* TODO: remove this deprecated feature */
          COGL_FLAGS_SET (context->features,
                          COGL_FEATURE_ID_SWAP_BUFFERS_EVENT,
                          TRUE);
          COGL_FLAGS_SET (context->features,
                          COGL_FEATURE_ID_PRESENTATION_TIME,
                          TRUE);
          COGL_FLAGS_SET (context->private_features,
                          COGL_PRIVATE_FEATURE_THREADED_SWAP_WAIT,
                          TRUE);
        }
    }
  /* We'll manually handle queueing dirty events in response to
   * Expose events from X */
@ -1512,8 +1481,7 @@ _cogl_winsys_onscreen_init (CoglOnscreen *onscreen,
    }
 #ifdef GLX_INTEL_swap_event
-  if (_cogl_winsys_has_feature (COGL_WINSYS_FEATURE_SYNC_AND_COMPLETE_EVENT) &&
+  if (_cogl_winsys_has_feature (COGL_WINSYS_FEATURE_SYNC_AND_COMPLETE_EVENT))
      !_cogl_has_private_feature (context, COGL_PRIVATE_FEATURE_THREADED_SWAP_WAIT))
    {
      GLXDrawable drawable =
        glx_onscreen->glxwin ? glx_onscreen->glxwin : xlib_onscreen->xwin;
@ -1556,31 +1524,6 @@ _cogl_winsys_onscreen_deinit (CoglOnscreen *onscreen)
      xlib_onscreen->output = NULL;
    }
  if (glx_onscreen->swap_wait_thread)
    {
      g_mutex_lock (&glx_onscreen->swap_wait_mutex);
      glx_onscreen->closing_down = TRUE;
      g_cond_signal (&glx_onscreen->swap_wait_cond);
      g_mutex_unlock (&glx_onscreen->swap_wait_mutex);
      g_thread_join (glx_onscreen->swap_wait_thread);
      glx_onscreen->swap_wait_thread = NULL;
      g_cond_clear (&glx_onscreen->swap_wait_cond);
      g_mutex_clear (&glx_onscreen->swap_wait_mutex);
      g_queue_free (glx_onscreen->swap_wait_queue);
      glx_onscreen->swap_wait_queue = NULL;
      _cogl_poll_renderer_remove_fd (context->display->renderer,
                                     glx_onscreen->swap_wait_pipe[0]);
      close (glx_onscreen->swap_wait_pipe[0]);
      close (glx_onscreen->swap_wait_pipe[1]);
      glx_renderer->glXDestroyContext (xlib_renderer->xdpy,
                                       glx_onscreen->swap_wait_context);
    }
  _cogl_xlib_renderer_trap_errors (context->display->renderer, &old_state);
  drawable =
@ -1807,199 +1750,6 @@ set_frame_info_output (CoglOnscreen *onscreen,
    }
 }
 static gpointer
 threaded_swap_wait (gpointer data)
 {
  CoglOnscreen *onscreen = data;
  CoglOnscreenGLX *glx_onscreen = onscreen->winsys;
  CoglFramebuffer *framebuffer = COGL_FRAMEBUFFER (onscreen);
  CoglContext *context = framebuffer->context;
  CoglDisplay *display = context->display;
  CoglXlibRenderer *xlib_renderer = _cogl_xlib_renderer_get_data (display->renderer);
  CoglGLXDisplay *glx_display = display->winsys;
  CoglGLXRenderer *glx_renderer = display->renderer->winsys;
  GLXDrawable dummy_drawable;
  if (glx_display->dummy_glxwin)
    dummy_drawable = glx_display->dummy_glxwin;
  else
    dummy_drawable = glx_display->dummy_xwin;
  glx_renderer->glXMakeContextCurrent (xlib_renderer->xdpy,
                                       dummy_drawable,
                                       dummy_drawable,
                                       glx_onscreen->swap_wait_context);
  g_mutex_lock (&glx_onscreen->swap_wait_mutex);
  while (TRUE)
    {
      gpointer queue_element;
      uint32_t vblank_counter;
      while (!glx_onscreen->closing_down && glx_onscreen->swap_wait_queue->length == 0)
         g_cond_wait (&glx_onscreen->swap_wait_cond, &glx_onscreen->swap_wait_mutex);
      if (glx_onscreen->closing_down)
         break;
      queue_element = g_queue_pop_tail (glx_onscreen->swap_wait_queue);
      vblank_counter = GPOINTER_TO_UINT(queue_element);
      g_mutex_unlock (&glx_onscreen->swap_wait_mutex);
      glx_renderer->glXWaitVideoSync (2,
                                      (vblank_counter + 1) % 2,
                                      &vblank_counter);
      g_mutex_lock (&glx_onscreen->swap_wait_mutex);
      if (!glx_onscreen->closing_down)
         {
           int bytes_written = 0;
           union {
             char bytes[8];
             int64_t presentation_time;
           } u;
           u.presentation_time = get_monotonic_time_ns ();
           while (bytes_written < 8)
             {
               int res = write (glx_onscreen->swap_wait_pipe[1], u.bytes + bytes_written, 8 - bytes_written);
               if (res == -1)
                 {
                   if (errno != EINTR)
                     g_error ("Error writing to swap notification pipe: %s\n",
                              g_strerror (errno));
                 }
               else
                 {
                   bytes_written += res;
                 }
             }
         }
    }
  g_mutex_unlock (&glx_onscreen->swap_wait_mutex);
  glx_renderer->glXMakeContextCurrent (xlib_renderer->xdpy,
                                       None,
                                       None,
                                       NULL);
  return NULL;
 }
 static int64_t
 threaded_swap_wait_pipe_prepare (void *user_data)
 {
  return -1;
 }
 static void
 threaded_swap_wait_pipe_dispatch (void *user_data, int revents)
 {
  CoglOnscreen *onscreen = user_data;
  CoglOnscreenGLX *glx_onscreen = onscreen->winsys;
  CoglFrameInfo *info;
  if ((revents & COGL_POLL_FD_EVENT_IN))
    {
      int bytes_read = 0;
      union {
         char bytes[8];
         int64_t presentation_time;
      } u;
      while (bytes_read < 8)
         {
           int res = read (glx_onscreen->swap_wait_pipe[0], u.bytes + bytes_read, 8 - bytes_read);
           if (res == -1)
             {
               if (errno != EINTR)
                 g_error ("Error reading from swap notification pipe: %s\n",
                          g_strerror (errno));
             }
           else
             {
               bytes_read += res;
             }
         }
      set_sync_pending (onscreen);
      set_complete_pending (onscreen);
      info = g_queue_peek_head (&onscreen->pending_frame_infos);
      info->presentation_time = u.presentation_time;
    }
 }
 static void
 start_threaded_swap_wait (CoglOnscreen *onscreen,
                           uint32_t      vblank_counter)
 {
  CoglOnscreenGLX *glx_onscreen = onscreen->winsys;
  CoglFramebuffer *framebuffer = COGL_FRAMEBUFFER (onscreen);
  CoglContext *context = framebuffer->context;
  if (glx_onscreen->swap_wait_thread == NULL)
    {
      CoglDisplay *display = context->display;
      CoglGLXRenderer *glx_renderer = display->renderer->winsys;
      CoglGLXDisplay *glx_display = display->winsys;
      CoglOnscreenXlib *xlib_onscreen = onscreen->winsys;
      CoglXlibRenderer *xlib_renderer =
        _cogl_xlib_renderer_get_data (display->renderer);
      GLXDrawable drawable =
        glx_onscreen->glxwin ? glx_onscreen->glxwin : xlib_onscreen->xwin;
      int i;
      ensure_ust_type (display->renderer, drawable);
      if ((pipe (glx_onscreen->swap_wait_pipe) == -1))
        g_error ("Couldn't create pipe for swap notification: %s\n",
                 g_strerror (errno));
      for (i = 0; i < 2; i++)
 	{
 	  if (fcntl(glx_onscreen->swap_wait_pipe[i], F_SETFD,
 		    fcntl(glx_onscreen->swap_wait_pipe[i], F_GETFD, 0) | FD_CLOEXEC) == -1)
 	    g_error ("Couldn't set swap notification pipe CLOEXEC: %s\n",
 		     g_strerror (errno));
 	}
      _cogl_poll_renderer_add_fd (display->renderer,
                                  glx_onscreen->swap_wait_pipe[0],
                                  COGL_POLL_FD_EVENT_IN,
                                  threaded_swap_wait_pipe_prepare,
                                  threaded_swap_wait_pipe_dispatch,
                                  onscreen);
      glx_onscreen->swap_wait_queue = g_queue_new ();
      g_mutex_init (&glx_onscreen->swap_wait_mutex);
      g_cond_init (&glx_onscreen->swap_wait_cond);
      glx_onscreen->swap_wait_context =
         glx_renderer->glXCreateNewContext (xlib_renderer->xdpy,
                                            glx_display->fbconfig,
                                            GLX_RGBA_TYPE,
                                            glx_display->glx_context,
                                            True);
      glx_onscreen->swap_wait_thread = g_thread_new ("cogl_glx_swap_wait",
                                                     threaded_swap_wait,
                                                     onscreen);
    }
  g_mutex_lock (&glx_onscreen->swap_wait_mutex);
  g_queue_push_head (glx_onscreen->swap_wait_queue, GUINT_TO_POINTER(vblank_counter));
  g_cond_signal (&glx_onscreen->swap_wait_cond);
  g_mutex_unlock (&glx_onscreen->swap_wait_mutex);
 }
 static void
 _cogl_winsys_onscreen_swap_region (CoglOnscreen *onscreen,
                                   const int *user_rectangles,
@ -2235,26 +1985,7 @@ _cogl_winsys_onscreen_swap_buffers_with_damage (CoglOnscreen *onscreen,
  have_counter = glx_display->have_vblank_counter;
-  if (glx_renderer->glXSwapInterval)
+  if (!glx_renderer->glXSwapInterval)
    {
      if (_cogl_has_private_feature (context, COGL_PRIVATE_FEATURE_THREADED_SWAP_WAIT))
        {
          /* If we didn't wait for the GPU here, then it's easy to get the case
           * where there is a VBlank between the point where we get the vsync counter
           * and the point where the GPU is ready to actually perform the glXSwapBuffers(),
           * and the swap wait terminates at the first VBlank rather than the one
           * where the swap buffers happens. Calling glFinish() here makes this a
           * rare race since the GPU is already ready to swap when we call glXSwapBuffers().
           * The glFinish() also prevents any serious damage if the rare race happens,
           * since it will wait for the preceding glXSwapBuffers() and prevent us from
           * getting premanently ahead. (For NVIDIA drivers, glFinish() after glXSwapBuffers()
           * waits for the buffer swap to happen.)
           */
          _cogl_winsys_wait_for_gpu (onscreen);
          start_threaded_swap_wait (onscreen, _cogl_winsys_get_vsync_counter (context));
        }
    }
  else
    {
      gboolean can_wait = have_counter || glx_display->can_vblank_wait;
--- a/src/backends/x11/meta-backend-x11.c
+++ b/src/backends/x11/meta-backend-x11.c
@ -790,11 +790,6 @@ meta_backend_x11_class_init (MetaBackendX11Class *klass)
 static void
 meta_backend_x11_init (MetaBackendX11 *x11)
 {
  /* XInitThreads() is needed to use the "threaded swap wait" functionality
   * in Cogl - see meta_renderer_x11_create_cogl_renderer(). We call it here
   * to hopefully call it before any other use of XLib.
   */
  XInitThreads();
 }
 Display *
--- a/src/backends/x11/meta-renderer-x11.c
+++ b/src/backends/x11/meta-renderer-x11.c
@ -85,14 +85,6 @@ meta_renderer_x11_create_cogl_renderer (MetaRenderer *renderer)
  cogl_xlib_renderer_set_foreign_display (cogl_renderer, xdisplay);
  cogl_xlib_renderer_request_reset_on_video_memory_purge (cogl_renderer, TRUE);
  /* Set up things so that if the INTEL_swap_event extension is not present,
   * but the driver is known to have good thread support, we use an extra
   * thread and call glXWaitVideoSync() in the thread. This allows idles
   * to work properly, even when Mutter is constantly redrawing new frames;
   * otherwise, without INTEL_swap_event, we'll just block in glXSwapBuffers().
   */
  cogl_xlib_renderer_set_threaded_swap_wait_enabled (cogl_renderer, TRUE);
  return cogl_renderer;
 }