cogl: Implements a software only read-pixel fast-path

This adds a transparent optimization to cogl_read_pixels for when a
single pixel is being read back and it happens that all the geometry of
the current frame is still available in the framebuffer's associated
journal.

The intention is to indirectly optimize Clutter's render based picking
mechanism in such a way that the 99% of cases where scenes are comprised
of trivial quad primitives that can easily be intersected we can avoid
the latency of kicking a GPU render and blocking for the result when we
know we can calculate the result manually on the CPU probably faster
than we could even kick a render.

A nice property of this solution is that it maintains all the
flexibility of the render based picking provided by Clutter and it can
gracefully fall back to GPU rendering if actors are drawn using anything
more complex than a quad for their geometry.

It seems worth noting that there is a limitation to the extensibility of
this approach in that it can only optimize picking a against geometry
that passes through Cogl's journal which isn't something Clutter
directly controls.  For now though this really doesn't matter since
basically all apps should end up hitting this fast-path. The current
idea to address this longer term would be a pick2 vfunc for ClutterActor
that can support geometry and render based input regions of actors and
move this optimization up into Clutter instead.

Note: currently we don't have a primitive count threshold to consider
that there could be scenes with enough geometry for us to compensate for
the cost of kicking a render and determine a result more efficiently by
utilizing the GPU. We don't currently expect this to be common though.

Note: in the future it could still be interesting to revive something
like the wip/async-pbo-picking branch to provide an asynchronous
read-pixels based optimization for Clutter picking in cases where more
complex input regions that necessitate rendering are in use or if we do
add a threshold for rendering as mentioned above.
This commit is contained in:
Robert Bragg 2011-01-12 22:12:41 +00:00
parent e289e85397
commit 1bdb0e6e98
7 changed files with 800 additions and 183 deletions

View File

@ -659,9 +659,6 @@ _clutter_do_pick (ClutterStage *stage,
else
_clutter_stage_set_pick_buffer_valid (stage, TRUE, mode);
/* Make sure Cogl flushes any batched geometry to the GPU driver */
cogl_flush ();
/* Read the color of the screen co-ords pixel. RGBA_8888_PRE is used
even though we don't care about the alpha component because under
GLES this is the only format that is guaranteed to work so Cogl

View File

@ -67,6 +67,22 @@ struct _CoglFramebuffer
/* The scene of a given framebuffer may depend on images in other
* framebuffers... */
GList *deps;
/* As part of an optimization for reading-back single pixels from a
* framebuffer in some simple cases where the geometry is still
* available in the journal we need to track the bounds of the last
* region cleared, its color and we need to track when something
* does in fact draw to that region so it is no longer clear.
*/
float clear_color_red;
float clear_color_green;
float clear_color_blue;
float clear_color_alpha;
int clear_clip_x0;
int clear_clip_y0;
int clear_clip_x1;
int clear_clip_y1;
gboolean clear_clip_dirty;
};
#define COGL_FRAMEBUFFER(X) ((CoglFramebuffer *)(X))
@ -117,6 +133,9 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer,
float blue,
float alpha);
void
_cogl_framebuffer_dirty (CoglFramebuffer *framebuffer);
int
_cogl_framebuffer_get_width (CoglFramebuffer *framebuffer);
@ -196,6 +215,14 @@ _cogl_framebuffer_flush_dependency_journals (CoglFramebuffer *framebuffer);
void
_cogl_framebuffer_swap_notify (CoglFramebuffer *framebuffer);
gboolean
_cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
int x,
int y,
CoglReadPixelsFlags source,
CoglPixelFormat format,
guint8 *pixel);
typedef enum _CoglFramebufferFlushFlags
{
/* XXX: When using this, that imples you are going to manually load the

View File

@ -160,6 +160,13 @@ _cogl_framebuffer_init (CoglFramebuffer *framebuffer,
framebuffer->journal = _cogl_journal_new ();
/* Ensure we know the framebuffer->clear_color* members can't be
* referenced for our fast-path read-pixel optimization (see
* _cogl_journal_try_read_pixel()) until some region of the
* framebuffer is initialized.
*/
framebuffer->clear_clip_dirty = TRUE;
/* XXX: We have to maintain a central list of all framebuffers
* because at times we need to be able to flush all known journals.
*
@ -248,6 +255,12 @@ _cogl_clear4f (unsigned long buffers,
GE (glClear (gl_buffers));
}
void
_cogl_framebuffer_dirty (CoglFramebuffer *framebuffer)
{
framebuffer->clear_clip_dirty = TRUE;
}
void
_cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer,
unsigned long buffers,
@ -256,11 +269,96 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer,
float blue,
float alpha)
{
CoglClipStack *clip_stack = _cogl_framebuffer_get_clip_stack (framebuffer);
int scissor_x0;
int scissor_y0;
int scissor_x1;
int scissor_y1;
_cogl_clip_stack_get_bounds (clip_stack,
&scissor_x0, &scissor_y0,
&scissor_x1, &scissor_y1);
/* NB: the previous clear could have had an arbitrary clip.
* NB: everything for the last frame might still be in the journal
* but we can't assume anything about how each entry was
* clipped.
* NB: Clutter will scissor its pick renders which would mean all
* journal entries have a common ClipStack entry, but without
* a layering violation Cogl has to explicitly walk the journal
* entries to determine if this is the case.
* NB: We have a software only read-pixel optimization in the
* journal that determines the color at a given framebuffer
* coordinate for simple scenes without rendering with the GPU.
* When Clutter is hitting this fast-path we can expect to
* receive calls to clear the framebuffer with an un-flushed
* journal.
* NB: To fully support software based picking for Clutter we
* need to be able to reliably detect when the contents of a
* journal can be discarded and when we can skip the call to
* glClear because it matches the previous clear request.
*/
/* Note: we don't check for the stencil buffer being cleared here
* since there isn't any public cogl api to manipulate the stencil
* buffer.
*
* Note: we check for an exact clip match here because
* 1) a smaller clip could mean existing journal entries may
* need to contribute to regions outside the new clear-clip
* 2) a larger clip would mean we need to issue a real
* glClear and we only care about cases avoiding a
* glClear.
*
* Note: Comparing without an epsilon is considered
* appropriate here.
*/
if (buffers & COGL_BUFFER_BIT_COLOR &&
buffers & COGL_BUFFER_BIT_DEPTH &&
!framebuffer->clear_clip_dirty &&
framebuffer->clear_color_red == red &&
framebuffer->clear_color_green == green &&
framebuffer->clear_color_blue == blue &&
framebuffer->clear_color_alpha == alpha &&
scissor_x0 == framebuffer->clear_clip_x0 &&
scissor_y0 == framebuffer->clear_clip_y0 &&
scissor_x1 == framebuffer->clear_clip_x1 &&
scissor_y1 == framebuffer->clear_clip_y1)
{
/* NB: We only have to consider the clip state of journal
* entries if the current clear is clipped since otherwise we
* know every pixel of the framebuffer is affected by the clear
* and so all journal entries become redundant and can simply be
* discarded.
*/
if (clip_stack)
{
/*
* Note: the function for checking the journal entries is
* quite strict. It avoids detailed checking of all entry
* clip_stacks by only checking the details of the first
* entry and then it only verifies that the remaining
* entries share the same clip_stack ancestry. This means
* it's possible for some false negatives here but that will
* just result in us falling back to a real clear.
*/
if (_cogl_journal_all_entries_within_bounds (framebuffer->journal,
scissor_x0, scissor_y0,
scissor_x1, scissor_y1))
{
_cogl_journal_discard (framebuffer->journal);
goto cleared;
}
}
else
{
_cogl_journal_discard (framebuffer->journal);
goto cleared;
}
}
COGL_NOTE (DRAW, "Clear begin");
/* XXX: in the case where it's the color buffer being cleared and
* the current clip-stack is empty we could instead discard the
* journal here instead of flushing it. */
_cogl_framebuffer_flush_journal (framebuffer);
/* NB: _cogl_framebuffer_flush_state may disrupt various state (such
@ -282,6 +380,38 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer,
}
COGL_NOTE (DRAW, "Clear end");
cleared:
if (buffers & COGL_BUFFER_BIT_COLOR && buffers & COGL_BUFFER_BIT_DEPTH)
{
/* For our fast-path for reading back a single pixel of simple
* scenes where the whole frame is in the journal we need to
* track the cleared color of the framebuffer in case the point
* read doesn't intersect any of the journal rectangles. */
framebuffer->clear_clip_dirty = FALSE;
framebuffer->clear_color_red = red;
framebuffer->clear_color_green = green;
framebuffer->clear_color_blue = blue;
framebuffer->clear_color_alpha = alpha;
/* NB: A clear may be scissored so we need to track the extents
* that the clear is applicable too... */
if (clip_stack)
{
_cogl_clip_stack_get_bounds (clip_stack,
&framebuffer->clear_clip_x0,
&framebuffer->clear_clip_y0,
&framebuffer->clear_clip_x1,
&framebuffer->clear_clip_y1);
}
else
{
/* FIXME: set degenerate clip */
}
}
else
_cogl_framebuffer_dirty (framebuffer);
}
/* XXX: We'll need to consider if this API is a good approach for the
@ -1097,3 +1227,63 @@ _cogl_framebuffer_get_alpha_bits (CoglFramebuffer *framebuffer)
return framebuffer->alpha_bits;
}
gboolean
_cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
int x,
int y,
CoglReadPixelsFlags source,
CoglPixelFormat format,
guint8 *pixel)
{
gboolean found_intersection;
if (source != COGL_READ_PIXELS_COLOR_BUFFER)
return FALSE;
if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE &&
format != COGL_PIXEL_FORMAT_RGBA_8888)
return FALSE;
if (!_cogl_journal_try_read_pixel (framebuffer->journal,
x, y, format, pixel,
&found_intersection))
return FALSE;
/* If we can't determine the color from the primitives in the
* journal then see if we can use the last recorded clear color
*/
/* If _cogl_journal_try_read_pixel() failed even though there was an
* intersection of the given point with a primitive in the journal
* then we can't fallback to the framebuffer's last clear color...
* */
if (found_intersection)
return TRUE;
/* If the framebuffer has been rendered too since it was last
* cleared then we can't return the last known clear color. */
if (framebuffer->clear_clip_dirty)
return FALSE;
if (x >= framebuffer->clear_clip_x0 &&
x < framebuffer->clear_clip_x1 &&
y >= framebuffer->clear_clip_y0 &&
y < framebuffer->clear_clip_y1)
{
/* we currently only care about cases where the premultiplied or
* unpremultipled colors are equivalent... */
if (framebuffer->clear_color_alpha != 1.0)
return FALSE;
pixel[0] = framebuffer->clear_color_red * 255.0;
pixel[1] = framebuffer->clear_color_green * 255.0;
pixel[2] = framebuffer->clear_color_blue * 255.0;
pixel[3] = framebuffer->clear_color_alpha * 255.0;
return TRUE;
}
return FALSE;
}

View File

@ -35,6 +35,8 @@ typedef struct _CoglJournal
GArray *vertices;
size_t needed_vbo_len;
int fast_read_pixel_count;
} CoglJournal;
/* To improve batching of geometry when submitting vertices to OpenGL we
@ -69,4 +71,22 @@ void
_cogl_journal_flush (CoglJournal *journal,
CoglFramebuffer *framebuffer);
void
_cogl_journal_discard (CoglJournal *journal);
gboolean
_cogl_journal_all_entries_within_bounds (CoglJournal *journal,
float clip_x0,
float clip_y0,
float clip_x1,
float clip_y1);
gboolean
_cogl_journal_try_read_pixel (CoglJournal *journal,
int x,
int y,
CoglPixelFormat format,
guint8 *pixel,
gboolean *found_intersection);
#endif /* __COGL_JOURNAL_PRIVATE_H */

View File

@ -37,6 +37,7 @@
#include "cogl-framebuffer-private.h"
#include "cogl-profile.h"
#include "cogl-vertex-attribute-private.h"
#include "cogl-point-in-poly-private.h"
#include <string.h>
#include <gmodule.h>
@ -820,10 +821,182 @@ typedef struct
float x_2, y_2;
} ClipBounds;
static gboolean
can_software_clip_entry (CoglJournalEntry *journal_entry,
CoglJournalEntry *prev_journal_entry,
CoglClipStack *clip_stack,
ClipBounds *clip_bounds_out)
{
CoglPipeline *pipeline = journal_entry->pipeline;
CoglClipStack *clip_entry;
int layer_num;
clip_bounds_out->x_1 = -G_MAXFLOAT;
clip_bounds_out->y_1 = -G_MAXFLOAT;
clip_bounds_out->x_2 = G_MAXFLOAT;
clip_bounds_out->y_2 = G_MAXFLOAT;
/* Check the pipeline is usable. We can short-cut here for
entries using the same pipeline as the previous entry */
if (prev_journal_entry == NULL || pipeline != prev_journal_entry->pipeline)
{
/* If the pipeline has a user program then we can't reliably modify
the texture coordinates */
if (cogl_pipeline_get_user_program (pipeline))
return FALSE;
/* If any of the pipeline layers have a texture matrix then we can't
reliably modify the texture coordinates */
for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1;
layer_num >= 0;
layer_num--)
if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num))
return FALSE;
}
/* Now we need to verify that each clip entry's matrix is just a
translation of the journal entry's modelview matrix. We can
also work out the bounds of the clip in modelview space using
this translation */
for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent)
{
float rect_x1, rect_y1, rect_x2, rect_y2;
CoglClipStackRect *clip_rect;
float tx, ty;
clip_rect = (CoglClipStackRect *) clip_entry;
if (!calculate_translation (&clip_rect->matrix,
&journal_entry->model_view,
&tx, &ty))
return FALSE;
if (clip_rect->x0 < clip_rect->x1)
{
rect_x1 = clip_rect->x0;
rect_x2 = clip_rect->x1;
}
else
{
rect_x1 = clip_rect->x1;
rect_x2 = clip_rect->x0;
}
if (clip_rect->y0 < clip_rect->y1)
{
rect_y1 = clip_rect->y0;
rect_y2 = clip_rect->y1;
}
else
{
rect_y1 = clip_rect->y1;
rect_y2 = clip_rect->y0;
}
clip_bounds_out->x_1 = MAX (clip_bounds_out->x_1, rect_x1 - tx);
clip_bounds_out->y_1 = MAX (clip_bounds_out->y_1, rect_y1 - ty);
clip_bounds_out->x_2 = MIN (clip_bounds_out->x_2, rect_x2 - tx);
clip_bounds_out->y_2 = MIN (clip_bounds_out->y_2, rect_y2 - ty);
}
return TRUE;
}
static void
check_software_clip_for_batch (CoglJournalEntry *batch_start,
int batch_len,
CoglJournalFlushState *state)
software_clip_entry (CoglJournalEntry *journal_entry,
float *verts,
ClipBounds *clip_bounds)
{
size_t stride =
GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers);
float rx1, ry1, rx2, ry2;
float vx1, vy1, vx2, vy2;
int layer_num;
/* Remove the clip on the entry */
_cogl_clip_stack_unref (journal_entry->clip_stack);
journal_entry->clip_stack = NULL;
vx1 = verts[0];
vy1 = verts[1];
vx2 = verts[stride];
vy2 = verts[stride + 1];
if (vx1 < vx2)
{
rx1 = vx1;
rx2 = vx2;
}
else
{
rx1 = vx2;
rx2 = vx1;
}
if (vy1 < vy2)
{
ry1 = vy1;
ry2 = vy2;
}
else
{
ry1 = vy2;
ry2 = vy1;
}
rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2);
ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2);
rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2);
ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2);
/* Check if the rectangle intersects the clip at all */
if (rx1 == rx2 || ry1 == ry2)
/* Will set all of the vertex data to 0 in the hope that this
will create a degenerate rectangle and the GL driver will
be able to clip it quickly */
memset (verts, 0, sizeof (float) * stride * 2);
else
{
if (vx1 > vx2)
{
float t = rx1;
rx1 = rx2;
rx2 = t;
}
if (vy1 > vy2)
{
float t = ry1;
ry1 = ry2;
ry2 = t;
}
verts[0] = rx1;
verts[1] = ry1;
verts[stride] = rx2;
verts[stride + 1] = ry2;
/* Convert the rectangle coordinates to a fraction of the original
rectangle */
rx1 = (rx1 - vx1) / (vx2 - vx1);
ry1 = (ry1 - vy1) / (vy2 - vy1);
rx2 = (rx2 - vx1) / (vx2 - vx1);
ry2 = (ry2 - vy1) / (vy2 - vy1);
for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++)
{
float *t = verts + 2 + 2 * layer_num;
float tx1 = t[0], ty1 = t[1];
float tx2 = t[stride], ty2 = t[stride + 1];
t[0] = rx1 * (tx2 - tx1) + tx1;
t[1] = ry1 * (ty2 - ty1) + ty1;
t[stride] = rx2 * (tx2 - tx1) + tx1;
t[stride + 1] = ry2 * (ty2 - ty1) + ty1;
}
}
}
static void
maybe_software_clip_entries (CoglJournalEntry *batch_start,
int batch_len,
CoglJournalFlushState *state)
{
CoglJournal *journal = state->journal;
CoglClipStack *clip_stack, *clip_entry;
@ -864,77 +1037,15 @@ check_software_clip_for_batch (CoglJournalEntry *batch_start,
for (entry_num = 0; entry_num < batch_len; entry_num++)
{
CoglJournalEntry *journal_entry = batch_start + entry_num;
CoglPipeline *pipeline = journal_entry->pipeline;
CoglJournalEntry *prev_journal_entry =
entry_num ? batch_start + (entry_num - 1) : NULL;
ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds,
ClipBounds, entry_num);
int layer_num;
clip_bounds->x_1 = -G_MAXFLOAT;
clip_bounds->y_1 = -G_MAXFLOAT;
clip_bounds->x_2 = G_MAXFLOAT;
clip_bounds->y_2 = G_MAXFLOAT;
/* Check the pipeline is usable. We can short-cut here for
entries using the same pipeline as the previous entry */
if (entry_num == 0 || pipeline != batch_start[entry_num - 1].pipeline)
{
/* If the pipeline has a user program then we can't reliably modify
the texture coordinates */
if (cogl_pipeline_get_user_program (pipeline))
return;
/* If any of the pipeline layers have a texture matrix then we can't
reliably modify the texture coordinates */
for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1;
layer_num >= 0;
layer_num--)
if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num))
return;
}
/* Now we need to verify that each clip entry's matrix is just a
translation of the journal entry's modelview matrix. We can
also work out the bounds of the clip in modelview space using
this translation */
for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent)
{
float rect_x1, rect_y1, rect_x2, rect_y2;
CoglClipStackRect *clip_rect;
float tx, ty;
clip_rect = (CoglClipStackRect *) clip_entry;
if (!calculate_translation (&clip_rect->matrix,
&journal_entry->model_view,
&tx, &ty))
return;
if (clip_rect->x0 < clip_rect->x1)
{
rect_x1 = clip_rect->x0;
rect_x2 = clip_rect->x1;
}
else
{
rect_x1 = clip_rect->x1;
rect_x2 = clip_rect->x0;
}
if (clip_rect->y0 < clip_rect->y1)
{
rect_y1 = clip_rect->y0;
rect_y2 = clip_rect->y1;
}
else
{
rect_y1 = clip_rect->y1;
rect_y2 = clip_rect->y0;
}
clip_bounds->x_1 = MAX (clip_bounds->x_1, rect_x1 - tx);
clip_bounds->y_1 = MAX (clip_bounds->y_1, rect_y1 - ty);
clip_bounds->x_2 = MIN (clip_bounds->x_2, rect_x2 - tx);
clip_bounds->y_2 = MIN (clip_bounds->y_2, rect_y2 - ty);
}
if (!can_software_clip_entry (journal_entry, prev_journal_entry,
clip_stack,
clip_bounds))
return;
}
/* If we make it here then we know we can software clip the entire batch */
@ -947,107 +1058,23 @@ check_software_clip_for_batch (CoglJournalEntry *batch_start,
ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds,
ClipBounds, entry_num);
size_t stride =
GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers);
float rx1, ry1, rx2, ry2;
float vx1, vy1, vx2, vy2;
int layer_num;
/* Remove the clip on the entry */
_cogl_clip_stack_unref (journal_entry->clip_stack);
journal_entry->clip_stack = NULL;
vx1 = verts[0];
vy1 = verts[1];
vx2 = verts[stride];
vy2 = verts[stride + 1];
if (vx1 < vx2)
{
rx1 = vx1;
rx2 = vx2;
}
else
{
rx1 = vx2;
rx2 = vx1;
}
if (vy1 < vy2)
{
ry1 = vy1;
ry2 = vy2;
}
else
{
ry1 = vy2;
ry2 = vy1;
}
rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2);
ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2);
rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2);
ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2);
/* Check if the rectangle intersects the clip at all */
if (rx1 == rx2 || ry1 == ry2)
/* Will set all of the vertex data to 0 in the hope that this
will create a degenerate rectangle and the GL driver will
be able to clip it quickly */
memset (verts, 0, sizeof (float) * stride * 2);
else
{
if (vx1 > vx2)
{
float t = rx1;
rx1 = rx2;
rx2 = t;
}
if (vy1 > vy2)
{
float t = ry1;
ry1 = ry2;
ry2 = t;
}
verts[0] = rx1;
verts[1] = ry1;
verts[stride] = rx2;
verts[stride + 1] = ry2;
/* Convert the rectangle coordinates to a fraction of the original
rectangle */
rx1 = (rx1 - vx1) / (vx2 - vx1);
ry1 = (ry1 - vy1) / (vy2 - vy1);
rx2 = (rx2 - vx1) / (vx2 - vx1);
ry2 = (ry2 - vy1) / (vy2 - vy1);
for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++)
{
float *t = verts + 2 + 2 * layer_num;
float tx1 = t[0], ty1 = t[1];
float tx2 = t[stride], ty2 = t[stride + 1];
t[0] = rx1 * (tx2 - tx1) + tx1;
t[1] = ry1 * (ty2 - ty1) + ty1;
t[stride] = rx2 * (tx2 - tx1) + tx1;
t[stride + 1] = ry2 * (ty2 - ty1) + ty1;
}
}
software_clip_entry (journal_entry, verts, clip_bounds);
}
return;
}
static void
_cogl_journal_check_software_clip (CoglJournalEntry *batch_start,
int batch_len,
void *data)
_cogl_journal_maybe_software_clip_entries (CoglJournalEntry *batch_start,
int batch_len,
void *data)
{
CoglJournalFlushState *state = data;
COGL_STATIC_TIMER (time_check_software_clip,
"Journal Flush", /* parent */
"flush: check software clip",
"Time spent checking for software clip",
"flush: software clipping",
"Time spent software clipping",
0 /* no application private data */);
_COGL_GET_CONTEXT (ctx, NO_RETVAL);
@ -1055,7 +1082,7 @@ _cogl_journal_check_software_clip (CoglJournalEntry *batch_start,
COGL_TIMER_START (_cogl_uprof_context,
time_check_software_clip);
check_software_clip_for_batch (batch_start, batch_len, state);
maybe_software_clip_entries (batch_start, batch_len, state);
COGL_TIMER_STOP (_cogl_uprof_context,
time_check_software_clip);
@ -1160,6 +1187,93 @@ upload_vertices (const CoglJournalEntry *entries,
return array;
}
void
_cogl_journal_discard (CoglJournal *journal)
{
int i;
for (i = 0; i < journal->entries->len; i++)
{
CoglJournalEntry *entry =
&g_array_index (journal->entries, CoglJournalEntry, i);
_cogl_pipeline_journal_unref (entry->pipeline);
_cogl_clip_stack_unref (entry->clip_stack);
}
g_array_set_size (journal->entries, 0);
g_array_set_size (journal->vertices, 0);
journal->needed_vbo_len = 0;
journal->fast_read_pixel_count = 0;
}
/* Note: A return value of FALSE doesn't mean 'no' it means
* 'unknown' */
gboolean
_cogl_journal_all_entries_within_bounds (CoglJournal *journal,
float clip_x0,
float clip_y0,
float clip_x1,
float clip_y1)
{
CoglJournalEntry *entry = (CoglJournalEntry *)journal->entries->data;
CoglClipStack *clip_entry;
CoglClipStack *reference = NULL;
int bounds_x0;
int bounds_y0;
int bounds_x1;
int bounds_y1;
int i;
if (journal->entries->len == 0)
return TRUE;
/* Find the shortest clip_stack ancestry that leaves us in the
* required bounds */
for (clip_entry = entry->clip_stack;
clip_entry;
clip_entry = clip_entry->parent)
{
_cogl_clip_stack_get_bounds (clip_entry,
&bounds_x0, &bounds_y0,
&bounds_x1, &bounds_y1);
if (bounds_x0 >= clip_x0 && bounds_y0 >= clip_y0 &&
bounds_x1 <= clip_x1 && bounds_y1 <= clip_y1)
reference = clip_entry;
else
break;
}
if (!reference)
return FALSE;
/* For the remaining journal entries we will only verify they share
* 'reference' as an ancestor in their clip stack since that's
* enough to know that they would be within the required bounds.
*/
for (i = 1; i < journal->entries->len; i++)
{
gboolean found_reference = FALSE;
entry = &g_array_index (journal->entries, CoglJournalEntry, i);
for (clip_entry = entry->clip_stack;
clip_entry;
clip_entry = clip_entry->parent)
{
if (clip_entry == reference)
{
found_reference = TRUE;
break;
}
}
if (!found_reference)
return FALSE;
}
return TRUE;
}
/* XXX NB: When _cogl_journal_flush() returns all state relating
* to pipelines, all glEnable flags and current matrix state
* is undefined.
@ -1219,7 +1333,7 @@ _cogl_journal_flush (CoglJournal *journal,
batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */
journal->entries->len, /* max number of entries to consider */
compare_entry_clip_stacks,
_cogl_journal_check_software_clip, /* callback */
_cogl_journal_maybe_software_clip_entries, /* callback */
&state); /* data */
}
@ -1268,17 +1382,7 @@ _cogl_journal_flush (CoglJournal *journal,
cogl_object_unref (state.vertex_array);
for (i = 0; i < journal->entries->len; i++)
{
CoglJournalEntry *entry =
&g_array_index (journal->entries, CoglJournalEntry, i);
_cogl_pipeline_journal_unref (entry->pipeline);
_cogl_clip_stack_unref (entry->clip_stack);
}
g_array_set_size (journal->entries, 0);
g_array_set_size (journal->vertices, 0);
journal->needed_vbo_len = 0;
_cogl_journal_discard (journal);
cogl_pop_framebuffer ();
@ -1438,3 +1542,261 @@ _cogl_journal_log_quad (CoglJournal *journal,
COGL_TIMER_STOP (_cogl_uprof_context, log_timer);
}
static void
entry_to_screen_polygon (const CoglJournalEntry *entry,
float *vertices,
float *poly)
{
size_t array_stride =
GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers);
CoglMatrixStack *projection_stack;
CoglMatrix projection;
int i;
int viewport[4];
poly[0] = vertices[0];
poly[1] = vertices[1];
poly[2] = 0;
poly[3] = 1;
poly[4] = vertices[0];
poly[5] = vertices[array_stride + 1];
poly[6] = 0;
poly[7] = 1;
poly[8] = vertices[array_stride];
poly[9] = vertices[array_stride + 1];
poly[10] = 0;
poly[11] = 1;
poly[12] = vertices[array_stride];
poly[13] = vertices[1];
poly[14] = 0;
poly[15] = 1;
/* TODO: perhaps split the following out into a more generalized
* _cogl_transform_points utility...
*/
cogl_matrix_transform_points (&entry->model_view,
2, /* n_components */
sizeof (float) * 4, /* stride_in */
poly, /* points_in */
/* strideout */
sizeof (float) * 4,
poly, /* points_out */
4 /* n_points */);
projection_stack =
_cogl_framebuffer_get_projection_stack (_cogl_get_framebuffer ());
_cogl_matrix_stack_get (projection_stack, &projection);
cogl_matrix_project_points (&projection,
3, /* n_components */
sizeof (float) * 4, /* stride_in */
poly, /* points_in */
/* strideout */
sizeof (float) * 4,
poly, /* points_out */
4 /* n_points */);
_cogl_framebuffer_get_viewport4fv (_cogl_get_framebuffer (),
viewport);
/* Scale from OpenGL normalized device coordinates (ranging from -1 to 1)
* to Cogl window/framebuffer coordinates (ranging from 0 to buffer-size) with
* (0,0) being top left. */
#define VIEWPORT_TRANSFORM_X(x, vp_origin_x, vp_width) \
( ( ((x) + 1.0) * ((vp_width) / 2.0) ) + (vp_origin_x) )
/* Note: for Y we first flip all coordinates around the X axis while in
* normalized device coodinates */
#define VIEWPORT_TRANSFORM_Y(y, vp_origin_y, vp_height) \
( ( ((-(y)) + 1.0) * ((vp_height) / 2.0) ) + (vp_origin_y) )
/* Scale from normalized device coordinates (in range [-1,1]) to
* window coordinates ranging [0,window-size] ... */
for (i = 0; i < 4; i++)
{
float w = poly[4 * i + 3];
/* Perform perspective division */
poly[4 * i] /= w;
poly[4 * i + 1] /= w;
/* Apply viewport transform */
poly[4 * i] = VIEWPORT_TRANSFORM_X (poly[4 * i],
viewport[0], viewport[2]);
poly[4 * i + 1] = VIEWPORT_TRANSFORM_Y (poly[4 * i + 1],
viewport[1], viewport[3]);
}
#undef VIEWPORT_TRANSFORM_X
#undef VIEWPORT_TRANSFORM_Y
}
static gboolean
try_checking_point_hits_entry_after_clipping (CoglJournalEntry *entry,
float *vertices,
float x,
float y,
gboolean *hit)
{
gboolean can_software_clip = TRUE;
gboolean needs_software_clip = FALSE;
CoglClipStack *clip_entry;
*hit = TRUE;
/* Verify that all of the clip stack entries are simple rectangle
* clips */
for (clip_entry = entry->clip_stack;
clip_entry;
clip_entry = clip_entry->parent)
{
if (x < clip_entry->bounds_x0 ||
x >= clip_entry->bounds_x1 ||
y < clip_entry->bounds_y0 ||
y >= clip_entry->bounds_y1)
{
*hit = FALSE;
return TRUE;
}
if (clip_entry->type == COGL_CLIP_STACK_WINDOW_RECT)
{
/* XXX: technically we could still run the software clip in
* this case because for our purposes we know this clip
* can be ignored now, but [can_]sofware_clip_entry() doesn't
* know this and will bail out. */
can_software_clip = FALSE;
}
else if (clip_entry->type == COGL_CLIP_STACK_RECT)
{
CoglClipStackRect *rect_entry = (CoglClipStackRect *)entry;
if (rect_entry->can_be_scissor == FALSE)
needs_software_clip = TRUE;
/* If can_be_scissor is TRUE then we know it's screen
* aligned and the hit test we did above has determined
* that we are inside this clip. */
}
else
return FALSE;
}
if (needs_software_clip)
{
ClipBounds clip_bounds;
float poly[16];
if (!can_software_clip_entry (entry, NULL,
entry->clip_stack, &clip_bounds))
return FALSE;
software_clip_entry (entry, vertices, &clip_bounds);
entry_to_screen_polygon (entry, vertices, poly);
*hit = _cogl_util_point_in_poly (x, y, poly, sizeof (float) * 4, 4);
return TRUE;
}
return TRUE;
}
gboolean
_cogl_journal_try_read_pixel (CoglJournal *journal,
int x,
int y,
CoglPixelFormat format,
guint8 *pixel,
gboolean *found_intersection)
{
int i;
_COGL_GET_CONTEXT (ctx, FALSE);
/* XXX: this number has been plucked out of thin air, but the idea
* is that if so many pixels are being read from the same un-changed
* journal than we expect that it will be more efficient to fail
* here so we end up flushing and rendering the journal so that
* further reads can directly read from the framebuffer. There will
* be a bit more lag to flush the render but if there are going to
* continue being lots of arbitrary single pixel reads they will end
* up faster in the end. */
if (journal->fast_read_pixel_count > 50)
return FALSE;
if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE &&
format != COGL_PIXEL_FORMAT_RGBA_8888)
return FALSE;
*found_intersection = FALSE;
/* NB: The most recently added journal entry is the last entry, and
* assuming this is a simple scene only comprised of opaque coloured
* rectangles with no special pipelines involved (e.g. enabling
* depth testing) then we can assume painter's algorithm for the
* entries and so our fast read-pixel just needs to walk backwards
* through the journal entries trying to intersect each entry with
* the given point of interest. */
for (i = journal->entries->len - 1; i >= 0; i--)
{
CoglJournalEntry *entry =
&g_array_index (journal->entries, CoglJournalEntry, i);
guint8 *color = (guint8 *)&g_array_index (journal->vertices, float,
entry->array_offset);
float *vertices = (float *)color + 1;
float poly[16];
entry_to_screen_polygon (entry, vertices, poly);
if (!_cogl_util_point_in_poly (x, y, poly, sizeof (float) * 4, 4))
continue;
/* FIXME: the journal should have a back pointer to the
* associated framebuffer, because it should be possible to read
* a pixel from arbitrary framebuffers without needing to
* internally call _cogl_push/pop_framebuffer.
*/
if (entry->clip_stack)
{
gboolean hit;
if (!try_checking_point_hits_entry_after_clipping (entry, vertices,
x, y, &hit))
return FALSE; /* hit couldn't be determined */
if (!hit)
continue;
}
*found_intersection = TRUE;
/* If we find that the rectangle the point of interest
* intersects has any state more complex than a constant opaque
* color then we bail out. */
if (!_cogl_pipeline_equal (ctx->opaque_color_pipeline, entry->pipeline,
(COGL_PIPELINE_STATE_ALL &
~COGL_PIPELINE_STATE_COLOR),
COGL_PIPELINE_LAYER_STATE_ALL,
0))
return FALSE;
/* we currently only care about cases where the premultiplied or
* unpremultipled colors are equivalent... */
if (color[3] != 0xff)
return FALSE;
pixel[0] = color[0];
pixel[1] = color[1];
pixel[2] = color[2];
pixel[3] = color[3];
goto success;
}
success:
journal->fast_read_pixel_count++;
return TRUE;
}

View File

@ -480,6 +480,7 @@ enable_gl_state (CoglDrawFlags flags,
CoglVertexAttribute **attributes,
ValidateLayerState *state)
{
CoglFramebuffer *framebuffer = _cogl_get_framebuffer ();
int i;
#ifdef MAY_HAVE_PROGRAMABLE_GL
GLuint generic_index = 0;
@ -492,6 +493,12 @@ enable_gl_state (CoglDrawFlags flags,
_COGL_GET_CONTEXT (ctx, COGL_INVALID_HANDLE);
/* In cogl_read_pixels we have a fast-path when reading a single
* pixel and the scene is just comprised of simple rectangles still
* in the journal. For this optimization to work we need to track
* when the framebuffer really does get drawn to. */
_cogl_framebuffer_dirty (framebuffer);
source = cogl_get_source ();
/* Iterate the attributes to work out whether blending needs to be

View File

@ -497,7 +497,7 @@ _cogl_read_pixels_with_rowstride (int x,
guint8 *pixels,
int rowstride)
{
CoglFramebuffer *framebuffer;
CoglFramebuffer *framebuffer = _cogl_get_framebuffer ();
int framebuffer_height;
int bpp;
CoglBitmap *bmp;
@ -510,6 +510,22 @@ _cogl_read_pixels_with_rowstride (int x,
g_return_if_fail (source == COGL_READ_PIXELS_COLOR_BUFFER);
if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
{
/* If everything drawn so far for this frame is still in the
* Journal then if all of the rectangles only have a flat
* opaque color we have a fast-path for reading a single pixel
* that avoids the relatively high cost of flushing primitives
* to be drawn on the GPU (considering how simple the geometry
* is in this case) and then blocking on the long GPU pipelines
* for the result.
*/
if (_cogl_framebuffer_try_fast_read_pixel (framebuffer,
x, y, source, format,
pixels))
return;
}
/* make sure any batched primitives get emitted to the GL driver
* before issuing our read pixels...
*
@ -521,8 +537,6 @@ _cogl_read_pixels_with_rowstride (int x,
*/
cogl_flush ();
framebuffer = _cogl_get_framebuffer ();
_cogl_framebuffer_flush_state (framebuffer, 0);
framebuffer_height = _cogl_framebuffer_get_height (framebuffer);