diff --git a/cogl/cogl-framebuffer-private.h b/cogl/cogl-framebuffer-private.h index 3d9cba364..6f69d0699 100644 --- a/cogl/cogl-framebuffer-private.h +++ b/cogl/cogl-framebuffer-private.h @@ -67,6 +67,22 @@ struct _CoglFramebuffer /* The scene of a given framebuffer may depend on images in other * framebuffers... */ GList *deps; + + /* As part of an optimization for reading-back single pixels from a + * framebuffer in some simple cases where the geometry is still + * available in the journal we need to track the bounds of the last + * region cleared, its color and we need to track when something + * does in fact draw to that region so it is no longer clear. + */ + float clear_color_red; + float clear_color_green; + float clear_color_blue; + float clear_color_alpha; + int clear_clip_x0; + int clear_clip_y0; + int clear_clip_x1; + int clear_clip_y1; + gboolean clear_clip_dirty; }; #define COGL_FRAMEBUFFER(X) ((CoglFramebuffer *)(X)) @@ -117,6 +133,9 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer, float blue, float alpha); +void +_cogl_framebuffer_dirty (CoglFramebuffer *framebuffer); + int _cogl_framebuffer_get_width (CoglFramebuffer *framebuffer); @@ -196,6 +215,14 @@ _cogl_framebuffer_flush_dependency_journals (CoglFramebuffer *framebuffer); void _cogl_framebuffer_swap_notify (CoglFramebuffer *framebuffer); +gboolean +_cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer, + int x, + int y, + CoglReadPixelsFlags source, + CoglPixelFormat format, + guint8 *pixel); + typedef enum _CoglFramebufferFlushFlags { /* XXX: When using this, that imples you are going to manually load the diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c index a0c5b944d..1519d58f3 100644 --- a/cogl/cogl-framebuffer.c +++ b/cogl/cogl-framebuffer.c @@ -160,6 +160,13 @@ _cogl_framebuffer_init (CoglFramebuffer *framebuffer, framebuffer->journal = _cogl_journal_new (); + /* Ensure we know the framebuffer->clear_color* members can't be + * referenced for our fast-path read-pixel optimization (see + * _cogl_journal_try_read_pixel()) until some region of the + * framebuffer is initialized. + */ + framebuffer->clear_clip_dirty = TRUE; + /* XXX: We have to maintain a central list of all framebuffers * because at times we need to be able to flush all known journals. * @@ -248,6 +255,12 @@ _cogl_clear4f (unsigned long buffers, GE (glClear (gl_buffers)); } +void +_cogl_framebuffer_dirty (CoglFramebuffer *framebuffer) +{ + framebuffer->clear_clip_dirty = TRUE; +} + void _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer, unsigned long buffers, @@ -256,11 +269,96 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer, float blue, float alpha) { + CoglClipStack *clip_stack = _cogl_framebuffer_get_clip_stack (framebuffer); + int scissor_x0; + int scissor_y0; + int scissor_x1; + int scissor_y1; + + _cogl_clip_stack_get_bounds (clip_stack, + &scissor_x0, &scissor_y0, + &scissor_x1, &scissor_y1); + + /* NB: the previous clear could have had an arbitrary clip. + * NB: everything for the last frame might still be in the journal + * but we can't assume anything about how each entry was + * clipped. + * NB: Clutter will scissor its pick renders which would mean all + * journal entries have a common ClipStack entry, but without + * a layering violation Cogl has to explicitly walk the journal + * entries to determine if this is the case. + * NB: We have a software only read-pixel optimization in the + * journal that determines the color at a given framebuffer + * coordinate for simple scenes without rendering with the GPU. + * When Clutter is hitting this fast-path we can expect to + * receive calls to clear the framebuffer with an un-flushed + * journal. + * NB: To fully support software based picking for Clutter we + * need to be able to reliably detect when the contents of a + * journal can be discarded and when we can skip the call to + * glClear because it matches the previous clear request. + */ + + /* Note: we don't check for the stencil buffer being cleared here + * since there isn't any public cogl api to manipulate the stencil + * buffer. + * + * Note: we check for an exact clip match here because + * 1) a smaller clip could mean existing journal entries may + * need to contribute to regions outside the new clear-clip + * 2) a larger clip would mean we need to issue a real + * glClear and we only care about cases avoiding a + * glClear. + * + * Note: Comparing without an epsilon is considered + * appropriate here. + */ + if (buffers & COGL_BUFFER_BIT_COLOR && + buffers & COGL_BUFFER_BIT_DEPTH && + !framebuffer->clear_clip_dirty && + framebuffer->clear_color_red == red && + framebuffer->clear_color_green == green && + framebuffer->clear_color_blue == blue && + framebuffer->clear_color_alpha == alpha && + scissor_x0 == framebuffer->clear_clip_x0 && + scissor_y0 == framebuffer->clear_clip_y0 && + scissor_x1 == framebuffer->clear_clip_x1 && + scissor_y1 == framebuffer->clear_clip_y1) + { + /* NB: We only have to consider the clip state of journal + * entries if the current clear is clipped since otherwise we + * know every pixel of the framebuffer is affected by the clear + * and so all journal entries become redundant and can simply be + * discarded. + */ + if (clip_stack) + { + /* + * Note: the function for checking the journal entries is + * quite strict. It avoids detailed checking of all entry + * clip_stacks by only checking the details of the first + * entry and then it only verifies that the remaining + * entries share the same clip_stack ancestry. This means + * it's possible for some false negatives here but that will + * just result in us falling back to a real clear. + */ + if (_cogl_journal_all_entries_within_bounds (framebuffer->journal, + scissor_x0, scissor_y0, + scissor_x1, scissor_y1)) + { + _cogl_journal_discard (framebuffer->journal); + goto cleared; + } + } + else + { + _cogl_journal_discard (framebuffer->journal); + goto cleared; + } + } + COGL_NOTE (DRAW, "Clear begin"); - /* XXX: in the case where it's the color buffer being cleared and - * the current clip-stack is empty we could instead discard the - * journal here instead of flushing it. */ _cogl_framebuffer_flush_journal (framebuffer); /* NB: _cogl_framebuffer_flush_state may disrupt various state (such @@ -282,6 +380,38 @@ _cogl_framebuffer_clear4f (CoglFramebuffer *framebuffer, } COGL_NOTE (DRAW, "Clear end"); + +cleared: + + if (buffers & COGL_BUFFER_BIT_COLOR && buffers & COGL_BUFFER_BIT_DEPTH) + { + /* For our fast-path for reading back a single pixel of simple + * scenes where the whole frame is in the journal we need to + * track the cleared color of the framebuffer in case the point + * read doesn't intersect any of the journal rectangles. */ + framebuffer->clear_clip_dirty = FALSE; + framebuffer->clear_color_red = red; + framebuffer->clear_color_green = green; + framebuffer->clear_color_blue = blue; + framebuffer->clear_color_alpha = alpha; + + /* NB: A clear may be scissored so we need to track the extents + * that the clear is applicable too... */ + if (clip_stack) + { + _cogl_clip_stack_get_bounds (clip_stack, + &framebuffer->clear_clip_x0, + &framebuffer->clear_clip_y0, + &framebuffer->clear_clip_x1, + &framebuffer->clear_clip_y1); + } + else + { + /* FIXME: set degenerate clip */ + } + } + else + _cogl_framebuffer_dirty (framebuffer); } /* XXX: We'll need to consider if this API is a good approach for the @@ -1097,3 +1227,63 @@ _cogl_framebuffer_get_alpha_bits (CoglFramebuffer *framebuffer) return framebuffer->alpha_bits; } +gboolean +_cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer, + int x, + int y, + CoglReadPixelsFlags source, + CoglPixelFormat format, + guint8 *pixel) +{ + gboolean found_intersection; + + if (source != COGL_READ_PIXELS_COLOR_BUFFER) + return FALSE; + + if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE && + format != COGL_PIXEL_FORMAT_RGBA_8888) + return FALSE; + + if (!_cogl_journal_try_read_pixel (framebuffer->journal, + x, y, format, pixel, + &found_intersection)) + return FALSE; + + /* If we can't determine the color from the primitives in the + * journal then see if we can use the last recorded clear color + */ + + /* If _cogl_journal_try_read_pixel() failed even though there was an + * intersection of the given point with a primitive in the journal + * then we can't fallback to the framebuffer's last clear color... + * */ + if (found_intersection) + return TRUE; + + /* If the framebuffer has been rendered too since it was last + * cleared then we can't return the last known clear color. */ + if (framebuffer->clear_clip_dirty) + return FALSE; + + if (x >= framebuffer->clear_clip_x0 && + x < framebuffer->clear_clip_x1 && + y >= framebuffer->clear_clip_y0 && + y < framebuffer->clear_clip_y1) + { + + /* we currently only care about cases where the premultiplied or + * unpremultipled colors are equivalent... */ + if (framebuffer->clear_color_alpha != 1.0) + return FALSE; + + pixel[0] = framebuffer->clear_color_red * 255.0; + pixel[1] = framebuffer->clear_color_green * 255.0; + pixel[2] = framebuffer->clear_color_blue * 255.0; + pixel[3] = framebuffer->clear_color_alpha * 255.0; + + return TRUE; + } + + return FALSE; +} + diff --git a/cogl/cogl-journal-private.h b/cogl/cogl-journal-private.h index 754a2ecd7..44a4af855 100644 --- a/cogl/cogl-journal-private.h +++ b/cogl/cogl-journal-private.h @@ -35,6 +35,8 @@ typedef struct _CoglJournal GArray *vertices; size_t needed_vbo_len; + int fast_read_pixel_count; + } CoglJournal; /* To improve batching of geometry when submitting vertices to OpenGL we @@ -69,4 +71,22 @@ void _cogl_journal_flush (CoglJournal *journal, CoglFramebuffer *framebuffer); +void +_cogl_journal_discard (CoglJournal *journal); + +gboolean +_cogl_journal_all_entries_within_bounds (CoglJournal *journal, + float clip_x0, + float clip_y0, + float clip_x1, + float clip_y1); + +gboolean +_cogl_journal_try_read_pixel (CoglJournal *journal, + int x, + int y, + CoglPixelFormat format, + guint8 *pixel, + gboolean *found_intersection); + #endif /* __COGL_JOURNAL_PRIVATE_H */ diff --git a/cogl/cogl-journal.c b/cogl/cogl-journal.c index dc81e588a..f0e8f3896 100644 --- a/cogl/cogl-journal.c +++ b/cogl/cogl-journal.c @@ -37,6 +37,7 @@ #include "cogl-framebuffer-private.h" #include "cogl-profile.h" #include "cogl-vertex-attribute-private.h" +#include "cogl-point-in-poly-private.h" #include #include @@ -820,10 +821,182 @@ typedef struct float x_2, y_2; } ClipBounds; +static gboolean +can_software_clip_entry (CoglJournalEntry *journal_entry, + CoglJournalEntry *prev_journal_entry, + CoglClipStack *clip_stack, + ClipBounds *clip_bounds_out) +{ + CoglPipeline *pipeline = journal_entry->pipeline; + CoglClipStack *clip_entry; + int layer_num; + + clip_bounds_out->x_1 = -G_MAXFLOAT; + clip_bounds_out->y_1 = -G_MAXFLOAT; + clip_bounds_out->x_2 = G_MAXFLOAT; + clip_bounds_out->y_2 = G_MAXFLOAT; + + /* Check the pipeline is usable. We can short-cut here for + entries using the same pipeline as the previous entry */ + if (prev_journal_entry == NULL || pipeline != prev_journal_entry->pipeline) + { + /* If the pipeline has a user program then we can't reliably modify + the texture coordinates */ + if (cogl_pipeline_get_user_program (pipeline)) + return FALSE; + + /* If any of the pipeline layers have a texture matrix then we can't + reliably modify the texture coordinates */ + for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1; + layer_num >= 0; + layer_num--) + if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num)) + return FALSE; + } + + /* Now we need to verify that each clip entry's matrix is just a + translation of the journal entry's modelview matrix. We can + also work out the bounds of the clip in modelview space using + this translation */ + for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent) + { + float rect_x1, rect_y1, rect_x2, rect_y2; + CoglClipStackRect *clip_rect; + float tx, ty; + + clip_rect = (CoglClipStackRect *) clip_entry; + + if (!calculate_translation (&clip_rect->matrix, + &journal_entry->model_view, + &tx, &ty)) + return FALSE; + + if (clip_rect->x0 < clip_rect->x1) + { + rect_x1 = clip_rect->x0; + rect_x2 = clip_rect->x1; + } + else + { + rect_x1 = clip_rect->x1; + rect_x2 = clip_rect->x0; + } + if (clip_rect->y0 < clip_rect->y1) + { + rect_y1 = clip_rect->y0; + rect_y2 = clip_rect->y1; + } + else + { + rect_y1 = clip_rect->y1; + rect_y2 = clip_rect->y0; + } + + clip_bounds_out->x_1 = MAX (clip_bounds_out->x_1, rect_x1 - tx); + clip_bounds_out->y_1 = MAX (clip_bounds_out->y_1, rect_y1 - ty); + clip_bounds_out->x_2 = MIN (clip_bounds_out->x_2, rect_x2 - tx); + clip_bounds_out->y_2 = MIN (clip_bounds_out->y_2, rect_y2 - ty); + } + + return TRUE; +} + static void -check_software_clip_for_batch (CoglJournalEntry *batch_start, - int batch_len, - CoglJournalFlushState *state) +software_clip_entry (CoglJournalEntry *journal_entry, + float *verts, + ClipBounds *clip_bounds) +{ + size_t stride = + GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers); + float rx1, ry1, rx2, ry2; + float vx1, vy1, vx2, vy2; + int layer_num; + + /* Remove the clip on the entry */ + _cogl_clip_stack_unref (journal_entry->clip_stack); + journal_entry->clip_stack = NULL; + + vx1 = verts[0]; + vy1 = verts[1]; + vx2 = verts[stride]; + vy2 = verts[stride + 1]; + + if (vx1 < vx2) + { + rx1 = vx1; + rx2 = vx2; + } + else + { + rx1 = vx2; + rx2 = vx1; + } + if (vy1 < vy2) + { + ry1 = vy1; + ry2 = vy2; + } + else + { + ry1 = vy2; + ry2 = vy1; + } + + rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2); + ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2); + rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2); + ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2); + + /* Check if the rectangle intersects the clip at all */ + if (rx1 == rx2 || ry1 == ry2) + /* Will set all of the vertex data to 0 in the hope that this + will create a degenerate rectangle and the GL driver will + be able to clip it quickly */ + memset (verts, 0, sizeof (float) * stride * 2); + else + { + if (vx1 > vx2) + { + float t = rx1; + rx1 = rx2; + rx2 = t; + } + if (vy1 > vy2) + { + float t = ry1; + ry1 = ry2; + ry2 = t; + } + + verts[0] = rx1; + verts[1] = ry1; + verts[stride] = rx2; + verts[stride + 1] = ry2; + + /* Convert the rectangle coordinates to a fraction of the original + rectangle */ + rx1 = (rx1 - vx1) / (vx2 - vx1); + ry1 = (ry1 - vy1) / (vy2 - vy1); + rx2 = (rx2 - vx1) / (vx2 - vx1); + ry2 = (ry2 - vy1) / (vy2 - vy1); + + for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++) + { + float *t = verts + 2 + 2 * layer_num; + float tx1 = t[0], ty1 = t[1]; + float tx2 = t[stride], ty2 = t[stride + 1]; + t[0] = rx1 * (tx2 - tx1) + tx1; + t[1] = ry1 * (ty2 - ty1) + ty1; + t[stride] = rx2 * (tx2 - tx1) + tx1; + t[stride + 1] = ry2 * (ty2 - ty1) + ty1; + } + } +} + +static void +maybe_software_clip_entries (CoglJournalEntry *batch_start, + int batch_len, + CoglJournalFlushState *state) { CoglJournal *journal = state->journal; CoglClipStack *clip_stack, *clip_entry; @@ -864,77 +1037,15 @@ check_software_clip_for_batch (CoglJournalEntry *batch_start, for (entry_num = 0; entry_num < batch_len; entry_num++) { CoglJournalEntry *journal_entry = batch_start + entry_num; - CoglPipeline *pipeline = journal_entry->pipeline; + CoglJournalEntry *prev_journal_entry = + entry_num ? batch_start + (entry_num - 1) : NULL; ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds, ClipBounds, entry_num); - int layer_num; - clip_bounds->x_1 = -G_MAXFLOAT; - clip_bounds->y_1 = -G_MAXFLOAT; - clip_bounds->x_2 = G_MAXFLOAT; - clip_bounds->y_2 = G_MAXFLOAT; - - /* Check the pipeline is usable. We can short-cut here for - entries using the same pipeline as the previous entry */ - if (entry_num == 0 || pipeline != batch_start[entry_num - 1].pipeline) - { - /* If the pipeline has a user program then we can't reliably modify - the texture coordinates */ - if (cogl_pipeline_get_user_program (pipeline)) - return; - - /* If any of the pipeline layers have a texture matrix then we can't - reliably modify the texture coordinates */ - for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1; - layer_num >= 0; - layer_num--) - if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num)) - return; - } - - /* Now we need to verify that each clip entry's matrix is just a - translation of the journal entry's modelview matrix. We can - also work out the bounds of the clip in modelview space using - this translation */ - for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent) - { - float rect_x1, rect_y1, rect_x2, rect_y2; - CoglClipStackRect *clip_rect; - float tx, ty; - - clip_rect = (CoglClipStackRect *) clip_entry; - - if (!calculate_translation (&clip_rect->matrix, - &journal_entry->model_view, - &tx, &ty)) - return; - - if (clip_rect->x0 < clip_rect->x1) - { - rect_x1 = clip_rect->x0; - rect_x2 = clip_rect->x1; - } - else - { - rect_x1 = clip_rect->x1; - rect_x2 = clip_rect->x0; - } - if (clip_rect->y0 < clip_rect->y1) - { - rect_y1 = clip_rect->y0; - rect_y2 = clip_rect->y1; - } - else - { - rect_y1 = clip_rect->y1; - rect_y2 = clip_rect->y0; - } - - clip_bounds->x_1 = MAX (clip_bounds->x_1, rect_x1 - tx); - clip_bounds->y_1 = MAX (clip_bounds->y_1, rect_y1 - ty); - clip_bounds->x_2 = MIN (clip_bounds->x_2, rect_x2 - tx); - clip_bounds->y_2 = MIN (clip_bounds->y_2, rect_y2 - ty); - } + if (!can_software_clip_entry (journal_entry, prev_journal_entry, + clip_stack, + clip_bounds)) + return; } /* If we make it here then we know we can software clip the entire batch */ @@ -947,107 +1058,23 @@ check_software_clip_for_batch (CoglJournalEntry *batch_start, ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds, ClipBounds, entry_num); - size_t stride = - GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers); - float rx1, ry1, rx2, ry2; - float vx1, vy1, vx2, vy2; - int layer_num; - - /* Remove the clip on the entry */ - _cogl_clip_stack_unref (journal_entry->clip_stack); - journal_entry->clip_stack = NULL; - - vx1 = verts[0]; - vy1 = verts[1]; - vx2 = verts[stride]; - vy2 = verts[stride + 1]; - - if (vx1 < vx2) - { - rx1 = vx1; - rx2 = vx2; - } - else - { - rx1 = vx2; - rx2 = vx1; - } - if (vy1 < vy2) - { - ry1 = vy1; - ry2 = vy2; - } - else - { - ry1 = vy2; - ry2 = vy1; - } - - rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2); - ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2); - rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2); - ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2); - - /* Check if the rectangle intersects the clip at all */ - if (rx1 == rx2 || ry1 == ry2) - /* Will set all of the vertex data to 0 in the hope that this - will create a degenerate rectangle and the GL driver will - be able to clip it quickly */ - memset (verts, 0, sizeof (float) * stride * 2); - else - { - if (vx1 > vx2) - { - float t = rx1; - rx1 = rx2; - rx2 = t; - } - if (vy1 > vy2) - { - float t = ry1; - ry1 = ry2; - ry2 = t; - } - - verts[0] = rx1; - verts[1] = ry1; - verts[stride] = rx2; - verts[stride + 1] = ry2; - - /* Convert the rectangle coordinates to a fraction of the original - rectangle */ - rx1 = (rx1 - vx1) / (vx2 - vx1); - ry1 = (ry1 - vy1) / (vy2 - vy1); - rx2 = (rx2 - vx1) / (vx2 - vx1); - ry2 = (ry2 - vy1) / (vy2 - vy1); - - for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++) - { - float *t = verts + 2 + 2 * layer_num; - float tx1 = t[0], ty1 = t[1]; - float tx2 = t[stride], ty2 = t[stride + 1]; - t[0] = rx1 * (tx2 - tx1) + tx1; - t[1] = ry1 * (ty2 - ty1) + ty1; - t[stride] = rx2 * (tx2 - tx1) + tx1; - t[stride + 1] = ry2 * (ty2 - ty1) + ty1; - } - } + software_clip_entry (journal_entry, verts, clip_bounds); } return; } static void -_cogl_journal_check_software_clip (CoglJournalEntry *batch_start, - int batch_len, - void *data) +_cogl_journal_maybe_software_clip_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) { CoglJournalFlushState *state = data; COGL_STATIC_TIMER (time_check_software_clip, "Journal Flush", /* parent */ - "flush: check software clip", - "Time spent checking for software clip", + "flush: software clipping", + "Time spent software clipping", 0 /* no application private data */); _COGL_GET_CONTEXT (ctx, NO_RETVAL); @@ -1055,7 +1082,7 @@ _cogl_journal_check_software_clip (CoglJournalEntry *batch_start, COGL_TIMER_START (_cogl_uprof_context, time_check_software_clip); - check_software_clip_for_batch (batch_start, batch_len, state); + maybe_software_clip_entries (batch_start, batch_len, state); COGL_TIMER_STOP (_cogl_uprof_context, time_check_software_clip); @@ -1160,6 +1187,93 @@ upload_vertices (const CoglJournalEntry *entries, return array; } +void +_cogl_journal_discard (CoglJournal *journal) +{ + int i; + + for (i = 0; i < journal->entries->len; i++) + { + CoglJournalEntry *entry = + &g_array_index (journal->entries, CoglJournalEntry, i); + _cogl_pipeline_journal_unref (entry->pipeline); + _cogl_clip_stack_unref (entry->clip_stack); + } + + g_array_set_size (journal->entries, 0); + g_array_set_size (journal->vertices, 0); + journal->needed_vbo_len = 0; + journal->fast_read_pixel_count = 0; +} + +/* Note: A return value of FALSE doesn't mean 'no' it means + * 'unknown' */ +gboolean +_cogl_journal_all_entries_within_bounds (CoglJournal *journal, + float clip_x0, + float clip_y0, + float clip_x1, + float clip_y1) +{ + CoglJournalEntry *entry = (CoglJournalEntry *)journal->entries->data; + CoglClipStack *clip_entry; + CoglClipStack *reference = NULL; + int bounds_x0; + int bounds_y0; + int bounds_x1; + int bounds_y1; + int i; + + if (journal->entries->len == 0) + return TRUE; + + /* Find the shortest clip_stack ancestry that leaves us in the + * required bounds */ + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + _cogl_clip_stack_get_bounds (clip_entry, + &bounds_x0, &bounds_y0, + &bounds_x1, &bounds_y1); + + if (bounds_x0 >= clip_x0 && bounds_y0 >= clip_y0 && + bounds_x1 <= clip_x1 && bounds_y1 <= clip_y1) + reference = clip_entry; + else + break; + } + + if (!reference) + return FALSE; + + /* For the remaining journal entries we will only verify they share + * 'reference' as an ancestor in their clip stack since that's + * enough to know that they would be within the required bounds. + */ + for (i = 1; i < journal->entries->len; i++) + { + gboolean found_reference = FALSE; + entry = &g_array_index (journal->entries, CoglJournalEntry, i); + + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + if (clip_entry == reference) + { + found_reference = TRUE; + break; + } + } + + if (!found_reference) + return FALSE; + } + + return TRUE; +} + /* XXX NB: When _cogl_journal_flush() returns all state relating * to pipelines, all glEnable flags and current matrix state * is undefined. @@ -1219,7 +1333,7 @@ _cogl_journal_flush (CoglJournal *journal, batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */ journal->entries->len, /* max number of entries to consider */ compare_entry_clip_stacks, - _cogl_journal_check_software_clip, /* callback */ + _cogl_journal_maybe_software_clip_entries, /* callback */ &state); /* data */ } @@ -1268,17 +1382,7 @@ _cogl_journal_flush (CoglJournal *journal, cogl_object_unref (state.vertex_array); - for (i = 0; i < journal->entries->len; i++) - { - CoglJournalEntry *entry = - &g_array_index (journal->entries, CoglJournalEntry, i); - _cogl_pipeline_journal_unref (entry->pipeline); - _cogl_clip_stack_unref (entry->clip_stack); - } - - g_array_set_size (journal->entries, 0); - g_array_set_size (journal->vertices, 0); - journal->needed_vbo_len = 0; + _cogl_journal_discard (journal); cogl_pop_framebuffer (); @@ -1438,3 +1542,261 @@ _cogl_journal_log_quad (CoglJournal *journal, COGL_TIMER_STOP (_cogl_uprof_context, log_timer); } +static void +entry_to_screen_polygon (const CoglJournalEntry *entry, + float *vertices, + float *poly) +{ + size_t array_stride = + GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers); + CoglMatrixStack *projection_stack; + CoglMatrix projection; + int i; + int viewport[4]; + + poly[0] = vertices[0]; + poly[1] = vertices[1]; + poly[2] = 0; + poly[3] = 1; + + poly[4] = vertices[0]; + poly[5] = vertices[array_stride + 1]; + poly[6] = 0; + poly[7] = 1; + + poly[8] = vertices[array_stride]; + poly[9] = vertices[array_stride + 1]; + poly[10] = 0; + poly[11] = 1; + + poly[12] = vertices[array_stride]; + poly[13] = vertices[1]; + poly[14] = 0; + poly[15] = 1; + + /* TODO: perhaps split the following out into a more generalized + * _cogl_transform_points utility... + */ + + cogl_matrix_transform_points (&entry->model_view, + 2, /* n_components */ + sizeof (float) * 4, /* stride_in */ + poly, /* points_in */ + /* strideout */ + sizeof (float) * 4, + poly, /* points_out */ + 4 /* n_points */); + + projection_stack = + _cogl_framebuffer_get_projection_stack (_cogl_get_framebuffer ()); + _cogl_matrix_stack_get (projection_stack, &projection); + + cogl_matrix_project_points (&projection, + 3, /* n_components */ + sizeof (float) * 4, /* stride_in */ + poly, /* points_in */ + /* strideout */ + sizeof (float) * 4, + poly, /* points_out */ + 4 /* n_points */); + + _cogl_framebuffer_get_viewport4fv (_cogl_get_framebuffer (), + viewport); + +/* Scale from OpenGL normalized device coordinates (ranging from -1 to 1) + * to Cogl window/framebuffer coordinates (ranging from 0 to buffer-size) with + * (0,0) being top left. */ +#define VIEWPORT_TRANSFORM_X(x, vp_origin_x, vp_width) \ + ( ( ((x) + 1.0) * ((vp_width) / 2.0) ) + (vp_origin_x) ) +/* Note: for Y we first flip all coordinates around the X axis while in + * normalized device coodinates */ +#define VIEWPORT_TRANSFORM_Y(y, vp_origin_y, vp_height) \ + ( ( ((-(y)) + 1.0) * ((vp_height) / 2.0) ) + (vp_origin_y) ) + + /* Scale from normalized device coordinates (in range [-1,1]) to + * window coordinates ranging [0,window-size] ... */ + for (i = 0; i < 4; i++) + { + float w = poly[4 * i + 3]; + + /* Perform perspective division */ + poly[4 * i] /= w; + poly[4 * i + 1] /= w; + + /* Apply viewport transform */ + poly[4 * i] = VIEWPORT_TRANSFORM_X (poly[4 * i], + viewport[0], viewport[2]); + poly[4 * i + 1] = VIEWPORT_TRANSFORM_Y (poly[4 * i + 1], + viewport[1], viewport[3]); + } + +#undef VIEWPORT_TRANSFORM_X +#undef VIEWPORT_TRANSFORM_Y +} + +static gboolean +try_checking_point_hits_entry_after_clipping (CoglJournalEntry *entry, + float *vertices, + float x, + float y, + gboolean *hit) +{ + gboolean can_software_clip = TRUE; + gboolean needs_software_clip = FALSE; + CoglClipStack *clip_entry; + + *hit = TRUE; + + /* Verify that all of the clip stack entries are simple rectangle + * clips */ + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + if (x < clip_entry->bounds_x0 || + x >= clip_entry->bounds_x1 || + y < clip_entry->bounds_y0 || + y >= clip_entry->bounds_y1) + { + *hit = FALSE; + return TRUE; + } + + if (clip_entry->type == COGL_CLIP_STACK_WINDOW_RECT) + { + /* XXX: technically we could still run the software clip in + * this case because for our purposes we know this clip + * can be ignored now, but [can_]sofware_clip_entry() doesn't + * know this and will bail out. */ + can_software_clip = FALSE; + } + else if (clip_entry->type == COGL_CLIP_STACK_RECT) + { + CoglClipStackRect *rect_entry = (CoglClipStackRect *)entry; + + if (rect_entry->can_be_scissor == FALSE) + needs_software_clip = TRUE; + /* If can_be_scissor is TRUE then we know it's screen + * aligned and the hit test we did above has determined + * that we are inside this clip. */ + } + else + return FALSE; + } + + if (needs_software_clip) + { + ClipBounds clip_bounds; + float poly[16]; + + if (!can_software_clip_entry (entry, NULL, + entry->clip_stack, &clip_bounds)) + return FALSE; + + software_clip_entry (entry, vertices, &clip_bounds); + entry_to_screen_polygon (entry, vertices, poly); + + *hit = _cogl_util_point_in_poly (x, y, poly, sizeof (float) * 4, 4); + return TRUE; + } + + return TRUE; +} + +gboolean +_cogl_journal_try_read_pixel (CoglJournal *journal, + int x, + int y, + CoglPixelFormat format, + guint8 *pixel, + gboolean *found_intersection) +{ + int i; + + _COGL_GET_CONTEXT (ctx, FALSE); + + /* XXX: this number has been plucked out of thin air, but the idea + * is that if so many pixels are being read from the same un-changed + * journal than we expect that it will be more efficient to fail + * here so we end up flushing and rendering the journal so that + * further reads can directly read from the framebuffer. There will + * be a bit more lag to flush the render but if there are going to + * continue being lots of arbitrary single pixel reads they will end + * up faster in the end. */ + if (journal->fast_read_pixel_count > 50) + return FALSE; + + if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE && + format != COGL_PIXEL_FORMAT_RGBA_8888) + return FALSE; + + *found_intersection = FALSE; + + /* NB: The most recently added journal entry is the last entry, and + * assuming this is a simple scene only comprised of opaque coloured + * rectangles with no special pipelines involved (e.g. enabling + * depth testing) then we can assume painter's algorithm for the + * entries and so our fast read-pixel just needs to walk backwards + * through the journal entries trying to intersect each entry with + * the given point of interest. */ + for (i = journal->entries->len - 1; i >= 0; i--) + { + CoglJournalEntry *entry = + &g_array_index (journal->entries, CoglJournalEntry, i); + guint8 *color = (guint8 *)&g_array_index (journal->vertices, float, + entry->array_offset); + float *vertices = (float *)color + 1; + float poly[16]; + + entry_to_screen_polygon (entry, vertices, poly); + + if (!_cogl_util_point_in_poly (x, y, poly, sizeof (float) * 4, 4)) + continue; + + /* FIXME: the journal should have a back pointer to the + * associated framebuffer, because it should be possible to read + * a pixel from arbitrary framebuffers without needing to + * internally call _cogl_push/pop_framebuffer. + */ + if (entry->clip_stack) + { + gboolean hit; + + if (!try_checking_point_hits_entry_after_clipping (entry, vertices, + x, y, &hit)) + return FALSE; /* hit couldn't be determined */ + + if (!hit) + continue; + } + + *found_intersection = TRUE; + + /* If we find that the rectangle the point of interest + * intersects has any state more complex than a constant opaque + * color then we bail out. */ + if (!_cogl_pipeline_equal (ctx->opaque_color_pipeline, entry->pipeline, + (COGL_PIPELINE_STATE_ALL & + ~COGL_PIPELINE_STATE_COLOR), + COGL_PIPELINE_LAYER_STATE_ALL, + 0)) + return FALSE; + + + /* we currently only care about cases where the premultiplied or + * unpremultipled colors are equivalent... */ + if (color[3] != 0xff) + return FALSE; + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; + pixel[3] = color[3]; + + goto success; + } + +success: + journal->fast_read_pixel_count++; + return TRUE; +} diff --git a/cogl/cogl-vertex-attribute.c b/cogl/cogl-vertex-attribute.c index d7821a13d..37ff0a1f9 100644 --- a/cogl/cogl-vertex-attribute.c +++ b/cogl/cogl-vertex-attribute.c @@ -480,6 +480,7 @@ enable_gl_state (CoglDrawFlags flags, CoglVertexAttribute **attributes, ValidateLayerState *state) { + CoglFramebuffer *framebuffer = _cogl_get_framebuffer (); int i; #ifdef MAY_HAVE_PROGRAMABLE_GL GLuint generic_index = 0; @@ -492,6 +493,12 @@ enable_gl_state (CoglDrawFlags flags, _COGL_GET_CONTEXT (ctx, COGL_INVALID_HANDLE); + /* In cogl_read_pixels we have a fast-path when reading a single + * pixel and the scene is just comprised of simple rectangles still + * in the journal. For this optimization to work we need to track + * when the framebuffer really does get drawn to. */ + _cogl_framebuffer_dirty (framebuffer); + source = cogl_get_source (); /* Iterate the attributes to work out whether blending needs to be diff --git a/cogl/cogl.c b/cogl/cogl.c index b6c3fa535..d8b0827c9 100644 --- a/cogl/cogl.c +++ b/cogl/cogl.c @@ -497,7 +497,7 @@ _cogl_read_pixels_with_rowstride (int x, guint8 *pixels, int rowstride) { - CoglFramebuffer *framebuffer; + CoglFramebuffer *framebuffer = _cogl_get_framebuffer (); int framebuffer_height; int bpp; CoglBitmap *bmp; @@ -510,6 +510,22 @@ _cogl_read_pixels_with_rowstride (int x, g_return_if_fail (source == COGL_READ_PIXELS_COLOR_BUFFER); + if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty) + { + /* If everything drawn so far for this frame is still in the + * Journal then if all of the rectangles only have a flat + * opaque color we have a fast-path for reading a single pixel + * that avoids the relatively high cost of flushing primitives + * to be drawn on the GPU (considering how simple the geometry + * is in this case) and then blocking on the long GPU pipelines + * for the result. + */ + if (_cogl_framebuffer_try_fast_read_pixel (framebuffer, + x, y, source, format, + pixels)) + return; + } + /* make sure any batched primitives get emitted to the GL driver * before issuing our read pixels... * @@ -521,8 +537,6 @@ _cogl_read_pixels_with_rowstride (int x, */ cogl_flush (); - framebuffer = _cogl_get_framebuffer (); - _cogl_framebuffer_flush_state (framebuffer, 0); framebuffer_height = _cogl_framebuffer_get_height (framebuffer);