diff --git a/cogl/Makefile.am b/cogl/Makefile.am
index 06c493cad..84fdb097c 100644
--- a/cogl/Makefile.am
+++ b/cogl/Makefile.am
@@ -120,6 +120,8 @@ libclutter_cogl_la_SOURCES = \
 	cogl-texture-2d-sliced.c	\
 	cogl-spans.h			\
 	cogl-spans.c			\
+	cogl-journal-private.h		\
+	cogl-journal.c			\
 	$(NULL)
 
 EXTRA_DIST += stb_image.c cogl-enum-types.h.in cogl-enum-types.c.in
diff --git a/cogl/cogl-context.c b/cogl/cogl-context.c
index 6812e888d..e93b5dac0 100644
--- a/cogl/cogl-context.c
+++ b/cogl/cogl-context.c
@@ -29,6 +29,7 @@
 #include "cogl-internal.h"
 #include "cogl-util.h"
 #include "cogl-context.h"
+#include "cogl-journal-private.h"
 #include "cogl-texture-private.h"
 #include "cogl-material-private.h"
 
diff --git a/cogl/cogl-journal-private.h b/cogl/cogl-journal-private.h
new file mode 100644
index 000000000..dc008f67a
--- /dev/null
+++ b/cogl/cogl-journal-private.h
@@ -0,0 +1,57 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2007,2008,2009 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __COGL_JOURNAL_PRIVATE_H
+#define __COGL_JOURNAL_PRIVATE_H
+
+#include "cogl-handle.h"
+
+/* To improve batching of geometry when submitting vertices to OpenGL we
+ * log the texture rectangles we want to draw to a journal, so when we
+ * later flush the journal we aim to batch data, and gl draw calls. */
+typedef struct _CoglJournalEntry
+{
+  CoglHandle               material;
+  int                      n_layers;
+  CoglMaterialFlushOptions flush_options;
+  CoglMatrix               model_view;
+  /* XXX: These entries are pretty big now considering the padding in
+   * CoglMaterialFlushOptions and CoglMatrix, so we might need to optimize this
+   * later. */
+} CoglJournalEntry;
+
+void _cogl_journal_flush (void);
+
+void
+_cogl_journal_log_quad (float         x_1,
+                        float         y_1,
+                        float         x_2,
+                        float         y_2,
+                        CoglHandle    material,
+                        int           n_layers,
+                        guint32       fallback_layers,
+                        GLuint        layer0_override_texture,
+                        float        *tex_coords,
+                        unsigned int  tex_coords_len);
+
+#endif /* __COGL_JOURNAL_PRIVATE_H */
diff --git a/cogl/cogl-journal.c b/cogl/cogl-journal.c
new file mode 100644
index 000000000..e9a7b61dc
--- /dev/null
+++ b/cogl/cogl-journal.c
@@ -0,0 +1,744 @@
+/*
+ * Cogl
+ *
+ * An object oriented GL/GLES Abstraction/Utility Layer
+ *
+ * Copyright (C) 2007,2008,2009 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cogl.h"
+#include "cogl-internal.h"
+#include "cogl-context.h"
+#include "cogl-journal-private.h"
+#include "cogl-texture-private.h"
+#include "cogl-material-private.h"
+#include "cogl-vertex-buffer-private.h"
+
+#include <string.h>
+#include <gmodule.h>
+#include <math.h>
+
+#define _COGL_MAX_BEZ_RECURSE_DEPTH 16
+
+#ifdef HAVE_COGL_GL
+
+#define glGenBuffers ctx->drv.pf_glGenBuffersARB
+#define glBindBuffer ctx->drv.pf_glBindBufferARB
+#define glBufferData ctx->drv.pf_glBufferDataARB
+#define glBufferSubData ctx->drv.pf_glBufferSubDataARB
+#define glDeleteBuffers ctx->drv.pf_glDeleteBuffersARB
+#define glClientActiveTexture ctx->drv.pf_glClientActiveTexture
+
+#elif defined (HAVE_COGL_GLES2)
+
+#include "../gles/cogl-gles2-wrapper.h"
+
+#endif
+
+/* XXX NB:
+ * Our journal's vertex data is arranged as follows:
+ * 4 vertices per quad:
+ *    2 or 3 GLfloats per position (3 when doing software transforms)
+ *    4 RGBA GLubytes,
+ *    2 GLfloats per tex coord * n_layers
+ *
+ * Where n_layers corresponds to the number of material layers enabled
+ *
+ * To avoid frequent changes in the stride of our vertex data we always pad
+ * n_layers to be >= 2
+ *
+ * When we are transforming quads in software we need to also track the z
+ * coordinate of transformed vertices.
+ *
+ * So for a given number of layers this gets the stride in 32bit words:
+ */
+#define SW_TRANSFORM      (!(cogl_debug_flags & \
+                             COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
+#define POS_STRIDE        (SW_TRANSFORM ? 3 : 2) /* number of 32bit words */
+#define N_POS_COMPONENTS  POS_STRIDE
+#define COLOR_STRIDE      1 /* number of 32bit words */
+#define TEX_STRIDE        2 /* number of 32bit words */
+#define MIN_LAYER_PADING  2
+#define GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS(N_LAYERS) \
+  (POS_STRIDE + COLOR_STRIDE + \
+   TEX_STRIDE * (N_LAYERS < MIN_LAYER_PADING ? MIN_LAYER_PADING : N_LAYERS))
+
+typedef CoglVertexBufferIndices  CoglJournalIndices;
+
+typedef struct _CoglJournalFlushState
+{
+  size_t              stride;
+  /* Note: this is a pointer to handle fallbacks. It normally holds a VBO
+   * offset, but when the driver doesn't support VBOs then this points into
+   * our GArray of logged vertices. */
+  char *              vbo_offset;
+  GLuint              vertex_offset;
+#ifndef HAVE_COGL_GL
+  CoglJournalIndices *indices;
+  size_t              indices_type_size;
+#endif
+} CoglJournalFlushState;
+
+typedef void (*CoglJournalBatchCallback) (CoglJournalEntry *start,
+                                          int n_entries,
+                                          void *data);
+typedef gboolean (*CoglJournalBatchTest) (CoglJournalEntry *entry0,
+                                          CoglJournalEntry *entry1);
+
+void
+_cogl_journal_dump_quad_vertices (guint8 *data, int n_layers)
+{
+  size_t stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers);
+  int i;
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  g_print ("n_layers = %d; stride = %d; pos stride = %d; color stride = %d; "
+           "tex stride = %d; stride in bytes = %d\n",
+           n_layers, (int)stride, POS_STRIDE, COLOR_STRIDE,
+           TEX_STRIDE, (int)stride * 4);
+
+  for (i = 0; i < 4; i++)
+    {
+      float *v = (float *)data + (i * stride);
+      guint8 *c = data + (POS_STRIDE * 4) + (i * stride * 4);
+      int j;
+
+      if (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)
+        g_print ("v%d: x = %f, y = %f, rgba=0x%02X%02X%02X%02X",
+                 i, v[0], v[1], c[0], c[1], c[2], c[3]);
+      else
+        g_print ("v%d: x = %f, y = %f, z = %f, rgba=0x%02X%02X%02X%02X",
+                 i, v[0], v[1], v[2], c[0], c[1], c[2], c[3]);
+      for (j = 0; j < n_layers; j++)
+        {
+          float *t = v + POS_STRIDE + COLOR_STRIDE + TEX_STRIDE * j;
+          g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]);
+        }
+      g_print ("\n");
+    }
+}
+
+void
+_cogl_journal_dump_quad_batch (guint8 *data, int n_layers, int n_quads)
+{
+  size_t byte_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4;
+  int i;
+
+  g_print ("_cogl_journal_dump_quad_batch: n_layers = %d, n_quads = %d\n",
+           n_layers, n_quads);
+  for (i = 0; i < n_quads; i++)
+    _cogl_journal_dump_quad_vertices (data + byte_stride * 4 * i, n_layers);
+}
+
+static void
+batch_and_call (CoglJournalEntry *entries,
+                int n_entries,
+                CoglJournalBatchTest can_batch_callback,
+                CoglJournalBatchCallback batch_callback,
+                void *data)
+{
+  int i;
+  int batch_len = 1;
+  CoglJournalEntry *batch_start = entries;
+
+  for (i = 1; i < n_entries; i++)
+    {
+      CoglJournalEntry *entry0 = &entries[i - 1];
+      CoglJournalEntry *entry1 = entry0 + 1;
+
+      if (can_batch_callback (entry0, entry1))
+        {
+          batch_len++;
+          continue;
+        }
+
+      batch_callback (batch_start, batch_len, data);
+
+      batch_start = entry1;
+      batch_len = 1;
+    }
+
+  /* The last batch... */
+  batch_callback (batch_start, batch_len, data);
+}
+
+static void
+_cogl_journal_flush_modelview_and_entries (CoglJournalEntry *batch_start,
+                                           int               batch_len,
+                                           void             *data)
+{
+  CoglJournalFlushState *state = data;
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
+    g_print ("BATCHING:    modelview batch len = %d\n", batch_len);
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
+    GE (glLoadMatrixf ((GLfloat *)&batch_start->model_view));
+
+#ifdef HAVE_COGL_GL
+
+  GE (glDrawArrays (GL_QUADS, state->vertex_offset, batch_len * 4));
+
+#else /* HAVE_COGL_GL */
+
+  if (batch_len > 1)
+    {
+      int indices_offset = (state->vertex_offset / 4) * 6;
+      GE (glDrawElements (GL_TRIANGLES,
+                          6 * batch_len,
+                          state->indices->type,
+                          (GLvoid*)(indices_offset * state->indices_type_size)));
+    }
+  else
+    {
+      GE (glDrawArrays (GL_TRIANGLE_FAN,
+                        state->vertex_offset, /* first */
+                        4)); /* n vertices */
+    }
+#endif
+
+  /* DEBUGGING CODE XXX:
+   * This path will cause all rectangles to be drawn with a red, green
+   * or blue outline with no blending. This may e.g. help with debugging
+   * texture slicing issues or blending issues, plus it looks quite cool.
+   */
+  if (cogl_debug_flags & COGL_DEBUG_RECTANGLES)
+    {
+      static CoglHandle outline = COGL_INVALID_HANDLE;
+      static int color = 0;
+      int i;
+      if (outline == COGL_INVALID_HANDLE)
+        outline = cogl_material_new ();
+
+      cogl_enable (COGL_ENABLE_VERTEX_ARRAY);
+      for (i = 0; i < batch_len; i++, color = (color + 1) % 3)
+        {
+          cogl_material_set_color4ub (outline,
+                                      color == 0 ? 0xff : 0x00,
+                                      color == 1 ? 0xff : 0x00,
+                                      color == 2 ? 0xff : 0x00,
+                                      0xff);
+          _cogl_material_flush_gl_state (outline, NULL);
+          GE( glDrawArrays (GL_LINE_LOOP, 4 * i, 4) );
+        }
+    }
+
+  state->vertex_offset += (4 * batch_len);
+}
+
+static gboolean
+compare_entry_modelviews (CoglJournalEntry *entry0,
+                          CoglJournalEntry *entry1)
+{
+  /* Batch together quads with the same model view matrix */
+
+  /* FIXME: this is nasty, there are much nicer ways to track this
+   * (at the add_quad_vertices level) without resorting to a memcmp!
+   *
+   * E.g. If the cogl-current-matrix code maintained an "age" for
+   * the modelview matrix we could simply check in add_quad_vertices
+   * if the age has increased, and if so record the change as a
+   * boolean in the journal.
+   */
+
+  if (memcmp (&entry0->model_view, &entry1->model_view,
+              sizeof (GLfloat) * 16) == 0)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/* At this point we have a run of quads that we know have compatible
+ * materials, but they may not all have the same modelview matrix */
+static void
+_cogl_journal_flush_material_and_entries (CoglJournalEntry *batch_start,
+                                          gint              batch_len,
+                                          void             *data)
+{
+  gulong                 enable_flags = 0;
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
+    g_print ("BATCHING:   material batch len = %d\n", batch_len);
+
+  _cogl_material_flush_gl_state (batch_start->material,
+                                 &batch_start->flush_options);
+
+  /* FIXME: This api is a bit yukky, ideally it will be removed if we
+   * re-work the cogl_enable mechanism */
+  enable_flags |= _cogl_material_get_cogl_enable_flags (batch_start->material);
+
+  if (ctx->enable_backface_culling)
+    enable_flags |= COGL_ENABLE_BACKFACE_CULLING;
+
+  enable_flags |= COGL_ENABLE_VERTEX_ARRAY;
+  enable_flags |= COGL_ENABLE_COLOR_ARRAY;
+  cogl_enable (enable_flags);
+
+  /* If we haven't transformed the quads in software then we need to also break
+   * up batches according to changes in the modelview matrix... */
+  if (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)
+    {
+      batch_and_call (batch_start,
+                      batch_len,
+                      compare_entry_modelviews,
+                      _cogl_journal_flush_modelview_and_entries,
+                      data);
+    }
+  else
+    _cogl_journal_flush_modelview_and_entries (batch_start, batch_len, data);
+}
+
+static gboolean
+compare_entry_materials (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
+{
+  /* batch rectangles using compatible materials */
+
+  /* XXX: _cogl_material_equal may give false negatives since it avoids
+   * deep comparisons as an optimization. It aims to compare enough so
+   * that we that we are able to batch the 90% common cases, but may not
+   * look at less common differences. */
+  if (_cogl_material_equal (entry0->material,
+                            &entry0->flush_options,
+                            entry1->material,
+                            &entry1->flush_options))
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/* Since the stride may not reflect the number of texture layers in use
+ * (due to padding) we deal with texture coordinate offsets separately
+ * from vertex and color offsets... */
+static void
+_cogl_journal_flush_texcoord_vbo_offsets_and_entries (
+                                          CoglJournalEntry *batch_start,
+                                          gint              batch_len,
+                                          void             *data)
+{
+  CoglJournalFlushState *state = data;
+  int                    prev_n_texcoord_arrays_enabled;
+  int                    i;
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  for (i = 0; i < batch_start->n_layers; i++)
+    {
+      GE (glClientActiveTexture (GL_TEXTURE0 + i));
+      GE (glEnableClientState (GL_TEXTURE_COORD_ARRAY));
+      /* XXX NB:
+       * Our journal's vertex data is arranged as follows:
+       * 4 vertices per quad:
+       *    2 or 3 GLfloats per position (3 when doing software transforms)
+       *    4 RGBA GLubytes,
+       *    2 GLfloats per tex coord * n_layers
+       * (though n_layers may be padded; see definition of
+       *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
+       */
+      GE (glTexCoordPointer (2, GL_FLOAT, state->stride,
+                             (void *)(state->vbo_offset +
+                                      (POS_STRIDE + COLOR_STRIDE) * 4 +
+                                      TEX_STRIDE * 4 * i)));
+    }
+  prev_n_texcoord_arrays_enabled =
+    ctx->n_texcoord_arrays_enabled;
+  ctx->n_texcoord_arrays_enabled = batch_start->n_layers;
+  for (; i < prev_n_texcoord_arrays_enabled; i++)
+    {
+      GE (glClientActiveTexture (GL_TEXTURE0 + i));
+      GE (glDisableClientState (GL_TEXTURE_COORD_ARRAY));
+    }
+
+  batch_and_call (batch_start,
+                  batch_len,
+                  compare_entry_materials,
+                  _cogl_journal_flush_material_and_entries,
+                  data);
+}
+
+static gboolean
+compare_entry_n_layers (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
+{
+  if (entry0->n_layers == entry1->n_layers)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/* At this point we know the stride has changed from the previous batch
+ * of journal entries */
+static void
+_cogl_journal_flush_vbo_offsets_and_entries (CoglJournalEntry *batch_start,
+                                             gint              batch_len,
+                                             void             *data)
+{
+  CoglJournalFlushState   *state = data;
+  size_t                   stride;
+#ifndef HAVE_COGL_GL
+  int                      needed_indices = batch_len * 6;
+  CoglHandle               indices_handle;
+  CoglVertexBufferIndices *indices;
+#endif
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
+    g_print ("BATCHING:  vbo offset batch len = %d\n", batch_len);
+
+  /* XXX NB:
+   * Our journal's vertex data is arranged as follows:
+   * 4 vertices per quad:
+   *    2 or 3 GLfloats per position (3 when doing software transforms)
+   *    4 RGBA GLubytes,
+   *    2 GLfloats per tex coord * n_layers
+   * (though n_layers may be padded; see definition of
+   *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
+   */
+  stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (batch_start->n_layers);
+  stride *= sizeof (GLfloat);
+  state->stride = stride;
+
+  GE (glVertexPointer (N_POS_COMPONENTS, GL_FLOAT, stride,
+                       (void *)state->vbo_offset));
+  GE (glColorPointer (4, GL_UNSIGNED_BYTE, stride,
+                      (void *)(state->vbo_offset + (POS_STRIDE * 4))));
+
+#ifndef HAVE_COGL_GL
+  indices_handle = cogl_vertex_buffer_indices_get_for_quads (needed_indices);
+  indices = _cogl_vertex_buffer_indices_pointer_from_handle (indices_handle);
+  state->indices = indices;
+
+  if (indices->type == GL_UNSIGNED_BYTE)
+    state->indices_type_size = 1;
+  else if (indices->type == GL_UNSIGNED_SHORT)
+    state->indices_type_size = 2;
+  else
+    g_critical ("unknown indices type %d", indices->type);
+
+  GE (glBindBuffer (GL_ELEMENT_ARRAY_BUFFER,
+                    GPOINTER_TO_UINT (indices->vbo_name)));
+#endif
+
+  /* We only call gl{Vertex,Color,Texture}Pointer when the stride within
+   * the VBO changes. (due to a change in the number of material layers)
+   * While the stride remains constant we walk forward through the above
+   * VBO using a vertex offset passed to glDraw{Arrays,Elements} */
+  state->vertex_offset = 0;
+
+  if (cogl_debug_flags & COGL_DEBUG_JOURNAL)
+    {
+      guint8 *verts;
+
+      if (cogl_get_features () & COGL_FEATURE_VBOS)
+        verts = ((guint8 *)ctx->logged_vertices->data) +
+          (size_t)state->vbo_offset;
+      else
+        verts = (guint8 *)state->vbo_offset;
+      _cogl_journal_dump_quad_batch (verts,
+                                     batch_start->n_layers,
+                                     batch_len);
+    }
+
+  batch_and_call (batch_start,
+                  batch_len,
+                  compare_entry_n_layers,
+                  _cogl_journal_flush_texcoord_vbo_offsets_and_entries,
+                  data);
+
+  /* progress forward through the VBO containing all our vertices */
+  state->vbo_offset += (stride * 4 * batch_len);
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_JOURNAL))
+    g_print ("new vbo offset = %lu\n", (gulong)state->vbo_offset);
+}
+
+static gboolean
+compare_entry_strides (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
+{
+  /* Currently the only thing that affects the stride for our vertex arrays
+   * is the number of material layers. We need to update our VBO offsets
+   * whenever the stride changes. */
+  /* TODO: We should be padding the n_layers == 1 case as if it were
+   * n_layers == 2 so we can reduce the need to split batches. */
+  if (entry0->n_layers == entry1->n_layers ||
+      (entry0->n_layers <= MIN_LAYER_PADING &&
+       entry1->n_layers <= MIN_LAYER_PADING))
+    return TRUE;
+  else
+    return FALSE;
+}
+
+static GLuint
+upload_vertices_to_vbo (GArray *vertices, CoglJournalFlushState *state)
+{
+  size_t needed_vbo_len;
+  GLuint journal_vbo;
+
+  _COGL_GET_CONTEXT (ctx, 0);
+
+  needed_vbo_len = vertices->len * sizeof (GLfloat);
+
+  g_assert (needed_vbo_len);
+  GE (glGenBuffers (1, &journal_vbo));
+  GE (glBindBuffer (GL_ARRAY_BUFFER, journal_vbo));
+  GE (glBufferData (GL_ARRAY_BUFFER,
+                    needed_vbo_len,
+                    vertices->data,
+                    GL_STATIC_DRAW));
+
+  /* As we flush the journal entries in batches we walk forward through the
+   * above VBO starting at offset 0... */
+  state->vbo_offset = 0;
+
+  return journal_vbo;
+}
+
+/* XXX NB: When _cogl_journal_flush() returns all state relating
+ * to materials, all glEnable flags and current matrix state
+ * is undefined.
+ */
+void
+_cogl_journal_flush (void)
+{
+  CoglJournalFlushState state;
+  int                   i;
+  GLuint                journal_vbo;
+  gboolean              vbo_fallback =
+    (cogl_get_features () & COGL_FEATURE_VBOS) ? FALSE : TRUE;
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  if (ctx->journal->len == 0)
+    return;
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
+    g_print ("BATCHING: journal len = %d\n", ctx->journal->len);
+
+  /* Load all the vertex data we have accumulated so far into a single VBO
+   * to minimize memory management costs within the GL driver. */
+  if (!vbo_fallback)
+    journal_vbo = upload_vertices_to_vbo (ctx->logged_vertices, &state);
+  else
+    state.vbo_offset = (char *)ctx->logged_vertices->data;
+
+  /* Since the journal deals with emitting the modelview matrices manually
+   * we need to dirty our client side modelview matrix stack cache... */
+  _cogl_current_matrix_state_dirty ();
+
+  /* And explicitly flush other matrix stacks... */
+  _cogl_set_current_matrix (COGL_MATRIX_PROJECTION);
+  _cogl_current_matrix_state_flush ();
+
+  /* If we have transformed all our quads at log time then the whole journal
+   * then we ensure no further model transform is applied by loading the
+   * identity matrix here...*/
+  if (!(cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
+    {
+      GE (glMatrixMode (GL_MODELVIEW));
+      glLoadIdentity ();
+    }
+
+  /* batch_and_call() batches a list of journal entries according to some
+   * given criteria and calls a callback once for each determined batch.
+   *
+   * The process of flushing the journal is staggered to reduce the amount
+   * of driver/GPU state changes necessary:
+   * 1) We split the entries according to the stride of the vertices:
+   *      Each time the stride of our vertex data changes we need to call
+   *      gl{Vertex,Color}Pointer to inform GL of new VBO offsets.
+   *      Currently the only thing that affects the stride of our vertex data
+   *      is the number of material layers.
+   * 2) We split the entries explicitly by the number of material layers:
+   *      We pad our vertex data when the number of layers is < 2 so that we
+   *      can minimize changes in stride. Each time the number of layers
+   *      changes we need to call glTexCoordPointer to inform GL of new VBO
+   *      offsets.
+   * 3) We then split according to compatible Cogl materials:
+   *      This is where we flush material state
+   * 4) Finally we split according to modelview matrix changes:
+   *      This is when we finally tell GL to draw something.
+   *      Note: Splitting by modelview changes is skipped when are doing the
+   *      vertex transformation in software at log time.
+   */
+  batch_and_call ((CoglJournalEntry *)ctx->journal->data, /* first entry */
+                  ctx->journal->len, /* max number of entries to consider */
+                  compare_entry_strides,
+                  _cogl_journal_flush_vbo_offsets_and_entries, /* callback */
+                  &state); /* data */
+
+  for (i = 0; i < ctx->journal->len; i++)
+    {
+      CoglJournalEntry *entry =
+        &g_array_index (ctx->journal, CoglJournalEntry, i);
+      _cogl_material_journal_unref (entry->material);
+    }
+
+  if (!vbo_fallback)
+    GE (glDeleteBuffers (1, &journal_vbo));
+
+  g_array_set_size (ctx->journal, 0);
+  g_array_set_size (ctx->logged_vertices, 0);
+}
+
+void
+_cogl_journal_log_quad (float         x_1,
+                        float         y_1,
+                        float         x_2,
+                        float         y_2,
+                        CoglHandle    material,
+                        int           n_layers,
+                        guint32       fallback_layers,
+                        GLuint        layer0_override_texture,
+                        float        *tex_coords,
+                        unsigned int  tex_coords_len)
+{
+  size_t            stride;
+  size_t            byte_stride;
+  int               next_vert;
+  GLfloat          *v;
+  GLubyte          *c;
+  GLubyte          *src_c;
+  int               i;
+  int               next_entry;
+  guint32           disable_layers;
+  CoglJournalEntry *entry;
+
+  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+
+  /* The vertex data is logged into a separate array in a layout that can be
+   * directly passed to OpenGL
+   */
+
+  /* XXX: See definition of GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details
+   * about how we pack our vertex data */
+  stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers);
+  /* NB: stride is in 32bit words */
+  byte_stride = stride * 4;
+
+  next_vert = ctx->logged_vertices->len;
+  g_array_set_size (ctx->logged_vertices, next_vert + 4 * stride);
+  v = &g_array_index (ctx->logged_vertices, GLfloat, next_vert);
+  c = (GLubyte *)(v + POS_STRIDE);
+
+  /* XXX: All the jumping around to fill in this strided buffer doesn't
+   * seem ideal. */
+
+  /* XXX: we could defer expanding the vertex data for GL until we come
+   * to flushing the journal. */
+
+  /* FIXME: This is a hacky optimization, since it will break if we
+   * change the definition of CoglColor: */
+  _cogl_material_get_colorubv (material, c);
+  src_c = c;
+  for (i = 0; i < 3; i++)
+    {
+      c += byte_stride;
+      memcpy (c, src_c, 4);
+    }
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
+    {
+      v[0] = x_1; v[1] = y_1;
+      v += stride;
+      v[0] = x_1; v[1] = y_2;
+      v += stride;
+      v[0] = x_2; v[1] = y_2;
+      v += stride;
+      v[0] = x_2; v[1] = y_1;
+    }
+  else
+    {
+      CoglMatrix  mv;
+      float       x, y, z, w;
+
+      cogl_get_modelview_matrix (&mv);
+
+      x = x_1, y = y_1, z = 0; w = 1;
+      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
+      v[0] = x; v[1] = y; v[2] = z;
+      v += stride;
+      x = x_1, y = y_2, z = 0; w = 1;
+      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
+      v[0] = x; v[1] = y; v[2] = z;
+      v += stride;
+      x = x_2, y = y_2, z = 0; w = 1;
+      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
+      v[0] = x; v[1] = y; v[2] = z;
+      v += stride;
+      x = x_2, y = y_1, z = 0; w = 1;
+      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
+      v[0] = x; v[1] = y; v[2] = z;
+    }
+
+  for (i = 0; i < n_layers; i++)
+    {
+      /* XXX: See definition of GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details
+       * about how we pack our vertex data */
+      GLfloat *t = &g_array_index (ctx->logged_vertices, GLfloat,
+                                   next_vert +  POS_STRIDE +
+                                   COLOR_STRIDE + TEX_STRIDE * i);
+
+      t[0] = tex_coords[0]; t[1] = tex_coords[1];
+      t += stride;
+      t[0] = tex_coords[0]; t[1] = tex_coords[3];
+      t += stride;
+      t[0] = tex_coords[2]; t[1] = tex_coords[3];
+      t += stride;
+      t[0] = tex_coords[2]; t[1] = tex_coords[1];
+    }
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_JOURNAL))
+    {
+      g_print ("Logged new quad:\n");
+      v = &g_array_index (ctx->logged_vertices, GLfloat, next_vert);
+      _cogl_journal_dump_quad_vertices ((guint8 *)v, n_layers);
+    }
+
+  next_entry = ctx->journal->len;
+  g_array_set_size (ctx->journal, next_entry + 1);
+  entry = &g_array_index (ctx->journal, CoglJournalEntry, next_entry);
+
+  disable_layers = (1 << n_layers) - 1;
+  disable_layers = ~disable_layers;
+
+  entry->material = _cogl_material_journal_ref (material);
+  entry->n_layers = n_layers;
+  entry->flush_options.flags =
+    COGL_MATERIAL_FLUSH_FALLBACK_MASK |
+    COGL_MATERIAL_FLUSH_DISABLE_MASK |
+    COGL_MATERIAL_FLUSH_SKIP_GL_COLOR;
+  entry->flush_options.fallback_layers = fallback_layers;
+  entry->flush_options.disable_layers = disable_layers;
+  if (layer0_override_texture)
+    {
+      entry->flush_options.flags |= COGL_MATERIAL_FLUSH_LAYER0_OVERRIDE;
+      entry->flush_options.layer0_override_texture = layer0_override_texture;
+    }
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
+    cogl_get_modelview_matrix (&entry->model_view);
+
+  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_BATCHING
+                  || cogl_debug_flags & COGL_DEBUG_RECTANGLES))
+    _cogl_journal_flush ();
+}
+
diff --git a/cogl/cogl-primitives.c b/cogl/cogl-primitives.c
index ca51f8a1d..76df31f79 100644
--- a/cogl/cogl-primitives.c
+++ b/cogl/cogl-primitives.c
@@ -28,81 +28,16 @@
 #include "cogl.h"
 #include "cogl-internal.h"
 #include "cogl-context.h"
+#include "cogl-journal-private.h"
 #include "cogl-texture-private.h"
 #include "cogl-material-private.h"
 #include "cogl-vertex-buffer-private.h"
 
 #include <string.h>
-#include <gmodule.h>
 #include <math.h>
 
 #define _COGL_MAX_BEZ_RECURSE_DEPTH 16
 
-#ifdef HAVE_COGL_GL
-
-#define glGenBuffers ctx->drv.pf_glGenBuffersARB
-#define glBindBuffer ctx->drv.pf_glBindBufferARB
-#define glBufferData ctx->drv.pf_glBufferDataARB
-#define glBufferSubData ctx->drv.pf_glBufferSubDataARB
-#define glDeleteBuffers ctx->drv.pf_glDeleteBuffersARB
-#define glClientActiveTexture ctx->drv.pf_glClientActiveTexture
-
-#elif defined (HAVE_COGL_GLES2)
-
-#include "../gles/cogl-gles2-wrapper.h"
-
-#endif
-
-
-/* XXX NB:
- * Our journal's vertex data is arranged as follows:
- * 4 vertices per quad:
- *    2 or 3 GLfloats per position (3 when doing software transforms)
- *    4 RGBA GLubytes,
- *    2 GLfloats per tex coord * n_layers
- *
- * Where n_layers corresponds to the number of material layers enabled
- *
- * To avoid frequent changes in the stride of our vertex data we always pad
- * n_layers to be >= 2
- *
- * When we are transforming quads in software we need to also track the z
- * coordinate of transformed vertices.
- *
- * So for a given number of layers this gets the stride in 32bit words:
- */
-#define SW_TRANSFORM      (!(cogl_debug_flags & \
-                             COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
-#define POS_STRIDE        (SW_TRANSFORM ? 3 : 2) /* number of 32bit words */
-#define N_POS_COMPONENTS  POS_STRIDE
-#define COLOR_STRIDE      1 /* number of 32bit words */
-#define TEX_STRIDE        2 /* number of 32bit words */
-#define MIN_LAYER_PADING  2
-#define GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS(N_LAYERS) \
-  (POS_STRIDE + COLOR_STRIDE + \
-   TEX_STRIDE * (N_LAYERS < MIN_LAYER_PADING ? MIN_LAYER_PADING : N_LAYERS))
-
-
-typedef void (*CoglJournalBatchCallback) (CoglJournalEntry *start,
-                                          int n_entries,
-                                          void *data);
-typedef gboolean (*CoglJournalBatchTest) (CoglJournalEntry *entry0,
-                                          CoglJournalEntry *entry1);
-typedef CoglVertexBufferIndices  CoglJournalIndices;
-
-typedef struct _CoglJournalFlushState
-{
-  size_t              stride;
-  /* Note: this is a pointer to handle fallbacks. It normally holds a VBO
-   * offset, but when the driver doesn't support VBOs then this points into
-   * our GArray of logged vertices. */
-  char *                   vbo_offset;
-  GLuint                   vertex_offset;
-#ifndef HAVE_COGL_GL
-  CoglJournalIndices *indices;
-  size_t              indices_type_size;
-#endif
-} CoglJournalFlushState;
 
 typedef struct _TextureSlicedQuadState
 {
@@ -133,644 +68,6 @@ void _cogl_path_add_node    (gboolean new_sub_path,
 void _cogl_path_fill_nodes    ();
 void _cogl_path_stroke_nodes  ();
 
-void
-_cogl_journal_dump_quad_vertices (guint8 *data, int n_layers)
-{
-  size_t stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers);
-  int i;
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  g_print ("n_layers = %d; stride = %d; pos stride = %d; color stride = %d; "
-           "tex stride = %d; stride in bytes = %d\n",
-           n_layers, (int)stride, POS_STRIDE, COLOR_STRIDE,
-           TEX_STRIDE, (int)stride * 4);
-
-  for (i = 0; i < 4; i++)
-    {
-      float *v = (float *)data + (i * stride);
-      guint8 *c = data + (POS_STRIDE * 4) + (i * stride * 4);
-      int j;
-
-      if (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)
-        g_print ("v%d: x = %f, y = %f, rgba=0x%02X%02X%02X%02X",
-                 i, v[0], v[1], c[0], c[1], c[2], c[3]);
-      else
-        g_print ("v%d: x = %f, y = %f, z = %f, rgba=0x%02X%02X%02X%02X",
-                 i, v[0], v[1], v[2], c[0], c[1], c[2], c[3]);
-      for (j = 0; j < n_layers; j++)
-        {
-          float *t = v + POS_STRIDE + COLOR_STRIDE + TEX_STRIDE * j;
-          g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]);
-        }
-      g_print ("\n");
-    }
-}
-
-void
-_cogl_journal_dump_quad_batch (guint8 *data, int n_layers, int n_quads)
-{
-  size_t byte_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4;
-  int i;
-
-  g_print ("_cogl_journal_dump_quad_batch: n_layers = %d, n_quads = %d\n",
-           n_layers, n_quads);
-  for (i = 0; i < n_quads; i++)
-    _cogl_journal_dump_quad_vertices (data + byte_stride * 4 * i, n_layers);
-}
-
-static void
-batch_and_call (CoglJournalEntry *entries,
-                int n_entries,
-                CoglJournalBatchTest can_batch_callback,
-                CoglJournalBatchCallback batch_callback,
-                void *data)
-{
-  int i;
-  int batch_len = 1;
-  CoglJournalEntry *batch_start = entries;
-
-  for (i = 1; i < n_entries; i++)
-    {
-      CoglJournalEntry *entry0 = &entries[i - 1];
-      CoglJournalEntry *entry1 = entry0 + 1;
-
-      if (can_batch_callback (entry0, entry1))
-        {
-          batch_len++;
-          continue;
-        }
-
-      batch_callback (batch_start, batch_len, data);
-
-      batch_start = entry1;
-      batch_len = 1;
-    }
-
-  /* The last batch... */
-  batch_callback (batch_start, batch_len, data);
-}
-
-static void
-_cogl_journal_flush_modelview_and_entries (CoglJournalEntry *batch_start,
-                                           int               batch_len,
-                                           void             *data)
-{
-  CoglJournalFlushState *state = data;
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
-    g_print ("BATCHING:    modelview batch len = %d\n", batch_len);
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
-    GE (glLoadMatrixf ((GLfloat *)&batch_start->model_view));
-
-#ifdef HAVE_COGL_GL
-
-  GE (glDrawArrays (GL_QUADS, state->vertex_offset, batch_len * 4));
-
-#else /* HAVE_COGL_GL */
-
-  if (batch_len > 1)
-    {
-      int indices_offset = (state->vertex_offset / 4) * 6;
-      GE (glDrawElements (GL_TRIANGLES,
-                          6 * batch_len,
-                          state->indices->type,
-                          (GLvoid*)(indices_offset * state->indices_type_size)));
-    }
-  else
-    {
-      GE (glDrawArrays (GL_TRIANGLE_FAN,
-                        state->vertex_offset, /* first */
-                        4)); /* n vertices */
-    }
-#endif
-
-  /* DEBUGGING CODE XXX:
-   * This path will cause all rectangles to be drawn with a red, green
-   * or blue outline with no blending. This may e.g. help with debugging
-   * texture slicing issues or blending issues, plus it looks quite cool.
-   */
-  if (cogl_debug_flags & COGL_DEBUG_RECTANGLES)
-    {
-      static CoglHandle outline = COGL_INVALID_HANDLE;
-      static int color = 0;
-      int i;
-      if (outline == COGL_INVALID_HANDLE)
-        outline = cogl_material_new ();
-
-      cogl_enable (COGL_ENABLE_VERTEX_ARRAY);
-      for (i = 0; i < batch_len; i++, color = (color + 1) % 3)
-        {
-          cogl_material_set_color4ub (outline,
-                                      color == 0 ? 0xff : 0x00,
-                                      color == 1 ? 0xff : 0x00,
-                                      color == 2 ? 0xff : 0x00,
-                                      0xff);
-          _cogl_material_flush_gl_state (outline, NULL);
-          GE( glDrawArrays (GL_LINE_LOOP, 4 * i, 4) );
-        }
-    }
-
-  state->vertex_offset += (4 * batch_len);
-}
-
-static gboolean
-compare_entry_modelviews (CoglJournalEntry *entry0,
-                          CoglJournalEntry *entry1)
-{
-  /* Batch together quads with the same model view matrix */
-
-  /* FIXME: this is nasty, there are much nicer ways to track this
-   * (at the add_quad_vertices level) without resorting to a memcmp!
-   *
-   * E.g. If the cogl-current-matrix code maintained an "age" for
-   * the modelview matrix we could simply check in add_quad_vertices
-   * if the age has increased, and if so record the change as a
-   * boolean in the journal.
-   */
-
-  if (memcmp (&entry0->model_view, &entry1->model_view,
-              sizeof (GLfloat) * 16) == 0)
-    return TRUE;
-  else
-    return FALSE;
-}
-
-/* At this point we have a run of quads that we know have compatible
- * materials, but they may not all have the same modelview matrix */
-static void
-_cogl_journal_flush_material_and_entries (CoglJournalEntry *batch_start,
-                                          gint              batch_len,
-                                          void             *data)
-{
-  gulong                 enable_flags = 0;
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
-    g_print ("BATCHING:   material batch len = %d\n", batch_len);
-
-  _cogl_material_flush_gl_state (batch_start->material,
-                                 &batch_start->flush_options);
-
-  /* FIXME: This api is a bit yukky, ideally it will be removed if we
-   * re-work the cogl_enable mechanism */
-  enable_flags |= _cogl_material_get_cogl_enable_flags (batch_start->material);
-
-  if (ctx->enable_backface_culling)
-    enable_flags |= COGL_ENABLE_BACKFACE_CULLING;
-
-  enable_flags |= COGL_ENABLE_VERTEX_ARRAY;
-  enable_flags |= COGL_ENABLE_COLOR_ARRAY;
-  cogl_enable (enable_flags);
-
-  /* If we haven't transformed the quads in software then we need to also break
-   * up batches according to changes in the modelview matrix... */
-  if (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)
-    {
-      batch_and_call (batch_start,
-                      batch_len,
-                      compare_entry_modelviews,
-                      _cogl_journal_flush_modelview_and_entries,
-                      data);
-    }
-  else
-    _cogl_journal_flush_modelview_and_entries (batch_start, batch_len, data);
-}
-
-static gboolean
-compare_entry_materials (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
-{
-  /* batch rectangles using compatible materials */
-
-  /* XXX: _cogl_material_equal may give false negatives since it avoids
-   * deep comparisons as an optimization. It aims to compare enough so
-   * that we that we are able to batch the 90% common cases, but may not
-   * look at less common differences. */
-  if (_cogl_material_equal (entry0->material,
-                            &entry0->flush_options,
-                            entry1->material,
-                            &entry1->flush_options))
-    return TRUE;
-  else
-    return FALSE;
-}
-
-/* Since the stride may not reflect the number of texture layers in use
- * (due to padding) we deal with texture coordinate offsets separately
- * from vertex and color offsets... */
-static void
-_cogl_journal_flush_texcoord_vbo_offsets_and_entries (
-                                          CoglJournalEntry *batch_start,
-                                          gint              batch_len,
-                                          void             *data)
-{
-  CoglJournalFlushState *state = data;
-  int                    prev_n_texcoord_arrays_enabled;
-  int                    i;
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  for (i = 0; i < batch_start->n_layers; i++)
-    {
-      GE (glClientActiveTexture (GL_TEXTURE0 + i));
-      GE (glEnableClientState (GL_TEXTURE_COORD_ARRAY));
-      /* XXX NB:
-       * Our journal's vertex data is arranged as follows:
-       * 4 vertices per quad:
-       *    2 or 3 GLfloats per position (3 when doing software transforms)
-       *    4 RGBA GLubytes,
-       *    2 GLfloats per tex coord * n_layers
-       * (though n_layers may be padded; see definition of
-       *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
-       */
-      GE (glTexCoordPointer (2, GL_FLOAT, state->stride,
-                             (void *)(state->vbo_offset +
-                                      (POS_STRIDE + COLOR_STRIDE) * 4 +
-                                      TEX_STRIDE * 4 * i)));
-    }
-  prev_n_texcoord_arrays_enabled =
-    ctx->n_texcoord_arrays_enabled;
-  ctx->n_texcoord_arrays_enabled = batch_start->n_layers;
-  for (; i < prev_n_texcoord_arrays_enabled; i++)
-    {
-      GE (glClientActiveTexture (GL_TEXTURE0 + i));
-      GE (glDisableClientState (GL_TEXTURE_COORD_ARRAY));
-    }
-
-  batch_and_call (batch_start,
-                  batch_len,
-                  compare_entry_materials,
-                  _cogl_journal_flush_material_and_entries,
-                  data);
-}
-
-static gboolean
-compare_entry_n_layers (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
-{
-  if (entry0->n_layers == entry1->n_layers)
-    return TRUE;
-  else
-    return FALSE;
-}
-
-/* At this point we know the stride has changed from the previous batch
- * of journal entries */
-static void
-_cogl_journal_flush_vbo_offsets_and_entries (CoglJournalEntry *batch_start,
-                                             gint              batch_len,
-                                             void             *data)
-{
-  CoglJournalFlushState   *state = data;
-  size_t                   stride;
-#ifndef HAVE_COGL_GL
-  int                      needed_indices = batch_len * 6;
-  CoglHandle               indices_handle;
-  CoglVertexBufferIndices *indices;
-#endif
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
-    g_print ("BATCHING:  vbo offset batch len = %d\n", batch_len);
-
-  /* XXX NB:
-   * Our journal's vertex data is arranged as follows:
-   * 4 vertices per quad:
-   *    2 or 3 GLfloats per position (3 when doing software transforms)
-   *    4 RGBA GLubytes,
-   *    2 GLfloats per tex coord * n_layers
-   * (though n_layers may be padded; see definition of
-   *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
-   */
-  stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (batch_start->n_layers);
-  stride *= sizeof (GLfloat);
-  state->stride = stride;
-
-  GE (glVertexPointer (N_POS_COMPONENTS, GL_FLOAT, stride,
-                       (void *)state->vbo_offset));
-  GE (glColorPointer (4, GL_UNSIGNED_BYTE, stride,
-                      (void *)(state->vbo_offset + (POS_STRIDE * 4))));
-
-#ifndef HAVE_COGL_GL
-  indices_handle = cogl_vertex_buffer_indices_get_for_quads (needed_indices);
-  indices = _cogl_vertex_buffer_indices_pointer_from_handle (indices_handle);
-  state->indices = indices;
-
-  if (indices->type == GL_UNSIGNED_BYTE)
-    state->indices_type_size = 1;
-  else if (indices->type == GL_UNSIGNED_SHORT)
-    state->indices_type_size = 2;
-  else
-    g_critical ("unknown indices type %d", indices->type);
-
-  GE (glBindBuffer (GL_ELEMENT_ARRAY_BUFFER,
-                    GPOINTER_TO_UINT (indices->vbo_name)));
-#endif
-
-  /* We only call gl{Vertex,Color,Texture}Pointer when the stride within
-   * the VBO changes. (due to a change in the number of material layers)
-   * While the stride remains constant we walk forward through the above
-   * VBO using a vertex offset passed to glDraw{Arrays,Elements} */
-  state->vertex_offset = 0;
-
-  if (cogl_debug_flags & COGL_DEBUG_JOURNAL)
-    {
-      guint8 *verts;
-
-      if (cogl_get_features () & COGL_FEATURE_VBOS)
-        verts = ((guint8 *)ctx->logged_vertices->data) +
-          (size_t)state->vbo_offset;
-      else
-        verts = (guint8 *)state->vbo_offset;
-      _cogl_journal_dump_quad_batch (verts,
-                                     batch_start->n_layers,
-                                     batch_len);
-    }
-
-  batch_and_call (batch_start,
-                  batch_len,
-                  compare_entry_n_layers,
-                  _cogl_journal_flush_texcoord_vbo_offsets_and_entries,
-                  data);
-
-  /* progress forward through the VBO containing all our vertices */
-  state->vbo_offset += (stride * 4 * batch_len);
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_JOURNAL))
-    g_print ("new vbo offset = %lu\n", (gulong)state->vbo_offset);
-}
-
-static gboolean
-compare_entry_strides (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
-{
-  /* Currently the only thing that affects the stride for our vertex arrays
-   * is the number of material layers. We need to update our VBO offsets
-   * whenever the stride changes. */
-  /* TODO: We should be padding the n_layers == 1 case as if it were
-   * n_layers == 2 so we can reduce the need to split batches. */
-  if (entry0->n_layers == entry1->n_layers ||
-      (entry0->n_layers <= MIN_LAYER_PADING &&
-       entry1->n_layers <= MIN_LAYER_PADING))
-    return TRUE;
-  else
-    return FALSE;
-}
-
-static GLuint
-upload_vertices_to_vbo (GArray *vertices, CoglJournalFlushState *state)
-{
-  size_t needed_vbo_len;
-  GLuint journal_vbo;
-
-  _COGL_GET_CONTEXT (ctx, 0);
-
-  needed_vbo_len = vertices->len * sizeof (GLfloat);
-
-  g_assert (needed_vbo_len);
-  GE (glGenBuffers (1, &journal_vbo));
-  GE (glBindBuffer (GL_ARRAY_BUFFER, journal_vbo));
-  GE (glBufferData (GL_ARRAY_BUFFER,
-                    needed_vbo_len,
-                    vertices->data,
-                    GL_STATIC_DRAW));
-
-  /* As we flush the journal entries in batches we walk forward through the
-   * above VBO starting at offset 0... */
-  state->vbo_offset = 0;
-
-  return journal_vbo;
-}
-
-/* XXX NB: When _cogl_journal_flush() returns all state relating
- * to materials, all glEnable flags and current matrix state
- * is undefined.
- */
-void
-_cogl_journal_flush (void)
-{
-  CoglJournalFlushState state;
-  int                   i;
-  GLuint                journal_vbo;
-  gboolean              vbo_fallback =
-    (cogl_get_features () & COGL_FEATURE_VBOS) ? FALSE : TRUE;
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  if (ctx->journal->len == 0)
-    return;
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_BATCHING))
-    g_print ("BATCHING: journal len = %d\n", ctx->journal->len);
-
-  /* Load all the vertex data we have accumulated so far into a single VBO
-   * to minimize memory management costs within the GL driver. */
-  if (!vbo_fallback)
-    journal_vbo = upload_vertices_to_vbo (ctx->logged_vertices, &state);
-  else
-    state.vbo_offset = (char *)ctx->logged_vertices->data;
-
-  /* Since the journal deals with emitting the modelview matrices manually
-   * we need to dirty our client side modelview matrix stack cache... */
-  _cogl_current_matrix_state_dirty ();
-
-  /* And explicitly flush other matrix stacks... */
-  _cogl_set_current_matrix (COGL_MATRIX_PROJECTION);
-  _cogl_current_matrix_state_flush ();
-
-  /* If we have transformed all our quads at log time then the whole journal
-   * then we ensure no further model transform is applied by loading the
-   * identity matrix here...*/
-  if (!(cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
-    {
-      GE (glMatrixMode (GL_MODELVIEW));
-      glLoadIdentity ();
-    }
-
-  /* batch_and_call() batches a list of journal entries according to some
-   * given criteria and calls a callback once for each determined batch.
-   *
-   * The process of flushing the journal is staggered to reduce the amount
-   * of driver/GPU state changes necessary:
-   * 1) We split the entries according to the stride of the vertices:
-   *      Each time the stride of our vertex data changes we need to call
-   *      gl{Vertex,Color}Pointer to inform GL of new VBO offsets.
-   *      Currently the only thing that affects the stride of our vertex data
-   *      is the number of material layers.
-   * 2) We split the entries explicitly by the number of material layers:
-   *      We pad our vertex data when the number of layers is < 2 so that we
-   *      can minimize changes in stride. Each time the number of layers
-   *      changes we need to call glTexCoordPointer to inform GL of new VBO
-   *      offsets.
-   * 3) We then split according to compatible Cogl materials:
-   *      This is where we flush material state
-   * 4) Finally we split according to modelview matrix changes:
-   *      This is when we finally tell GL to draw something.
-   *      Note: Splitting by modelview changes is skipped when are doing the
-   *      vertex transformation in software at log time.
-   */
-  batch_and_call ((CoglJournalEntry *)ctx->journal->data, /* first entry */
-                  ctx->journal->len, /* max number of entries to consider */
-                  compare_entry_strides,
-                  _cogl_journal_flush_vbo_offsets_and_entries, /* callback */
-                  &state); /* data */
-
-  for (i = 0; i < ctx->journal->len; i++)
-    {
-      CoglJournalEntry *entry =
-        &g_array_index (ctx->journal, CoglJournalEntry, i);
-      _cogl_material_journal_unref (entry->material);
-    }
-
-  if (!vbo_fallback)
-    GE (glDeleteBuffers (1, &journal_vbo));
-
-  g_array_set_size (ctx->journal, 0);
-  g_array_set_size (ctx->logged_vertices, 0);
-}
-
-static void
-_cogl_journal_log_quad (float       x_1,
-                        float       y_1,
-                        float       x_2,
-                        float       y_2,
-                        CoglHandle  material,
-                        int         n_layers,
-                        guint32     fallback_layers,
-                        GLuint      layer0_override_texture,
-                        float      *tex_coords,
-                        guint       tex_coords_len)
-{
-  size_t            stride;
-  size_t            byte_stride;
-  int               next_vert;
-  GLfloat          *v;
-  GLubyte          *c;
-  GLubyte          *src_c;
-  int               i;
-  int               next_entry;
-  guint32           disable_layers;
-  CoglJournalEntry *entry;
-
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
-  /* The vertex data is logged into a separate array in a layout that can be
-   * directly passed to OpenGL
-   */
-
-  /* XXX: See definition of GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details
-   * about how we pack our vertex data */
-  stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers);
-  /* NB: stride is in 32bit words */
-  byte_stride = stride * 4;
-
-  next_vert = ctx->logged_vertices->len;
-  g_array_set_size (ctx->logged_vertices, next_vert + 4 * stride);
-  v = &g_array_index (ctx->logged_vertices, GLfloat, next_vert);
-  c = (GLubyte *)(v + POS_STRIDE);
-
-  /* XXX: All the jumping around to fill in this strided buffer doesn't
-   * seem ideal. */
-
-  /* XXX: we could defer expanding the vertex data for GL until we come
-   * to flushing the journal. */
-
-  /* FIXME: This is a hacky optimization, since it will break if we
-   * change the definition of CoglColor: */
-  _cogl_material_get_colorubv (material, c);
-  src_c = c;
-  for (i = 0; i < 3; i++)
-    {
-      c += byte_stride;
-      memcpy (c, src_c, 4);
-    }
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
-    {
-      v[0] = x_1; v[1] = y_1;
-      v += stride;
-      v[0] = x_1; v[1] = y_2;
-      v += stride;
-      v[0] = x_2; v[1] = y_2;
-      v += stride;
-      v[0] = x_2; v[1] = y_1;
-    }
-  else
-    {
-      CoglMatrix  mv;
-      float       x, y, z, w;
-
-      cogl_get_modelview_matrix (&mv);
-
-      x = x_1, y = y_1, z = 0; w = 1;
-      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
-      v[0] = x; v[1] = y; v[2] = z;
-      v += stride;
-      x = x_1, y = y_2, z = 0; w = 1;
-      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
-      v[0] = x; v[1] = y; v[2] = z;
-      v += stride;
-      x = x_2, y = y_2, z = 0; w = 1;
-      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
-      v[0] = x; v[1] = y; v[2] = z;
-      v += stride;
-      x = x_2, y = y_1, z = 0; w = 1;
-      cogl_matrix_transform_point (&mv, &x, &y, &z, &w);
-      v[0] = x; v[1] = y; v[2] = z;
-    }
-
-  for (i = 0; i < n_layers; i++)
-    {
-      /* XXX: See definition of GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details
-       * about how we pack our vertex data */
-      GLfloat *t = &g_array_index (ctx->logged_vertices, GLfloat,
-                                   next_vert +  POS_STRIDE +
-                                   COLOR_STRIDE + TEX_STRIDE * i);
-
-      t[0] = tex_coords[0]; t[1] = tex_coords[1];
-      t += stride;
-      t[0] = tex_coords[0]; t[1] = tex_coords[3];
-      t += stride;
-      t[0] = tex_coords[2]; t[1] = tex_coords[3];
-      t += stride;
-      t[0] = tex_coords[2]; t[1] = tex_coords[1];
-    }
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_JOURNAL))
-    {
-      g_print ("Logged new quad:\n");
-      v = &g_array_index (ctx->logged_vertices, GLfloat, next_vert);
-      _cogl_journal_dump_quad_vertices ((guint8 *)v, n_layers);
-    }
-
-  next_entry = ctx->journal->len;
-  g_array_set_size (ctx->journal, next_entry + 1);
-  entry = &g_array_index (ctx->journal, CoglJournalEntry, next_entry);
-
-  disable_layers = (1 << n_layers) - 1;
-  disable_layers = ~disable_layers;
-
-  entry->material = _cogl_material_journal_ref (material);
-  entry->n_layers = n_layers;
-  entry->flush_options.flags =
-    COGL_MATERIAL_FLUSH_FALLBACK_MASK |
-    COGL_MATERIAL_FLUSH_DISABLE_MASK |
-    COGL_MATERIAL_FLUSH_SKIP_GL_COLOR;
-  entry->flush_options.fallback_layers = fallback_layers;
-  entry->flush_options.disable_layers = disable_layers;
-  if (layer0_override_texture)
-    {
-      entry->flush_options.flags |= COGL_MATERIAL_FLUSH_LAYER0_OVERRIDE;
-      entry->flush_options.layer0_override_texture = layer0_override_texture;
-    }
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))
-    cogl_get_modelview_matrix (&entry->model_view);
-
-  if (G_UNLIKELY (cogl_debug_flags & COGL_DEBUG_DISABLE_BATCHING
-                  || cogl_debug_flags & COGL_DEBUG_RECTANGLES))
-    _cogl_journal_flush ();
-}
-
 static void
 log_quad_sub_textures_cb (CoglHandle texture_handle,
                           GLuint gl_handle,
diff --git a/cogl/cogl-texture-private.h b/cogl/cogl-texture-private.h
index 454cf6b90..1b7184778 100644
--- a/cogl/cogl-texture-private.h
+++ b/cogl/cogl-texture-private.h
@@ -55,20 +55,6 @@ struct _CoglTexture
   gboolean           mipmaps_dirty;
 };
 
-/* To improve batching of geometry when submitting vertices to OpenGL we
- * log the texture rectangles we want to draw to a journal, so when we
- * later flush the journal we aim to batch data, and gl draw calls. */
-typedef struct _CoglJournalEntry
-{
-  CoglHandle               material;
-  int                      n_layers;
-  CoglMaterialFlushOptions flush_options;
-  CoglMatrix               model_view;
-  /* XXX: These entries are pretty big now considering the padding in
-   * CoglMaterialFlushOptions and CoglMatrix, so we might need to optimize this
-   * later. */
-} CoglJournalEntry;
-
 typedef void (*CoglTextureSliceCallback) (CoglHandle handle,
                                           GLuint gl_handle,
                                           GLenum gl_target,