cogl-matrix: Get rid of the *_packed variants

cogl_matrix_project_points and cogl_matrix_transform_points had an
optimization for the common case where the stride parameters exactly
match the size of the corresponding structures. The code for both when
generated by gcc with -O2 on x86-64 use two registers to hold the
addresses of the input and output arrays. In the strided version these
pointers are incremented by adding the value of a register and in the
packed version they are incremented by adding an immediate value. I
think the difference in cost here would be negligible and it may even
be faster to add a register.

Also GCC appears to retain the loop counter in a register for the
strided version but in the packed version it can optimize it out and
directly use the input pointer as the counter. I think it would be
possible to reorder the code a bit to explicitly use the input pointer
as the counter if this were a problem.

Getting rid of the packed versions tidies up the code a bit and it
could potentially be faster if the code differences are small and we
get to avoid an extra conditional in cogl_matrix_transform_points.
This commit is contained in:
Neil Roberts 2011-01-31 18:53:51 +00:00
parent edd7a71ecf
commit fadd935891

View File

@ -439,28 +439,7 @@ typedef struct _Point4f
} Point4f;
static void
_cogl_matrix_transform_points_f2_packed (const CoglMatrix *matrix,
void *points_in,
void *points_out,
int n_points)
{
Point3f *o = points_out;
int i;
for (i = 0; i < n_points; i++)
{
Point2f p = ((Point2f *)points_in)[i];
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
matrix->xw;
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
matrix->yw;
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
matrix->zw;
}
}
static void
_cogl_matrix_transform_points_f2_strided (const CoglMatrix *matrix,
_cogl_matrix_transform_points_f2 (const CoglMatrix *matrix,
size_t stride_in,
void *points_in,
size_t stride_out,
@ -481,30 +460,7 @@ _cogl_matrix_transform_points_f2_strided (const CoglMatrix *matrix,
}
static void
_cogl_matrix_project_points_f2_packed (const CoglMatrix *matrix,
void *points_in,
void *points_out,
int n_points)
{
Point4f *o = points_out;
int i;
for (i = 0; i < n_points; i++)
{
Point2f p = ((Point2f *)points_in)[i];
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
matrix->xw;
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
matrix->yw;
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
matrix->zw;
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
matrix->ww;
}
}
static void
_cogl_matrix_project_points_f2_strided (const CoglMatrix *matrix,
_cogl_matrix_project_points_f2 (const CoglMatrix *matrix,
size_t stride_in,
void *points_in,
size_t stride_out,
@ -526,28 +482,7 @@ _cogl_matrix_project_points_f2_strided (const CoglMatrix *matrix,
}
static void
_cogl_matrix_transform_points_f3_packed (const CoglMatrix *matrix,
void *points_in,
void *points_out,
int n_points)
{
Point3f *o = points_out;
int i;
for (i = 0; i < n_points; i++)
{
Point3f p = ((Point3f *)points_in)[i];
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
matrix->xz * p.z + matrix->xw;
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
matrix->yz * p.z + matrix->yw;
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
matrix->zz * p.z + matrix->zw;
}
}
static void
_cogl_matrix_transform_points_f3_strided (const CoglMatrix *matrix,
_cogl_matrix_transform_points_f3 (const CoglMatrix *matrix,
size_t stride_in,
void *points_in,
size_t stride_out,
@ -571,30 +506,7 @@ _cogl_matrix_transform_points_f3_strided (const CoglMatrix *matrix,
}
static void
_cogl_matrix_project_points_f3_packed (const CoglMatrix *matrix,
void *points_in,
void *points_out,
int n_points)
{
Point4f *o = points_out;
int i;
for (i = 0; i < n_points; i++)
{
Point3f p = ((Point3f *)points_in)[i];
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
matrix->xz * p.z + matrix->xw;
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
matrix->yz * p.z + matrix->yw;
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
matrix->zz * p.z + matrix->zw;
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
matrix->wz * p.z + matrix->ww;
}
}
static void
_cogl_matrix_project_points_f3_strided (const CoglMatrix *matrix,
_cogl_matrix_project_points_f3 (const CoglMatrix *matrix,
size_t stride_in,
void *points_in,
size_t stride_out,
@ -620,30 +532,7 @@ _cogl_matrix_project_points_f3_strided (const CoglMatrix *matrix,
}
static void
_cogl_matrix_project_points_f4_packed (const CoglMatrix *matrix,
void *points_in,
void *points_out,
int n_points)
{
Point4f *o = points_out;
int i;
for (i = 0; i < n_points; i++)
{
Point4f p = ((Point4f *)points_in)[i];
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
matrix->xz * p.z + matrix->xw * p.w;
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
matrix->yz * p.z + matrix->yw * p.w;
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
matrix->zz * p.z + matrix->zw * p.w;
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
matrix->wz * p.z + matrix->ww * p.w;
}
}
static void
_cogl_matrix_project_points_f4_strided (const CoglMatrix *matrix,
_cogl_matrix_project_points_f4 (const CoglMatrix *matrix,
size_t stride_in,
void *points_in,
size_t stride_out,
@ -681,28 +570,15 @@ cogl_matrix_transform_points (const CoglMatrix *matrix,
g_return_if_fail (stride_out >= sizeof (Point3f));
if (n_components == 2)
{
if (stride_in == sizeof (Point2f) &&
stride_out == sizeof (Point3f))
_cogl_matrix_transform_points_f2_packed (matrix,
points_in, points_out,
n_points);
else
_cogl_matrix_transform_points_f2_strided (matrix,
_cogl_matrix_transform_points_f2 (matrix,
stride_in, points_in,
stride_out, points_out,
n_points);
}
else
{
g_return_if_fail (n_components == 3);
if (stride_in == sizeof (Point3f) && stride_out == stride_in)
_cogl_matrix_transform_points_f3_packed (matrix,
points_in, points_out,
n_points);
else
_cogl_matrix_transform_points_f3_strided (matrix,
_cogl_matrix_transform_points_f3 (matrix,
stride_in, points_in,
stride_out, points_out,
n_points);
@ -719,44 +595,22 @@ cogl_matrix_project_points (const CoglMatrix *matrix,
int n_points)
{
if (n_components == 2)
{
if (stride_in == sizeof (Point2f) &&
stride_out == sizeof (Point4f))
_cogl_matrix_project_points_f2_packed (matrix,
points_in, points_out,
n_points);
else
_cogl_matrix_project_points_f2_strided (matrix,
_cogl_matrix_project_points_f2 (matrix,
stride_in, points_in,
stride_out, points_out,
n_points);
}
else if (n_components == 3)
{
if (stride_in == sizeof (Point3f) &&
stride_out == sizeof (Point4f))
_cogl_matrix_project_points_f3_packed (matrix,
points_in, points_out,
n_points);
else
_cogl_matrix_project_points_f3_strided (matrix,
_cogl_matrix_project_points_f3 (matrix,
stride_in, points_in,
stride_out, points_out,
n_points);
}
else
{
g_return_if_fail (n_components == 4);
if (stride_in == sizeof (Point4f) && stride_out == stride_in)
_cogl_matrix_project_points_f4_packed (matrix,
points_in, points_out,
n_points);
else
_cogl_matrix_project_points_f4_strided (matrix,
_cogl_matrix_project_points_f4 (matrix,
stride_in, points_in,
stride_out, points_out,
n_points);
}
}