cogl-matrix: Get rid of the *_packed variants
cogl_matrix_project_points and cogl_matrix_transform_points had an optimization for the common case where the stride parameters exactly match the size of the corresponding structures. The code for both when generated by gcc with -O2 on x86-64 use two registers to hold the addresses of the input and output arrays. In the strided version these pointers are incremented by adding the value of a register and in the packed version they are incremented by adding an immediate value. I think the difference in cost here would be negligible and it may even be faster to add a register. Also GCC appears to retain the loop counter in a register for the strided version but in the packed version it can optimize it out and directly use the input pointer as the counter. I think it would be possible to reorder the code a bit to explicitly use the input pointer as the counter if this were a problem. Getting rid of the packed versions tidies up the code a bit and it could potentially be faster if the code differences are small and we get to avoid an extra conditional in cogl_matrix_transform_points.
This commit is contained in:
parent
c4d4a5469a
commit
2ded18933e
@ -439,33 +439,12 @@ typedef struct _Point4f
|
|||||||
} Point4f;
|
} Point4f;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_cogl_matrix_transform_points_f2_packed (const CoglMatrix *matrix,
|
_cogl_matrix_transform_points_f2 (const CoglMatrix *matrix,
|
||||||
void *points_in,
|
size_t stride_in,
|
||||||
void *points_out,
|
void *points_in,
|
||||||
int n_points)
|
size_t stride_out,
|
||||||
{
|
void *points_out,
|
||||||
Point3f *o = points_out;
|
int n_points)
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n_points; i++)
|
|
||||||
{
|
|
||||||
Point2f p = ((Point2f *)points_in)[i];
|
|
||||||
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
|
|
||||||
matrix->xw;
|
|
||||||
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
|
|
||||||
matrix->yw;
|
|
||||||
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
|
|
||||||
matrix->zw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_cogl_matrix_transform_points_f2_strided (const CoglMatrix *matrix,
|
|
||||||
size_t stride_in,
|
|
||||||
void *points_in,
|
|
||||||
size_t stride_out,
|
|
||||||
void *points_out,
|
|
||||||
int n_points)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -481,35 +460,12 @@ _cogl_matrix_transform_points_f2_strided (const CoglMatrix *matrix,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_cogl_matrix_project_points_f2_packed (const CoglMatrix *matrix,
|
_cogl_matrix_project_points_f2 (const CoglMatrix *matrix,
|
||||||
void *points_in,
|
size_t stride_in,
|
||||||
void *points_out,
|
void *points_in,
|
||||||
int n_points)
|
size_t stride_out,
|
||||||
{
|
void *points_out,
|
||||||
Point4f *o = points_out;
|
int n_points)
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n_points; i++)
|
|
||||||
{
|
|
||||||
Point2f p = ((Point2f *)points_in)[i];
|
|
||||||
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
|
|
||||||
matrix->xw;
|
|
||||||
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
|
|
||||||
matrix->yw;
|
|
||||||
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
|
|
||||||
matrix->zw;
|
|
||||||
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
|
|
||||||
matrix->ww;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_cogl_matrix_project_points_f2_strided (const CoglMatrix *matrix,
|
|
||||||
size_t stride_in,
|
|
||||||
void *points_in,
|
|
||||||
size_t stride_out,
|
|
||||||
void *points_out,
|
|
||||||
int n_points)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -526,33 +482,12 @@ _cogl_matrix_project_points_f2_strided (const CoglMatrix *matrix,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_cogl_matrix_transform_points_f3_packed (const CoglMatrix *matrix,
|
_cogl_matrix_transform_points_f3 (const CoglMatrix *matrix,
|
||||||
void *points_in,
|
size_t stride_in,
|
||||||
void *points_out,
|
void *points_in,
|
||||||
int n_points)
|
size_t stride_out,
|
||||||
{
|
void *points_out,
|
||||||
Point3f *o = points_out;
|
int n_points)
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n_points; i++)
|
|
||||||
{
|
|
||||||
Point3f p = ((Point3f *)points_in)[i];
|
|
||||||
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
|
|
||||||
matrix->xz * p.z + matrix->xw;
|
|
||||||
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
|
|
||||||
matrix->yz * p.z + matrix->yw;
|
|
||||||
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
|
|
||||||
matrix->zz * p.z + matrix->zw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_cogl_matrix_transform_points_f3_strided (const CoglMatrix *matrix,
|
|
||||||
size_t stride_in,
|
|
||||||
void *points_in,
|
|
||||||
size_t stride_out,
|
|
||||||
void *points_out,
|
|
||||||
int n_points)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -571,35 +506,12 @@ _cogl_matrix_transform_points_f3_strided (const CoglMatrix *matrix,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_cogl_matrix_project_points_f3_packed (const CoglMatrix *matrix,
|
_cogl_matrix_project_points_f3 (const CoglMatrix *matrix,
|
||||||
void *points_in,
|
size_t stride_in,
|
||||||
void *points_out,
|
void *points_in,
|
||||||
int n_points)
|
size_t stride_out,
|
||||||
{
|
void *points_out,
|
||||||
Point4f *o = points_out;
|
int n_points)
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n_points; i++)
|
|
||||||
{
|
|
||||||
Point3f p = ((Point3f *)points_in)[i];
|
|
||||||
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
|
|
||||||
matrix->xz * p.z + matrix->xw;
|
|
||||||
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
|
|
||||||
matrix->yz * p.z + matrix->yw;
|
|
||||||
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
|
|
||||||
matrix->zz * p.z + matrix->zw;
|
|
||||||
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
|
|
||||||
matrix->wz * p.z + matrix->ww;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_cogl_matrix_project_points_f3_strided (const CoglMatrix *matrix,
|
|
||||||
size_t stride_in,
|
|
||||||
void *points_in,
|
|
||||||
size_t stride_out,
|
|
||||||
void *points_out,
|
|
||||||
int n_points)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -620,35 +532,12 @@ _cogl_matrix_project_points_f3_strided (const CoglMatrix *matrix,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_cogl_matrix_project_points_f4_packed (const CoglMatrix *matrix,
|
_cogl_matrix_project_points_f4 (const CoglMatrix *matrix,
|
||||||
void *points_in,
|
size_t stride_in,
|
||||||
void *points_out,
|
void *points_in,
|
||||||
int n_points)
|
size_t stride_out,
|
||||||
{
|
void *points_out,
|
||||||
Point4f *o = points_out;
|
int n_points)
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n_points; i++)
|
|
||||||
{
|
|
||||||
Point4f p = ((Point4f *)points_in)[i];
|
|
||||||
o[i].x = matrix->xx * p.x + matrix->xy * p.y +
|
|
||||||
matrix->xz * p.z + matrix->xw * p.w;
|
|
||||||
o[i].y = matrix->yx * p.x + matrix->yy * p.y +
|
|
||||||
matrix->yz * p.z + matrix->yw * p.w;
|
|
||||||
o[i].z = matrix->zx * p.x + matrix->zy * p.y +
|
|
||||||
matrix->zz * p.z + matrix->zw * p.w;
|
|
||||||
o[i].w = matrix->wx * p.x + matrix->wy * p.y +
|
|
||||||
matrix->wz * p.z + matrix->ww * p.w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
_cogl_matrix_project_points_f4_strided (const CoglMatrix *matrix,
|
|
||||||
size_t stride_in,
|
|
||||||
void *points_in,
|
|
||||||
size_t stride_out,
|
|
||||||
void *points_out,
|
|
||||||
int n_points)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -681,31 +570,18 @@ cogl_matrix_transform_points (const CoglMatrix *matrix,
|
|||||||
g_return_if_fail (stride_out >= sizeof (Point3f));
|
g_return_if_fail (stride_out >= sizeof (Point3f));
|
||||||
|
|
||||||
if (n_components == 2)
|
if (n_components == 2)
|
||||||
{
|
_cogl_matrix_transform_points_f2 (matrix,
|
||||||
if (stride_in == sizeof (Point2f) &&
|
stride_in, points_in,
|
||||||
stride_out == sizeof (Point3f))
|
stride_out, points_out,
|
||||||
_cogl_matrix_transform_points_f2_packed (matrix,
|
n_points);
|
||||||
points_in, points_out,
|
|
||||||
n_points);
|
|
||||||
else
|
|
||||||
_cogl_matrix_transform_points_f2_strided (matrix,
|
|
||||||
stride_in, points_in,
|
|
||||||
stride_out, points_out,
|
|
||||||
n_points);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
g_return_if_fail (n_components == 3);
|
g_return_if_fail (n_components == 3);
|
||||||
|
|
||||||
if (stride_in == sizeof (Point3f) && stride_out == stride_in)
|
_cogl_matrix_transform_points_f3 (matrix,
|
||||||
_cogl_matrix_transform_points_f3_packed (matrix,
|
stride_in, points_in,
|
||||||
points_in, points_out,
|
stride_out, points_out,
|
||||||
n_points);
|
n_points);
|
||||||
else
|
|
||||||
_cogl_matrix_transform_points_f3_strided (matrix,
|
|
||||||
stride_in, points_in,
|
|
||||||
stride_out, points_out,
|
|
||||||
n_points);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -719,44 +595,22 @@ cogl_matrix_project_points (const CoglMatrix *matrix,
|
|||||||
int n_points)
|
int n_points)
|
||||||
{
|
{
|
||||||
if (n_components == 2)
|
if (n_components == 2)
|
||||||
{
|
_cogl_matrix_project_points_f2 (matrix,
|
||||||
if (stride_in == sizeof (Point2f) &&
|
stride_in, points_in,
|
||||||
stride_out == sizeof (Point4f))
|
stride_out, points_out,
|
||||||
_cogl_matrix_project_points_f2_packed (matrix,
|
n_points);
|
||||||
points_in, points_out,
|
|
||||||
n_points);
|
|
||||||
else
|
|
||||||
_cogl_matrix_project_points_f2_strided (matrix,
|
|
||||||
stride_in, points_in,
|
|
||||||
stride_out, points_out,
|
|
||||||
n_points);
|
|
||||||
}
|
|
||||||
else if (n_components == 3)
|
else if (n_components == 3)
|
||||||
{
|
_cogl_matrix_project_points_f3 (matrix,
|
||||||
if (stride_in == sizeof (Point3f) &&
|
stride_in, points_in,
|
||||||
stride_out == sizeof (Point4f))
|
stride_out, points_out,
|
||||||
_cogl_matrix_project_points_f3_packed (matrix,
|
n_points);
|
||||||
points_in, points_out,
|
|
||||||
n_points);
|
|
||||||
else
|
|
||||||
_cogl_matrix_project_points_f3_strided (matrix,
|
|
||||||
stride_in, points_in,
|
|
||||||
stride_out, points_out,
|
|
||||||
n_points);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
g_return_if_fail (n_components == 4);
|
g_return_if_fail (n_components == 4);
|
||||||
|
|
||||||
if (stride_in == sizeof (Point4f) && stride_out == stride_in)
|
_cogl_matrix_project_points_f4 (matrix,
|
||||||
_cogl_matrix_project_points_f4_packed (matrix,
|
stride_in, points_in,
|
||||||
points_in, points_out,
|
stride_out, points_out,
|
||||||
n_points);
|
n_points);
|
||||||
else
|
|
||||||
_cogl_matrix_project_points_f4_strided (matrix,
|
|
||||||
stride_in, points_in,
|
|
||||||
stride_out, points_out,
|
|
||||||
n_points);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user