mirror of
https://github.com/brl/mutter.git
synced 2024-12-22 19:12:04 +00:00
more fixed point work
This commit is contained in:
parent
f51d4659b8
commit
f924e2bbf7
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
|||||||
|
2007-01-19 Tomas Frydrych <tf@openedhand.com>
|
||||||
|
|
||||||
|
* clutter/clutter-fixed.h.:
|
||||||
|
* clutter/clutter-fixed.c:
|
||||||
|
Added fast double to int and double to fixed point conversion
|
||||||
|
routines; changed CLUTTER_FLOAT_TO_FIXED to use it.
|
||||||
|
Replaced clutter_sqrti with fixed point implementation of the QIII
|
||||||
|
algorithm.
|
||||||
|
|
||||||
|
|
||||||
|
* clutter/clutter-behavior-path.c: use clutter_sqrti always
|
||||||
|
|
||||||
|
* clutter/clutter-alpha.c:
|
||||||
|
(sinc_func): replaced double -> int cast with CLUTTER_FLOAT_TO_INT
|
||||||
|
|
||||||
|
|
||||||
2007-01-18 Emmanuele Bassi <ebassi@openedhand.com>
|
2007-01-18 Emmanuele Bassi <ebassi@openedhand.com>
|
||||||
|
|
||||||
* configure.ac: Post release bump to 0.3.0.
|
* configure.ac: Post release bump to 0.3.0.
|
||||||
|
@ -519,7 +519,6 @@ sincx1024_func (ClutterAlpha *alpha,
|
|||||||
|
|
||||||
return CLUTTER_FIXED_INT (sine * CLUTTER_ALPHA_MAX_ALPHA);
|
return CLUTTER_FIXED_INT (sine * CLUTTER_ALPHA_MAX_ALPHA);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
/*
|
/*
|
||||||
* The following two functions are left in place for reference
|
* The following two functions are left in place for reference
|
||||||
@ -570,9 +569,10 @@ sinc_func (ClutterAlpha *alpha,
|
|||||||
|
|
||||||
CLUTTER_NOTE (ALPHA, "sine: %2f\n", sine);
|
CLUTTER_NOTE (ALPHA, "sine: %2f\n", sine);
|
||||||
|
|
||||||
return (guint32) (sine * (gdouble) CLUTTER_ALPHA_MAX_ALPHA);
|
return CLUTTER_FLOAT_TO_INT ((sine * (gdouble) CLUTTER_ALPHA_MAX_ALPHA));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* clutter_sine_func:
|
* clutter_sine_func:
|
||||||
* @alpha: a #ClutterAlpha
|
* @alpha: a #ClutterAlpha
|
||||||
|
@ -196,12 +196,12 @@ node_distance (const ClutterKnot *begin,
|
|||||||
if (clutter_knot_equal (begin, end))
|
if (clutter_knot_equal (begin, end))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
#ifdef CFX_NO_FPU
|
#if 1
|
||||||
return clutter_sqrti ((end->x - begin->x) * (end->x - begin->x) +
|
return clutter_sqrti ((end->x - begin->x) * (end->x - begin->x) +
|
||||||
(end->y - begin->y) * (end->y - begin->y));
|
(end->y - begin->y) * (end->y - begin->y));
|
||||||
#else
|
#else
|
||||||
return (gint) sqrt ((end->x - begin->x) * (end->x - begin->x) +
|
return CLUTTER_FLOAT_TO_INT(sqrt((end->x - begin->x) * (end->x - begin->x) +
|
||||||
(end->y - begin->y) * (end->y - begin->y));
|
(end->y - begin->y) * (end->y - begin->y)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,14 +353,6 @@ clutter_sqrtx (ClutterFixed x)
|
|||||||
* on ARM this function is about 5 times faster than c-lib sqrt, whilst
|
* on ARM this function is about 5 times faster than c-lib sqrt, whilst
|
||||||
* producing errors < 1%.
|
* producing errors < 1%.
|
||||||
*
|
*
|
||||||
* (There are faster algorithm's available; the Carmack 'magic'
|
|
||||||
* algorithm, http://www.codemaestro.com/reviews/review00000105.html,
|
|
||||||
* is about five times faster than this one when implemented
|
|
||||||
* as fixed point, but it's error is much greater and grows with the
|
|
||||||
* size of the argument (reaches about 10% around x == 800).
|
|
||||||
*
|
|
||||||
* Note: on systems with FPU, the clib sqrt can be noticeably faster
|
|
||||||
* than this function.
|
|
||||||
*/
|
*/
|
||||||
int t = 0;
|
int t = 0;
|
||||||
int sh = 0;
|
int sh = 0;
|
||||||
@ -448,68 +440,121 @@ clutter_sqrtx (ClutterFixed x)
|
|||||||
* clutter_sqrti:
|
* clutter_sqrti:
|
||||||
* @x: integer value
|
* @x: integer value
|
||||||
*
|
*
|
||||||
* A fixed point implementation of square root for integers
|
* Very fast fixed point implementation of square root for integers.
|
||||||
|
*
|
||||||
|
* This function is about 10x faster than clib sqrt() on x86, and (this is
|
||||||
|
* not a typo!) more than 800x faster on ARM without FPU. It's error is < 5%
|
||||||
|
* for arguments < 132 and < 10% for arguments < 5591.
|
||||||
*
|
*
|
||||||
* Return value: integer square root (truncated).
|
* Return value: integer square root.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* Since: 0.2
|
* Since: 0.2
|
||||||
*/
|
*/
|
||||||
gint
|
gint
|
||||||
clutter_sqrti (gint x)
|
clutter_sqrti (gint number)
|
||||||
{
|
{
|
||||||
int t = 0;
|
/* This is a fixed point implementation of the Quake III sqrt algorithm,
|
||||||
int sh = 0;
|
* described, for example, at
|
||||||
unsigned int mask = 0x40000000;
|
* http://www.codemaestro.com/reviews/review00000105.html
|
||||||
|
*
|
||||||
|
* While the original QIII is extremely fast, the use of floating division
|
||||||
|
* and multiplication makes it perform very on arm processors without FPU.
|
||||||
|
*
|
||||||
|
* The key to successfully replacing the floating point operations with
|
||||||
|
* fixed point is in the choice of the fixed point format. The QIII
|
||||||
|
* algorithm does not calculate the square root, but its reciprocal ('y'
|
||||||
|
* below), which is only at the end turned to the inverse value. In order
|
||||||
|
* for the algorithm to produce satisfactory results, the reciprocal value
|
||||||
|
* must be represented with sufficient precission; the 16.16 we use
|
||||||
|
* elsewhere in clutter is not good enough, and 10.22 is used instead.
|
||||||
|
*/
|
||||||
|
ClutterFixed x;
|
||||||
|
unsigned long y, y1; /* 10.22 fixed point */
|
||||||
|
unsigned long f = 0x600000; /* '1.5' as 10.22 fixed */
|
||||||
|
float flt = number;
|
||||||
|
float flt2;
|
||||||
|
|
||||||
if (x <= 0)
|
x = CLUTTER_INT_TO_FIXED (number) / 2;
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (x > (sizeof (sqrt_tbl)/sizeof(ClutterFixed) - 1))
|
/* The QIII initial estimate */
|
||||||
{
|
y = * ( unsigned long * ) &flt;
|
||||||
/*
|
y = 0x5f3759df - ( y >> 1 );
|
||||||
* Find the highest bit set
|
flt = * ( float * ) &y;
|
||||||
*/
|
|
||||||
#if __arm__
|
|
||||||
/* This actually requires at least arm v5, but gcc does not seem
|
|
||||||
* to set the architecture defines correctly, and it is probably
|
|
||||||
* very unlikely that anyone will want to use clutter on anything
|
|
||||||
* less than v5.
|
|
||||||
*/
|
|
||||||
int bit;
|
|
||||||
__asm__ ("clz %0, %1\n"
|
|
||||||
"rsb %0, %0, #31\n"
|
|
||||||
:"=r"(bit)
|
|
||||||
:"r" (x));
|
|
||||||
|
|
||||||
/* make even (2n) */
|
/* Now, we convert the float to 10.22 fixed. We exploit the mechanism
|
||||||
bit &= 0xfffffffe;
|
* described at http://www.d6.com/users/checker/pdfs/gdmfp.pdf.
|
||||||
#else
|
*
|
||||||
/* TODO -- add i386 branch using bshr */
|
* We want 22 bit fraction; a single precission float uses 23 bit
|
||||||
int bit = 30;
|
* mantisa, so we only need to add 2^(23-22) (no need for the 1.5
|
||||||
while (bit >= 0)
|
* multiplier as we are only dealing with positive numbers).
|
||||||
{
|
*
|
||||||
if (x & mask)
|
* Note: we have to use two separate variables here -- for some reason,
|
||||||
break;
|
* if we try to use just the flt variable, gcc on ARM optimises the whole
|
||||||
|
* addition out, and it all goes pear shape, since without it, the bits
|
||||||
|
* in the float will not be correctly aligned.
|
||||||
|
*/
|
||||||
|
flt2 = flt + 2.0;
|
||||||
|
y = * ( long * ) &flt2;
|
||||||
|
y &= 0x7FFFFF;
|
||||||
|
|
||||||
mask = (mask >> 1 | mask >> 2);
|
/* Now we correct the estimate, only single iterration is needed */
|
||||||
bit -= 2;
|
y1 = (y >> 11) * (y >> 11);
|
||||||
}
|
y1 = (y1 >> 8) * (x >> 8);
|
||||||
#endif
|
|
||||||
sh = ((bit - 6) >> 1);
|
|
||||||
t = (x >> (bit - 6));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return (sqrt_tbl[x] >> CFX_Q);
|
|
||||||
}
|
|
||||||
|
|
||||||
x = sqrt_tbl[t];
|
y1 = f - y1;
|
||||||
|
y = (y >> 11) * (y1 >> 11);
|
||||||
|
|
||||||
if (sh > 0)
|
/* Invert, round and convert from 10.22 to an integer
|
||||||
x = x << sh;
|
* 0x1e3c68 is a magical rounding constant that produces slightly
|
||||||
else if (sh < 0)
|
* better results than 0x200000.
|
||||||
x = (x >> (1 + ~sh));
|
*/
|
||||||
|
return (number * y + 0x1e3c68) >> 22;
|
||||||
return (x >> CFX_Q);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* <private> */
|
||||||
|
const double _magic = 68719476736.0*1.5;
|
||||||
|
|
||||||
|
/* Where in the 64 bits of double is the mantisa */
|
||||||
|
#ifdef LITTLE_ENDIAN
|
||||||
|
#define _CFX_MAN 0
|
||||||
|
#else
|
||||||
|
#define _CFX_MAN 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clutter_double_to_fixed :
|
||||||
|
* @value: value to be converted
|
||||||
|
*
|
||||||
|
* A fast conversion from double precision floating to fixed point
|
||||||
|
*
|
||||||
|
* Return value: Fixed point representation of the value
|
||||||
|
*
|
||||||
|
* Since: 0.2
|
||||||
|
*/
|
||||||
|
ClutterFixed
|
||||||
|
_clutter_double_to_fixed (double val)
|
||||||
|
{
|
||||||
|
val = val + _magic;
|
||||||
|
return ((gint32*)&val)[_CFX_MAN];
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clutter_double_to_int :
|
||||||
|
* @value: value to be converted
|
||||||
|
*
|
||||||
|
* A fast conversion from doulbe precision floatint point to int;
|
||||||
|
* used this instead of casting double/float to int.
|
||||||
|
*
|
||||||
|
* Return value: Integer part of the double
|
||||||
|
*
|
||||||
|
* Since: 0.2
|
||||||
|
*/
|
||||||
|
ClutterFixed
|
||||||
|
_clutter_double_to_int (double val)
|
||||||
|
{
|
||||||
|
val = val + _magic;
|
||||||
|
return ((gint32*)&val)[_CFX_MAN] >> 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef _CFX_MAN
|
||||||
|
@ -118,10 +118,9 @@ typedef gint32 ClutterAngle; /* angle such that 1024 == 2*PI */
|
|||||||
#define CLUTTER_FIXED_TO_FLOAT(x) ((float) ((int)(x) / 65536.0))
|
#define CLUTTER_FIXED_TO_FLOAT(x) ((float) ((int)(x) / 65536.0))
|
||||||
#define CLUTTER_FIXED_TO_DOUBLE(x) ((double) ((int)(x) / 65536.0))
|
#define CLUTTER_FIXED_TO_DOUBLE(x) ((double) ((int)(x) / 65536.0))
|
||||||
|
|
||||||
#define CLUTTER_FLOAT_TO_FIXED(x) \
|
#define CLUTTER_FLOAT_TO_FIXED(x) _clutter_double_to_fixed((x))
|
||||||
( (ABS(x) > 32767.0) ? (((x) / (x)) * 0x7fffffff) \
|
#define CLUTTER_FLOAT_TO_INT(x) _clutter_double_to_int((x))
|
||||||
: ((long)((x) * 65536.0 + ((x) < 0 ? -0.5 \
|
|
||||||
: 0.5))) )
|
|
||||||
#define CLUTTER_INT_TO_FIXED(x) ((x) << CFX_Q)
|
#define CLUTTER_INT_TO_FIXED(x) ((x) << CFX_Q)
|
||||||
|
|
||||||
#define CLUTTER_FIXED_INT(x) ((x) >> CFX_Q)
|
#define CLUTTER_FIXED_INT(x) ((x) >> CFX_Q)
|
||||||
@ -180,6 +179,14 @@ ClutterFixed clutter_sini (ClutterAngle angle);
|
|||||||
ClutterFixed clutter_sqrtx (ClutterFixed x);
|
ClutterFixed clutter_sqrtx (ClutterFixed x);
|
||||||
gint clutter_sqrti (gint x);
|
gint clutter_sqrti (gint x);
|
||||||
|
|
||||||
|
|
||||||
|
/* <private> */
|
||||||
|
extern inline
|
||||||
|
ClutterFixed _clutter_double_to_fixed (double value);
|
||||||
|
|
||||||
|
extern inline
|
||||||
|
ClutterFixed _clutter_double_to_int (double value);
|
||||||
|
|
||||||
G_END_DECLS
|
G_END_DECLS
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user