2007-12-06 Tomas Frydrych <tf@openedhand.com>

* clutter/clutter-fixed.c: * clutter/clutter-fixed.h: (clutter_sqrti): Added extra iteration to the Newton-Rapson algorithm for argumens lesser than 342 to improve precission.
2007-12-06 16:21:48 +00:00
parent dad823f123
commit 58142d7ee7
3 changed files with 25 additions and 4 deletions
--- a/8
+++ b/8
@@ -1,3 +1,11 @@
 2007-12-06  Tomas Frydrych  <tf@openedhand.com>
 	* clutter/clutter-fixed.c:
 	* clutter/clutter-fixed.h:
 	(clutter_sqrti):
 	Added extra iteration to the Newton-Rapson algorithm for arguments
 	lesser than 342 to improve precission.
 2007-12-06  Emmanuele Bassi  <ebassi@openedhand.com>
 	* clutter/clutter-main.c:
--- a/clutter/clutter-fixed.c
+++ b/clutter/clutter-fixed.c
@@ -606,8 +606,8 @@ clutter_sqrtx (ClutterFixed x)
 *
 * Very fast fixed point implementation of square root for integers.
 *
- * This function is about 10x faster than clib sqrt() on x86, and (this is
+ * This function is at least 6x faster than clib sqrt() on x86, and (this is
- * not a typo!) more than 800x faster on ARM without FPU. It's error is < 5%
+ * not a typo!) about 500x faster on ARM without FPU. It's error is < 5%
 * for arguments < #CLUTTER_SQRTI_ARG_5_PERCENT and < 10% for arguments <
 * #CLUTTER_SQRTI_ARG_10_PERCENT. The maximum argument that can be passed to
 * this function is CLUTTER_SQRTI_ARG_MAX.
@@ -673,13 +673,26 @@ clutter_sqrti (gint number)
    flt2.f = flt.f + 2.0;
    flt2.i &= 0x7FFFFF;
-    /* Now we correct the estimate, only single iterration is needed */
+    /* Now we correct the estimate */
    y_1 = (flt2.i >> 11) * (flt2.i >> 11);
    y_1 = (y_1 >> 8) * (x >> 8);
    y_1 = f - y_1;
    flt2.i = (flt2.i >> 11) * (y_1 >> 11);
    /* If the original argument is less than 342, we do another
     * iteration to improve precission (for arguments >= 342, the single
     * iteration produces generally better results).
     */
    if (x < 171)
      {
 	y_1 = (flt2.i >> 11) * (flt2.i >> 11);
 	y_1 = (y_1 >> 8) * (x >> 8);
 	y_1 = f - y_1;
 	flt2.i = (flt2.i >> 11) * (y_1 >> 11);
      }
    /* Invert, round and convert from 10.22 to an integer
     * 0x1e3c68 is a magical rounding constant that produces slightly
     * better results than 0x200000.
--- a/clutter/clutter-fixed.h
+++ b/clutter/clutter-fixed.h
@@ -338,7 +338,7 @@ ClutterFixed clutter_tani (ClutterAngle angle);
 * Since: 0.6
 */
 #ifndef __SSE2__
-#define CLUTTER_SQRTI_ARG_5_PERCENT  131
+#define CLUTTER_SQRTI_ARG_5_PERCENT 210
 #else
 #define CLUTTER_SQRTI_ARG_5_PERCENT INT_MAX
 #endif