30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差
标准的c或者c++的double数据类型只有15位有效数字(好像有这么回事,参看IEEE 754 ),
因此产生了大的数字多次累加后的明显的误差,在财务计算中,这种误差是不能接受的。
参看http://blog.csdn.net/l1t/archive/2004/10/09/129110.aspx
来自Lcc-win32 的src\doubledouble\doubledouble.c
(作者主页:http://members.lycos.co.uk/keithmbriggs/doubledouble.html
上还有一个c++封装的类,有兴趣的人可以去看。)
利用2个double变量构造出一个doubledouble结构,解决了这个问题。
测试办法:把main()函数的内容替换成下面内容。
int main(void)
{
doubledouble a = new_doubledouble(5.0,0.0);
doubledouble b = new_doubledouble(4.0,0.0);
doubledouble c = a*b;
double x1,x2;
char buffer[256];
int i;
a=a-a+99999999.99;
c=c-c+0.0;
for(i=0;i<81920;i++)
{
c=c+a;
}
doubledoubletoa(c,buffer);
printf("sum() =%s\n",buffer);
c=a*81920.0;
doubledoubletoa(c,buffer);
printf("mult()=%s\n",buffer);
x1=c.hi+c.lo;
printf("mult()=%18.2lf\n",x1);
return 0;
}
运行结果:
sum() =8.191999999180799560546875000000000e12
mult()=8.191999999180799560546875000000000e12
mult()= 8191999999180.80
在Lcc-win32 中,可以不加修改地通过编译。
如果在gcc下编译,需要定义宏#define GCC
在gcc和vs.net 2003 中,函数
doubledouble ddexp(const doubledouble& x)
{
...
sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;
...
}
上述2行不能通过编译,如果不需要这个函数,可以删除。
/* doubledouble.c */
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/*
This software was written by the mathematician Keith Martin Briggs. It implements
an extended precision data type "doubledouble", that is made up of two floating
point numbers. I have rewritten it for lcc-win32 and extensiveley used this code
to test the compiler's operator subclassing module.
The code should compile without problems in any C++ compiler.
---------------------------------------------------------------jacob
Copyright (C) 1997 Keith Martin Briggs
Except where otherwise indicated,
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with This program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
// 97 Aug 04 KMB added ldexp
// 97 Jul 11 Moved This stuff out of quad.h, created inline.h so it can
// be #included even if we're not inlining, by just "#define inline"
// 97 Jul 12 Added all combos of doubledouble/double/int +-*/. Only a couple actually
// written; most just call the others by swapping arguments. Generates
// equivalent code with a good inlining compiler (tested with g++ 2.7.2).
// - e.g., all subtraction is now done by addition of unary minus.
// - removed if's from doubledouble*int. Zero-branch code is 2.5 faster (tested)
// - generally cleaned up and re-organized the order of the functions,
// now they're grouped nicely by function.
// - tested Newton's division. Works but is terribly slow, because
// it needs to do several doubledouble + and * operations, and doubledouble /
// without it is about the same speed at doubledouble * anyway, so no win.
// - moved recip here as an inline.
// - checked and tested doubledouble/double (BUG?), seems fine.
typedef struct doubledouble {
double hi;
double lo;
} doubledouble;
#ifdef __LCC__
#define x86_FIX unsigned short __old_cw, __new_cw; _asm("\tfnstcw\t%__old_cw"); __new_cw = (__old_cw & ~0x300) | 0x200; _asm ("\tfldcw\t%__new_cw")
#define END_x86_FIX _asm ("\tfldcw\t%__old_cw")
#else
#define x86_FIX unsigned short __old_cw,__new_cw; _asm {fnstcw __old_cw} __new_cw = (__old_cw & ~0x300) | 0x200; _asm { fldcw __new_cw};
#define END_x86_FIX _asm { fldcw __new_cw}
#endif
#ifdef GCC
#undef x86_FIX
#undef END_x86_FIX
#define x86_FIX
#define END_x86_FIX
#endif
static double Split = 134217729.0;
doubledouble new_doubledouble(double x,double y);
doubledouble operator-(const doubledouble &x)
{
doubledouble z;
z.hi = -x.hi;
z.lo = -x.lo;
return z;
}
// Absolute value
doubledouble ddfabs(const doubledouble& x)
{
if (x.hi>=0.0) return x;
else {
doubledouble result;
result.hi = -x.hi;
result.lo = -x.lo;
return result;
}
}
// Addition
/* (C) W. Kahan 1989
* NOTICE:
* Copyrighted programs may not be translated, used, nor
* reproduced without the author's permission. Normally that
* permission is granted freely for academic and scientific
* purposes subject to the following three requirements:
* 1. This NOTICE and the copyright notices must remain attached
* to the programs and their translations.
* 2. Users of such a program should regard themselves as voluntary
* participants in the author's researches and so are obliged
* to report their experience with the program back to the author.
* 3. Neither the author nor the institution that employs him will
* be held responsible for the consequences of using a program
* for which neither has received payment.
* Would-be commercial users of these programs must correspond
* with the author to arrange terms of payment and warranty.
*/
///////////////////////////////////////////////////////////////
/////////////////// Addition and Subtraction /////////////////
///////////////////////////////////////////////////////////////
// Binary addition
doubledouble operator +(const doubledouble& x,const doubledouble& y)
{
double H, h, T, t, S, s, e, f;
doubledouble z;
x86_FIX;
S = x.hi+y.hi;
T = x.lo+y.lo;
e = S-x.hi;
f = T-x.lo;
// s = S-e;
t = T-f;
s = (y.hi-e)+(x.hi-(S-e));
t = (y.lo-f)+(x.lo-(T-f));
e = s+T;
H = S+e;
h = e+(S-H);
e = t+h;
z.hi = H+e;
z.lo = e+(double)(H-z.hi);
END_x86_FIX;
return z;
}
doubledouble operator +(const double& x,const doubledouble& y)
{
double H, h, S, s, e;
doubledouble z;
x86_FIX;
S = x+y.hi;
e = S-x;
s = S-e;
s = (y.hi-e)+(x-s);
H = S+(s+y.lo);
h = (s+y.lo)+(S-H);
z.hi = H+h;
z.lo = h+(double)(H-z.hi);
END_x86_FIX;
return z;
}
doubledouble operator +(const doubledouble& x,const double& y )
{
return y+x;
}
doubledouble operator +(int x,const doubledouble& y)
{
return (double)(x)+y;
}
doubledouble operator +(doubledouble& x, int y) {
return y+x;
}
// Subtraction
doubledouble operator -(const doubledouble& x,const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const double& x, const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const int x, const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const doubledouble& x, const double& y)
{
return x+(-y);
}
doubledouble operator -(const doubledouble& x,const int y)
{
return x+(-y);
}
////////////////////////// Self-Addition ///////////////////////
doubledouble& operator +=(doubledouble &This,const doubledouble& y)
{
double H, h, T, t, S, s, e, f;
x86_FIX;
S =This.hi+y.hi;
T = This.lo+y.lo;
e = S-This.hi;
f = T-This.lo;
s = S-e;
t = T-f;
s = (y.hi-e)+(This.hi-s);
t = (y.lo-f)+(This.lo-t);
f = s+T;
H = S+f;
h = f+(S-H);
This.hi = H+(t+h);
This.lo = (t+h)+(double)(H-This.hi);
END_x86_FIX;
return This;
}
doubledouble& operator +=(doubledouble &This,const double& y)
{
double H, h, S, s, e, f;
x86_FIX;
S =This.hi+y;
e = S-This.hi;
s = S-e;
s = (y-e)+(This.hi-s);
f = s+This.lo;
H = S+f;
h = f+(S-H);
This.hi = H+h;
This.lo = h+(double)(H-This.hi);
END_x86_FIX;
return This;
}
doubledouble& operator +=(doubledouble &This,const int y)
{
return This += (double)(y);
}
// Self-Subtraction
doubledouble& operator -=(doubledouble &This,const doubledouble& y)
{
return This += -y;
}
doubledouble& operator -=(doubledouble &This,const double& y)
{
return This += -y;
}
doubledouble& operator -=(doubledouble &This,const int y)
{
return This += -y;
}
/////////////////////////////////////////////////////////////
//////////////////// Multiplication ///////////////////////
/////////////////////////////////////////////////////////////
// Binary Multiplication
doubledouble operator *(const doubledouble& x,const doubledouble& y)
{
double hx, tx, hy, ty, C, c;
doubledouble z;
x86_FIX;
C = Split*x.hi;
hx = C-x.hi;
c = Split*y.hi;
hx = C-hx;
tx = x.hi-hx;
hy = c-y.hi;
C = x.hi*y.hi;
hy = c-hy;
ty = y.hi-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi);
z.hi = C+c;
hx=C-z.hi;
z.lo = c+hx;
END_x86_FIX;
return z;
}
// double*doubledouble
doubledouble operator *(const double& x, const doubledouble& y)
{
double hx, tx, hy, ty, C, c;
doubledouble z;
x86_FIX;
C = Split*x;
hx = C-x;
c = Split*y.hi;
hx = C-hx ;
tx = x-hx;
hy = c-y.hi;
C = x*y.hi;
hy = c-hy;
ty = y.hi - hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+x*y.lo;
z.hi = C+c;
z.lo = c+(double)(C-z.hi);
END_x86_FIX;
return z;
}
doubledouble operator *(int x, const doubledouble& y )
{
return ((const double)(x))*y;
}
doubledouble operator *(const doubledouble& x,const double& y )
{
return y*x;
}
doubledouble operator *(const doubledouble& x,const int y )
{
return y*x;
}
// Self-multiplication
doubledouble& operator *=(doubledouble &This,const doubledouble& y )
{
double hx, tx, hy, ty, C, c;
x86_FIX;
C = Split*This.hi;
hx = C-This.hi;
c = Split*y.hi;
hx = C-hx;
tx =This.hi-hx;
hy = c-y.hi;
C =This.hi*y.hi;
hy = c-hy;
ty = y.hi-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.hi*y.lo+This.lo*y.hi);
hx = C+c;
This.hi = hx;
This.lo = c+(double)(C-hx);
END_x86_FIX;
return This;
}
doubledouble& operator *=(doubledouble &This,const double& y )
{
double hx, tx, hy, ty, C, c;
x86_FIX;
C = Split*This.hi;
hx = C-This.hi;
c = Split*y;
hx = C-hx;
tx =This.hi-hx;
hy = c-y;
C =This.hi*y;
hy = c-hy;
ty = y-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.lo*y);
hx = C+c;
This.hi = hx;
This.lo = c+(double)(C-hx);
END_x86_FIX;
return This;
}
doubledouble& operator *=(doubledouble &This,const int y )
{
return This *= (const double)(y);
}
////////////////////////////////////////////////////////////////
////////////////////////// Division //////////////////////////
////////////////////////////////////////////////////////////////
// Reciprocal
doubledouble recip(const doubledouble& y) {
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = 1.0/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C*y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((1.0-U)-u))-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}
doubledouble operator /(const doubledouble& x,const doubledouble& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = x.hi/y.hi;
c =Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C * y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}
// double/doubledouble:
doubledouble operator /(const double& x,const doubledouble& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = x/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C*y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((x-U)-u))-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}
doubledouble operator /(const doubledouble& x,const double& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = x.hi/y;
c = Split*C;
hc = c-C;
u = Split*y;
hc = c-hc;
tc = C-hc;
hy = u-y;
U = C*y;
hy = u-hy;
ty = y-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = (((x.hi-U)-u)+x.lo)/y;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}
// doubledouble/int
doubledouble operator /(const doubledouble& x,const int y)
{
return x/(const double)(y);
}
doubledouble operator /(const int x,const doubledouble& y)
{
return (const double)(x)/y;
}
// Self-division.
doubledouble& operator /=(doubledouble &This,const doubledouble& y)
{
double hc, tc, hy, ty, C, c, U, u;
x86_FIX;
C =This.hi/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C * y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((This.hi-U)-u)+This.lo)-C*y.lo)/y.hi;
u = C+c;
This.hi = u;
This.lo = (double)(C-u)+c;
END_x86_FIX;
return This;
}
doubledouble& operator /=(doubledouble &This,const double& y)
{
double hc, tc, hy, ty, C, c, U, u;
x86_FIX;
C =This.hi/y;
c = Split*C;
hc =c-C;
u = Split*y;
hc = c-hc;
tc = C-hc;
hy = u-y;
U = C * y;
hy = u-hy;
ty = y-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = (((This.hi-U)-u)+This.lo)/y;
u = C+c;
This.hi = u;
This.lo = (double)(C-u)+c;
END_x86_FIX;
return This;
}
doubledouble& operator /=(doubledouble &This,const int y)
{
return (This) /=(const double)(y);
}
doubledouble Qcopysign(const doubledouble& x,const double y)
{
if (y>=0.0) return ddfabs(x);
else return -ddfabs(x);
}
doubledouble atodd(const char *s)
{
doubledouble result = {
0.0,0.0 };
int n, sign, ex = 0;
/* eat whitespace */
while (*s == ' ' || *s == '\t' || *s == '\n') s++;
switch (*s) { // get sign of mantissa
case '-':
{
sign = -1;
s++;
break;
}
case '+':
s++; // no break
default:
sign = 1;
}
/* get digits before decimal point */
while (n=(*s++)-'0', n>=0 && n<10) result = 10.0*result+n;
s--;
if (*s == '.') /* get digits after decimal point */ {
s++;
while (n=(*s++)-'0', n>=0 && n<10) {
result = 10.0*result+n;
--ex;
}
s--;
}
if (*s == 'e' || *s == 'E') /* get exponent */ ex+=atoi(++s);
/* exponent adjustment */
while (ex-- > 0) result *= 10.0;
while (++ex < 0) result /= 10.0;
return (sign>=0) ? result : -result;
}
// Square (faster than x*x)
doubledouble sqr(const doubledouble& x)
{
double hx,tx,C,c;
doubledouble r;
x86_FIX;
C=Split*x.hi;
hx=C-x.hi;
hx=C-hx;
tx=x.hi-hx;
C=x.hi*x.hi;
c=((((hx*hx-C)+2.0*hx*tx))+tx*tx)+2.0*x.hi*x.lo;
hx=C+c;
r.hi=hx;
r.lo = c+(C-hx);
END_x86_FIX;
return r;
}
extern double pow(double,double);
// doubledouble^int
doubledouble powint(const doubledouble& u, int c)
{
doubledouble result;
if (c<0) return recip(powint(u,-c));
switch (c) {
case 0:
result.hi = result.lo = 0.0;
if (u.hi == 0.0)
result.lo = pow(0.0,0.0);
else
result.lo = 1.0;
return result;
// let math.h handle 0^0.
case 1:
return u;
case 2:
return sqr(u);
case 3:
return sqr(u)*u;
default:
{ // binary method
int n=c, m;
doubledouble y={
1.0,0.0 }
, z=u;
if (n<0) n=-n;
while (1) {
m=n;
n/=2;
if (n+n!=m) { // if m odd
y*=z;
if (0==n) return y;
}
z=sqr(z);
}
}
}
/* not reached */
return u;
}
doubledouble new_doubledouble(double x,double y)
{
doubledouble result;
x86_FIX;
result.hi = x+y;
result.lo = y+(x-result.hi);
END_x86_FIX;
return result;
}
// Floor. See Graham, Knuth and Patashnik `Concrete Mathematics' page 70.
// Greatest integer <= x
// maybe floor_s is better?
doubledouble ddfloor(const doubledouble& x)
{
double fh=floor(x.hi), fl=floor(x.lo);
double t1=x.hi-fh;
double t2=x.lo-fl;
double t3=t1+t2;
int t;
t3 = x.hi-fh+x.lo-fl;
t=(int)(floor(t3));
switch (t) {
case 0:
return new_doubledouble(fh,0.0)+new_doubledouble(fl,0.0);
case 1:
return new_doubledouble(fh,0.0)+new_doubledouble(fl+1,0.0);
// case 2 can only arise when fp({hi})<=1, fp({lo})<=1, but
fp({hi}+{lo})=2
case 2:
return new_doubledouble(fh,0.0)+new_doubledouble(fl+1.0,0.0);
}
// never get here
return new_doubledouble(0.0,0.0);
}
// Signum
int sign(doubledouble& x)
{
if (x.hi>0.0) return 1;
else if (x.hi<0.0) return -1;
else return 0;
}
char *doubledoubletoa(doubledouble &x,char *s)
{
int Digits;
doubledouble ten = new_doubledouble(10.0,0.0),l,y,z;
double q;
int m,n,d,i;
if (x.hi==0.0) {
strcpy(s, "0.0 ");
return s;
}
if (x.hi!=x.hi) {
strcpy(s , "NaN ");
return s;
}
Digits=34;
y=ddfabs(x);
q=log10(y.hi);
n=(int)(floor(q));
if (n<0) n++;
if (n == 0)
l = new_doubledouble(1.0,0.0);
else
l = powint(ten,n);
y = y/l;
if (sign(x)<0) *s++ = '-'; //else s<<" ";
d = Digits>34 ? 34 : Digits;
d = d<3 ? 3 : d;
for (i=1; i<=d; i++) {
if (2==i) *s++ = '.';
m = (int)(ddfloor(y).hi);
if (m<0 || m>9) {
// fprintf(stderr,
// "Internal error in doubledouble.cc: digit=%d",m);
}
sprintf(s,"%d",m);
s += strlen(s);
// z.lo = 0.0;
// z.hi = (double)m;
z = new_doubledouble((double)m,0.0);
y = (y-z)*ten;
if (y.hi<=0.0) break; // x must be an integer
//if (y.hi==0.0) break; // ???????????
}
if (n!=0) {
*s++ = 'e';
sprintf(s,"%d",n);
s += strlen(s);
}
else *s++ = 0;
return s;
}
int operator> (const doubledouble& x, const doubledouble& y)
{
return (x.hi> y.hi) || (x.hi==y.hi && x.lo> y.lo);
}
int operator>=(const doubledouble& x, const doubledouble& y)
{
return (x.hi>y.hi) || (x.hi==y.hi && x.lo>=y.lo);
}
int operator< (const doubledouble& x, const doubledouble& y)
{
return (x.hi< y.hi) || (x.hi==y.hi && x.lo< y.lo);
}
int operator<=(const doubledouble& x, const doubledouble& y)
{
return (x.hi<y.hi) || (x.hi==y.hi && x.lo<=y.lo);
}
int operator==(const doubledouble& x, const doubledouble& y)
{
return x.hi==y.hi && x.lo==y.lo;
}
int operator!=(const doubledouble& x, const doubledouble& y)
{
return x.hi!=y.hi || x.lo!=y.lo;
}
struct ddConstants {
doubledouble Log2;
doubledouble Log10;
doubledouble Pi;
doubledouble TwoPi;
doubledouble Pion2;
doubledouble Pion4;
doubledouble _Pi;
} ddconst;
void InitConstants(void)
{
ddconst.Log2 =atodd("0.6931471805599453094172321214581765680755");
ddconst.Log10=atodd("2.302585092994045684017991454684364207601");
ddconst.Pi =atodd("3.1415926535897932384626433832795028841972");
ddconst.TwoPi=atodd("6.2831853071795864769252867665590057683943");
ddconst.Pion2=atodd("1.5707963267948966192313216916397514420985");
ddconst.Pion4=atodd("0.7853981633974483096156608458198757210493");
ddconst._Pi =atodd("0.3183098861837906715377675267450287240689");
}
doubledouble ddldexp(const doubledouble x, const int exp)
{ // x*2^exp
return new_doubledouble(ldexp(x.hi,exp),ldexp(x.lo,exp));
}
// Another floor. V. Shoup 97 Sep 23...
doubledouble ddfloor_s(const doubledouble& x)
{
double fhi=floor(x.hi);
if (fhi!=x.hi)
return new_doubledouble(fhi,0.0);
else
return new_doubledouble(fhi,floor(x.lo));
}
// rint (round to nearest int)
doubledouble ddrint(const doubledouble& x)
{
const doubledouble &rx = x+new_doubledouble(0.5,0.0);
return ddfloor(rx);
}
doubledouble ddexp(const doubledouble& x)
{
/* Uses some ideas by Alan Miller
Method:
x x.log2(e) nint[x.log2(e)] + frac[x.log2(e)]
e = 2 = 2
iy fy
= 2 . 2
Then
fy y.loge(2)
2 = e
Now y.loge(2) will be less than 0.3466 in absolute value.
This is halved and a Pade' approximant is used to approximate e^x over
the region (-0.1733, +0.1733). This approximation is then squared.
*/
int iy;
doubledouble y,temp,ysq,sum1,sum2;
if (x.hi<-744.4400719213812) return new_doubledouble(0.0,0.0); //
exp(x)<1e-300
y=x/ddconst.Log2;
iy = (int)(ddrint(y)).hi;
temp=new_doubledouble((double)iy,0.0);
y=(y-temp)*ddconst.Log2;
y=ddldexp(y,-1);
ysq=sqr(y);
sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;
/*
sum2 + sum1 2.sum1
Now approximation = ----------- = 1 + ----------- = 1 + 2.temp
sum2 - sum1 sum2 - sum1
Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1
*/
temp=sum1/(sum2-sum1);
y=temp*(temp+1);
y=ddldexp(y,2);
return ddldexp(y+1,iy);
}
// Natural logarithm
doubledouble ddlog(const doubledouble& x)
{ // Newton method
doubledouble s,e;
if (x.hi<0.0) {
fprintf(stderr,"\ndoubledouble: attempt to take log of negative
argument!\n");
return new_doubledouble(0.0,0.0);
}
if (x.hi==0.0) {
fprintf(stderr,"\ndoubledouble: attempt to take log(0)!\n");
return new_doubledouble(0.0,0.0);
}
s=new_doubledouble(log(x.hi),0.0), e=ddexp(s); // s=double approximation to
result
return s+(x-e)/e; // Newton correction, good enough
//doubledouble d=(x-e)/e; return s+d-0.5*sqr(d); // or 2nd order correction
}
void base_and_prec(void) { // Linnainmaa ACM TOMS 7, 272 Thm 3
int p;
x86_FIX;
printf("Base and precision determination by Linnainmaa's method:\n");
{
double U,R,u,r,beta;
U=4.0/3.0;
U-=1;
U*=3;
U-=1;
U=fabs(U);
R=U/2+1;
R-=1;
if (R!=0.0) U=R;
u=2;
u/=3;
u-=0.5;
u*=3;
u-=0.5;
u=fabs(u);
r=u/2+0.5;
r-=0.5;
if (r!=0.0) u=r;
beta=U/u;
p=(int)(-log(u)/log(beta)+0.5);
printf("Type double: base is %g, precision %d\n",beta,p);
}
{
doubledouble ddU,ddR,u,r,beta;
ddU=new_doubledouble(4.0,0.0);
ddU/=new_doubledouble(3.0,0.0);
ddU-=1;
ddU*=3;
ddU-=1;
ddU=ddfabs(ddU);
ddR=ddU/2+1;
ddR-=1;
if (ddR.hi!=0.0) ddU=ddR;
u=new_doubledouble(2.0,0.0);
u/=3.0;
u-=0.5;
u*=3;
u-=0.5;
u=ddfabs(u);
r=u/2+0.5;
r-=0.5;
if (r.hi!=0.0) u=r;
beta=ddU/u;
p=(int)(-ddlog(u)/ddlog(beta)+0.5).hi;
printf("Type doubledouble: base is %g, precision %d\n",beta.hi,p);
}
END_x86_FIX;
}
// Assumption: if it works, don't even bother doing output
int Verify(char msg[], doubledouble correct, doubledouble computed)
{
// default fractional error allowed.
doubledouble delta = new_doubledouble(1.1e-31,0.0);
if(correct == computed)
return 1;
else if(correct.hi == 0.0) // can't do a relative test
{
error:
printf("%s", msg);
printf(" corret.hi %g correct.lo %g", correct.hi,correct.lo);
printf("\ngot hi %g, lo %g\n",computed.hi,computed.lo);
return 0;
}
else if(ddfabs((correct-computed)/correct) <= delta)
return 1;
else goto error;
}
int main(void)
{
doubledouble a = new_doubledouble(5.0,0.0);
doubledouble b = new_doubledouble(4.0,0.0);
doubledouble c = a*b;
double x1,x2;
char buffer[256];
int i;
InitConstants();
base_and_prec();
doubledoubletoa(ddconst.Log2,buffer);
printf("%s\n",buffer);
x1=(pow(2,53)-2)/pow(2,53);
x2=(3.0)/pow(2,55);
Verify("Test -2: should be 0: ",
new_doubledouble(0.0,0.0),
ddfloor(new_doubledouble(x1,0.0)+new_doubledouble(x2,0.0)));
Verify("Test -1: should be 0: ", new_doubledouble(0.0,0.0),
ddfloor(atodd("0.9999999999999997779553950749686919152737")
+
atodd("0.00000000000000008326672684688674053177237510681152343750")));
for (i=0; i<5;i++) {
c = c*c;
doubledoubletoa(c,buffer);
printf("%s\n",buffer);
a = c - 1.0;
b = new_doubledouble(-1.0,0.0);
a = c + b;
doubledoubletoa(a,buffer);
printf("%s\n",buffer);
}
return 0;
}