/* LICENSE AGREEMENT Copyright (C) <2002> Eli Bendersky This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program, in file license.txt; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ // // Basic fixed point arithmetic // // Using 32bit words - 1 bit for sign, // 17 bits for the integral part, 14 bits for // the fraction (the 17/14 balance can be // modified by setting FB accordingly) // // All arithmetic operations were designed // to fit into 32 bits. // // Representability (approximately): // // -(2^17) <= num <= 2^16 // // // Precision: // // 14 bits in the fractional part allow 2^14, // i.e. 16384 states, so the precision is // 1/16384 =~ 0.00006 // Note: Some precision is lost in multiplication // and division // // Note: It would be nice to turn this into // a class, and make a fixed point object that // behaves like the built in types (with overloaded // operators) // // Eli Bendersky, 29.08.2002 // #include #include #include using namespace std; // Fixed nums are stored in an internal representation // which isn't easy to decipher by hand, use double2fixed // and fixed2double instead. All operations are done with // fixed. // typedef int fixed; // Bits for the fractional part (so there are 31 - FB // bits for the integral part) const int FB = 14; // Max for fractional part const int FMAX = (1 << FB); // Bits for half of the fractional part const int HFB = FB / 2; // Max for half of the fractional part const int HFMAX = (1 << HFB); // Use this to construct fixed numbers from // doubles // fixed double2fixed(double num) { return (fixed) (num * FMAX); } // Use this to get doubles back from fixed numbers // double fixed2double(fixed num) { double fraction = (num & (FMAX - 1)) / (double) FMAX; int whole = num >> FB; return ((double) whole) + fraction; } // 2's complement flip // inline fixed flip(fixed num) { return ~num + 1; } // Addition is a piece of cake :-) // inline fixed add_fixed(fixed num1, fixed num2) { return num1 + num2; } // We use 2's complement numbers, // so subtraction should be performed by adding // the negated num2 // inline fixed sub_fixed(fixed num1, fixed num2) { return num1 + flip(num2); } // The whole operation fits into 32 bit integers. // Some precision is lost... // fixed mul_fixed(fixed num1, fixed num2) { // remainders int r1 = num1 & (HFMAX - 1); int r2 = num2 & (HFMAX - 1); // what's left after the remainder is removed int n1 = num1 - r1; int n2 = num2 - r2; // Partitioned multiplication to achieve maximal accuracy, // basically using the fact that num1*num2/FMAX is actually // (n1+r1)*(n2+r2)/FMAX // fixed res = (n1 / HFMAX) * (n2 / HFMAX); res += ((r1 * n2 / HFMAX) / HFMAX) + ((r2 * n1 / HFMAX) / HFMAX); res += r1 * r2 / FMAX; return res; } // Division is a bit problematic... When you try to divide // by numbers smaller than 1, you will get a division by // zero error. I'm working on a way to solve this problem. // In the meanwhile, you can use this hack: // // x*0.52 = x*52/100 // fixed div_fixed(fixed num, fixed denom) { if (abs(denom) < FMAX + 1) { cerr << "Error: division by 0 will occur\n" << endl; exit(0); } return (fixed) num / (denom >> FB); } // Sample "driver" // int main() { // Create a double and convert it to fixed point double d = 15.68; cout << "d = " << d << endl; fixed fx = double2fixed(d); // Now, some mathematical operations fixed negated = flip(fx); cout << "negated = " << fixed2double(negated) << endl; fixed fx2 = double2fixed(4.21); fixed m = mul_fixed(fx, fx2); cout << "multiplied by 4.21 = " << fixed2double(m) << endl; return 0; }