#define VECTREX 1 #include "muldiv.h" // vectrex random: unsigned int16_t _x=12, _a=34, _b=56, _c=78; unsigned int16_t vrandom(void) { _x++; _a = (_a^_c^_x); _b = (_b+_a); return _c = ((_c+(_b>>1))^_a); } unsigned int32_t random32(void) { return (((unsigned int32_t)vrandom() << 24) ^ ((unsigned int32_t)vrandom() << 16) ^ ((unsigned int32_t)vrandom() << 8) ^ (unsigned int32_t)vrandom()); } // ===================================================================================== // DIVIDING // ===================================================================================== /* function to divide arbitrary unsigned 8 bit numerator by unsigned 8 bit divisor */ unsigned int8_t udiv8_by_8(unsigned int8_t num, unsigned int8_t denom) { unsigned int16_t temp; unsigned int8_t rem; static const void *lab[256] = { &&d0, &&d1, &&d2, &&d3, &&d4, &&d5, &&d6, &&d7, &&d8, &&d9, &&d10, &&d11, &&d12, &&d13, &&d14, &&d15, &&d16, &&d17, &&d18, &&d19, &&d20, &&d21, &&d22, &&d23, &&d24, &&d25, &&d26, &&d27, &&d28, &&d29, &&d30, &&d31, &&d32, &&d33, &&d34, &&d35, &&d36, &&d37, &&d38, &&d39, &&d40, &&d41, &&d42, &&d43, &&d44, &&d45, &&d46, &&d47, &&d48, &&d49, &&d50, &&d51, &&d52, &&d53, &&d54, &&d55, &&d56, &&d57, &&d58, &&d59, &&d60, &&d61, &&d62, &&d63, &&d64, &&d65, &&d66, &&d67, &&d68, &&d69, &&d70, &&d71, &&d72, &&d73, &&d74, &&d75, &&d76, &&d77, &&d78, &&d79, &&d80, &&d81, &&d82, &&d83, &&d84, &&d85, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, }; goto *lab[denom]; d: return num >= denom ? 1U : 0U; dd: return num < denom ? 0U : (num < (denom<<1) ? 1U : 2U); d0: return 255U; // User needs to supply some sort of error handler. d1: return num; d2: return div2(num); d3: return div3(num); d4: return div4(num); d5: return div5(num); d6: return div6(num); d7: return div7(num); d8: return div8(num); d9: return div9(num); d10: return div10(num); d11: return div11(num); d12: return div12(num); d13: return div13(num); d14: return div14(num); d15: return div15(num); d16: return div16(num); d17: return div17(num); d18: return div18(num); d19: return div19(num); d20: return div20(num); d21: return div21(num); d22: return div22(num); d23: return div23(num); d24: return div24(num); d25: return div25(num); d26: return div26(num); d27: return div27(num); d28: return div28(num); d29: return div29(num); d30: return div30(num); d31: return div31(num); d32: return div32(num); d33: return div33(num); d34: return div34(num); d35: return div35(num); d36: return div36(num); d37: return div37(num); d38: return div38(num); d39: return div39(num); d40: return div40(num); d41: return div41(num); d42: return div42(num); d43: return div43(num); d44: return div44(num); d45: return div45(num); d46: return div46(num); d47: return div47(num); d48: return div48(num); d49: return div49(num); d50: return div50(num); d51: return div51(num); d52: return div52(num); d53: return div53(num); d54: return div54(num); d55: return div55(num); d56: return div56(num); d57: return div57(num); d58: return div58(num); d59: return div59(num); d60: return div60(num); d61: return div61(num); d62: return div62(num); d63: return div63(num); d64: return div64(num); d65: return div65(num); d66: return div66(num); d67: return div67(num); d68: return div68(num); d69: return div69(num); d70: return div70(num); d71: return div71(num); d72: return div72(num); d73: return div73(num); d74: return div74(num); d75: return div75(num); d76: return div76(num); d77: return div77(num); d78: return div78(num); d79: return div79(num); d80: return div80(num); d81: return div81(num); d82: return div82(num); d83: return div83(num); d84: return div84(num); d85: return div85(num); } // this division code started off as a simple long division shift-and-subtract taken // from the "C" section of: https://gtoal.com/vectrex/vectrex-muldiv.c.html // Speeded up by using a binary chop to detect where to subtract from. unsigned int32_t div_core32(unsigned int32_t numerator, unsigned int32_t denominator, int8_t max_shift) { unsigned int32_t quotient, shifted; int8_t shift, min_shift; #define DIV_BODY \ quotient = 0LL; \ for (;;) { \ if (numerator < denominator) break; /* < 1.0 (ie 0) */ \ min_shift = 0; \ for (;;) { \ shift = (min_shift+max_shift)/2; \ shifted = denominator<<(int32_t)shift; \ /* The above and below is why the data type used *must* match the 'max_shift' given width! */\ if ((shifted>>(int32_t)shift) != denominator) { /* shift causes overflow? */ \ max_shift = shift; /* (exclusive upper bound, remember?) */ \ } else { \ if (numerator < shifted) max_shift = shift; else min_shift = shift; \ if (min_shift+1==max_shift) break; \ } \ } \ shift = max_shift = min_shift; /* max shift is remembered for next upper bound */ \ if (numerator < (denominator<<shift)) { \ quotient += 1UL<<(int16_t)shift; /* remainder is screwed up but we don't care about that for now */ \ break; /* would go negative? */ \ } \ numerator -= denominator<<shift; /* can't go negative */ \ quotient += 1UL<<(int16_t)shift; \ } \ return quotient; /* remainder = -numerator; */ DIV_BODY } // Not efficient to use 32 bit variables for 16-bit calculations on the 8-bit Vectrex... so: unsigned int16_t div_core16(unsigned int16_t numerator, unsigned int16_t denominator, int8_t max_shift) { unsigned int16_t quotient, shifted; int8_t shift, min_shift; DIV_BODY // C++ users, consider this a poor man's template. } /* In header as static inlines: unsigned int32_t udiv32_by_32(unsigned int32_t numerator, unsigned int32_t denominator); unsigned int32_t udiv32_by_16(unsigned int32_t numerator, unsigned int16_t denominator); unsigned int32_t udiv32_by_8(unsigned int32_t numerator, unsigned int denominator); unsigned int16_t udiv16_by_16(unsigned int16_t numerator, unsigned int16_t denominator); unsigned int16_t udiv16_by_8(unsigned int16_t numerator, unsigned int denominator); unsigned int udiv8_by_8(unsigned int numerator, unsigned int denominator); */ int32_t sdiv32_by_32(int32_t numerator, int32_t denominator) { int sign = 1; if (denominator == 0) return(int32_t)( numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/ : 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0; if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/; // remainder = 0; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign; } int32_t sdiv32_by_16(int32_t numerator, int16_t denominator) { int sign = 1; if (denominator == 0) return (int32_t)( numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/ : 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0; if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign; } int32_t sdiv32_by_8(int32_t numerator, int8_t denominator) { int sign = 1; if (denominator == 0) return (int32_t)(numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/ : 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0; if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL/*MAX_SIGNED(int32_t, 32)*/; // remainder = 0; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign; } int16_t sdiv16_by_16(int16_t numerator, int16_t denominator) { int sign = 1; if (denominator == 0) return (int16_t)(numerator < 0 ? 0x8000 /*MIN_SIGNED(int16_t, 16)*/ : 0x7FFF /*MAX_SIGNED(int16_t, 16)*/); // remainder = 0; if ((numerator == (int16_t)0x8000 /*MIN_SIGNED(int16_t, 16)*/) && (denominator == -1)) return 0x7FFF /*MAX_SIGNED(int16_t, 16)*/; // remainder = 0; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int16_t)div_core16((unsigned int16_t)numerator,(unsigned int16_t)denominator,16) * (int16_t)sign; } int16_t sdiv16_by_8(int16_t numerator, int8_t denominator) { int sign = 1; if (denominator == 0) return (int16_t)(numerator < 0 ? 0x8000 /*MIN_SIGNED(int16_t, 16)*/ : 0x7FFF /*MAX_SIGNED(int16_t, 16)*/); // remainder = 0; if ((numerator == (int16_t)0x8000 /*MIN_SIGNED(int16_t, 16)*/) && (denominator == -1)) return 0x7FFF /*MAX_SIGNED(int16_t, 16)*/; // remainder = 0; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int16_t)div_core16((unsigned int16_t)numerator,(unsigned int16_t)denominator,16) * (int16_t)sign; } int8_t sdiv8_by_8(int8_t numerator, int8_t denominator) { int sign = 1; if (denominator == 0) return (int8_t)(numerator < 0 ? 0x80 /*MIN_SIGNED(int, 8)*/ : 0x7F /*MAX_SIGNED(int, 8)*/); // remainder = 0; if ((numerator == (int8_t)0x80 /*MIN_SIGNED(int, 8)*/) && (denominator == -1)) return 0x7F /*MAX_SIGNED(int, 8)*/; // remainder = 0; if (numerator < 0) { sign = -1; numerator = -numerator;} if (denominator < 0) { sign = -sign; denominator = -denominator;} return (int8_t)udiv8_by_8((unsigned int8_t)numerator,(unsigned int8_t)denominator)*sign; } #ifdef DBMAIN #ifdef VECTREX void crash(int reason) { for (;;) { } } #define TESTS 1000L #else #include <time.h> #define TESTS 10000000L #endif int main(int argc, char **argv) { #ifndef VECTREX clock_t t; double base_time, time_taken, original_time, my_time; #endif int32_t i; int32_t dividend, divisor, result; int32_t cksum; // default range is everything. for (i=0; i < TESTS; i++) { // should force more randomicity in the number of bits used. Half will be 31 bits long. Decreasingly few shorter ones. dividend = random32()&MAX_SIGNED(int32_t, 32); do {divisor = random32()&MAX_SIGNED(int32_t, 32); } while (divisor==0); result = sdiv32_by_32(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "sdiv32_by_32(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(1); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv32_by_32: %ld randomised signed 32-bit by 32-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32); do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 32); } while (divisor==0); result = udiv32_by_32(dividend, divisor); if ((unsigned int32_t)dividend/(unsigned int32_t)divisor != (unsigned int32_t)result) { #ifndef VECTREX fprintf(stdout, "udiv32_by_32(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", (unsigned int32_t)dividend, (unsigned int32_t)divisor, (unsigned int32_t)result, (unsigned int32_t)dividend, (unsigned int32_t)divisor, (unsigned int32_t)dividend/(unsigned int32_t)divisor); exit(1); #else crash(2); #endif } } #ifndef VECTREX fprintf(stdout, "udiv32_by_32: %ld randomised unsigned 32-bit by 32-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_SIGNED(int32_t, 32); do { divisor = random32()&MAX_SIGNED(int32_t, 16); } while (divisor==0); result = sdiv32_by_16(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "sdiv32_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(3); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv32_by_16: %ld randomised signed 32-bit by 16-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32); do { divisor = random32()&MAX_UNSIGNED(unsigned int16_t, 16); } while (divisor==0); result = udiv32_by_16(dividend, divisor); if ((unsigned int32_t)dividend/(unsigned int16_t)divisor != (unsigned int32_t)result) { #ifndef VECTREX fprintf(stdout, "udiv32_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(4); #endif } } #ifndef VECTREX fprintf(stdout, "udiv32_by_16: %ld randomised unsigned 32-bit by 8-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_SIGNED(int32_t, 32); do { divisor = random32()&MAX_SIGNED(int8_t, 8); } while (divisor==0); result = sdiv32_by_8(dividend, divisor); if ((int32_t)dividend/(int8_t)divisor != (int32_t)result) { #ifndef VECTREX fprintf(stdout, "sdiv32_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(5); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv32_by_8: %ld randomised signed 32-bit by 8-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32); do { divisor = random32()&MAX_UNSIGNED(unsigned int8_t, 8); } while (divisor==0); result = udiv32_by_8(dividend, divisor); if ((unsigned int32_t)dividend/(unsigned int8_t)divisor != (unsigned int32_t)result) { #ifndef VECTREX fprintf(stdout, "udiv32_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(6); #endif } } #ifndef VECTREX fprintf(stdout, "udiv32_by_8: %ld randomised unsigned 32-bit by 16-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_SIGNED(int32_t, 16); do { divisor = random32()&MAX_SIGNED(int32_t, 16); } while (divisor==0); result = sdiv16_by_16(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "sdiv16_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(7); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv16_by_16: %ld randomised signed 16-bit by 16-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 16); do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 16); } while (divisor==0); result = udiv16_by_16(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "udiv16_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(8); #endif } } #ifndef VECTREX fprintf(stdout, "udiv16_by_16: %ld randomised unsigned 16-bit by 16-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_SIGNED(int32_t, 16); do { divisor = random32()&MAX_SIGNED(int32_t, 8); } while (divisor==0); result = sdiv16_by_8(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "sdiv16_by_8(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(9); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv16_by_8: %ld randomised signed 16-bit by 8-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 16); do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 8); } while (divisor==0); result = udiv16_by_8(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "udiv16_by_8(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(10); #endif } } #ifndef VECTREX fprintf(stdout, "udiv16_by_8: %ld randomised unsigned 16-bit by 8-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_SIGNED(int32_t, 8); do { divisor = random32()&MAX_SIGNED(int32_t, 8); } while (divisor==0); result = sdiv8_by_8(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "sdiv8_by_8(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(11); #endif } } #ifndef VECTREX fprintf(stdout, "sdiv8_by_8: %ld randomised signed 8-bit by 8-bit tests passed!\n", TESTS); #endif for (i=0; i < TESTS; i++) { dividend = random32()&MAX_UNSIGNED(unsigned char, 8); do { divisor = random32()&MAX_UNSIGNED(unsigned char, 8); } while (divisor==0); result = udiv8_by_8(dividend, divisor); if (dividend/divisor != result) { #ifndef VECTREX fprintf(stdout, "udiv8_by_8(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor); exit(1); #else crash(12); #endif } } #ifndef VECTREX fprintf(stdout, "udiv8_by_8: %ld randomised unsigned 8-bit by 8-bit tests passed!\n", TESTS); #endif unsigned char num, den; num = 0; for (;;) { den = 1; for (;;) { if (num/den != udiv8_by_8(num,den)) { #ifndef VECTREX fprintf(stdout, "udiv8_by_8(%d,%d) = %d but %d/%d = %d ?\n", num, den, udiv8_by_8(num,den), num, den, num/den); exit(1); #else crash(13); #endif } den += 1; if (den == 0) break; } num += 1; if (num == 0) break; } #ifndef VECTREX fprintf(stdout, "udiv8_by_8: Complete 256x256 unsigned 8-bit tests passed!\n"); #endif return 0; } #endif #define umul8(x,y) ((x)*(y)) // ===================================================================================== // MULTIPLYING // ===================================================================================== // This first implementation is based on: // https://stackoverflow.com/questions/22845801/32-bit-signed-integer-multiplication-without-using-64-bit-data-type/22847373#22847373 // with some minimal tweaking to supply two different word sizes... /* compute the full 32-bit product of two signed 16-bit integers */ unsigned int32_t umultiply16 (unsigned int16_t a, unsigned int16_t b) { /* split operands into halves */ unsigned int8_t al = (unsigned int8_t)a; unsigned int8_t ah = (unsigned int8_t)(a >> 8); unsigned int8_t bl = (unsigned int8_t)b; unsigned int8_t bh = (unsigned int8_t)(b >> 8); unsigned int16_t p0 = umul8(al, bl); /* compute partial products using native 8x8 -> 16 MUL */ unsigned int16_t p1 = umul8(al, bh); unsigned int16_t p2 = umul8(ah, bl); unsigned int16_t p3 = umul8(ah, bh); unsigned int16_t cy = ((p0 >> 8) + (unsigned int8_t)p1 + (unsigned int8_t)p2) >> 8; /* sum partial products for high */ unsigned int16_t umul16hi = p3 + (p2 >> 8) + (p1 >> 8) + cy; /* compute the upper 16 bits of the product of two unsigned 16-bit integers */ //unsigned int16_t umul16lo = p0 + (p2 << 8) + (p1 << 8); unsigned int16_t umul16lo = ((p2 + p1) << 8) + p0; return (unsigned int32_t)(((unsigned int32_t)umul16hi<<(unsigned int32_t)16) | (unsigned int32_t)umul16lo); /* bits <31:16> of the product a * b */ } int32_t multiply16 (int16_t a, int16_t b) { /* split operands into halves */ unsigned int8_t al = (unsigned int8_t)a; unsigned int8_t ah = (unsigned int8_t)(a >> 8); unsigned int8_t bl = (unsigned int8_t)b; unsigned int8_t bh = (unsigned int8_t)(b >> 8); unsigned int16_t p0 = umul8(al, bl); /* compute partial products using native 8x8 -> 16 MUL */ unsigned int16_t p1 = umul8(al, bh); unsigned int16_t p2 = umul8(ah, bl); unsigned int16_t p3 = umul8(ah, bh); unsigned int16_t cy = ((p0 >> 8) + (unsigned int8_t)p1 + (unsigned int8_t)p2) >> 8; /* sum partial products for high */ unsigned int16_t umul16hi = (unsigned int16_t)(p3 + (p2 >> 8) + (p1 >> 8) + cy); /* compute the upper 16 bits of the product of two unsigned 16-bit integers */ int16_t mul16hi = (int16_t)(umul16hi - (unsigned int16_t)(((a < 0) ? b : 0) - ((b < 0) ? a : 0))); /* compute the upper 16 bits of the product of two signed 16-bit integers */ /* sum partial products for low */ //unsigned int16_t umul16lo = p0 + (p2 << 8) + (p1 << 8); unsigned int16_t umul16lo = ((p2 + p1) << 8) + p0; return (int32_t)(((int32_t)mul16hi<<(int32_t)16) | (int32_t)((unsigned int32_t)umul16lo)); /* bits <31:16> of the product a * b */ } // ----------- #ifndef VECTREX /* compute the full 64-bit product of two signed 32-bit integers */ int64_t multiply32(int32_t a, int32_t b) { /* split operands into halves */ unsigned int16_t al = (unsigned int16_t)a; unsigned int16_t ah = a >> 16; unsigned int16_t bl = (unsigned int16_t)b; unsigned int16_t bh = b >> 16; /* compute partial products */ unsigned int32_t p0 = umultiply16(al, bl); unsigned int32_t p1 = umultiply16(al, bh); unsigned int32_t p2 = umultiply16(ah, bl); unsigned int32_t p3 = umultiply16(ah, bh); /* sum partial products */ unsigned int32_t cy = ((p0 >> 16) + (unsigned int16_t)p1 + (unsigned int16_t)p2) >> 16; unsigned int32_t umul32hi = p3 + (p2 >> 16) + (p1 >> 16) + cy; /* compute the upper 32 bits of the product of two unsigned 32-bit integers */ int32_t mul32hi = umul32hi - ((a < 0) ? b : 0) - ((b < 0) ? a : 0); /* compute the upper 32 bits of the product of two signed 32-bit integers */ /* sum partial products for low */ //unsigned int32_t umul32lo = p0 + (p2 << 16) + (p1 << 16); unsigned int32_t umul32lo = ((p2 + p1) << 16) + p0; return (int64_t)mul32hi<<32LL | (int64_t)((unsigned int64_t)umul32lo); // requires native 32x32 -> lower-32 multiply. (16x16 does not work) } #endif // ===================================================================================== // The next implementation is based on: // https://www.techiedelight.com/multiply-16-bit-integers-using-8-bit-multiplier/ // although it required a lot more hacking to fit our use-case... unsigned int32_t umultiply16bit(unsigned int16_t m, unsigned int16_t n) { unsigned int8_t mLow = (unsigned int8_t)m; // stores first 8-bits of m unsigned int8_t mHigh = (unsigned int8_t)(m>>8); // stores last 8-bits of m unsigned int8_t nLow = (unsigned int8_t)n; // stores first 8-bits of n unsigned int8_t nHigh = (unsigned int8_t)(n>>8); // stores last 8-bits of n unsigned int16_t mLow_nLow = (unsigned int16_t)umul8(mLow, nLow); // native 8x8 -> 16 MULs unsigned int16_t mHigh_nLow = (unsigned int16_t)umul8(mHigh, nLow); unsigned int16_t mLow_nHigh = (unsigned int16_t)umul8(mLow, nHigh); unsigned int16_t mHigh_nHigh = (unsigned int16_t)umul8(mHigh, nHigh); // return mLow_nLow + ((mHigh_nLow + mLow_nHigh) << 8L) + (mHigh_nHigh << 16L); // -> return (mHigh_nHigh << 16L) + ((mHigh_nLow + mLow_nHigh) << 8L) + mLow_nLow; return (((mHigh_nHigh << 8L) + (mHigh_nLow + mLow_nHigh)) << 8L) + mLow_nLow; } int32_t multiply16bit(int16_t m, int16_t n) { int sign = 1; if (m < 0) {m = -m; sign = -sign;} if (n < 0) {n = -n; sign = -sign;} if (sign < 0) { return -(int32_t)umultiply16bit((unsigned int16_t)m,(unsigned int16_t)n); } else { return (int32_t)umultiply16bit((unsigned int16_t)m,(unsigned int16_t)n); } } // ----------- #ifndef VECTREX static unsigned int64_t umultiply32bit(unsigned int32_t m, unsigned int32_t n) { // could use these 4 as parameters instead of 2 int32_t's: unsigned int16_t mLow = m; // stores first 8-bits of m unsigned int16_t mHigh = m >> 16; // stores last 8-bits of m unsigned int16_t nLow = n; // stores first 8-bits of n unsigned int16_t nHigh = n >> 16; // stores last 8-bits of n unsigned int32_t mLow_nLow = umultiply16bit(mLow, nLow); unsigned int32_t mHigh_nLow = umultiply16bit(mHigh, nLow); unsigned int32_t mLow_nHigh = umultiply16bit(mLow, nHigh); unsigned int32_t mHigh_nHigh = umultiply16bit(mHigh, nHigh); unsigned int32_t LOW, LOW_LOW, SUM, CARRY; SUM = mHigh_nLow + mLow_nHigh; LOW_LOW = SUM << 16L; CARRY = 0; LOW = mLow_nLow+LOW_LOW; if (LOW < mLow_nLow || LOW < LOW_LOW) CARRY = 1; return ((unsigned int64_t) ((SUM >> 16L)+mHigh_nHigh+CARRY) << 32LL) | LOW; } static int64_t multiply32bit(int32_t m, int32_t n) { // for Vectrex, will need to pass in parameters as multiple 16-bit ints (Q16.16 format) int sign = 1; // will fail if both params are largest negative integer. I don't care. if (m < 0) {m = -m; sign = -sign;} if (n < 0) {n = -n; sign = -sign;} if (sign < 0) { return -umultiply32bit(m,n); } else { return umultiply32bit(m,n); } } #endif