#define VECTREX 1
#include "muldiv.h"
// vectrex random:
unsigned int16_t _x=12, _a=34, _b=56, _c=78;
unsigned int16_t vrandom(void) {
_x++; _a = (_a^_c^_x); _b = (_b+_a);
return _c = ((_c+(_b>>1))^_a);
}
unsigned int32_t random32(void) {
return (((unsigned int32_t)vrandom() << 24) ^ ((unsigned int32_t)vrandom() << 16) ^ ((unsigned int32_t)vrandom() << 8) ^ (unsigned int32_t)vrandom());
}
// =====================================================================================
// DIVIDING
// =====================================================================================
/* function to divide arbitrary unsigned 8 bit numerator by unsigned 8 bit divisor */
unsigned int8_t udiv8_by_8(unsigned int8_t num, unsigned int8_t denom) {
unsigned int16_t temp;
unsigned int8_t rem;
static const void *lab[256] = {
&&d0, &&d1, &&d2, &&d3, &&d4, &&d5, &&d6, &&d7,
&&d8, &&d9, &&d10, &&d11, &&d12, &&d13, &&d14, &&d15,
&&d16, &&d17, &&d18, &&d19, &&d20, &&d21, &&d22, &&d23,
&&d24, &&d25, &&d26, &&d27, &&d28, &&d29, &&d30, &&d31,
&&d32, &&d33, &&d34, &&d35, &&d36, &&d37, &&d38, &&d39,
&&d40, &&d41, &&d42, &&d43, &&d44, &&d45, &&d46, &&d47,
&&d48, &&d49, &&d50, &&d51, &&d52, &&d53, &&d54, &&d55,
&&d56, &&d57, &&d58, &&d59, &&d60, &&d61, &&d62, &&d63,
&&d64, &&d65, &&d66, &&d67, &&d68, &&d69, &&d70, &&d71,
&&d72, &&d73, &&d74, &&d75, &&d76, &&d77, &&d78, &&d79,
&&d80, &&d81, &&d82, &&d83, &&d84, &&d85, &&dd, &&dd,
&&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd,
&&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd,
&&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd,
&&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd,
&&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd, &&dd,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
&&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d, &&d,
};
goto *lab[denom];
d: return num >= denom ? 1U : 0U;
dd: return num < denom ? 0U : (num < (denom<<1) ? 1U : 2U);
d0: return 255U; // User needs to supply some sort of error handler.
d1: return num;
d2: return div2(num); d3: return div3(num); d4: return div4(num); d5: return div5(num);
d6: return div6(num); d7: return div7(num); d8: return div8(num); d9: return div9(num);
d10: return div10(num); d11: return div11(num); d12: return div12(num); d13: return div13(num);
d14: return div14(num); d15: return div15(num); d16: return div16(num); d17: return div17(num);
d18: return div18(num); d19: return div19(num); d20: return div20(num); d21: return div21(num);
d22: return div22(num); d23: return div23(num); d24: return div24(num); d25: return div25(num);
d26: return div26(num); d27: return div27(num); d28: return div28(num); d29: return div29(num);
d30: return div30(num); d31: return div31(num); d32: return div32(num); d33: return div33(num);
d34: return div34(num); d35: return div35(num); d36: return div36(num); d37: return div37(num);
d38: return div38(num); d39: return div39(num); d40: return div40(num); d41: return div41(num);
d42: return div42(num); d43: return div43(num); d44: return div44(num); d45: return div45(num);
d46: return div46(num); d47: return div47(num); d48: return div48(num); d49: return div49(num);
d50: return div50(num); d51: return div51(num); d52: return div52(num); d53: return div53(num);
d54: return div54(num); d55: return div55(num); d56: return div56(num); d57: return div57(num);
d58: return div58(num); d59: return div59(num); d60: return div60(num); d61: return div61(num);
d62: return div62(num); d63: return div63(num); d64: return div64(num); d65: return div65(num);
d66: return div66(num); d67: return div67(num); d68: return div68(num); d69: return div69(num);
d70: return div70(num); d71: return div71(num); d72: return div72(num); d73: return div73(num);
d74: return div74(num); d75: return div75(num); d76: return div76(num); d77: return div77(num);
d78: return div78(num); d79: return div79(num); d80: return div80(num); d81: return div81(num);
d82: return div82(num); d83: return div83(num); d84: return div84(num); d85: return div85(num);
}
// this division code started off as a simple long division shift-and-subtract taken
// from the "C" section of: https://gtoal.com/vectrex/vectrex-muldiv.c.html
// Speeded up by using a binary chop to detect where to subtract from.
unsigned int32_t div_core32(unsigned int32_t numerator, unsigned int32_t denominator, int8_t max_shift) {
unsigned int32_t quotient, shifted;
int8_t shift, min_shift;
#define DIV_BODY \
quotient = 0LL; \
for (;;) { \
if (numerator < denominator) break; /* < 1.0 (ie 0) */ \
min_shift = 0; \
for (;;) { \
shift = (min_shift+max_shift)/2; \
shifted = denominator<<(int32_t)shift; \
/* The above and below is why the data type used *must* match the 'max_shift' given width! */\
if ((shifted>>(int32_t)shift) != denominator) { /* shift causes overflow? */ \
max_shift = shift; /* (exclusive upper bound, remember?) */ \
} else { \
if (numerator < shifted) max_shift = shift; else min_shift = shift; \
if (min_shift+1==max_shift) break; \
} \
} \
shift = max_shift = min_shift; /* max shift is remembered for next upper bound */ \
if (numerator < (denominator<<shift)) { \
quotient += 1UL<<(int16_t)shift; /* remainder is screwed up but we don't care about that for now */ \
break; /* would go negative? */ \
} \
numerator -= denominator<<shift; /* can't go negative */ \
quotient += 1UL<<(int16_t)shift; \
} \
return quotient; /* remainder = -numerator; */
DIV_BODY
}
// Not efficient to use 32 bit variables for 16-bit calculations on the 8-bit Vectrex... so:
unsigned int16_t div_core16(unsigned int16_t numerator, unsigned int16_t denominator, int8_t max_shift) {
unsigned int16_t quotient, shifted;
int8_t shift, min_shift;
DIV_BODY // C++ users, consider this a poor man's template.
}
/* In header as static inlines:
unsigned int32_t udiv32_by_32(unsigned int32_t numerator, unsigned int32_t denominator);
unsigned int32_t udiv32_by_16(unsigned int32_t numerator, unsigned int16_t denominator);
unsigned int32_t udiv32_by_8(unsigned int32_t numerator, unsigned int denominator);
unsigned int16_t udiv16_by_16(unsigned int16_t numerator, unsigned int16_t denominator);
unsigned int16_t udiv16_by_8(unsigned int16_t numerator, unsigned int denominator);
unsigned int udiv8_by_8(unsigned int numerator, unsigned int denominator);
*/
int32_t sdiv32_by_32(int32_t numerator, int32_t denominator) {
int sign = 1;
if (denominator == 0) return(int32_t)( numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/ : 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0;
if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/; // remainder = 0;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign;
}
int32_t sdiv32_by_16(int32_t numerator, int16_t denominator) {
int sign = 1;
if (denominator == 0) return (int32_t)( numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/
: 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0;
if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign;
}
int32_t sdiv32_by_8(int32_t numerator, int8_t denominator) {
int sign = 1;
if (denominator == 0) return (int32_t)(numerator < 0 ? 0x80000000LL /*MIN_SIGNED(int32_t, 32)*/ : 0x7FFFFFFFLL /*MAX_SIGNED(int32_t, 32)*/); // remainder = 0;
if ((numerator == (int32_t)0x80000000LL /*MIN_SIGNED(int32_t, 32)*/) && (denominator == -1)) return 0x7FFFFFFFLL/*MAX_SIGNED(int32_t, 32)*/; // remainder = 0;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int32_t)div_core32((unsigned int32_t)numerator,(unsigned int32_t)denominator,32) * (int32_t)sign;
}
int16_t sdiv16_by_16(int16_t numerator, int16_t denominator) {
int sign = 1;
if (denominator == 0) return (int16_t)(numerator < 0 ? 0x8000 /*MIN_SIGNED(int16_t, 16)*/ : 0x7FFF /*MAX_SIGNED(int16_t, 16)*/); // remainder = 0;
if ((numerator == (int16_t)0x8000 /*MIN_SIGNED(int16_t, 16)*/) && (denominator == -1)) return 0x7FFF /*MAX_SIGNED(int16_t, 16)*/; // remainder = 0;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int16_t)div_core16((unsigned int16_t)numerator,(unsigned int16_t)denominator,16) * (int16_t)sign;
}
int16_t sdiv16_by_8(int16_t numerator, int8_t denominator) {
int sign = 1;
if (denominator == 0) return (int16_t)(numerator < 0 ? 0x8000 /*MIN_SIGNED(int16_t, 16)*/ : 0x7FFF /*MAX_SIGNED(int16_t, 16)*/); // remainder = 0;
if ((numerator == (int16_t)0x8000 /*MIN_SIGNED(int16_t, 16)*/) && (denominator == -1)) return 0x7FFF /*MAX_SIGNED(int16_t, 16)*/; // remainder = 0;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int16_t)div_core16((unsigned int16_t)numerator,(unsigned int16_t)denominator,16) * (int16_t)sign;
}
int8_t sdiv8_by_8(int8_t numerator, int8_t denominator) {
int sign = 1;
if (denominator == 0) return (int8_t)(numerator < 0 ? 0x80 /*MIN_SIGNED(int, 8)*/ : 0x7F /*MAX_SIGNED(int, 8)*/); // remainder = 0;
if ((numerator == (int8_t)0x80 /*MIN_SIGNED(int, 8)*/) && (denominator == -1)) return 0x7F /*MAX_SIGNED(int, 8)*/; // remainder = 0;
if (numerator < 0) { sign = -1; numerator = -numerator;}
if (denominator < 0) { sign = -sign; denominator = -denominator;}
return (int8_t)udiv8_by_8((unsigned int8_t)numerator,(unsigned int8_t)denominator)*sign;
}
#ifdef DBMAIN
#ifdef VECTREX
void crash(int reason) {
for (;;) {
}
}
#define TESTS 1000L
#else
#include <time.h>
#define TESTS 10000000L
#endif
int main(int argc, char **argv) {
#ifndef VECTREX
clock_t t;
double base_time, time_taken, original_time, my_time;
#endif
int32_t i;
int32_t dividend, divisor, result;
int32_t cksum;
// default range is everything.
for (i=0; i < TESTS; i++) {
// should force more randomicity in the number of bits used. Half will be 31 bits long. Decreasingly few shorter ones.
dividend = random32()&MAX_SIGNED(int32_t, 32);
do {divisor = random32()&MAX_SIGNED(int32_t, 32); } while (divisor==0);
result = sdiv32_by_32(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_32(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(1);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_32: %ld randomised signed 32-bit by 32-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32);
do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 32); } while (divisor==0);
result = udiv32_by_32(dividend, divisor);
if ((unsigned int32_t)dividend/(unsigned int32_t)divisor != (unsigned int32_t)result) {
#ifndef VECTREX
fprintf(stdout, "udiv32_by_32(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", (unsigned int32_t)dividend, (unsigned int32_t)divisor, (unsigned int32_t)result, (unsigned int32_t)dividend, (unsigned int32_t)divisor, (unsigned int32_t)dividend/(unsigned int32_t)divisor);
exit(1);
#else
crash(2);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv32_by_32: %ld randomised unsigned 32-bit by 32-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_SIGNED(int32_t, 32);
do { divisor = random32()&MAX_SIGNED(int32_t, 16); } while (divisor==0);
result = sdiv32_by_16(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(3);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_16: %ld randomised signed 32-bit by 16-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32);
do { divisor = random32()&MAX_UNSIGNED(unsigned int16_t, 16); } while (divisor==0);
result = udiv32_by_16(dividend, divisor);
if ((unsigned int32_t)dividend/(unsigned int16_t)divisor != (unsigned int32_t)result) {
#ifndef VECTREX
fprintf(stdout, "udiv32_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(4);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv32_by_16: %ld randomised unsigned 32-bit by 8-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_SIGNED(int32_t, 32);
do { divisor = random32()&MAX_SIGNED(int8_t, 8); } while (divisor==0);
result = sdiv32_by_8(dividend, divisor);
if ((int32_t)dividend/(int8_t)divisor != (int32_t)result) {
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(5);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv32_by_8: %ld randomised signed 32-bit by 8-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 32);
do { divisor = random32()&MAX_UNSIGNED(unsigned int8_t, 8); } while (divisor==0);
result = udiv32_by_8(dividend, divisor);
if ((unsigned int32_t)dividend/(unsigned int8_t)divisor != (unsigned int32_t)result) {
#ifndef VECTREX
fprintf(stdout, "udiv32_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(6);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv32_by_8: %ld randomised unsigned 32-bit by 16-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_SIGNED(int32_t, 16);
do { divisor = random32()&MAX_SIGNED(int32_t, 16); } while (divisor==0);
result = sdiv16_by_16(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "sdiv16_by_16(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(7);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv16_by_16: %ld randomised signed 16-bit by 16-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 16);
do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 16); } while (divisor==0);
result = udiv16_by_16(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "udiv16_by_16(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(8);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv16_by_16: %ld randomised unsigned 16-bit by 16-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_SIGNED(int32_t, 16);
do { divisor = random32()&MAX_SIGNED(int32_t, 8); } while (divisor==0);
result = sdiv16_by_8(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "sdiv16_by_8(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(9);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv16_by_8: %ld randomised signed 16-bit by 8-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned int32_t, 16);
do { divisor = random32()&MAX_UNSIGNED(unsigned int32_t, 8); } while (divisor==0);
result = udiv16_by_8(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "udiv16_by_8(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(10);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv16_by_8: %ld randomised unsigned 16-bit by 8-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_SIGNED(int32_t, 8);
do { divisor = random32()&MAX_SIGNED(int32_t, 8); } while (divisor==0);
result = sdiv8_by_8(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "sdiv8_by_8(%ld,%ld) = %ld but %ld/%ld = %ld ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(11);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "sdiv8_by_8: %ld randomised signed 8-bit by 8-bit tests passed!\n", TESTS);
#endif
for (i=0; i < TESTS; i++) {
dividend = random32()&MAX_UNSIGNED(unsigned char, 8);
do { divisor = random32()&MAX_UNSIGNED(unsigned char, 8); } while (divisor==0);
result = udiv8_by_8(dividend, divisor);
if (dividend/divisor != result) {
#ifndef VECTREX
fprintf(stdout, "udiv8_by_8(%lu,%lu) = %lu but %lu/%lu = %lu ?\n", dividend, divisor, result, dividend, divisor, dividend/divisor);
exit(1);
#else
crash(12);
#endif
}
}
#ifndef VECTREX
fprintf(stdout, "udiv8_by_8: %ld randomised unsigned 8-bit by 8-bit tests passed!\n", TESTS);
#endif
unsigned char num, den;
num = 0;
for (;;) {
den = 1;
for (;;) {
if (num/den != udiv8_by_8(num,den)) {
#ifndef VECTREX
fprintf(stdout, "udiv8_by_8(%d,%d) = %d but %d/%d = %d ?\n", num, den, udiv8_by_8(num,den), num, den, num/den);
exit(1);
#else
crash(13);
#endif
}
den += 1;
if (den == 0) break;
}
num += 1;
if (num == 0) break;
}
#ifndef VECTREX
fprintf(stdout, "udiv8_by_8: Complete 256x256 unsigned 8-bit tests passed!\n");
#endif
return 0;
}
#endif
#define umul8(x,y) ((x)*(y))
// =====================================================================================
// MULTIPLYING
// =====================================================================================
// This first implementation is based on:
// https://stackoverflow.com/questions/22845801/32-bit-signed-integer-multiplication-without-using-64-bit-data-type/22847373#22847373
// with some minimal tweaking to supply two different word sizes...
/* compute the full 32-bit product of two signed 16-bit integers */
unsigned int32_t umultiply16 (unsigned int16_t a, unsigned int16_t b) {
/* split operands into halves */
unsigned int8_t al = (unsigned int8_t)a;
unsigned int8_t ah = (unsigned int8_t)(a >> 8);
unsigned int8_t bl = (unsigned int8_t)b;
unsigned int8_t bh = (unsigned int8_t)(b >> 8);
unsigned int16_t p0 = umul8(al, bl); /* compute partial products using native 8x8 -> 16 MUL */
unsigned int16_t p1 = umul8(al, bh);
unsigned int16_t p2 = umul8(ah, bl);
unsigned int16_t p3 = umul8(ah, bh);
unsigned int16_t cy = ((p0 >> 8) + (unsigned int8_t)p1 + (unsigned int8_t)p2) >> 8;
/* sum partial products for high */
unsigned int16_t umul16hi = p3 + (p2 >> 8) + (p1 >> 8) + cy; /* compute the upper 16 bits of the product of two unsigned 16-bit integers */
//unsigned int16_t umul16lo = p0 + (p2 << 8) + (p1 << 8);
unsigned int16_t umul16lo = ((p2 + p1) << 8) + p0;
return (unsigned int32_t)(((unsigned int32_t)umul16hi<<(unsigned int32_t)16) | (unsigned int32_t)umul16lo); /* bits <31:16> of the product a * b */
}
int32_t multiply16 (int16_t a, int16_t b) {
/* split operands into halves */
unsigned int8_t al = (unsigned int8_t)a;
unsigned int8_t ah = (unsigned int8_t)(a >> 8);
unsigned int8_t bl = (unsigned int8_t)b;
unsigned int8_t bh = (unsigned int8_t)(b >> 8);
unsigned int16_t p0 = umul8(al, bl); /* compute partial products using native 8x8 -> 16 MUL */
unsigned int16_t p1 = umul8(al, bh);
unsigned int16_t p2 = umul8(ah, bl);
unsigned int16_t p3 = umul8(ah, bh);
unsigned int16_t cy = ((p0 >> 8) + (unsigned int8_t)p1 + (unsigned int8_t)p2) >> 8;
/* sum partial products for high */
unsigned int16_t umul16hi = (unsigned int16_t)(p3 + (p2 >> 8) + (p1 >> 8) + cy); /* compute the upper 16 bits of the product of two unsigned 16-bit integers */
int16_t mul16hi = (int16_t)(umul16hi - (unsigned int16_t)(((a < 0) ? b : 0) - ((b < 0) ? a : 0))); /* compute the upper 16 bits of the product of two signed 16-bit integers */
/* sum partial products for low */
//unsigned int16_t umul16lo = p0 + (p2 << 8) + (p1 << 8);
unsigned int16_t umul16lo = ((p2 + p1) << 8) + p0;
return (int32_t)(((int32_t)mul16hi<<(int32_t)16) | (int32_t)((unsigned int32_t)umul16lo)); /* bits <31:16> of the product a * b */
}
// -----------
#ifndef VECTREX
/* compute the full 64-bit product of two signed 32-bit integers */
int64_t multiply32(int32_t a, int32_t b) {
/* split operands into halves */
unsigned int16_t al = (unsigned int16_t)a;
unsigned int16_t ah = a >> 16;
unsigned int16_t bl = (unsigned int16_t)b;
unsigned int16_t bh = b >> 16;
/* compute partial products */
unsigned int32_t p0 = umultiply16(al, bl);
unsigned int32_t p1 = umultiply16(al, bh);
unsigned int32_t p2 = umultiply16(ah, bl);
unsigned int32_t p3 = umultiply16(ah, bh);
/* sum partial products */
unsigned int32_t cy = ((p0 >> 16) + (unsigned int16_t)p1 + (unsigned int16_t)p2) >> 16;
unsigned int32_t umul32hi = p3 + (p2 >> 16) + (p1 >> 16) + cy; /* compute the upper 32 bits of the product of two unsigned 32-bit integers */
int32_t mul32hi = umul32hi - ((a < 0) ? b : 0) - ((b < 0) ? a : 0); /* compute the upper 32 bits of the product of two signed 32-bit integers */
/* sum partial products for low */
//unsigned int32_t umul32lo = p0 + (p2 << 16) + (p1 << 16);
unsigned int32_t umul32lo = ((p2 + p1) << 16) + p0;
return (int64_t)mul32hi<<32LL | (int64_t)((unsigned int64_t)umul32lo); // requires native 32x32 -> lower-32 multiply. (16x16 does not work)
}
#endif
// =====================================================================================
// The next implementation is based on:
// https://www.techiedelight.com/multiply-16-bit-integers-using-8-bit-multiplier/
// although it required a lot more hacking to fit our use-case...
unsigned int32_t umultiply16bit(unsigned int16_t m, unsigned int16_t n) {
unsigned int8_t mLow = (unsigned int8_t)m; // stores first 8-bits of m
unsigned int8_t mHigh = (unsigned int8_t)(m>>8); // stores last 8-bits of m
unsigned int8_t nLow = (unsigned int8_t)n; // stores first 8-bits of n
unsigned int8_t nHigh = (unsigned int8_t)(n>>8); // stores last 8-bits of n
unsigned int16_t mLow_nLow = (unsigned int16_t)umul8(mLow, nLow); // native 8x8 -> 16 MULs
unsigned int16_t mHigh_nLow = (unsigned int16_t)umul8(mHigh, nLow);
unsigned int16_t mLow_nHigh = (unsigned int16_t)umul8(mLow, nHigh);
unsigned int16_t mHigh_nHigh = (unsigned int16_t)umul8(mHigh, nHigh);
// return mLow_nLow + ((mHigh_nLow + mLow_nHigh) << 8L) + (mHigh_nHigh << 16L);
// -> return (mHigh_nHigh << 16L) + ((mHigh_nLow + mLow_nHigh) << 8L) + mLow_nLow;
return (((mHigh_nHigh << 8L) + (mHigh_nLow + mLow_nHigh)) << 8L) + mLow_nLow;
}
int32_t multiply16bit(int16_t m, int16_t n) {
int sign = 1;
if (m < 0) {m = -m; sign = -sign;}
if (n < 0) {n = -n; sign = -sign;}
if (sign < 0) {
return -(int32_t)umultiply16bit((unsigned int16_t)m,(unsigned int16_t)n);
} else {
return (int32_t)umultiply16bit((unsigned int16_t)m,(unsigned int16_t)n);
}
}
// -----------
#ifndef VECTREX
static unsigned int64_t umultiply32bit(unsigned int32_t m, unsigned int32_t n) {
// could use these 4 as parameters instead of 2 int32_t's:
unsigned int16_t mLow = m; // stores first 8-bits of m
unsigned int16_t mHigh = m >> 16; // stores last 8-bits of m
unsigned int16_t nLow = n; // stores first 8-bits of n
unsigned int16_t nHigh = n >> 16; // stores last 8-bits of n
unsigned int32_t mLow_nLow = umultiply16bit(mLow, nLow);
unsigned int32_t mHigh_nLow = umultiply16bit(mHigh, nLow);
unsigned int32_t mLow_nHigh = umultiply16bit(mLow, nHigh);
unsigned int32_t mHigh_nHigh = umultiply16bit(mHigh, nHigh);
unsigned int32_t LOW, LOW_LOW, SUM, CARRY;
SUM = mHigh_nLow + mLow_nHigh;
LOW_LOW = SUM << 16L;
CARRY = 0; LOW = mLow_nLow+LOW_LOW; if (LOW < mLow_nLow || LOW < LOW_LOW) CARRY = 1;
return ((unsigned int64_t) ((SUM >> 16L)+mHigh_nHigh+CARRY) << 32LL) | LOW;
}
static int64_t multiply32bit(int32_t m, int32_t n) { // for Vectrex, will need to pass in parameters as multiple 16-bit ints (Q16.16 format)
int sign = 1;
// will fail if both params are largest negative integer. I don't care.
if (m < 0) {m = -m; sign = -sign;}
if (n < 0) {n = -n; sign = -sign;}
if (sign < 0) {
return -umultiply32bit(m,n);
} else {
return umultiply32bit(m,n);
}
}
#endif