#define VECTREX 1 // Although I have another file with generic divide code, this is specific to dividing by 10 // which is frequently needed when displaying numbers on screen, and worth optimizing. // That said, I have not yet put this code on the clock to confirm timings. // This file contains a 16-bit signed divide by 10, // and 8-bit signed and unsigned divides by 10 (with two different // implementations of each - time them and use the faster ones) #ifdef VECTREX // high is 0 on intel, 1 on 6809 #define HIGH 0 #define LOW 1 #define int8_t int #define uint8_t unsigned int #define int16_t long #define uint16_t unsigned long #define USE_MUL 1 #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 #else #include <stdio.h> #include <stdlib.h> // high is 0 on intel, 1 on 6809 #define HIGH 0 #define LOW 1 #define int8_t signed char #define uint8_t unsigned char #define int16_t signed short #define uint16_t unsigned short #define USE_MUL 1 #define TEST #endif // An accurate signed 16 bit division by 10 // Taken from http://homepage.divms.uiowa.edu/~jones/bcd/divide.html static inline int16_t sdiv10_16(int16_t A) { int16_t Q; /* the quotient */ int16_t R; /* the remainder */ // Avoid using divide: Q = ((A >> 1) + A) >> 1; /* Q = A*0.11 */ Q = ((Q >> 4) + Q) ; /* Q = A*0.110011 */ Q = ((Q >> 8) + Q) >> 3; /* Q = A*0.00011001100110011 */ /* either Q = A/10 or Q+1 = A/10 for -87,381 < A < 534,890 */ #ifdef USE_MUL // Need to time both cases on 6809 to see which is faster. R = Q * 10; #else R = ((Q << 2) + Q) << 1; #endif R = A - R; /* R = A - 10*Q */ if (R >= 10) { R -= 10; Q += 1; } /* Q = A/10 for -87,381 < A < 534,890 */ if ((Q < 0) && (R != 0)) Q += 1; #ifdef TEST if (Q != A/10) { fprintf(stderr, "A: %d A/10: %d Q: %d\n", A, A/10, Q); exit(EXIT_FAILURE); } #endif return Q; } // ditto with remainder exported to user static inline int16_t sdiv10_16_with_remainder(int16_t A, int16_t *Remainder) { int16_t Q; /* the quotient */ int16_t R; /* the remainder */ // Avoid using divide: Q = ((A >> 1) + A) >> 1; /* Q = A*0.11 */ Q = ((Q >> 4) + Q) ; /* Q = A*0.110011 */ Q = ((Q >> 8) + Q) >> 3; /* Q = A*0.00011001100110011 */ /* either Q = A/10 or Q+1 = A/10 for -87,381 < A < 534,890 */ #ifdef USE_MUL // Need to time both cases on 6809 to see which is faster. R = Q * 10; #else R = ((Q << 2) + Q) << 1; #endif R = -R + A; /* R = A - 10*Q */ if (R >= 10) { R -= 10; Q += 1; } /* Q = A/10 for -87,381 < A < 534,890 */ if ((Q < 0) && (R != 0)) { R -= 10; Q += 1; } *Remainder = R; #ifdef TEST if ((Q != A/10) || (R != A%10)) { fprintf(stderr, "A: %d A/10: %d Q: %d R: %d A%%10: %d\n", A, A/10, Q, R, A%10); exit(EXIT_FAILURE); } #endif return Q; } // divide signed 8-bit quantity by 10 using 16 bit intermediate at most static inline int8_t s8divided_by_10(int8_t x) { int8_t rem; int8_t result; result = (int8_t)(((int16_t)x * 25L) >> 8L); // A*B should be the only 16 bit quantity #ifdef USE_MUL rem = -(result*10)+x; // yet to check if *10 faster than shift+add. Probably is. #else rem = -(((result << 2) + result) << 1) + x; #endif if (rem > 0) { if ((x < 0) || (rem >= 10)) result++; } #ifdef TEST if (result != x/10) { printf("s: %d %d %d rem %d\n", x, x/10, result, rem); exit(EXIT_FAILURE); } #endif return result; } // divide unsigned 8-bit quantity by 10 using 16 bit intermediate at most static inline uint8_t u8divided_by_10(uint8_t x) { uint8_t rem; uint8_t result; result = (uint8_t)(((uint16_t)x * 25UL) >> 8UL); // Again, A*B should be the only 16 bit quantity rem = x - result*10; // yet to check if *10 faster than shift+add. Probably is. if (rem >= 10) result++; #ifdef TEST if (result != x/10U) { printf("u: %u %u %u rem %u\n", x, x/10U,result,rem); exit(EXIT_FAILURE); } #endif return result; } static inline uint8_t divu10_8(uint8_t i) { uint8_t result, remainder; union twobytes { // an experiment to see if the 6809 code generator can be forced to avoid using 8 double-byte LSR instructions uint16_t sixteenbit; // Despite being lengthier source code this should be tighter machine code. uint8_t eightbit[2]; } temp; temp.sixteenbit = (i+1)*51; result = temp.eightbit[LOW]>>1; remainder = i - result*10; #ifdef TEST if (result != i/10) { printf("U: %u/10 = %u? %u rem %u\n", i, i/10, result, i - result*10); exit(EXIT_FAILURE); } #endif return result; } static inline int8_t divs10_8(int8_t i) { int8_t result; union twobytes { // an experiment to see if the 6809 code generator can be forced to avoid using 8 double-byte ASR instructions int16_t sixteenbit; int8_t eightbit[2]; } temp; temp.sixteenbit = (i+1)*51; result = temp.eightbit[LOW]>>1; if ((i < 0) && (i > result*10)) result += 1; // there's probably a bit-twiddling expression to avoid two comparisons... // (but just casting i to uint8_t won't work) #ifdef TEST if (result != i/10) { printf("S: %d/10 = %d? %d rem %d\n", i, i/10, result, i - result*10); exit(EXIT_FAILURE); } #endif return result; } int main(void) { { // signed 16-bit divide int16_t Q,R,A = 0; do Q = sdiv10_16_with_remainder(A++,&R); while (A); do Q = sdiv10_16(A++); while (A); } { // unsigned 8-bit divide uint8_t i, r2, r5; i = 0; for (;;) { if (i) { r2 = u8divided_by_10(i); r5 = divu10_8(i); } i++; if (i == 0) break; } } { // signed 8-bit divide int8_t i, r1, r2, r5; i = -128; for (;;) { if (i) { r1 = i/10; r2 = s8divided_by_10(i); r5 = divs10_8(i); } i++; if (i == -128) break; } } #ifdef TEST printf("All test passed\n"); exit(EXIT_SUCCESS); #endif return (EXIT_FAILURE); }