```#define VECTREX 1
// Although I have another file with generic divide code, this is specific to dividing by 10
// which is frequently needed when displaying numbers on screen, and worth optimizing.
// That said, I have not yet put this code on the clock to confirm timings.

// This file contains a 16-bit signed divide by 10,
// and 8-bit signed and unsigned divides by 10 (with two different
// implementations of each - time them and use the faster ones)

#ifdef VECTREX
// high is 0 on intel, 1 on 6809
#define HIGH 0
#define LOW  1
#define int8_t int
#define uint8_t unsigned int
#define int16_t long
#define uint16_t unsigned long
#define USE_MUL 1
#define EXIT_SUCCESS 0
#define EXIT_FAILURE 1
#else
#include <stdio.h>
#include <stdlib.h>
// high is 0 on intel, 1 on 6809
#define HIGH 0
#define LOW  1
#define int8_t signed char
#define uint8_t unsigned char
#define int16_t signed short
#define uint16_t unsigned short
#define USE_MUL 1
#define TEST
#endif

// An accurate signed 16 bit division by 10
// Taken from http://homepage.divms.uiowa.edu/~jones/bcd/divide.html

static inline int16_t sdiv10_16(int16_t A) {
int16_t Q; /* the quotient */
int16_t R; /* the remainder */

// Avoid using divide:
Q = ((A >> 1) + A) >> 1; /* Q = A*0.11 */
Q = ((Q >> 4) + Q)     ; /* Q = A*0.110011 */
Q = ((Q >> 8) + Q) >> 3; /* Q = A*0.00011001100110011 */
/* either Q = A/10 or Q+1 = A/10 for -87,381 < A < 534,890 */

#ifdef USE_MUL  // Need to time both cases on 6809 to see which is faster.
R = Q * 10;
#else
R = ((Q << 2) + Q) << 1;
#endif
R = A - R; /* R = A - 10*Q */
if (R >= 10) { R -= 10; Q += 1; }   /* Q = A/10 for -87,381 < A < 534,890 */
if ((Q < 0) && (R != 0)) Q += 1;
#ifdef TEST
if (Q != A/10) {
fprintf(stderr, "A: %d A/10: %d Q: %d\n", A, A/10, Q);
exit(EXIT_FAILURE);
}
#endif
return Q;
}

// ditto with remainder exported to user
static inline int16_t sdiv10_16_with_remainder(int16_t A, int16_t *Remainder) {
int16_t Q; /* the quotient */
int16_t R; /* the remainder */

// Avoid using divide:
Q = ((A >> 1) + A) >> 1; /* Q = A*0.11 */
Q = ((Q >> 4) + Q)     ; /* Q = A*0.110011 */
Q = ((Q >> 8) + Q) >> 3; /* Q = A*0.00011001100110011 */
/* either Q = A/10 or Q+1 = A/10 for -87,381 < A < 534,890 */

#ifdef USE_MUL  // Need to time both cases on 6809 to see which is faster.
R = Q * 10;
#else
R = ((Q << 2) + Q) << 1;
#endif
R = -R + A; /* R = A - 10*Q */
if (R >= 10) { R -= 10; Q += 1; }   /* Q = A/10 for -87,381 < A < 534,890 */
if ((Q < 0) && (R != 0)) { R -= 10; Q += 1; }
*Remainder = R;
#ifdef TEST
if ((Q != A/10) || (R != A%10)) {
fprintf(stderr, "A: %d A/10: %d Q: %d  R: %d  A%%10: %d\n", A, A/10, Q, R, A%10);
exit(EXIT_FAILURE);
}
#endif
return Q;
}

// divide signed 8-bit quantity by 10 using 16 bit intermediate at most
static inline int8_t s8divided_by_10(int8_t x) {
int8_t rem;
int8_t result;
result = (int8_t)(((int16_t)x * 25L) >> 8L); // A*B should be the only 16 bit quantity
#ifdef USE_MUL
rem = -(result*10)+x; // yet to check if *10 faster than shift+add. Probably is.
#else
rem = -(((result << 2) + result) << 1) + x;
#endif
if (rem > 0) {
if ((x < 0) || (rem >= 10)) result++;
}
#ifdef TEST
if (result != x/10) {
printf("s: %d %d %d rem %d\n", x, x/10, result, rem);
exit(EXIT_FAILURE);
}
#endif
return result;
}

// divide unsigned 8-bit quantity by 10 using 16 bit intermediate at most
static inline uint8_t u8divided_by_10(uint8_t x) {
uint8_t rem;
uint8_t result;
result = (uint8_t)(((uint16_t)x * 25UL) >> 8UL); // Again, A*B should be the only 16 bit quantity
rem = x - result*10; // yet to check if *10 faster than shift+add. Probably is.
if (rem >= 10) result++;
#ifdef TEST
if (result != x/10U) {
printf("u: %u %u %u rem %u\n", x, x/10U,result,rem);
exit(EXIT_FAILURE);
}
#endif
return result;
}

static inline uint8_t divu10_8(uint8_t i) {
uint8_t result, remainder;
union twobytes {         // an experiment to see if the 6809 code generator can be forced to avoid using 8 double-byte LSR instructions
uint16_t sixteenbit;   // Despite being lengthier source code this should be tighter machine code.
uint8_t eightbit[2];
} temp;
temp.sixteenbit = (i+1)*51;
result = temp.eightbit[LOW]>>1;
remainder = i - result*10;
#ifdef TEST
if (result != i/10) {
printf("U: %u/10 = %u? %u rem %u\n", i, i/10, result, i - result*10);
exit(EXIT_FAILURE);
}
#endif
return result;
}

static inline int8_t divs10_8(int8_t i) {
int8_t result;
union twobytes {         // an experiment to see if the 6809 code generator can be forced to avoid using 8 double-byte ASR instructions
int16_t sixteenbit;
int8_t eightbit[2];
} temp;
temp.sixteenbit = (i+1)*51;
result = temp.eightbit[LOW]>>1;
if ((i < 0) && (i > result*10)) result += 1; // there's probably a bit-twiddling expression to avoid two comparisons...
// (but just casting i to uint8_t won't work)
#ifdef TEST
if (result != i/10) {
printf("S: %d/10 = %d? %d rem %d\n", i, i/10, result, i - result*10);
exit(EXIT_FAILURE);
}
#endif
return result;
}

int main(void) {
{ // signed 16-bit divide
int16_t Q,R,A = 0;
do Q = sdiv10_16_with_remainder(A++,&R); while (A);
do Q = sdiv10_16(A++); while (A);
}
{ // unsigned 8-bit divide
uint8_t i, r2, r5;
i = 0;
for (;;) {
if (i) {
r2 = u8divided_by_10(i);
r5 = divu10_8(i);
}
i++;
if (i == 0) break;
}
}
{ // signed 8-bit divide
int8_t i, r1, r2, r5;
i = -128;
for (;;) {
if (i) {
r1 = i/10;
r2 = s8divided_by_10(i);
r5 = divs10_8(i);
}
i++;
if (i == -128) break;
}
}
#ifdef TEST
printf("All test passed\n");
exit(EXIT_SUCCESS);
#endif
return (EXIT_FAILURE);
}
```