// radix sort - gt style. I assume someone has come up with this before me,
// but I don't remember ever coming across it. Doing a google search just
// now I found the related 'bradsort' but it is significantly different.
// I came up with this a *long long* time ago, but never coded it as I had
// no actual need for the code. But watching Jon Bentley talk about 'the most
// beautiful code I never wrote' made me think about the neatest algorithm
// I never wrote too. By coincidence, also a sort. So here's the first
// rough draft, after taking what I thought would be short and elegant, and
// adding enough real-life code to it to make it work, but before putting the
// effort into simplifying it and making it both robust and efficient.
// linear time (in array size; plus log_2 factor in data width), linear space
// asymptotically better than quicksort et al, as long as we have sufficient
// ram available and don't start swapping. memory demands are higher than
// quicksort but linear. Question: on modern computers with huge rams, at what
// point does the more expensive code in this sort start to pay out over quicksort,
// and is that point reached before we hit the point where we start paging?
// code assumes gcc extensions (because it's programmed in my head in Algol
// and only realised in C since that's the compiler that's available to me)
#include <stdio.h>
#include <stdlib.h>
#ifndef BITSPERBYTE
const int BITSPERBYTE = 8; // should calculate dynamically. Wish C had a BITSPERINT etc.
#endif
// the interface and demo is a simple sort of an array of ints.
// can be trivially generalised to an array of any fixed-size object
// slightly more effort for variable-sized objects such as strings
// left as an exercise for the reader how to handle the case where
// the data is a key and includes a separate pointer to other data
// can also be coded to save space, but the reason I wrote this now
// (and not 30 years ago when I first thought of it) is that big
// rams are the norm so why not use them?
void sort(int array[], int items) // exclusive upper bound
{
// using gcc extension allowing algol-style nested procedures
static int nextfreeslot;
void add(int trie[], int start, int val, int bitsleft) // add an int to a bit-wise trie
{
int ptr;
int bit;
while (bitsleft > 0) {
bit = val < 0 ? 1 : 0; // this is unfortunate, but we have to look at the high bit, otherwise
// the data we pull out after sorting is bit-reversed. (i.e. if we say bit = val&1)
// (could pick the top bit out with a shift but am trying for more portable coding style)
ptr = trie[start+bit];
if (ptr == 0) { // unallocated
ptr = trie[start+bit] = nextfreeslot;
trie[nextfreeslot++] = 0; trie[nextfreeslot++] = 0; // reserve 2 cells for 0 or 1 links (or count + spare)
}
// add(trie, ptr, val<<1, bitsleft-1); // Tail recursion, now transformed into a loop
start = ptr; val <<= 1; bitsleft -= 1;
}
// final slot holds count of values
trie[start] += 1; // Final trie[start+1] is currently unused. We *could* use it as a flag that this
// is a final node in the trie, and thereby not require 32 cell pairs for every word.
}
void walk(int trie[], int start, int array[], int bitsleft, int data)
{
static int outputpos;
if (start == 0) outputpos = 0; // somewhat hacky
if (bitsleft == 0) {
while (trie[start]-- > 0) array[outputpos++] = data; // count of items which were equal to this value
return;
}
if (trie[start] != 0) { // a word was present in which this bit was 0
walk(trie, trie[start], array, bitsleft-1, data<<1);
// this one cannot be optimised away. We could use an explicit stack though. No more that BITSPERWORD deep
}
if (trie[start+1] != 0) { // a word was present in which this bit was 1
walk(trie, trie[start+1], array, bitsleft-1, (data<<1)|1); // first bit is highest bit. adds a minor complication
// again, tail-recursion could be manually optimised away.
}
}
/* Uses gcc extension allowing algol-style dynamic bounds on stack array. Should use heap for portability */
int i, size, trie[size = (sizeof(int)*BITSPERBYTE+2)*2 * items]; // I think this is an OK upper bound
nextfreeslot = 2; trie[0] = 0; trie[1] = 0; // inelegant, but this is only the first hack.
// first add all the input data to a bit-wise trie
for (i = 0; i < items; i++) add(trie, 0, array[i], sizeof(int)*BITSPERBYTE);
// then pre-order traverse the trie to extract the data back into the original input array
// (we're careful about duplicates)
// do this for unsigned ints:
// walk(trie, 0, array, sizeof(unsigned int)*BITSPERBYTE, 0); // reassign sorted values to array
// do this for signed ints:
walk(trie, trie[1], array, sizeof(int)*BITSPERBYTE-1, 1); // reassign sorted values to array
walk(trie, trie[0], array, sizeof(int)*BITSPERBYTE-1, 0); // hack to make negative numbers come first
// as by default, alg treats numbers as unsigned
}
int main(int argc, char **argv)
{
int i, data[] = {42, 0x400000, 3, 66, 21, -42, -1, 0};
fprintf(stdout, "Input:"); for (i = 0; i < sizeof(data)/sizeof(*data); i++) fprintf(stdout, " %d", data[i]); fprintf(stdout, "\n");
sort(data, 8);
fprintf(stdout, "Output:"); for (i = 0; i < sizeof(data)/sizeof(*data); i++) fprintf(stdout, " %d", data[i]); fprintf(stdout, "\n");
exit(0);
}