// This is a skeleton of a C application that reads a JSON data structure // containing a hierarchical tree of data, and walks that tree to process // the data in whatever way is appropriate. // This is effectively an extension of the jsmn package (json parser) // which makes it easier to navigate the json hierarchy by building it // all into an in-memory data structure rather than acting on it // sequentially with the standard pre-order traverse. #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/mman.h> #include <unistd.h> #ifndef JSMN_PARENT_LINKS #define JSMN_PARENT_LINKS #endif #define JSMN_STRICT #include "jsmn.c" #define JSMN_CHILD 255 // I originally added JSMN_CHILD to the enum, but I reverted jsmn to its original code // and added that element here as a #define. Being careful that it is higher than // the elements in the enum in case the author adds any new elements of his own. #define tokstrcmp(js, t, s) ((strncmp((js)+(t).start, (s), (t).end - (t).start) == 0) && (strlen(s) == (t).end - (t).start)) #ifndef TRUE #define TRUE (0==0) #define FALSE (0!=0) #endif void showtok (char *js, jsmntok_t * token, int tok) { int i; if ((token[tok].type != JSMN_OBJECT) && (token[tok].type != JSMN_ARRAY)) { // to do - handle escapes as necessary if (token[tok].type == JSMN_STRING) putchar ('"'); for (i = token[tok].start; i < token[tok].end; i++) putchar (js[i]); if (token[tok].type == JSMN_STRING) putchar ('"'); } } void indent (int n) { while (n-- > 0) fprintf (stdout, " "); // better than \t } void reorder (char *js, jsmntok_t * token, jsmntok_t * tree, int tree_max, int root, int depth, int display) { // This makes up for a deficit in the lightweight json parser. The jsmn // code as supplied requires you to walk *all* leaves of the tree in left // to right order whenever you access the json structure. This is not // useful when you want to rearrange the order of blocks etc // so this code takes the serialised tree and inserts explicit branch // nodes, so that you may follow any path down the tree using your // preferred traverse order and omitting any parts that you are not // interested in. Unfortunately I don't see how to add this to the original // code as it builds the tree, so I have retrofitted it by having it copy // from the original tree to a new copy. Somewhat wasteful of space, but // not really an issue considering the size of memory nowadays. static int tokenptr = 0, treeptr = 0; int base, children, i; tree[treeptr].type = token[root].type; tree[treeptr].start = token[root].start; tree[treeptr].end = token[root].end; tree[treeptr].size = token[root].size; #ifdef JSMN_PARENT_LINKS tree[treeptr].parent = token[root].parent; #endif if (treeptr + 1 >= tree_max) { // we could actually calculate the output size exactly with a dummy // pass through the data, allocate enough space off the heap, and then // copy the result back on top of the original array, followed by // freeing the heap space. Checking of course that the original array // is large enough before mallocing the temp workspace... next version // perhaps... fprintf (stderr, "reorder: the output array is not large enough - %d limit reached\n", tree_max); exit (1); } tokenptr += 1; treeptr += 1; base = treeptr; if (token[root].type == JSMN_OBJECT || token[root].type == JSMN_ARRAY) { children = token[root].size; if (treeptr + 1 + children >= tree_max) { fprintf (stderr, "reorder: the output array is not large enough - %d limit exceeded\n", tree_max); exit (1); } treeptr += children; // reserve extra space for the children for (i = 0; i < children; i++) { tree[base + i].type = JSMN_CHILD; tree[base + i].size = treeptr; reorder (js, token, tree, tree_max, tokenptr, depth + 3, display); } } } char *typename[4] = { "JSMN_PRIMITIVE", "JSMN_OBJECT", "JSMN_ARRAY", "JSMN_STRING" }; void tree_walk (char *js, jsmntok_t * token, int root, int depth, int display) { int keyword; // token index int children, i; // I expect to write several procedures for walking parts of the json tree // - this one just displays the whole tree in the original format (with // some contents elided for brevity since this is for doing a visual check) switch (token[root].type) { case JSMN_OBJECT: children = token[root].size; for (i = 0; i < children; i++) { if (/* some terminal */1) { // now that we've added explicit child links using the 'reorder' // procedure, we can skip the subtree entirely, without requiring // to traverse it all in order to step over data and reach the // following sibling... if (tokstrcmp (js, token[keyword], "...whatever...")) { tree_walk (js, token, token[root + 1 + i].size, 0, TRUE); } else { if (display) fprintf (stdout, " << skipped >> "); } } else { tree_walk (js, token, token[root + 1 + i].size, depth + 1, display); // these are all JSMN_CHILD } } if (display) putchar ('\n'); if (display) indent (depth); break; case JSMN_ARRAY: // if (display) printf ("[DEPTH%0d ", depth); children = token[root].size; // handle appropriately per application here // if (display) putchar (']'); break; default: if (display) { showtok (js, token, root); } } } // if you declare tokens[] on the stack inside main, (as was originally done following the // example in the jsmn package) you'll get a run time error if BIGPROG is much over 400000 // - it's better to be static or allocated off the heap. #define BIGPROG 1000000 static jsmntok_t tokens[BIGPROG]; static jsmntok_t tree[BIGPROG]; int main (int argc, char **argv) { char *js; int r; jsmn_parser p; int json_fd; off_t flen; if (argc != 2) { fprintf (stderr, "syntax: %s file.json\n", argv[0]); exit (1); } json_fd = open (argv[1], O_RDONLY); if (json_fd == -1) { fprintf (stderr, "%s - %s\n", argv[0], strerror (errno)); exit (2); } flen = lseek (json_fd, (off_t) 0L, SEEK_END); js = mmap (NULL, flen, PROT_READ, MAP_SHARED, json_fd, (off_t) 0L); jsmn_init (&p); r = jsmn_parse (&p, js, strlen (js), tokens, BIGPROG); if (r <= 0) { fprintf (stderr, "syntax: %s - parse failed\n", argv[1]); } reorder (js, tokens, tree, BIGPROG, 0, 0, FALSE); tree_walk (js, tree, 0, 0, FALSE); exit (0); return 0; }