// FOR DOCUMENTATION, SEE MY BLOG POST: // http://techennui.blogspot.com/2007/11/quick-hack-17-in-series-of-42-inlining.html // Expands LaTeX \newcommand macros to allow submission of documents // to print services which do not allow user-defined macros. // Valid input formats are: // \newcommand{\whatever}{Replacement text} // \newcommand{\whatever}[2]{Expand #1 and #2 but not \#1 or even $\#1$} // - anything else ought to be passed through verbatim; if an inurmountable // error is detected, the program exits with a non-0 return code. // The purpose of this utility is similar to: // http://winedt.org/Macros/LaTeX/uncommand.php // which I wasn't aware of when I wrote it. Though I would like to see how // well that program handles the test input file, to see if it does the // right thing with some of the more complex definitions :-) // // See also http://texcatalogue.sarovar.org/entries/de-macro.html // and http://www.mackichan.com/index.html?techtalk/685.htm~mainFrame #include <stdio.h> #include <stdlib.h> /* exit() */ #include <string.h> /* strcpy(), strcmp() */ #include <ctype.h> #define TRUE (0==0) #define FALSE (0!=0) // Sorry, only 8-bit char sets supported. #define NEWCOMMANDLEN 14 /* strlen("\\renewcommand")+1 */ /* Will later make these dynamic - quick hack for now */ #define MAXMACRONAMELEN 80 #define MAXMACROBODYLEN 1024 #define MAXARGLEN 1024 #define MAXCOMMANDS 1024 #define MAX_MACRO_EXPANSION (32*1024) #define _PROTECTED_ 256 #define _PARAMETER_ 512 static int NEXTFREEMACRO = 0; static int THIS_COMMAND = MAXCOMMANDS-1; static char macro[MAXMACRONAMELEN][MAXCOMMANDS]; static int body[MAXMACROBODYLEN][MAXCOMMANDS]; static int args[MAXCOMMANDS]; char actual[MAXARGLEN][10]; // used at point of macro call. char curcommand[NEWCOMMANDLEN] = { '\0' }; static int in_comment = FALSE; void intcpy(int *dest, int *source) { while ((*dest++ = *source++) != 0) ; } #ifndef BITS #define BITS 15 #endif #define BUFFERSIZE (1<<BITS) #define CIRCULAR (BUFFERSIZE-1) static int buffer[BUFFERSIZE]; // deliberately int, not char, for _protected_ static int get_index = 0, put_index = 0; // nasty pushback buffer because we always insert text *before* the current 'get' pointer // and step the get pointer backwards. This is OK if we insert a whole string at a time // but if we push back two strings in a row, they can be inserted out of order unless // we're very careful! I.e. not as simple as the usual put/get from a cyclic buffer :-( int get_next_char(void) { int c; if (get_index == put_index) return(fgetc(stdin)); c = buffer[get_index]; get_index = (get_index+1)&CIRCULAR; return c; } int locate_macro_name(char *def) { int i = 0; for (;;) { if (i == NEXTFREEMACRO) break; if (strcmp(def, macro[i]) == 0) break; i += 1; if (i == MAXCOMMANDS) { fprintf(stderr, "Coding error #1. Aborted.\n"); exit(1); } } return i; } void reinsert_char(int c) { buffer[put_index] = c; put_index = (put_index+1)&CIRCULAR; if (put_index == get_index) { fprintf(stderr, "Sorry, a large expansion ran me out of space. Please recompile with -DBITS=%d\n", BITS+1); exit(1); } } void unread_char(int c) // PUT AT *HEAD* OF RE-READ BUFFER. JUST LIKE ungetc(stdin, c) { get_index = (get_index-1)&CIRCULAR; buffer[get_index] = c; if (put_index == get_index) { fprintf(stderr, "Sorry, a large expansion ran me out of space. Please recompile with -DBITS=%d\n", BITS+1); exit(1); } } void unread_string(char *s) { char *start = s; while (*s != '\0') s += 1; for (;;) {s -= 1; unread_char(*s); if (s == start) break;} } char *get_command(int c) { static char w[MAXMACRONAMELEN]; char *wp = w; for (;;) { *wp++ = c; c = get_next_char(); if (!isalpha(c)) break; } unread_char(c); *wp = '\0'; return w; } int next_non_comment_char(void) { int c; for (;;) { c = get_next_char(); if (c == '%') { for (;;) { if (c == '\n') break; c = get_next_char(); } continue; // try again } else { break; } } return c; } int next_non_comment_non_space_char(void) { int c; for (;;) { c=next_non_comment_char(); if (!isspace(c)) return(c); } } void learn_body(void) { int c; c = next_non_comment_non_space_char(); if (c != '{') { // single token. Not yet handled. } else { // READ BODY UP TO AND INCLUDING FINAL '}' BUT NOT BEYOND static int expansion[MAXMACROBODYLEN]; int *ep = expansion; int c, depth = 0; for (;;) { c = get_next_char(); // We'll include comments in the macro expansion *but* must be careful not to count braces within comments if (c == '\\') { *ep++ = c; c = get_next_char(); *ep++ = c; } else if (c == '%') { // Copy rest of comment for (;;) { *ep++ = c; if (c == '\n') break; c = get_next_char(); } } else { // regular character - proess it normally: if (c == '{') depth += 1; if ((c == '}') && (depth == 0)) break; if (c == '}') depth -= 1; if (c == '#') { c = get_next_char(); // '1' .. '9' *ep++ = c - '1' + _PARAMETER_; // INTERNAL CODE FOR #1, #2, ... #9 } else *ep++ = c; } } *ep = '\0'; intcpy(body[NEXTFREEMACRO], expansion); NEXTFREEMACRO = NEXTFREEMACRO + 1; // We now have all the pieces. // Need to add check to see if we've busted the array bounds. } } int learn_argcount(void) { int c, argcount; c = get_next_char(); argcount = c-'0'; // eg "[3]" -> 3 // VERIFY THAT isdigit(c) c = get_next_char(); // ']' // VERIFY THAT c == ']' return argcount; } void learn_keyword(void) { static char name[MAXMACRONAMELEN]; char *cp = name; int c, argcount; c = next_non_comment_non_space_char(); if (c != '\\') { fprintf(stderr, "Problem at \"\\%s{%c\" <-- last char should be a '\\' (was ascii %d)\n", curcommand, c, c); exit(1); } for (;;) { c = next_non_comment_non_space_char(); if (!isalpha(c)) break; // or isalnum? Are numbers allowed in TeX words? Probably not. *cp++ = c; } *cp = '\0'; if (c != '}') { fprintf(stderr, "Problem at \"\\%s{\\%s%c\" <-- last char should be a '}' (was ascii %d)\n", curcommand, name, c, c); exit(1); } strcpy(macro[NEXTFREEMACRO], name); // NOW READ ARG COUNT IF PRESENT FOLLOWED BY BODY c = next_non_comment_non_space_char(); if (c == '[') { argcount = learn_argcount(); // reads n and the final ']' } else { reinsert_char(c); argcount = 0; } args[NEXTFREEMACRO] = argcount; learn_body(); } void learn_macro(void) { int c; c = next_non_comment_non_space_char(); if (c == '{') { learn_keyword(); // reads \word and the final '}' } else { // badly formatted definition fprintf(stdout, "\\%s", curcommand); unread_char(c); } } void expand_macro(void) { // READ ARGS IF NEEDED, THEN EXPAND. static char temp_buffer[MAX_MACRO_EXPANSION]; char *pp; int *fp; char *ap; // put pointer, fetch pointer, arg pointer int c, param, i, argcount = args[THIS_COMMAND]; // following text should be args between {}s... (or nothing, if argcount is 0) // fprintf(stdout, "%% COMPLEX EXPANSION OF \\%s WITH %d ARGS\n", macro[THIS_COMMAND], args[THIS_COMMAND]); // add %c? - do tests and check for (i = 0; i < argcount; i++) { c = get_next_char(); if (c == '{') { // READ PARAM INTO actual[i] char *ap = actual[i]; for (;;) { c = get_next_char(); // IS THIS A BUG? DO I NEED TO HANDLE \} OR MULTI-LINE ? % COMMENTS? if (c == '}') break; *ap++ = c; } *ap = '\0'; // fprintf(stdout, "%% Got actual parameter #%d: %s\n", i+1, actual[i]); } else { // parameter is a single atom - not handled! fprintf(stderr, "Sorry - I expected a {} parameter (#%d) to \\%s but found '%c'\n", i+1, macro[THIS_COMMAND], c); fprintf(stderr, "This is either a program bug or you need to edit the source text to add {}'s\n"); exit(1); } } // NOW EXPAND THE BODY, SUBSTITUTING ARGS 1..n AS NECESSARY // THIS IS WHERE WE NEED TO BE EXTRA CAREFUL ABOUT PUSHBACK ORDER!!!! // THE EXPANDED BODY MAY CONTAIN MORE TEXT TO BE EXPANDED. fp = body[THIS_COMMAND]; pp = temp_buffer; for (;;) { c = *fp++; if (c == '\0') break; if ((c&_PARAMETER_) != 0) { param = (c&255); // 1..9 - TeX counts from 1 up I think. Pre-processed at defn time to 0..n-1 // NEED RANGE CHECK, IF INVALID #n GIVEN - BETTER TO CHECK AT DEFN TIME THOUGH! ap = actual[param]; for (;;) { if (*ap == '\0') break; *pp++ = *ap++; } } else *pp++ = c; } *pp = '\0'; unread_string(temp_buffer); // there is a pending char (whatever followed the \word) at getptr. // we have to put our expansion *before* getptr } void handle_word(char *s) { if ((strcmp(s, "newcommand") == 0) || (strcmp(s, "renewcommand") == 0)) { strcpy(curcommand, s); learn_macro(); } else if ((THIS_COMMAND = locate_macro_name(s)) < NEXTFREEMACRO) { expand_macro(); } else { // IGNORE UNKNOWN fprintf(stdout, "\\%s", s); } } int main(int argc, char **argv) { char *command; int i, c; for (i = 0; i < MAXCOMMANDS; i++) macro[0][i]= '\0'; for (;;) { c = get_next_char(); if (c == EOF) break; if (in_comment) { fputc(c, stdout); if (c == '\n') { in_comment = FALSE; } } else if (c == '\\') { c = get_next_char(); if (isalpha(c)) { // Handle TeX word command = get_command(c); handle_word(command); } else { fprintf(stdout, "\\%c", c); } } else if (c == '%') { fputc(c, stdout); in_comment = TRUE; } else { fputc(c, stdout); } } // clean up exit(0); return(1); }