/*RTF2HTML.c, Chuck Shotton - 6/21/93 */ /************************************************************************ * This program takes a stab at converting RTF (Rich Text Format) files * into HTML. There are some limitations that keep RTF from being able to * easily represent things like in-line images and anchors as styles. In * particular, RTF styles apply to entire "paragraphs", so anchors or * images in the middle of a text stream can't easily be represented by * styles. The intent is to ultimately use something like embedded text * color changes to represent these constructs. * * In the meantime, you can take existing Word documents, apply the * correct style sheet, and convert them to HTML with this tool. * * AUTHOR: Chuck Shotton, UT-Houston Academic Computing, * cshotton@oac.hsc.uth.tmc.edu * * USAGE: rtf2html [rtf_filename] * * BEHAVIOR: * rtf2html will open the specified RTF input file or read from * standard input, writing converted HTML to standard output. * * NOTES: * The RTF document must be formatted with a style sheet that has * style numberings that conform to the style_mappings table * defined in this source file. * * MODIFICATIONS: * 6/21/93 : Chuck Shotton - created version 1.0. * ************************************************************************/ /* Note, the source is formated with 4 character tabs */ #include #include #ifdef THINK_C #include #endif #ifndef TRUE #define TRUE -1 #define FALSE 0 #endif #define MAX_LEVELS 20 /*defines the # of nested in-line styles (pairs of {})*/ #define MAX_STYLES 12 #define MAX_INLINE_STYLES 4 /*defines # of in-line styles, bold, italic, etc.*/ typedef enum {s_plain, s_bold, s_italic, s_underline, /*in-line styles*/ s_para, /*pseudo style*/ s_h0, s_h1, s_h2, s_h3, s_h4, s_h5, s_h6 /*heading styles*/ } StyleState; char *styles[MAX_STYLES][2] = { /*HTML Start and end tags for styles*/ {"", ""}, {"", ""}, {"", ""}, {"", ""}, {"

", ""}, {"", ""}, {"

", "

"}, {"

", "

"}, {"

", "

"}, {"

", "

"}, {"
", "
"}, {"
", "
"} }; /* style_mappings maps the style numbers in a RTF style sheet into one of the*/ /* (currently) six paragraph-oriented HTML styles (i.e. heading 1 through 6.)*/ /* Additional styles for lists, etc. should be added here. Style info */ /* ultimately should be read from some sort of config file into these tables.*/ char *style_mappings[7] = { "", "255", "254", "253", "252", "251", "250" }; /* RTF tokens that mean something to the parser. All others are ignored. */ typedef enum {t_start,t_fonttbl, t_colortbl, t_stylesheet, t_info, t_s, t_b, t_u, t_i, t_plain, t_par, t_end} TokenIndex; char *tokens[] = { "###", "fonttbl", "colortbl", "stylesheet", "info", "s", "b", "ul", "i", "plain", "par", "###" }; char style_state[MAX_LEVELS][MAX_INLINE_STYLES], curr_style[MAX_INLINE_STYLES]; short curr_heading; short level, /*current {} nesting level*/ skip_to_level,/*{} level to which parsing should skip (used to skip */ /* font tables, style sheets, color tables, etc.) */ gobble, /*Flag set to indicate all input should be discarded */ ignore_styles;/*Set to ignore inline style expansions after style use*/ /**************************************/ char RTF_GetChar(f) FILE *f; { return fgetc(f); } /**************************************/ void RTF_PutStr(s) char *s; { if (gobble) return; fputs(s, stdout); } /**************************************/ void RTF_PutChar(ch) char ch; { if (gobble) return; switch (ch) { case '<': RTF_PutStr("<"); break; case '>': RTF_PutStr(">"); break; case '&': RTF_PutStr("&"); break; default: fputc(ch, stdout); } } /**************************************/ void RTF_PlainStyle (s) char *s; { int j; for (j=0;j%s", s); } /**************************************/ void RTF_BuildToken (token, ch) char *token; char ch; { strncat (token, &ch, 1); } /**************************************/ /* Map a style number into a HTML heading */ short RTF_MapStyle(s) char *s; { int i; for (i=0;i<7;i++) if (!strcmp(style_mappings[i], s)) return (i); return (0); } /**************************************/ /* Perform actions for RTF control words */ void RTF_DoControl (control, arg) char *control, *arg; { TokenIndex i; short style; if (gobble) return; for (i=t_start; i1) return (RTF_Parse(argv[1])); else return (RTF_Parse(NULL)); }