% Modified 16 Jan 2002 to agree with COMMON version 3.64 \def\9#1{} % this hack is explained in CWEB manual Appendix F11 @* Introduction. This file contains the program \.{wmerge}, which takes two or more files and merges them according to the conventions of \.{CWEB}. Namely, it takes an ordinary \.{.w} file and and optional \.{.ch} file and sends the corresponding \.{.w}-style file to standard output (or to a named file), expanding all ``includes'' that might be specified by \.{@@i} in the original \.{.w} file. (A more precise description appears in the section on ``command line arguments'' below.) @c #include #include /* declaration of |getenv| */ #include /* definition of |isalpha|, |isdigit| and so on */ @@; @@; @@; main (ac,av) int ac; char **av; { argc=ac; argv=av; @; @; reset_input(); while (get_line()) put_line(); fflush(out_file); check_complete(); fflush(out_file); return wrap_up(); } @ @= typedef short boolean; typedef unsigned char eight_bits; typedef char ASCII; /* type of characters inside \.{WEB} */ @ We predeclare some standard string-handling functions here instead of including their system header files, because the names of the header files are not as standard as the names of the functions. (There's confusion between \.{} and \.{}.) @= extern size_t strlen(); /* length of string */ extern char* strcpy(); /* copy one string to another */ extern int strncmp(); /* compare up to $n$ string characters */ extern char* strncpy(); /* copy up to $n$ string characters */ @ @= @ The lowest level of input to the \.{WEB} programs is performed by |input_ln|, which must be told which file to read from. The return value of |input_ln| is 1 if the read is successful and 0 if not (generally this means the file has ended). The characters of the next line of the file are copied into the |buffer| array, and the global variable |limit| is set to the first unoccupied position. Trailing blanks are ignored. The value of |limit| must be strictly less than |buf_size|, so that |buffer[buf_size-1]| is never filled. Some of the routines use the fact that it is safe to refer to |*(limit+2)| without overstepping the bounds of the array. @d buf_size 4096 @= ASCII buffer[buf_size]; /* where each line of input goes */ ASCII *buffer_end=buffer+buf_size-2; /* end of |buffer| */ ASCII *limit; /* points to the last character in the buffer */ ASCII *loc; /* points to the next character to be read from the buffer */ @ In the unlikely event that your standard I/O library does not support |feof|, |getc| and |ungetc|, you may have to change things here. @^system dependencies@> Incidentally, here's a curious fact about \.{CWEB} for those of you who are reading this file as an example of \.{CWEB} programming. The file \.{stdio.h} includes a typedef for the identifier |FILE|, which is not, strictly speaking, part of \CEE/. It turns out \.{CWEAVE} knows that |FILE| is a reserved word (after all, |FILE| is almost as common as |int|); indeed, \.{CWEAVE} knows all the types of the ISO standard \CEE/ library. But if you're using other types like {\bf caddr\_t}, @:caddr_t}{\bf caddr\_t@> which is defined in \.{/usr/include/sys/types.h}, you should let \.{WEAVE} know that this is a type, either by including the \.{.h} file at \.{WEB} time (saying \.{@@i /usr/include/sys/types.h}), or by using \.{WEB}'s format command (saying \.{@@f caddr\_t int}). Either of these will make {\bf caddr\_t} be treated in the same way as |int|. @= input_ln(fp) /* copies a line into |buffer| or returns 0 */ FILE *fp; /* what file to read from */ { register int c=EOF; /* character read; initialized so some compilers won't complain */ register char *k; /* where next character goes */ if (feof(fp)) return(0); /* we have hit end-of-file */ limit = k = buffer; /* beginning of buffer */ while (k<=buffer_end && (c=getc(fp)) != EOF && c!='\n') if ((*(k++) = c) != ' ') limit = k; if (k>buffer_end) if ((c=getc(fp))!=EOF && c!='\n') { ungetc(c,fp); loc=buffer; err_print("! Input line too long"); @.Input line too long@> } if (c==EOF && limit==buffer) return(0); /* there was nothing after the last newline */ return(1); } @ Now comes the problem of deciding which file to read from next. Recall that the actual text that \.{CWEB} should process comes from two streams: a |web_file|, which can contain possibly nested include commands \.{@@i}, and a |change_file|, which might also contain includes. The |web_file| together with the currently open include files form a stack |file|, whose names are stored in a parallel stack |file_name|. The boolean |changing| tells whether or not we're reading from the |change_file|. The line number of each open file is also kept for error reporting. @f line x /* make |line| an unreserved word */ @d max_include_depth 10 /* maximum number of source files open simultaneously, not counting the change file */ @d max_file_name_length 60 @d cur_file file[include_depth] /* current file */ @d cur_file_name file_name[include_depth] /* current file name */ @d cur_line line[include_depth] /* number of current line in current file */ @d web_file file[0] /* main source file */ @d web_file_name file_name[0] /* main source file name */ @= int include_depth; /* current level of nesting */ FILE *file[max_include_depth]; /* stack of non-change files */ FILE *change_file; /* change file */ char file_name[max_include_depth][max_file_name_length]; /* stack of non-change file names */ char change_file_name[max_file_name_length]; /* name of change file */ char alt_web_file_name[max_file_name_length]; /* alternate name to try */ int line[max_include_depth]; /* number of current line in the stacked files */ int change_line; /* number of current line in change file */ int change_depth; /* where \.{@@y} originated during a change */ boolean input_has_ended; /* if there is no more input */ boolean changing; /* if the current line is from |change_file| */ boolean web_file_open=0; /* if the web file is being read */ @ When |changing=0|, the next line of |change_file| is kept in |change_buffer|, for purposes of comparison with the next line of |cur_file|. After the change file has been completely input, we set |change_limit=change_buffer|, so that no further matches will be made. Here's a shorthand expression for inequality between the two lines: @d lines_dont_match (change_limit-change_buffer != limit-buffer || strncmp(buffer, change_buffer, limit-buffer)) @= char change_buffer[buf_size]; /* next line of |change_file| */ char *change_limit; /* points to the last character in |change_buffer| */ @ Procedure |prime_the_change_buffer| sets |change_buffer| in preparation for the next matching operation. Since blank lines in the change file are not used for matching, we have |(change_limit==change_buffer && !changing)| if and only if the change file is exhausted. This procedure is called only when |changing| is 1; hence error messages will be reported correctly. @= void prime_the_change_buffer() { change_limit=change_buffer; /* this value is used if the change file ends */ @; @; @; } @ While looking for a line that begins with \.{@@x} in the change file, we allow lines that begin with \.{@@}, as long as they don't begin with \.{@@y}, \.{@@z} or \.{@@i} (which would probably mean that the change file is fouled up). @= while(1) { change_line++; if (!input_ln(change_file)) return; if (limit } } @ Here we are looking at lines following the \.{@@x}. @= do { change_line++; if (!input_ln(change_file)) { err_print("! Change file ended after @@x"); @.Change file ended...@> return; } } while (limit==buffer); @ @= { change_limit=change_buffer+(limit-buffer); strncpy(change_buffer,buffer,limit-buffer+1); } @ The following procedure is used to see if the next change entry should go into effect; it is called only when |changing| is 0. The idea is to test whether or not the current contents of |buffer| matches the current contents of |change_buffer|. If not, there's nothing more to do; but if so, a change is called for: All of the text down to the \.{@@y} is supposed to match. An error message is issued if any discrepancy is found. Then the procedure prepares to read the next line from |change_file|. This procedure is called only when |buffer= void check_change() /* switches to |change_file| if the buffers match */ { int n=0; /* the number of discrepancies found */ if (lines_dont_match) return; while (1) { changing=1; change_line++; if (!input_ln(change_file)) { err_print("! Change file ended before @@y"); @.Change file ended...@> change_limit=change_buffer; changing=0; return; } if (limit>buffer+1 && buffer[0]=='@@') { char xyz_code=isupper(buffer[1])? tolower(buffer[1]): buffer[1]; @; } @; changing=0; cur_line++; while (!input_ln(cur_file)) { /* pop the stack or quit */ if (include_depth==0) { err_print("! CWEB file ended during a change"); @.CWEB file ended...@> input_has_ended=1; return; } include_depth--; cur_line++; } if (lines_dont_match) n++; } } @ @= if (xyz_code=='x' || xyz_code=='z') { loc=buffer+2; err_print("! Where is the matching @@y?"); @.Where is the match...@> } else if (xyz_code=='y') { if (n>0) { loc=buffer+2; fprintf(stderr,"\n! Hmm... %d ",n); err_print("of the preceding lines failed to match"); @.Hmm... n of the preceding...@> } change_depth=include_depth; return; } @ The |reset_input| procedure gets the program ready to read the user's \.{WEB} input. @= void reset_input() { limit=buffer; loc=buffer+1; buffer[0]=' '; @; include_depth=0; cur_line=0; change_line=0; change_depth=include_depth; changing=1; prime_the_change_buffer(); changing=!changing; limit=buffer; loc=buffer+1; buffer[0]=' '; input_has_ended=0; } @ The following code opens the input files. @^system dependencies@> @= if ((web_file=fopen(web_file_name,"r"))==NULL) { strcpy(web_file_name,alt_web_file_name); if ((web_file=fopen(web_file_name,"r"))==NULL) fatal("! Cannot open input file ", web_file_name); } @.Cannot open input file@> @.Cannot open change file@> web_file_open=1; if ((change_file=fopen(change_file_name,"r"))==NULL) fatal("! Cannot open change file ", change_file_name); @ The |get_line| procedure is called when |loc>limit|; it puts the next line of merged input into the buffer and updates the other variables appropriately. A space is placed at the right end of the line. This procedure returns |!input_has_ended| because we often want to check the value of that variable after calling the procedure. @= int get_line() /* inputs the next line */ { restart: if (changing && include_depth==change_depth) @; if (! changing || include_depth>change_depth) { @; if (changing && include_depth==change_depth) goto restart; } if (input_has_ended) return 0; loc=buffer; *limit=' '; if (buffer[0]=='@@' && (buffer[1]=='i' || buffer[1]=='I')) { loc=buffer+2; *limit='"'; while (*loc==' '||*loc=='\t') loc++; if (loc>=limit) { err_print("! Include file name not given"); @.Include file name ...@> goto restart; } if (include_depth>=max_include_depth-1) { err_print("! Too many nested includes"); @.Too many nested includes@> goto restart; } include_depth++; /* push input stack */ @; } return 1; } void put_line() { char *ptr=buffer; while (ptr= { char temp_file_name[max_file_name_length]; char *cur_file_name_end=cur_file_name+max_file_name_length-1; char *k=cur_file_name, *kk; int l; /* length of file name */ if (*loc=='"') { loc++; while (*loc!='"' && k<=cur_file_name_end) *k++=*loc++; if (loc==limit) k=cur_file_name_end+1; /* unmatched quote is `too long' */ } else while (*loc!=' '&&*loc!='\t'&&*loc!='"'&&k<=cur_file_name_end) *k++=*loc++; if (k>cur_file_name_end) too_long(); @.Include file name ...@> *k='\0'; if ((cur_file=fopen(cur_file_name,"r"))!=NULL) { cur_line=0; goto restart; /* success */ } kk=getenv("CWEBINPUTS"); if (kk!=NULL) { if ((l=strlen(kk))>max_file_name_length-2) too_long(); strcpy(temp_file_name,kk); } else { #ifdef CWEBINPUTS if ((l=strlen(CWEBINPUTS))>max_file_name_length-2) too_long(); strcpy(temp_file_name,CWEBINPUTS); #else l=0; #endif /* |CWEBINPUTS| */ } if (l>0) { if (k+l+2>=cur_file_name_end) too_long(); @.Include file name ...@> for (; k>= cur_file_name; k--) *(k+l+1)=*k; strcpy(cur_file_name,temp_file_name); cur_file_name[l]='/'; /* \UNIX/ pathname separator */ if ((cur_file=fopen(cur_file_name,"r"))!=NULL) { cur_line=0; goto restart; /* success */ } } include_depth--; err_print("! Cannot open include file"); goto restart; } @ @= { cur_line++; while (!input_ln(cur_file)) { /* pop the stack or quit */ if (include_depth==0) {input_has_ended=1; break;} else { fclose(cur_file); include_depth--; if (changing && include_depth==change_depth) break; cur_line++; } } if (!changing && !input_has_ended) if (limit-buffer==change_limit-change_buffer) if (buffer[0]==change_buffer[0]) if (change_limit>change_buffer) check_change(); } @ @= { change_line++; if (!input_ln(change_file)) { err_print("! Change file ended without @@z"); @.Change file ended...@> buffer[0]='@@'; buffer[1]='z'; limit=buffer+2; } if (limit>buffer) { /* check if the change has ended */ *limit=' '; if (buffer[0]=='@@') { if (isupper(buffer[1])) buffer[1]=tolower(buffer[1]); if (buffer[1]=='x' || buffer[1]=='y') { loc=buffer+2; err_print("! Where is the matching @@z?"); @.Where is the match...@> } else if (buffer[1]=='z') { prime_the_change_buffer(); changing=!changing; } } } } @ At the end of the program, we will tell the user if the change file had a line that didn't match any relevant line in |web_file|. @= void check_complete(){ if (change_limit!=change_buffer) { /* |changing| is 0 */ strncpy(buffer,change_buffer,change_limit-change_buffer+1); limit=buffer+(int)(change_limit-change_buffer); changing=1; change_depth=include_depth; loc=buffer; err_print("! Change file entry did not match"); @.Change file entry did not match@> } } @* Reporting errors to the user. A global variable called |history| will contain one of four values at the end of every run: |spotless| means that no unusual messages were printed; |harmless_message| means that a message of possible interest was printed but no serious errors were detected; |error_message| means that at least one error was found; |fatal_message| means that the program terminated abnormally. The value of |history| does not influence the behavior of the program; it is simply computed for the convenience of systems that might want to use such information. @d spotless 0 /* |history| value for normal jobs */ @d harmless_message 1 /* |history| value when non-serious info was printed */ @d error_message 2 /* |history| value when an error was noted */ @d fatal_message 3 /* |history| value when we had to stop prematurely */ @d mark_harmless {if (history==spotless) history=harmless_message;} @d mark_error history=error_message @= int history=spotless; /* indicates how bad this run was */ @ The command `|err_print("! Error message")|' will report a syntax error to the user, by printing the error message at the beginning of a new line and then giving an indication of where the error was spotted in the source file. Note that no period follows the error message, since the error routine will automatically supply a period. A newline is automatically supplied if the string begins with |"!"|. The actual error indications are provided by a procedure called |error|. @= void err_print(); @ @= void err_print(s) /* prints `\..' and location of error message */ char *s; { char *k,*l; /* pointers into |buffer| */ fprintf(stderr,*s=='!'? "\n%s" : "%s",s); if(web_file_open) @@; else putc('\n',stderr); update_terminal; mark_error; } @ The error locations can be indicated by using the global variables |loc|, |cur_line|, |cur_file_name| and |changing|, which tell respectively the first unlooked-at position in |buffer|, the current line number, the current file, and whether the current line is from |change_file| or |cur_file|. This routine should be modified on systems whose standard text editor has special line-numbering conventions. @^system dependencies@> @= {if (changing && include_depth==change_depth) fprintf(stderr,". (l. %d of change file)\n", change_line); else if (include_depth==0) fprintf(stderr,". (l. %d)\n", cur_line); else fprintf(stderr,". (l. %d of include file %s)\n", cur_line, cur_file_name); l= (loc>=limit? limit: loc); if (l>buffer) { for (k=buffer; k @= wrap_up() { @; if (history > harmless_message) return(1); else return(0); } @ @= switch (history) { case spotless: if (show_happiness) fprintf(stderr,"(No errors were found.)\n"); break; case harmless_message: fprintf(stderr,"(Did you see the warning message above?)\n"); break; case error_message: fprintf(stderr,"(Pardon me, but I think I spotted something wrong.)\n"); break; case fatal_message: fprintf(stderr,"(That was a fatal error, my friend.)\n"); } /* there are no other cases */ @* Command line arguments. The user calls \.{wmerge} with arguments on the command line. These are either file names or flags to be turned off (beginning with |"-"|) or flags to be turned on (beginning with |"+"|. The following globals are for communicating the user's desires to the rest of the program. The various file name variables contain strings with the names of those files. Most of the 128 flags are undefined but available for future extensions. @d show_banner flags['b'] /* should the banner line be printed? */ @d show_happiness flags['h'] /* should lack of errors be announced? */ @= int argc; /* copy of |ac| parameter to |main| */ char **argv; /* copy of |av| parameter to |main| */ char out_file_name[max_file_name_length]; /* name of |out_file| */ boolean flags[128]; /* an option for each 7-bit code */ @ The |flags| will be initially 1. @= show_banner=show_happiness=1; @ We now must look at the command line arguments and set the file names accordingly. At least one file name must be present: the \.{WEB} file. It may have an extension, or it may omit it to get |'.w'| added. If there is another file name present among the arguments, it is the change file, again either with an extension or without one to get |'.ch'| An omitted change file argument means that |'/dev/null'| should be used, when no changes are desired. @^system dependencies@> If there's a third file name, it will be the output file. @= void scan_args(); @ @= void scan_args() { char *dot_pos; /* position of |'.'| in the argument */ register char *s; /* register for scanning strings */ boolean found_web=0,found_change=0,found_out=0; /* have these names have been seen? */ boolean flag_change; while (--argc > 0) { if (**(++argv)=='-' || **argv=='+') @@; else { s=*argv;@+dot_pos=NULL; while (*s) { if (*s=='.') dot_pos=s++; else if (*s=='/') dot_pos=NULL,++s; else s++; } if (!found_web) @@; else if (!found_change) @@; else if (!found_out) @@; else @; } } if (!found_web) @; if (!found_change) strcpy(change_file_name,"/dev/null"); } @ We use all of |*argv| for the |web_file_name| if there is a |'.'| in it, otherwise we add |".w"|. If this file can't be opened, we prepare an |alt_web_file_name| by adding |"web"| after the dot. The other file names come from adding other things after the dot. We must check that there is enough room in |web_file_name| and the other arrays for the argument. @= { if (s-*argv > max_file_name_length-5) @; if (dot_pos==NULL) sprintf(web_file_name,"%s.w",*argv); else { strcpy(web_file_name,*argv); *dot_pos=0; /* string now ends where the dot was */ } sprintf(alt_web_file_name,"%s.web",*argv); *out_file_name='\0'; /* this will print to stdout */ found_web=1; } @ @= { if (s-*argv > max_file_name_length-4) @; if (dot_pos==NULL) sprintf(change_file_name,"%s.ch",*argv); else strcpy(change_file_name,*argv); found_change=1; } @ @= { if (s-*argv > max_file_name_length-5) @; if (dot_pos==NULL) sprintf(out_file_name,"%s.out",*argv); else strcpy(out_file_name,*argv); found_out=1; } @ @= { if (**argv=='-') flag_change=0; else flag_change=1; for(dot_pos=*argv+1;*dot_pos>'\0';dot_pos++) flags[*dot_pos]=flag_change; } @ @= { fatal("! Usage: wmerge webfile[.w] [changefile[.ch] [outfile[.out]]]\n","")@; } @ @= fatal("! Filename too long\n", *argv); @* Output. Here is the code that opens the output file: @^system dependencies@> @= FILE *out_file; /* where output goes */ @ @= scan_args(); if (out_file_name[0]=='\0') out_file=stdout; else if ((out_file=fopen(out_file_name,"w"))==NULL) fatal("! Cannot open output file ", out_file_name); @.Cannot open output file@> @ The |update_terminal| procedure is called when we want to make sure that everything we have output to the terminal so far has actually left the computer's internal buffers and been sent. @^system dependencies@> @d update_terminal fflush(stderr) /* empty the terminal output buffer */ @* Index.