% Copyright 1989 by Norman Ramsey, Odyssey Research Associates % Not to be sold, but may be used freely for any purpose % For more information, see file COPYRIGHT in the parent directory % This file is part of Spidery WEB % This program by Norman Ramsey is based on programs Silvio Levy % and D. E. Knuth. Silvio Levy wrote most of the code. % It is distributed WITHOUT ANY WARRANTY, express or implied. % Dec 1987 % Here is TeX material that gets inserted after \input webmac \message{OK, entering \string\batchmode...} \batchmode \def\hang{\hangindent 3em\indent\ignorespaces} \font\ninerm=cmr9 \let\mc=\ninerm % medium caps \def\cee{C} \def\pb{$\.|\ldots\.|$} % C brackets (|...|) \def\v{\char'174} % vertical (|) in typewriter font \def\ceeref{{\it The C Reference Manual}} \mathchardef\RA="3221 % right arrow \mathchardef\BA="3224 % double arrow \def\title{Spidery COMMON} \def\contentspagenumber{1} % should be odd \def\topofcontents{\null\vfill \titlefalse % include headline on the contents page \def\rheader{\hfil} \centerline{\titlefont Common Code in Spidery {\ttitlefont WEB}} \vfill } @* Introduction. This file contains code common to both \.{TANGLE} and \.{WEAVE}, that roughly concerns the following problems: character uniformity, input routines, error handling and parsing of command line. We have tried to concentrate in this file all the system dependencies, so as to maximize portability. In the texts below we will sometimes use \.{WEB} to refer to either of the two component programs, if no confusion can arise. Here is the overall appearance of this file: @u@1 #include #ifdef MSDOS #define index strchr #endif @@; @@; @@; @@; @; @ In certain cases \.{TANGLE} and \.{WEAVE} should do almost, but not quite, the same thing. In these case we've written common code for both, differentiating between the two by means of the global variable |program|. @d tangle = 0 @d weave = 1 @ Give us an at sign that isn't always `@@': @d at_sign = the_at_sign @= extern char the_at_sign; @ @= typedef short boolean; typedef char unsigned eight_bits; boolean program; /* \.{WEAVE} or \.{TANGLE}? */ @ \.{CWEAVE} operates in three phases: first it inputs the source file and stores cross-reference data, then it inputs the source once again and produces the \TeX\ output file, and finally it sorts and outputs the index. Similarly, \.{CTANGLE} operates in two phases. The global variable |phase| tells which phase we are in. @= int phase; /* which phase are we in? */ @* The character set. One of the main goals in the design of \.{WEB} has been to make it readily portable between a wide variety of computers. Yet \.{WEB} by its very nature must use a greater variety of characters than most computer programs deal with, and character encoding is one of the areas in which existing machines differ most widely from each other. To resolve this problem, all input to \.{WEAVE} and \.{TANGLE} is converted to an internal seven-bit code that is essentially standard ASCII, the ``American Standard Code for Information Interchange.'' The conversion is done immediately when each character is read in. Conversely, characters are converted from ASCII to the user's external representation just before they are output. Such an internal code can be accessed by users of \.{WEB} by means of constructions like \.{@@`A'}, which should be distinguished from \.{'A'}. The former is transformed by \.{TANGLE} into an integer that is the internal code of \.A, but the latter, a |char| constant, is not touched by \.{WEB}, and will be interpreted by the \cee\ complier according to the machine's character set. (Actually, of course, it gets translated into \.{WEB}'s internal code just like any other character in the input file, but then it gets translated back at output time.) @^ASCII code@> Here is a table of the standard visible ASCII codes (\.{ } stands for a blank space): $$\def\:{\char\count255\global\advance\count255 by 1} \count255='40 \vbox{ \hbox{\hbox to 40pt{\it\hfill0\/\hfill}% \hbox to 40pt{\it\hfill1\/\hfill}% \hbox to 40pt{\it\hfill2\/\hfill}% \hbox to 40pt{\it\hfill3\/\hfill}% \hbox to 40pt{\it\hfill4\/\hfill}% \hbox to 40pt{\it\hfill5\/\hfill}% \hbox to 40pt{\it\hfill6\/\hfill}% \hbox to 40pt{\it\hfill7\/\hfill}} \vskip 4pt \hrule \def\^{\vrule height 10.5pt depth 4.5pt} \halign{\hbox to 0pt{\hskip -24pt\O{#0}\hfill}&\^ \hbox to 40pt{\tt\hfill#\hfill\^}& &\hbox to 40pt{\tt\hfill#\hfill\^}\cr 04&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 05&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 06&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 07&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 10&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 11&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 12&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 13&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 14&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 15&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 16&\:&\:&\:&\:&\:&\:&\:&\:\cr\noalign{\hrule} 17&\:&\:&\:&\:&\:&\:&\:\cr} \hrule width 280pt}$$ We introduce new types to distinguish between the transliterated characters and the characters in the outside world. Let all ``interesting'' values that a |char| variable may take lie between |first_text_char| and |last_text_char|; for the ASCII code we can take |first_text_char=0| and |last_text_char=@'177|. We will tell \.{WEB} to convert all input characters in this range to its own code, and balk at characters outside the range. We make two assumptions: |first_text_char>=0| and |char| has room for at least eight bits. @^system dependencies@> @d first_text_char = 0 /* lowest interesting value of a |char| */ @d last_text_char = @'177 /* highest interesting value of a |char| */ @= typedef char ASCII; /* type of characters inside \.{WEB} */ typedef char outer_char; /* type of characters outside \.{WEB} */ @ The \.{WEAVE} and \.{TANGLE} processors convert between ASCII code and the user's external character set by means of arrays |xord| and |xchr| that are analogous to PASCAL's |ord| and |chr| functions. @= ASCII xord[last_text_char+1]; /* specifies conversion of input characters */ outer_char xchr[@'200]; /* specifies conversion of output characters */ @ Every system supporting \cee\ must be able to read and write the 95 visible characters of standard ASCII above (although not necessarily using the ASCII codes to represent them). Conversely, these characters, plus the newline, are sufficient to write any \cee\ program. Other characters are desirable mainly in strings, and they can be referred to by means of escape sequences like \.{'\t'}. The basic implementation of \.{WEB}, then, only has to assign an |xord| to these 95 characters (newlines are swallowed by the reading routines). The easiest way to do this is to assign the characters to their positions in |xchr| and then invert the correspondence: @= common_init() { strncpy(xchr," !\"#$%&'()*+,-./0123456789\ :;<=>?@@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ",@'200); @; @; @; setting_up=1; @; setting_up=0; } @ We use this variable to tell us what to do in |err_print|. @=char setting_up; @ The following system-independent code makes the |xord| array contain a suitable inverse to the information in |xchr|. @= { int i; /* to invert the correspondence */ for (i=first_text_char; i<=last_text_char; i++) xord[i]=@'40; /* ASCII code for space */ for (i=1; i<@'177; i++) xord[xchr[i]]=i; } @ Some \cee\ compilers, accept an extended character set, so that one can type things like \.^^Z\ instead of \.{!=}. If that's the case in your system, you should change this module, assigning positions |@'1| to |@'37| in the most convenient way; for example, at MIT you can just say $$\hbox{|for (i=1; i<=@'37; i++) xchr[i]=i;|}$$ since \.{WEB}'s character set is essentially identical to MIT's, even with respect to characters less than |@'40| (see the definitions below). If, however, the changes do not conform with these definitions you should change the definitions as well. @^system dependencies@> @^notes to myself@> @d and_and = @'4 /* `\.{\&\&}' */ @d tab_mark = @'11 /* ASCII code used as tab-skip */ @d line_feed = @'12 /* ASCII code thrown away at end of line */ @d form_feed = @'14 /* ASCII code used at end of page */ @d carriage_return = @'15 /* ASCII code used at end of line */ @d gt_gt = @'20 /* `\.{>>}'; this doesn't exist in MIT */ @d lt_lt = @'22 /* `\.{<<}'; this doesn't exist in MIT */ @d plus_plus = @'13 /* `\.{++}'; this corresponds to MIT's up-arrow */ @d minus_minus = @'1 /* `\.{--}'; this corresponds to MIT's down-arrow */ @d minus_gt = @'31 /* `\.{->}' */ @d not_eq = @'32 /* `\.{!=}' */ @d lt_eq = @'34 /* `\.{<=}' */ @d gt_eq = @'35 /* `\.{>=}' */ @d eq_eq = @'36 /* `\.{==}' */ @d or_or = @'37 /* `\.{\vert\vert}' */ @= /* nothing needs to be done */ @* Input routines. The lowest level of input to the \.{WEB} programs is performed by |input_ln|, which must be told which file to read from. The return value of |input_ln| is 1 if the read is successful and 0 if not (generally this means the file has ended). The conventions of \TeX\ are followed; i.e., the characters of the next line of the file are translated to |ASCII| code and copied into the |buffer| array, and the global variable |limit| is set to the first unoccupied position. Trailing blanks are ignored. The value of |limit| must be strictly less than |buf_size|, so that |buffer[buf_size-1]| is never filled. We assume that none of the |ASCII| values of |*j| for |buffer<=j= ASCII buffer[long_buf_size]; /* where each line of input goes */ ASCII *buffer_end=buffer+buf_size-2; /* end of |buffer| */ ASCII *limit; /* points to the last character in the buffer */ ASCII *loc; /* points to the next character to be read from the buffer */ @ In the unlikely event that you standard I/O library does not support |feof|, |getc| and |ungetc| you may have to change things here. @^system dependencies@> @= #include input_ln(fp) /* copies a line into |buffer| or returns 0 */ FILE *fp; /* what file to read from */ { register int c; /* the character read */ register ASCII *k; /* where next character goes */ if (feof(fp)) return(0); /* we have hit end-of-file */ limit = k = buffer; /* beginning of buffer */ while (k<=buffer_end && (c=getc(fp)) != EOF && c!='\n') if ((*(k++) = xord[c]) != @` ') limit = k; if (k>buffer_end) if ((c=getc(fp))!=EOF && c!='\n') { ungetc(c,fp); loc=buffer; err_print("\n! Input line too long"); @.Input line too long@> } if (c==EOF && limit==buffer) return(0); /* there was nothing after the last newline */ return(1); } @ Now comes the problem of deciding which file to read from next. Recall that the actual text that \.{WEB} should process comes from two streams: a |web_file|, which can contain possibly nested include commands `|@i|', and a |change_file|, which should not contain includes. The |web_file| together with the currently open include files form a stack |file|, whose names are stored in a parallel stack |file_name|. The boolean |changing| tells whether or not we're reading form the |change_file|. The line number of each open file is also kept for error reporting and for the benefit of \.{TANGLE}. @f line x /* make |line| an unreserved word */ @d max_include_depth = 10 /* maximum number of source files open simultaneously, not counting the change file */ @d max_file_name_length = 60 @d cur_file = file[include_depth] /* current file */ @d cur_file_name = file_name[include_depth] /* current file name */ @d cur_line = line[include_depth] /* number of current line in current file */ @d web_file = file[0] /* main source file */ @d web_file_name = file_name[0] /* main source file name */ @= int include_depth; /* current level of nesting */ FILE *file[max_include_depth]; /* stack of non-change files */ FILE *change_file; /* change file */ char file_name[max_include_depth][max_file_name_length]; /* stack of non-change file names */ char change_file_name[max_file_name_length]; /* name of change file */ int line[max_include_depth]; /* number of current line in the stacked files */ int change_line; /* number of current line in change file */ boolean input_has_ended; /* if there is no more input */ boolean changing; /* if the current line is from |change_file| */ @ When |changing=0|, the next line of |change_file| is kept in |change_buffer|, for purposes of comparison with the next line of |cur_file|. After the change file has been completely input, we set |change_limit=change_buffer|, so that no further matches will be made. Here's a shorthand expression for inequality between the two lines: @d lines_dont_match = (change_limit-change_buffer != limit-buffer || strncmp(buffer, change_buffer, limit-buffer)) @= ASCII change_buffer[buf_size]; /* where next line of |change_file| is kept */ ASCII *change_limit; /* points to the last character in |change_buffer| */ @ Procedure |prime_the_change_buffer| sets |change_buffer| in preparation for the next matching operation. Since blank lines in the change file are not used for matching, we have |(change_limit==0 && !changing)| if and only if the change file is exhausted. This procedure is called only when |changing| is 1; hence error messages will be reported correctly. @= prime_the_change_buffer() { change_limit=0; /* this value will be used if the change file ends */ @; @; @; } @ While looking for a line that begins with \.{@@x} in the change file, we allow lines that begin with \.{@@}, as long as they don't begin with \.{@@y} or \.{@@z} (which would probably indicate that the change file is fouled up). @= while(1) { change_line++; if (!input_ln(change_file)) return; if (limit; @; if (buffer[1]==@`x') break; if (buffer[1]==@`y' || buffer[1]==@`z') { loc=buffer+2; err_print("! Where is the matching @@x?"); @.Where is the match...@> } } @ This line of code makes |"@@X"| equivalent to |"@@x"| and so on. @= if (buffer[1]>=@`X' && buffer[1]<=@`Z' || buffer[1]==@`I') buffer[1]+=@`z'-@`Z'; @ We do not allow includes in a change file, so as to avoid confusion. @= { if (buffer[1]==@`i') { loc=buffer+2; err_print("! No includes allowed in change file"); @.No includes allowed...@> } } @ Here we are looking at lines following the \.{@@x}. @= do { change_line++; if (!input_ln(change_file)) { err_print("! Change file ended after @@x"); @.Change file ended...@> return; } } while (limit==buffer); @ @= { change_limit=change_buffer-buffer+limit; strncpy(change_buffer,buffer,limit-buffer+1); } @ The following procedure is used to see if the next change entry should go into effect; it is called only when |changing| is 0. The idea is to test whether or not the current contents of |buffer| matches the current contents of |change_buffer|. If not, there's nothing more to do; but if so, a change is called for: All of the text down to the \.{@@y} is supposed to match. An error message is issued if any discrepancy is found. Then the procedure prepares to read the next line from |change_file|. @= check_change() /* switches to |change_file| if the buffers match */ { int n=0; /* the number of discrepancies found */ if (lines_dont_match) return; while (1) { changing=1; print_where=1; change_line++; if (!input_ln(change_file)) { err_print("! Change file ended before @@y"); @.Change file ended...@> change_limit=0; changing=0; print_where=1; return; } if (limit>buffer+1 && buffer[0]==the_at_sign) @; @;@/ @;@/ changing=0; print_where=1; cur_line++; while (!input_ln(cur_file)) { /* pop the stack or quit */ if (include_depth==0) { err_print("! WEB file ended during a change"); @.WEB file ended...@> input_has_ended=1; return; } include_depth--; print_where=1; cur_line++; } if (lines_dont_match) n++; } } @ @= if (limit>buffer+1 && buffer[0]==the_at_sign) { @; if (buffer[1]==@`x' || buffer[1]==@`z') { loc=buffer+2; err_print("! Where is the matching @@y?"); @.Where is the match...@> } else if (buffer[1]==@`y') { if (n>0) { loc=buffer+2; err_print("! Hmm... some of the preceding lines failed to match"); @.Hmm... some of the preceding...@> } return; } } @ The |reset_input| procedure, which gets \.{CWEAVE} ready to read the user's \.{WEB} input, is used at the beginning of phases one and two. @= reset_input() { limit=buffer; loc=buffer+1; buffer[0]=@` '; @; cur_line=0; change_line=0; include_depth=0; changing=1; prime_the_change_buffer(); changing=!changing; limit=buffer; loc=buffer+1; buffer[0]=@` '; input_has_ended=0; } @ The following code opens the input files. @^system dependencies@> For the input files, we {\em don't} look on the search path. @= if ((web_file=fopen(web_file_name,"r"))==NULL) fatal("! Cannot open input file ", web_file_name); if ((change_file=fopen(change_file_name,"r"))==NULL) fatal("! Cannot open change file ", change_file_name); @ The |get_line| procedure is called when |loc>limit|; it puts the next line of merged input into the buffer and updates the other variables appropriately. A space is placed at the right end of the line. This procedure returns |!input_has_ended| because we often want to check the value of that variable after calling the procedure. If we've just changed from the |cur_file| to the |change_file|, or if the |cur_file| has changed, we tell \.{TANGLE} to print this information in the \cee\ file by means of the |print_where| flag. @d max_modules = 2000 /* number of identifiers, strings, module names; must be less than 10240 */ @= typedef unsigned short sixteen_bits; sixteen_bits module_count; /* the current module number */ boolean changed_module[max_modules]; /* is the module changed? */ boolean print_where=0; /* tells \.{TANGLE} to print line and file info */ @ @= get_line() /* inputs the next line */ { restart: if (changing) changed_module[module_count]=1; else @; if (changing) { @; if (! changing) { changed_module[module_count]=1; goto restart; } } loc=buffer; *limit=@` '; if (*buffer==the_at_sign && (*(buffer+1)==@`i' || *(buffer+1)==@`I')) @; return (!input_has_ended); } @ When a \.{@@i} line is found in the |cur_file|, we must temporarily stop reading it and start reading from the named include file. The \.{@@i} line should give a complete file name with or without \.{"..."}; \.{WEB} will look for include files in standard directories using the |pathopen| module. That is set up at the beginning using the {\tt -I} arguments and the {\tt WEBPATH} environment variable. @= pathaddpath(getenv("WEBPATH"),':'); @ @= pathaddname(*argv+2); @ @= @i pathopen.h @ We need to define an |overflow| {\em function} for |pathopen| to call. @ @= void @= overflow @>(s) char *s; { overflow(s);} @ The included file name should only contain visible ASCII characters, since the characters are translated into ASCII and back again. @= { ASCII *k, *j; loc=buffer+2; while (loc<=limit && (*loc==@` '||*loc==@`\t'||*loc==@`"')) loc++; if (loc>=limit) err_print("! Include file name not given"); @.Include file name not given@> else { if (++include_depth } else {cur_line=0; print_where=1;} } else { include_depth--; err_print("! Too many nested includes"); @.Too many nested includes@> } } goto restart; } @ @= { cur_line++; while (!input_ln(cur_file)) { /* pop the stack or quit */ print_where=1; if (include_depth==0) {input_has_ended=1; break;} else {fclose(cur_file); include_depth--; cur_line++;} } if (!input_has_ended) if (limit==change_limit-change_buffer+buffer) if (buffer[0]==change_buffer[0]) if (change_limit>change_buffer) check_change(); } @ @= { change_line++; if (!input_ln(change_file)) { err_print("! Change file ended without @@z"); @.Change file ended...@> buffer[0]=the_at_sign; buffer[1]=@`z'; limit=buffer+2; } if (limit>buffer+1) /* check if the change has ended */ if (buffer[0]==the_at_sign) { @; @; if (buffer[1]==@`x' || buffer[1]==@`y') { loc=buffer+2; err_print("! Where is the matching @@z?"); @.Where is the match...@> } else if (buffer[1]==@`z') { prime_the_change_buffer(); changing=!changing; print_where=1; } } } @ At the end of the program, we will tell the user if the change file had a line that didn't match any relevant line in |web_file|. @= check_complete(){ if (change_limit!=0) { /* |changing| is 0 */ strncpy(buffer,change_buffer,change_limit-change_buffer+1); limit=change_limit-change_buffer+buffer; changing=1; loc=change_limit; err_print("! Change file entry did not match"); @.Change file entry did not match@> } } @* Storage of identifiers and module names. Both \.{WEAVE} and \.{TANGLE} store identifiers, module names and other strings in a large array of |ASCII|s, called |byte_mem|. Information about the names is kept in the array |name_dir|, whose elements are structures of type \&{name\_info}, containing a pointer into the |byte_mem| array (the address where the name begins) and other data. A \&{name\_pointer} variable is a pointer into |name_dir|. This module has to go after the module that defines the types of the other elements, so it has a special name. @d max_bytes = 90000 /* the number of bytes in identifiers, index entries, and module names */ @d max_names = 4000 /* number of identifiers, strings, module names; must be less than 10240 */ @= typedef struct name_info { ASCII *byte_start; /* beginning of the name in |byte_mem| */ ===> @@; } name_info; /* contains information about an identifier or module name */ typedef name_info *name_pointer; /* pointer into array of |name_info|s */ ASCII byte_mem[max_bytes]; /* characters of names */ ASCII *byte_mem_end = byte_mem+max_bytes-1; /* end of |byte_mem| */ name_info name_dir[max_names]; /* information about names */ name_pointer name_dir_end = name_dir+max_names-1; /* end of |name_dir| */ @ The actual sequence of characters in the name pointed to by a |name_pointer| |p| appears in positions |p->byte_start| to |(p+1)->byte_start|, inclusive. The |print_id| macro prints this text on the user's terminal. @d length(c) = (c+1)->byte_start-(c)->byte_start /* the length of a name */ @d print_id(c) = ASCII_write((c)->byte_start,length((c))) /* print identifier or module name */ @ The first unused position in |byte_mem| and |name_dir| is kept in |byte_ptr| and |name_ptr|, respectively. Thus we usually have |name_ptr->byte_start=byte_ptr|, and certainly we want to keep |name_ptr<=name_dir_end| and |byte_ptr<=byte_mem_end|. @= name_pointer name_ptr; /* first unused position in |byte_start| */ ASCII *byte_ptr; /* first unused position in |byte_mem| */ @ @= name_dir->byte_start=byte_ptr=byte_mem; /* position zero in both arrays */ name_ptr=name_dir+1; /* |name_dir[0]| will not be used */ name_ptr->byte_start=byte_mem; /* this makes name 0 of length zero */ @ The names of identifiers are found by computing a hash address |h| and then looking at strings of bytes signified by the |name_pointer|s |hash[h]|, |hash[h]->link|, |hash[h]->link->link|, \dots, until either finding the desired name or encountering the null pointer. @= struct name_info *link; @ The hash table itself consists of |hash_size| entries of type |name_pointer|, and is updated by the |id_lookup| procedure, which finds a given identifier and returns the appropriate |name_pointer|. The matching is done by the function |names_match|, which is slightly different in \.{WEAVE} and \.{TANGLE}. If there is no match for the identifier, it is inserted into the table. @d hash_size = 353 /* should be prime */ @= typedef name_pointer *hash_pointer; name_pointer hash[hash_size]; /* heads of hash lists */ hash_pointer hash_end = hash+hash_size-1; /* end of |hash| */ hash_pointer h; /* index into hash-head array */ @ Initially all the hash lists are empty. @= for (h=hash; h<=hash_end; *h++=NULL) ; @ Here is the main procedure for finding identifiers: @u name_pointer id_lookup(first,last,t) /* looks up a string in the identifier table */ ASCII *first; /* first character of string */ ASCII *last; /* last character of string plus one */ eight_bits t; /* the |ilk|; used by \.{WEAVE} and by \.{TANGLE} for macros */ { ASCII *i=first; /* position in |buffer| */ int h; /* hash code */ int l; /* length of the given identifier */ name_pointer p; /* where the identifier is being sought */ if (last==NULL) for (last=first; *last!='\0'; last++); l=last-first; /* compute the length */ @; @; if (p==name_ptr) @; return(p); } @ A simple hash code is used: If the sequence of ASCII codes is $c_1c_2\ldots c_n$, its hash value will be $$(2^{n-1}c_1+2^{n-2}c_2+\cdots+c_n)\,\bmod\,|hash_size|.$$ @= h=*i; while (++i= p=hash[h]; while (p && !names_match(p,first,l,t)) p=p->link; if (p==NULL) { p=name_ptr; /* the current identifier is new */ p->link=hash[h]; hash[h]=p; /* insert |p| at beginning of hash list */ } @ The information associated with a new identifier must be initialized in a slightly different way in \.{WEAVE} than in \.{TANGLE}; hence the |init_p| procedure. @= { if (byte_ptr+l>byte_mem_end) overflow("byte memory"); if (name_ptr>=name_dir_end) overflow("name"); strncpy(byte_ptr,first,l); (++name_ptr)->byte_start=byte_ptr+=l; init_p(p,t); /* set ilk */ } @ The names of modules are stored in |byte_mem| together with the identifier names, but a hash table is not used for them because \.{TANGLE} needs to be able to recognize a module name when given a prefix of that name. A conventional binary seach tree is used to retrieve module names, with fields called |llink| and |rlink| (where |llink| takes the place of |link|). The root of this tree is stored in |name_dir->rlink|; this will be the only information in |name_dir[0]|. Since the space used by |rlink| has a different function for identifiers than for module names, we declare it as a |union|. @d llink = link /* left link in binary search tree for module names */ @d rlink = dummy.Rlink /* right link in binary search tree for module names */ @d root = name_dir->rlink /* the root of the binary search tree for module names */ @= union { struct name_info *Rlink; /* right link in binary search tree for module names */ eight_bits Ilk; /* used by identifiers in \.{WEAVE} only */ } dummy; @ @= root=NULL; /* the binary search tree starts out with nothing in it */ @ The |mod_lookup| procedure finds a module name in the search tree, after inserting it if necessary, and returns a pointer to where it was found. According to the rules of \.{WEB}, no module name should be a proper prefix of another, so a ``clean'' comparison should occur between any two names. The result of |mod_lookup| is |NULL| if this prefix condition is violated. An error message is printed when such violations are detected. @d less = 0 /* the first name is lexicographically less than the second */ @d equal = 1 /* the first name is equal to the second */ @d greater = 2 /* the first name is lexicographically greater than the second */ @d prefix = 3 /* the first name is a proper prefix of the second */ @d extension = 4 /* the first name is a proper extension of the second */ @= name_pointer install_node(); @ @u name_pointer mod_lookup(k,l) /* finds module name */ ASCII *k; /* first character of name */ ASCII *l; /* last character of name */ { short c = greater; /* comparison between two names */ name_pointer p = root; /* current node of the search tree */ name_pointer q = name_dir; /* father of node |p| */ while (p) { c=web_strcmp(k,l+1,p->byte_start,(p+1)->byte_start); q=p; switch(c) { case less: p=p->llink; continue; case greater: p=p->rlink; continue; case equal: return p; default: err_print("! Incompatible section names"); return NULL; @.Incompatible section names@> } } return(install_node(q,c,k,l-k+1)); } @ This function is like |strcmp|, but it does not assume the strings are null-terminated. @u web_strcmp(j,j1,k,k1) /* fuller comparison than |strcmp| */ ASCII *j; /* beginning of first string */ ASCII *j1; /* end of first string plus one */ ASCII *k; /* beginning of second string */ ASCII *k1; /* end of second string plus one */ { while (krlink| point to the root of the tree when |q=name_dir|, that is, the first time it is called. The information associated with a new node must be initialized in a slightly different way in \.{WEAVE} than in \.{TANGLE}; hence the |init_node| procedure. @u name_pointer install_node(parent,c,j,name_len) /* install a new node in the tree */ name_pointer parent; /* parent of new node */ int c; /* right or left? */ ASCII *j; /* where replacement text starts */ int name_len; /* length of replacement text */ { name_pointer node=name_ptr; /* new node */ if (byte_ptr+name_len>byte_mem_end) overflow("byte memory"); if (name_ptr==name_dir_end) overflow("name"); if (c==less) parent->llink=node; else parent->rlink=node; node->llink=NULL; node->rlink=NULL; init_node(node); strncpy(byte_ptr,j,name_len); (++name_ptr)->byte_start=byte_ptr+=name_len; return(node); } @ The |prefix_lookup| procedure is supposed to find exactly one module name that has $|k|\ldots|l|$ as a prefix. Actually the algorithm silently accepts also the situation that some module name is a prefix of $|k|\ldots|l|$, because the user who painstakingly typed in more than necessary probably doesn't want to be told about the wasted effort. @u name_pointer prefix_lookup(k,l) /* finds module name given a prefix */ ASCII *k; /* first char of prefix */ ASCII *l; /* last char of prefix */ { short c = greater; /* comparison between two names */ short count = 0; /* the number of hits */ name_pointer p = root; /* current node of the search tree */ name_pointer q = NULL; /* another place to resume the search after one is done */ name_pointer r = NULL; /* extension found */ while (p) { c=web_strcmp(k,l+1,p->byte_start,(p+1)->byte_start); switch(c) { case less: p=p->llink; break; case greater: p=p->rlink; break; default: r=p; count++; q=p->rlink; p=p->llink; } if (p==NULL) { p=q; q=NULL; } } if (count==0) err_print("! Name does not match"); @.Name does not match@> if (count>1) err_print("! Ambiguous prefix"); @.Ambiguous prefix@> return(r); /* the result will be |NULL| if there was no match */ } @ The last component of |name_info| is different for \.{TANGLE} and \.{WEAVE}. In \.{TANGLE}, if |p| is a pointer to a module name, |p->equiv| is a pointer to its replacement text, an element of the array |text_info|. In \.{WEAVE}, on the other hand, if |p| points to an identifier, |p->xref| is a pointer to its list of cross-references, an element of the array |xmem|. The make-up of |text_info| and |xmem| is discussed in the \.{TANGLE} and \.{WEAVE} source files, respectively; here we just declare a common field |equiv_or_xref| as a pointer to an |ASCII|. @= ASCII *equiv_or_xref; /* info corresponding to names */ @* Reporting errors to the user. A global variable called |history| will contain one of four values at the end of every run: |spotless| means that no unusual messages were printed; |harmless_message| means that a message of possible interest was printed but no serious errors were detected; |error_message| means that at least one error was found; |fatal_message| means that the program terminated abnormally. The value of |history| does not influence the behavior of the program; it is simply computed for the convenience of systems that might want to use such information. @d spotless = 0 /* |history| value for normal jobs */ @d harmless_message = 1 /* |history| value when non-serious info was printed */ @d error_message = 2 /* |history| value when an error was noted */ @d fatal_message = 3 /* |history| value when we had to stop prematurely */ @d mark_harmless = {if (history==spotless) history=harmless_message;} @d mark_error = history=error_message @= int history=spotless; /* indicates how bad this run was */ @ The command `|err_print("! Error message")|' will report a syntax error to the user, by printing the error message at the beginning of a new line and then giving an indication of where the error was spotted in the source file. Note that no period follows the error message, since the error routine will automatically supply a period. The actual error indications are provided by a procedure called |error|. However, error messages are not actually reported during phase one, since errors detected on the first pass will be detected again during the second. @= err_print(s) /* prints `\..' and location of error message */ char *s; { ASCII *k,*l; /* pointers into |buffer| */ printf("\n%s",(s==(char *)0) ? "Bad error message!!!" : s); if (setting_up) { printf("\nError occurred while scanning arguments or opening output files. "); } else { @; } fflush(stdout); mark_error; } @ The error locations can be indicated by using the global variables |loc|, |cur_line|, |cur_file_name| and |changing|, which tell respectively the first unlooked-at position in |buffer|, the current line number, the current file, and whether the current line is from |change_file| or |cur_file|. This routine should be modified on systems whose standard text editor has special line-numbering conventions. @^system dependencies@> @= if (changing) printf(". (l. %d of change file)\n", change_line); else if (include_depth==0) printf(". (l. %d)\n", cur_line); else printf(". (l. %d of include file %s)\n", cur_line, cur_file_name); l= (loc>=limit? limit: loc); if (l>buffer) { for (k=buffer; k @ An overflow stop occurs if \.{WEB}'s tables aren't large enough. @d overflow(s) = { fatal("\n! Sorry, capacity exceeded: ",s); } @.Sorry, x capacity exceeded@> @ Some implementations may wish to pass the |history| value to the operating system so that it can be used to govern whether or not other programs are started. Here, for instance, we pass the operating system a status of 0 if and only if only harmless messages were printed. @^system dependencies@> @= wrap_up() { putchar('\n'); @; if (history > harmless_message) exit(1); else exit(0); } @ @= switch (history) { case spotless: printf("(No errors were found.)\n"); break; case harmless_message: printf("(Did you see the warning message above?)\n"); break; case error_message: printf("(Pardon me, but I think I spotted something wrong.)\n"); break; case fatal_message: printf("(That was a fatal error, my friend.)\n"); } /* there are no other cases */ @* Command line arguments. The user calls \.{CWEAVE} and \.{CTANGLE} with arguments on the command line. These are either file names or flags (beginning with |'-'|). The following globals are for communicating the user's desires to the rest of the program. The various file name variables contain strings with the names of those files. The only flag that affects \.{CWEAVE} is |"-x"|, whose status is kept in |no_xref|; it means the cross-referencing information---the index, the module name list, and the table of contents---is to be suppressed. No flags affect \.{CTANGLE}. @= int argc; /* copy of |ac| parameter to |main| */ char **argv; /* copy of |av| parameter to |main| */ char C_file_name[max_file_name_length]; /* name of |C_file| */ char tex_file_name[max_file_name_length]; /* name of |tex_file| */ int no_xref; /* should \.{WEAVE} print an index? */ @ We now must look at the command line arguments and set the file names accordingly. At least one file name must be present: the \.{WEB} file. It may have an extension, or it may omit it to get |".web"| added. The \TeX\ output file name is formed by replacing the \.{WEB} file name extension by |".tex"|, and the \cee\ file name by replacing the extension by |C_file_extension| @=extern char C_file_extension[]; @ If there is another file name present among the arguments, it is the change file, again either with an extension or without one to get |".ch"| An omitted change file argument means that |"/dev/null"| should be used, when no changes are desired. @^system dependencies@> @= scan_args() { char *dot_pos, *index(); /* position of |'.'| in the argument */ boolean found_web=0,found_change=0; /* have these names have been seen? */ no_xref=0; while (--argc > 0) { if (**(++argv)!='-') { if (!found_web) @@; else if (!found_change) @@; else @; } else @; } if (!found_web) @; if (!found_change) @; } @ We use all of |*argv| for the |web_file_name| if there is a |'.'| in it, otherwise add |".web"|. The other file names come from adding things after the dot. We must check that there is enough room in |web_file_name| and the other arrays for the argument. @= { if (strlen(*argv) > max_file_name_length-5) @; if ((dot_pos=index(*argv,'.'))==NULL) sprintf(web_file_name,"%s.web",*argv); else { sprintf(web_file_name,"%s",*argv); @ *dot_pos=0; /* string now ends where the dot was */ } sprintf(tex_file_name,"%s.tex",*argv); sprintf(C_file_name,"%s.%s",*argv,C_file_extension); found_web=1; } @ @= { char *p; p=dot_pos; while (*++p != '\0') if (*p=='.') dot_pos=p; } @ @= { if (strlen(*argv) > max_file_name_length-5) @; if ((dot_pos=index(*argv,'.'))==NULL) sprintf(change_file_name,"%s.ch",*argv); else sprintf(change_file_name,"%s",*argv); found_change=1; } @ @= #ifdef MSDOS strcpy(change_file_name,"NUL"); #else strcpy(change_file_name,"/dev/null"); #endif @ We have two possible flags: \medskip \halign{\tabskip=1em\tt#\hfil&#\hfil\cr \omit{\bf Argument}\hfil&\omit\bf Function\hfil\cr -x&Turns off \.{WEAVE} cross-reference.\cr -I&% Adds {\tt } to the directories searched for included files.\cr } @= { switch((*argv)[1]) { case 'x': no_xref=1; break; case 'I': @@; break; default: @; break; } } @ @= { if (program==tangle) fatal("! Usage: tangle webfile[.web] [changefile[.ch]] [-Ipathname ...]\n",0)@; else fatal("! Usage: weave webfile[.web] [changefile[.ch]] [-x] [-Ipathname ...]\n", 0); } @ @= fatal("! Filename %s too long\n", *argv); @ @= { printf("! Unknown option in argument %s\n", *argv); mark_harmless; } @* Output. Here is the code that opens the output file: @^system dependencies@> @= FILE *C_file; /* where output of \.{TANGLE} goes */ FILE *tex_file; /* where output of \.{WEAVE} goes */ @ @= scan_args(); @@; if (program==tangle) { if ((C_file=fopen(C_file_name,"w"))==NULL) fatal("! Cannot open output file ", C_file_name); } else { if ((tex_file=fopen(tex_file_name,"w"))==NULL) fatal("! Cannot open output file ", tex_file_name); } @ The |update_terminal| procedure is called when we want to make sure that everything we have output to the terminal so far has actually left the computer's internal buffers and been sent. @^system dependencies@> @d update_terminal = fflush(stdout) /* empty the terminal output buffer */ @ Terminal output uses |putchar| and |putc| when we have to translate from \.{WEB}'s code into the external character code, and |printf| when we just want to print strings. @^system dependencies@> @d new_line = putchar('\n') @d putxchar = putchar @d ASCII_write(a,b) = fflush(stdout), write(1,a,b) /* write on the standard output */ @d line_write(c) = write(fileno(C_file),c) /* write on the C output file */ @d C_printf(c,a) = fprintf(C_file,c,a) @d C_putc(c) = putc(c,C_file) @* Index.