@> } goto restart; @* Producing the output. The |get_output| routine above handles most of the complexity of output generation, but there is one further consideration that has a nontrivial effect on \.{TANGLE}'s algorithms. Namely, we want to make sure that the output has spaces and line breaks in the right places (e.g., not in the middle of a string or a constant or an identifier, not at a `\.{@@\&}' position where quantities are being joined together, and certainly after a \.= because the C compiler thinks \.{=-} is ambiguous). The output process can be in one of following states: \yskip\hang |num_or_id| means that the last item in the buffer is a number or identifier, hence a blank space or line break must be inserted if the next item is also a number or identifier. \yskip\hang |unbreakable| means that the last item in the buffer was followed by the \.{@@\&} operation that inhibits spaces between it and the next item. \yskip\hang |verbatim| means we're copying only character tokens, and that they are to be output exactly as stored. This is the case during strings, verbatim constructions and numerical constants. \yskip\hang |misc| means none of the above. \yskip Furthermore, if the variable |protect| is positive, new-lines are preceded by a `\.\\'.% note this for /*spider*/ @d misc = 0 /* ``normal'' state */ @d num_or_id = 1 /* state associated with numbers and identifiers */ @d unbreakable = 3 /* state associated with \.{@@\&} */ @d verbatim = 4 /* state in the middle of a string */ @= eight_bits out_state; /* current status of partial output */ boolean protect; /* current status of partial output */ @ Here is a routine that is invoked when we want to output the current line. During the output process, |cur_line| equals the number of the next line to be output. @u flush_buffer() /* writes one line to output file */ { C_putc('\n'); if (cur_line % 100 == 0) { printf("."); if (cur_line % 500 == 0) printf("%d",cur_line); update_terminal; /* progress report */ } cur_line++; } @* The big output switch. Here then is the routine that does the output. We have made some modifications to \.{TANGLE} so it will write output on multiple files. We do this very simply: if a module name is introduced by \.{@@(} instead of \.{@@<}, we treat it as the name of a file. All these special modules are saved on a stack, |output_files|. We write them out after we've done the unnamed module. @d max_files = 256 @= name_pointer output_files[max_files]; name_pointer *cur_out_file, *end_output_files, *an_output_file; char cur_module_char; /* is it |'<'| or |'('| */ char output_file_name[longest_name]; /* name of the file */ @ We make |end_output_files| point jsut beyond the end of |output_files|. |cur_out_file| starts out there. Every time we see a new file, we decrement |cur_out_file| and then write it in. @= cur_out_file=end_output_files=output_files+max_files; @ @= { if (cur_out_file>output_files) { for (an_output_file=cur_out_file; an_output_filetext_link==0) { if(end_output_files==cur_out_file) { printf("\n! No program text was specified."); mark_harmless; @.No output was specified@> } } else { printf("\nWriting the output files: (%s)",C_file_name); update_terminal; @; while (stack_ptr>stack) get_output(); flush_buffer(); } if (end_output_files>cur_out_file) { if(text_info->text_link==0) { printf("\nWriting the output files: "); update_terminal; } @@; } printf("\nDone."); } @ To write the named output files, we proceed as for the unnamed module. The only subtlety is that we have to open each one. @= for (an_output_file=end_output_files; an_output_file>cur_out_file;) { an_output_file--; strncpy(output_file_name,(*an_output_file)->byte_start, longest_name); output_file_name[length(*an_output_file)]='\0'; fclose(C_file); C_file=fopen(output_file_name,"w"); if (C_file == NULL) { fatal("! Cannot open output file:",output_file_name)@; } else { printf(" (%s)",output_file_name); update_terminal; } stack_ptr=stack+1; cur_name= (*an_output_file); cur_repl= (text_pointer) cur_name->equiv_or_xref; cur_byte=cur_repl->tok_start; cur_end=(cur_repl+1)->tok_start; cur_mod=0; while (stack_ptr > stack) get_output(); flush_buffer(); } @ A many-way switch is used to send the output: @u out_char(cur_char) eight_bits cur_char; { ASCII *j; /* pointer into |byte_mem| */ @; switch (cur_char) { case @`\n': if (protect) C_putc(' '); /*spider*/ if (protect || out_state==verbatim) C_putc('\\'); /*spider*/ /*spider*/ /*may need to escape newlines*/ flush_buffer(); if (out_state!=verbatim) out_state=misc; break; @/@t\4@>@; @/@t\4@>@; @@; case join: out_state=unbreakable; break; case constant: if (out_state==verbatim) { out_state=num_or_id; break; } if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break; case string: if (out_state==verbatim) out_state=misc; else out_state=verbatim; break; default: C_putc(cur_char); if (out_state!=verbatim) out_state=misc; break; } } @ @= #ifdef TRACE_MACROS if (tracing>2) { switch(cur_char) { case @`\n': printf(" [\\n]"); break; case string: printf( "[STRING]"); break; case join: printf( "[JOIN]"); break; case constant: printf( "[CONSTANT]"); break; case identifier: printf (" ["); print_id(cur_val+name_dir); printf("]"); break; case module_number: if (cur_val>0) { printf(" [%d:]", cur_val); } else if(cur_val<0) { printf(" [:%d]", - cur_val); } else { printf( "[LINE NUMBER]"); } break; default: if (@'37= case identifier: if (out_state==num_or_id) C_putc(' '); for (j=(cur_val+name_dir)->byte_start; j<(name_dir+cur_val+1)->byte_start; j++) C_putc(*j); out_state=num_or_id; break; @ @= case module_number: if (cur_val>0) { C_printf("%s",begin_comment_string); C_printf("%d:",cur_val); C_printf("%s",end_comment_string); } else if(cur_val<0) { C_printf("%s",begin_comment_string); C_printf(":%d",-cur_val); C_printf("%s",end_comment_string); } else { sixteen_bits a; a=@'400* *cur_byte++; a+=*cur_byte++; /* gets the line number */ C_printf("\n%s",sharp_line_open); C_printf(" %d \"",a); cur_val=*cur_byte++; cur_val=@'400*(cur_val-@'200)+ *cur_byte++; /* points to the file name */ for (j=(cur_val+name_dir)->byte_start; j<(name_dir+cur_val+1)->byte_start; j++) C_putc(*j); C_printf("\"%s\n",sharp_line_close); } break; @i outtoks.web @* Introduction to the input phase. We have now seen that \.{TANGLE} will be able to output the full \cee\ program, if we can only get that program into the byte memory in the proper format. The input process is something like the output process in reverse, since we compress the text as we read it in and we expand it as we write it out. There are three main input routines. The most interesting is the one that gets the next token of a \cee\ text; the other two are used to scan rapidly past \TeX\ text in the \.{WEB} source code. One of the latter routines will jump to the next token that starts with `\.{@@}', and the other skips to the end of a \cee\ comment. @ Control codes in \.{WEB} begin with `\.{@@}', and the next character identifies the code. Some of these are of interest only to \.{WEAVE}, so \.{TANGLE} ignores them; the others are converted by \.{TANGLE} into internal code numbers by the |control_code| table below. The ordering of these internal code numbers has been chosen to simplify the program logic; larger numbers are given to the control codes that denote more significant milestones. @d ignore = 0 /* control code of no interest to \.{TANGLE} */ @d octal = @'5 @d hex = @'6 @d trace = @'370 @d ascii_constant = @'371 /* control code for `\.{@@`}' */ @d control_text = @'372 /* control code for `\.{@@t}', `\.{@@\^}', etc. */ @d format = @'373 /* control code for `\.{@@f}' */ @d definition = @'374 /* control code for `\.{@@d}' */ @d begin_unnamed = @'375 /* control code for `\.{@@u}' */ @d module_name = @'376 /* control code for `\.{@@<}' */ @d new_module = @'377 /* control code for `\.{@@\ }' and `\.{@@*}' */ @= eight_bits ccode[128]; /* meaning of a char following \.{@@} */ @ @= { int c; /* must be |int| so the |for| loop will end */ for (c=0; c<=127; c++) ccode[c]=ignore; ccode[' ']=ccode[tab_mark]=ccode['*']=new_module; ccode ['@@'] = '@@'; ccode['=']=string; ccode['d']=ccode['D']=definition; ccode['f']=ccode['F']=format; ccode['c']=ccode['C']=begin_unnamed; ccode['u']=ccode['U']=begin_unnamed; ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']=control_text; ccode['&']=join; ccode['<']=ccode['(']=module_name; ccode['`']=ascii_constant; ccode['\'']=octal; ccode['"']=hex; /*Now adjust for |at_sign|... if it is @@, we have no-op followed by quoting */ /* ... but if it is other, say \#, then \#@@ replaces @@\#, and \#\# quotes itself*/ ccode['@@']=ccode[at_sign]; ccode[at_sign]=at_sign; #ifdef DEBUG ccode['0']=ccode['1']=ccode['2']=ccode['3']=ccode['4']=trace; #endif DEBUG } @ We may want some sort of tracing facility: @=short tracing; @ @=tracing=0; @ The |skip_ahead| procedure reads through the input at fairly high speed until finding the next non-ignorable control code, which it returns. @u eight_bits skip_ahead() /* skip to next control code */ { eight_bits c; /* control code found */ while (1) { if (loc>limit && (get_line()==0)) return(new_module); *(limit+1)=at_sign; while (*loc!=at_sign) loc++; if (loc<=limit) { loc++; c=ccode[*loc]; #ifdef DEBUG if (c==trace) { tracing=*loc-@`0'; c=ignore; } #endif DEBUG loc++; if (c!=ignore || *(loc-1)=='>') return(c); } } } @ The |skip_comment| procedure reads through the input at somewhat high speed until finding the end-comment token \.{*/} or a new-line, in which case |skip_comment| will be called again by |get_next|, since the comment is not finished. This is done so that the each newline in the C part of a module is copied to the output; otherwise the \&{\#line} commands inserted into the C file by the output routines become useless. If it comes to the end of the module it prints an error message. @= boolean comment_continues=0; /* are we scanning a comment? */ @ @u skip_comment() /* skips over comments */ { ASCII c; /* current character */ if (comments_end_with_newline) { get_line(); return (comment_continues=0); } else { while (1) {/*spider*/ /* fix this to recognize end ok */ if (loc>limit) if(get_line()) return(comment_continues=1); else{ err_print("! Input ended in mid-comment"); @.Input ended in mid-comment@> return(comment_continues=0); } c=*(loc++); @@; if (c==at_sign) { if (ccode[*loc]==new_module) { err_print("! Section name ended in mid-comment"); loc--; @.Section name ended in mid-comment@> return(comment_continues=0); } else loc++; } } } } @* Inputting the next token. @d constant = @'3 @= name_pointer cur_module; /* name of module just scanned */ @ @= #include "ctype.h" /* definition of |isalpha|, |isdigit| and so on */ @ As one might expect, |get_next| consists mostly of a big switch that branches to the various special cases that can arise. @u eight_bits get_next() /* produces the next input token */ { eight_bits c; /* the current character */ while (1) { if (loc>limit) { if (get_line()==0) return(new_module); else if (print_where) { print_where=0; @; } else return (@`\n'); } c=*loc; if (comment_continues) { skip_comment(); /* scan to end of comment or newline */ if (comment_continues || comments_end_with_newline) return(@`\n'); else continue; } @@; loc++; if (isdigit(c) || c=='\\' || c=='.') @@;/*spider*/ else if (isalpha(c) || c=='_' || c=='$') @@;/*spider*/ else if (c=='\'' || c=='\"') @@;/*spider*/ else if (c==at_sign) @@; else if (c==' ' || c==tab_mark) { continue; /* ignore spaces and tabs */ } mistake: @@; return(c); } } @ @= {/*spider*/ id_first=--loc; while (isalpha(*++loc) || isdigit(*loc) || *loc=='_'); if (*loc=='$') while (isdigit(*++loc)||*loc=='$'); /* make room for \$\$ and \$nnn suffixes */ id_loc=loc; return(identifier); } @ @= {/*spider*/ id_first=loc-1; if (*id_first=='.' && !isdigit(*loc)) goto mistake; /* not a constant */ if (*id_first=='\\') while (isdigit(*loc)) loc++; /* octal constant */ else { if (*id_first=='0') { if (*loc=='x' || *loc=='X') { /* hex constant */ loc++; while (isxdigit(*loc)) loc++; goto found; } } while (isdigit(*loc)) loc++; if (*loc=='.') { loc++; while (isdigit(*loc)) loc++; } if (*loc=='e' || *loc=='E') { /* float constant */ if (*++loc=='+' || *loc=='-') loc++; while (isdigit(*loc)) loc++; } } found: id_loc=loc; return(constant); } @ \cee\ strings and character constants, delimited by double and single quotes, respectively, can contain newlines or instances of their own delimiters if they are protected by a backslash. We follow this convention, but do not allow the string to be longer than |longest_name|. @= {/*spider*/ ASCII delim = c; /* what started the string */ @# /* if it's not a single-character literal, it's a tick mark or an |at_sign| */ if (delim=='\'' && (loc+1>=limit || (*loc != '\\' && *loc!=at_sign && loc[1]!='\'') || (*loc=='\\' && (loc+2>=limit||loc[2]!='\'')) || (*loc==at_sign && (loc+2>=limit||loc[1]!=at_sign||loc[2]!='\'')) )) goto mistake; id_first = mod_text+1; id_loc = mod_text; *++id_loc=delim; while (1) { if (loc>=limit) { if(*(limit-1)!='\\') { err_print("! String didn't end"); loc=limit; break; @.String didn't end@> } if(get_line()==0) { err_print("! Input ended in middle of string"); loc=buffer; break; @.Input ended in middle of string@> } else if (++id_loc<=mod_text_end) *id_loc=@`\n'; /* will print as \.{"\\\\\\n"} */ } if ((c=*loc++)==delim) { if (++id_loc<=mod_text_end) *id_loc=c; break; } if (c=='\\') { if (loc>=limit) continue; if (++id_loc<=mod_text_end) *id_loc = '\\'; c=*loc++; } if (++id_loc<=mod_text_end) *id_loc=c; } if (id_loc>=mod_text_end) { printf("\n! String too long: "); @.String too long@> ASCII_write(mod_text+1,25); printf("..."); mark_error; } id_loc++; return(string); } @ After an \.{@@} sign has been scanned, the next character tells us whether there is more work to do. @= { c=ccode[*loc++]; switch(c) { case ignore: continue; case control_text: while ((c=skip_ahead())==at_sign); /* only \.{@@@@} and \.{@@>} are expected */ if (*(loc-1)!='>') err_print("! Improper @@ within control text"); @.Improper {\AT!} within control text@> continue; case module_name: cur_module_char=*(loc-1); @; case string: @; #ifdef DEBUG case trace: tracing=*(loc-1)-'0'; continue; #endif DEBUG case ascii_constant: @; case octal: @; case hex: @; default: return(c); } } @ @=/*spider*/ id_first=loc; if (*loc=='\\') loc++; while (*loc!='\'') { loc++; if (loc>limit) { err_print("! String didn't end"); loc=limit-1; break; } } loc++; return(ascii_constant); @ @= { id_first=loc; while ('0'<=*loc && *loc<'8') loc++; id_loc=loc; return(octal); } @ @= { id_first=loc; while (isxdigit(*loc)) loc++; id_loc=loc; return(hex); } @ @= { ASCII *k; /* pointer into |mod_text| */ @; if (k-mod_text>3 && strncmp(k-2,"...",3)==0) cur_module=prefix_lookup(mod_text+1,k-3); else cur_module=mod_lookup(mod_text+1,k); if (cur_module_char=='(') { @@; } return(module_name); } @ Module names are placed into the |mod_text| array with consecutive spaces, tabs, and carriage-returns replaced by single spaces. There will be no spaces at the beginning or the end. (We set |mod_text[0]=' '| to facilitate this, since the |mod_lookup| routine uses |mod_text[1]| as the first character of the name.) @=mod_text[0]=' '; @ @= k=mod_text; while (1) { if (loc>limit && get_line()==0) { err_print("! Input ended in section name"); @.Input ended in section name@> loc=buffer+1; break; } c=*loc; @; loc++; if (k=mod_text_end) { printf("\n! Section name too long: "); @.Section name too long@> ASCII_write(mod_text+1,25); printf("..."); mark_harmless; } if (*k==' ' && k>mod_text) k--; @ @= if (c==at_sign) { c=*(loc+1); if (c=='>') { loc+=2; break; } if (ccode[c]==new_module) { err_print("! Section name didn't end"); break; @.Section name didn't end@> } *(++k)=at_sign; loc++; /* now |c==*loc| again */ } @ At the present point in the program we have |*(loc-1)=string|; we set |id_first| to the beginning of the string itself, and |id_loc| to its ending-plus-one location in the buffer. We also set |loc| to the position just after the ending delimiter. @= { id_first=loc++; *(limit+1)=at_sign; *(limit+2)='>'; while (*loc!=at_sign || *(loc+1)!='>') loc++; if (loc>=limit) err_print("! Verbatim string didn't end"); @.Verbatim string didn't end@> id_loc=loc; loc+=2; return(string); } @* Scanning a macro definition. The rules for generating the replacement texts corresponding to macros and \cee\ texts of a module are almost identical; the only differences are that \yskip \item{a)}Module names are not allowed in macros; in fact, the appearance of a module name terminates such macros and denotes the name of the current module. \item{b)}The symbols \.{@@d} and \.{@@f} and \.{@@u} are not allowed after module names, while they terminate macro definitions. \yskip Therefore there is a single procedure |scan_repl| whose parameter |t| specifies either |macro| or |module_name|. After |scan_repl| has acted, |cur_text| will point to the replacement text just generated, and |next_control| will contain the control code that terminated the activity. @d app_repl(c) = {if (tok_ptr==tok_mem_end) overflow("token"); *tok_ptr++=c;} @= text_pointer cur_text; /* replacement text formed by |scan_repl| */ eight_bits next_control; @ @u scan_repl(t) /* creates a replacement text */ eight_bits t; { sixteen_bits a; /* the current token */ int set_print_where; if (t==module_name) {@;} /* avoid inserting line number in macro replacement texts */ /* |print_where| is both tested and set in |get_next| */ while (1) { if (t==macro) { print_where = 0; } a=get_next(); if (t==macro) { set_print_where = print_where; } switch (a) { @@; default: app_repl(a); /* store |a| in |tok_mem| */ } } done: next_control=(eight_bits) a; if (text_ptr>text_info_end) overflow("text"); if (t==macro) { @; } cur_text=text_ptr; (++text_ptr)->tok_start=tok_ptr; print_where = set_print_where; } @ We don't ever want a macro replacement text to end with newline, but for readability of the {\tt WEB} source we usually want the last token in a macro definition to be a newline token. We can't just look for |@`\n'| at the end of the token list, because that might be the second half of a two-byte token, in which case it certainly {\em wouldn't} be a newline! So we look for a two-byte token (|*tok_ptr>=@'200|); as soon as we find one we've eliminated all trailing newlines. As long as we keep finding one-byte tokens, we drop trailing newlines as they come along. @= tok_ptr -= 2; while (*tok_ptr<@'200 && *(tok_ptr+1)==@`\n') tok_ptr--; tok_ptr += 2; @ We use macros with zero or more parameters, and we give the parameters names. In order to scan a macro definition, we need to be able to substitute special markers for the parameter names. We {\em don't} put the parameter names in the hash table, because they're strictly temporary. Instead we keep them in temporary storage: We allow macros to have up to 32 parameters using 256 text chars. @d max_param_name_texts = 256 @d max_param_names = 32 @= ASCII param_name_texts[max_param_name_texts]; ASCII *param_name_texts_end = param_name_texts+max_param_name_texts; ASCII * param_names[max_param_names]; /* pointers into |param_name_texts| */ short next_param_name; /* first free spot in |param_names| */ ASCII * next_param_name_text; /* first free spot in |param_name_texts| */ @ @= next_param_name=0; next_param_name_text=param_name_texts; param_names[next_param_name]=next_param_name_text; @ @=@; @ @= @; while (id_first= if (next_param_name==max_param_names) overflow ("parameter names"); if (id_loc - id_first > param_name_texts_end - next_param_name_text) overflow ("parameter name texts"); @ The function |parameter_number(first,loc)| returns |0| if the identifier is not a parameter, and the number of the parameter if it is a parameter. This is not the parameter number we'll store, because for storage we want the {\em last} parameter to be {\tt \#1}, the penultimate to be {\tt \#2}, and so on. This means we have to know the number of parameters only when constructing the replacement text, not when scanning the replacement text. @u int parameter_number(first, loc) ASCII *first, *loc; { ASCII *f, *p; int n; for (n=0;n= @@; @; @; if (next_control!= @`=') { err_print("! You must put an = sign before the macro replacement text"); @; } @; app_repl(next_param_name); /* store number of parameters in |tok_mem| */ scan_repl(macro); p->equiv=(ASCII *)cur_text; @; @; @ @= #ifdef TRACE_MACROS if (tracing>1) { printf("\nScanning definition of macro "); print_id(p); } #endif TRACE_MACROS @ @= #ifdef TRACE_MACROS if (tracing>2) { printf("\nMacro "); print_id(p); printf("'s replacement text is in text_info[%d]", cur_text-text_info); } #endif TRACE_MACROS @ @= @@; if (next_control==@`(') { do { @; @@; } while (next_control==@`,'); if (next_control != @`)') { err_print("! Macro parameter list must end with )"); @; } next_control=get_next(); /* first token following parameter list */ } @ @= if (next_control!=identifier) { err_print("! Macro name must be an identifier"); @.Macro name must be an identifier@> @; } else { p = id_lookup(id_first,id_loc,macro); if (p->ilk!=macro) { #ifdef WARN_USE_BEFORE_DEF printf("\n! Warning: macro name "); print_id(p); printf(" was used before it was defined"); mark_harmless; #endif WARN_USE_BEFORE_DEF p->ilk=macro; } else if (p+1!=name_ptr) { err_print ("! Macro name is multiply defined: "); } } @ @= @; if (next_control!=identifier) { err_print("! Macro parameter name not an identifier"); @.Macro parameter name...@> @; } else { if (parameter_number(id_first,id_loc)!=0) { err_print("! Duplicate parameters in macro definition"); @.Duplicate parameters...@> @; } else { @; } } @ @= while ((next_control=get_next())==@`\n'); @ We modify this from the old tangle: @= case identifier: { short n; if ((n=parameter_number(id_first,id_loc))!=0) { #ifdef TRACE_MACROS if(tracing>1) { printf("\nIdentified "); ASCII_write(id_first,id_loc-id_first); printf(" as parameter number %d (%d from back)",n,next_param_name-n+1); } #endif TRACE_MACROS app_repl(param); app_repl(next_param_name-n+1); /* reverses numbering */ } else { /* not a parameter */ a=id_lookup(id_first,id_loc,normal)-name_dir; app_repl((a / @'400)+@'200); app_repl(a % @'400); } } break; case module_name: if (t!=module_name) goto done; else { @; a=cur_module-name_dir; app_repl((a / @'400)+@'250); app_repl(a % @'400); @; break; } case constant: case string: @; case ascii_constant: @; case octal: @; break; case hex: @; break; case @`\n': #ifdef NEWLINES_IN_MACROS app_repl(a); #else if (t==macro) continue; else app_repl(a); #endif NEWLINES_IN_MACROS break; case definition: case format: case begin_unnamed: if (t!=module_name) goto done; else { err_print("! @@d, @@f and @@u are ignored in C text"); continue; @.{\AT!}d, {\AT!}f and {\AT!}u are ignored in C text@> } case new_module: goto done; @ Here is the code for the line number: first a |sixteen_bits| equal to |@'150000|; then, if we're dealing with the change file, the line number plus |@'100000|; or, if we're dealing with the web file, the line number; or, if we're dealing with an include file, the number 0, then the line number, followed by the number of characters in the file name and the file name. @= store_two_bytes(@'150000); if (changing) id_first=change_file_name; else id_first=cur_file_name; id_loc=id_first+strlen(id_first); if (changing) store_two_bytes((sixteen_bits)change_line); else store_two_bytes((sixteen_bits)cur_line); {int a=id_lookup(id_first,id_loc,normal)-name_dir; app_repl((a / @'400)+@'200); app_repl(a % @'400);} @ @= { ASCII *try_loc=loc; while (*try_loc==' ' && try_loc } @ @= app_repl(a); /* |string| or |constant| */ while (id_first < id_loc) { /* simplify \.{@@@@} pairs */ if (*id_first==at_sign) id_first++; app_repl(*id_first++); } app_repl(a); break; @ @= { int c; if (*id_first==at_sign) { c=xchr[*id_first++]; if (*id_first!=at_sign) err_print("! Double @@ within string"); } else if (*id_first=='\\') { id_first++; switch (*id_first) { case 't':c=@`\t';break; case 'n':c=@`\n';break; case 'b':c=@`\b';break; case '0':c=@`\0';break; case '\\':c=@`\\';break; default: err_print("! Unrecognized escape sequence"); } } else c=xchr[*id_first]; app_repl(constant); /* we don't want octal; we want decimal */ /* we know |c<=255| */ app_decimal((long)c); app_repl(constant); } break; @ Paranoia to work on any 32 bit integer machine... @= { long sum=0; while (id_first @"04000000) err_print("! Octal constant exceeds @@\"04000000"); } app_repl(constant); app_decimal(sum); app_repl(constant); } @ @= { long sum=0; while (id_first @"04000000) err_print("! Hex constant exceeds @@\"04000000"); } app_repl(constant); app_decimal(sum); app_repl(constant); } @ This function prints out a decimal constant using |app_repl|. @u app_decimal(c) long c; /* on entry require |c>=0| */ {long power; if (c==0) {app_repl('0'); return;} if (c<0) /* should never happen */ {app_repl('-'); c = - c;} for (power=1; c>=power; power *=10); /* now |power/10<=c=1; power /=10) { app_repl('0'+c/power); /* leading digit $>0$ */ c%=power; /* invariant: original c = this c + printed string*power */ } } @* Scanning a module. The |scan_module| procedure starts when `\.{@@\ }' or `\.{@@*}' has been sensed in the input, and it proceeds until the end of that module. It uses |module_count| to keep track of the current module number; with luck, \.{WEAVE} and \.{TANGLE} will both assign the same numbers to modules. @= extern sixteen_bits module_count; /* the current module number */ @ The top level of |scan_module| is trivial. @u scan_module() { name_pointer p; /* module name for the current module */ text_pointer q; /* text for the current module */ sixteen_bits a; /* token for left-hand side of definition */ module_count++; if (*(loc-1)=='*') /* starred module */ printf("*%d",module_count); fflush(stdout); @; @; } @ We define two kinds of ilks for identifiers: |normal| for ordinary identifiers, and |macro| for macros. @d normal = 0 @d macro = 1 @d simple = 2 @= next_control=0; while (1) { done_scanning: while (next_control<=format) if ((next_control=skip_ahead())==module_name) { /* scan the module name too */ loc-=2; next_control=get_next(); } if (next_control!=definition) break; @; cur_text->text_link=0; /* |text_link=0| characterizes a macro */ } @ We initialize the parameter name area at the beginning, and we re-initialize any time we have to punt a macro definition. This makes sure we have an empty parameter name area when scanning the replacement text for a module. @= @; p->ilk=normal; /* turn off macro replacement and hope for the best */ goto done_scanning; @ @= switch (next_control) { case begin_unnamed: p=name_dir; break; case module_name: p=cur_module; @; break; default: return; } @; scan_repl(module_name); /* now |cur_text| points to the replacement text */ @; @ @= while ((next_control=get_next())=='+'); /* allow optional `\.{+=}" */ if (next_control!='=') { err_print("! C text flushed, = sign is missing"); @.C text flushed...@> while ((next_control=skip_ahead()) != new_module); return; } @ @= store_two_bytes((sixteen_bits)(@'150000+module_count)); /* |@'150000==@'320*@'400| */ @ @= if (p==name_dir||p==0) { /* unnamed module, or bad module name */ (last_unnamed)->text_link=cur_text-text_info; last_unnamed=cur_text; } else if (p->equiv==(ASCII *)text_info) p->equiv=(ASCII *)cur_text; /* first module of this name */ else { q=(text_pointer)p->equiv; while (q->text_linktext_link+text_info; /* find end of list */ q->text_link=cur_text-text_info; } cur_text->text_link=module_flag; /* mark this replacement text as a nonmacro */ @ @u phase_one() { phase=1; module_count=0; reset_input(); while ((next_control=skip_ahead())!=new_module); while (!input_has_ended) scan_module(); check_complete(); phase=2; } @ @u print_stats() { printf("\nMemory usage statistics:\n"); printf("%d names (out of %d)\n",name_ptr-name_dir,max_names); printf("%d replacement texts (out of %d)\n",text_ptr-text_info,max_texts); printf("%d bytes (out of %d)\n",byte_ptr-byte_mem,max_bytes); printf("%d tokens (out of %d)\n",tok_ptr-tok_mem,max_toks); } @* Index. Here is a cross-reference table for the \.{TANGLE} processor. All modules in which an identifier is used are listed with that identifier, except that reserved words are indexed only when they appear in format definitions, and the appearances of identifiers in module names are not indexed. Underlined entries correspond to where the identifier was declared. Error messages and a few other things like ``ASCII code'' are indexed here too.