/* Copyright Dave Bone 1998 - 2014 All Rights Reserved. No part of this document may be reproduced without written consent from the author. FILE: enumerate_T_alphabet.lex Dates: 9 Aug. 2005 Purpose: enumerate terminal symbols. How: Each terminal phase contains its mapped symbols and create order list. */ /@ @i "/usr/local/yacco2/copyright.w" @** |enumerate_T_alphabet| grammar.\fbreak Enumerate the Terminal symbols starting at zero and going positively outward. When the enumeration is finished, the global |START_OF_RULES_ENUM| becomes the starting enumerate of the grammar's rules. This is the cut off boundary for shift / reduce LR1 compatibilty. Why? As a state is composed of shift vectors using their enumerates, only terminals up to but not including the start rule's enumerate are needed. The exception to this is the ``eosubrule'' terminal that represents the reducing subrule situation is not included in the shift set. So it is easy to iterate thru the state's vectors in building up the shift set for the lr1 compatibility check: as the state's vector map is sorted by enumerate value, just stop when the current vector's enumerate is part of the rule's domain. In this grammar, each rule is a logic sequencer fetching its phase's batch of symbols for the baptism. This grammar demonstrates zero token consumption.\fbreak \fbreak How:\fbreak Each terminal phase contains its mapped symbols and create order list. Global Phase table:\fbreak |O2_xxx| are the individual phases.\fbreak Grammar Phases:\fbreak \ptindent{0 - O2\_FSM\_PHASE : T\_fsm\_phrase} \ptindent{1 - O2\_PP\_PHASE : T\_parallel\_parser\_phrase} \ptindent{2 - O2\_T\_ENUM\_PHASE : T\_enum\_phrase} \ptindent{3 - O2\_ERROR\_PHASE : T\_error\_symbols\_phrase} \ptindent{4 - O2\_RC\_PHASE : T\_rc\_phrase} \ptindent{5 - O2\_LRK\_PHASE : T\_lr1\_k\_phrase} \ptindent{6 - O2\_T\_PHASE : T\_terminals\_phrase} \ptindent{7 - O2\_RULES\_PHASE : T\_rules\_phrase} Within each rule is the symbol iteration.\fbreak The enumeration starts with the |lr k| symbols followed by raw characters, Terminals, and Errors. Why is zero the start point? Glad u asked as i use modulo 32 on the terminal enumerate to arrive at its set number and bit position within the set. See set discussion in \O2's library documentation as to the reasons.\fbreak Caveat:\fbreak As i used a keyword descent trigger parse phases, i must check here if all my phrases are present before the enumeration can take place. Why here? This grammar frontends the triggered rule phase parse due to its related grammars using symbol enumeration as edit checks. So dot the Ts and Is before the ruling! @/ fsm (fsm-id "enumerate_T_alphabet.lex" ,fsm-filename enumerate_T_alphabet ,fsm-namespace NS_enumerate_T_alphabet ,fsm-class Cenumerate_T_alphabet{ user-prefix-declaration #include "o2_externs.h" *** user-declaration public: NS_yacco2_terminals::T_enum_phrase* enum_phrase_; *** op START_OF_RULES_ENUM = 0; CAbs_lr1_sym* gps = O2_FSM_PHASE; CAbs_lr1_sym* esym(0); CAbs_lr1_sym* ph = O2_T_ENUM_PHASE; if(ph == 0){ esym = new ERR_no_T_enum_phrase; goto error_fnd; } gps = ph; ph = O2_ERROR_PHASE; if(ph == 0){ esym = new ERR_no_errors_phrase; goto error_fnd; } gps = ph; ph = O2_RC_PHASE; if(ph == 0){ esym = new ERR_no_rc_phrase; goto error_fnd; } gps = ph; ph = O2_LRK_PHASE; if(ph == 0){ esym = new ERR_no_lrk_phrase; goto error_fnd; } gps = ph; ph = O2_T_PHASE; if(ph == 0){ esym = new ERR_no_terminals_phrase; goto error_fnd; } all_phases_ok: enum_phrase_ = O2_T_ENUM_PHASE; return; error_fnd: parser__->add_token_to_error_queue(*esym); if(gps != 0) // anchor error against previously good phase esym->set_rc(*gps,__FILE__,__LINE__); parser__->set_abort_parse(true); return; *** constructor START_OF_RULES_ENUM = 0; enum_phrase_ = 0; *** } ,fsm-version "1.0",fsm-date "9 Aug. 2005",fsm-debug "false" ,fsm-comments "Enumerate grammar's terminal symbols: \na 0 and a 1, ... the oracle for parsing lookups.") @"/usr/local/yacco2/compiler/grammars/yacco2_T_includes.T" rules{ Renumerate_T_alphabet (){ -> Renum_lrk Renum_rc Renum_T Renum_err{ /@ As START\_OF\_RULES\_ENUM is the next enumerate value to use and the enumeration starts from 0, it is more efficient to use this value instead of suming all the totals per terminal type: lrk, rc, err, T now if i was paranoid, maybe the comparison of the calculated against the traversal value would be asserted @/ op Cenumerate_T_alphabet* fsm = (Cenumerate_T_alphabet*)rule_info__.parser__->fsm_tbl__; fsm->enum_phrase_->total_enumerate(START_OF_RULES_ENUM); lrclog << "Total symbols: " << START_OF_RULES_ENUM << std::endl; *** } } Renum_lrk (){ -> { op Cenumerate_T_alphabet* fsm = (Cenumerate_T_alphabet*)rule_info__.parser__->fsm_tbl__; T_lr1_k_phrase* lr_ph = O2_LRK_PHASE; std::vector* order = lr_ph->crt_order(); std::vector::iterator i = order->begin(); std::vector::iterator ie = order->end(); for(fsm->enum_phrase_->start_lrk_enumerate(START_OF_RULES_ENUM); i!=ie;++i,++START_OF_RULES_ENUM){ T_terminal_def* tdef = *i; tdef->enum_id(START_OF_RULES_ENUM); } fsm->enum_phrase_->stop_lrk_enumerate(START_OF_RULES_ENUM-1); fsm->enum_phrase_-> total_lrk_enumerate (fsm->enum_phrase_->stop_lrk_enumerate() - fsm->enum_phrase_->start_lrk_enumerate() + 1); lrclog << "Total lrk symbols: " << fsm->enum_phrase_->total_lrk_enumerate() << std::endl; lrclog << "Start lrk symbol: " << fsm->enum_phrase_->start_lrk_enumerate() << " Stop lrk symbol: " << fsm->enum_phrase_->stop_lrk_enumerate() << std::endl; *** } } Renum_rc (){ -> { op Cenumerate_T_alphabet* fsm = (Cenumerate_T_alphabet*)rule_info__.parser__->fsm_tbl__; T_rc_phrase* lr_ph = O2_RC_PHASE; std::vector* order = lr_ph->crt_order(); std::vector::iterator i = order->begin(); std::vector::iterator ie = order->end(); for(fsm->enum_phrase_->start_rc_enumerate (START_OF_RULES_ENUM);i!=ie;++i,++START_OF_RULES_ENUM){ T_terminal_def* tdef = *i; tdef->enum_id(START_OF_RULES_ENUM); } fsm->enum_phrase_->stop_rc_enumerate(START_OF_RULES_ENUM-1); fsm->enum_phrase_-> total_rc_enumerate (fsm->enum_phrase_->stop_rc_enumerate() - fsm->enum_phrase_->start_rc_enumerate() + 1); lrclog << "Total rc symbols: " << fsm->enum_phrase_->total_rc_enumerate() << std::endl; lrclog << "Start rc symbol: " << fsm->enum_phrase_->start_rc_enumerate() << " Stop rc symbol: " << fsm->enum_phrase_->stop_rc_enumerate() << std::endl; *** } } Renum_T (){ -> { op Cenumerate_T_alphabet* fsm = (Cenumerate_T_alphabet*)rule_info__.parser__->fsm_tbl__; T_terminals_phrase* lr_ph = O2_T_PHASE; std::vector* order = lr_ph->crt_order(); std::vector::iterator i = order->begin(); std::vector::iterator ie = order->end(); for(fsm->enum_phrase_->start_T_enumerate(START_OF_RULES_ENUM); i!=ie;++i,++START_OF_RULES_ENUM){ T_terminal_def* tdef = *i; tdef->enum_id(START_OF_RULES_ENUM); } fsm->enum_phrase_->stop_T_enumerate(START_OF_RULES_ENUM-1); fsm->enum_phrase_-> total_T_enumerate (fsm->enum_phrase_->stop_T_enumerate() - fsm->enum_phrase_->start_T_enumerate() + 1); lrclog << "Total T symbols: " << fsm->enum_phrase_->total_T_enumerate() << std::endl; lrclog << "Start T symbol: " << fsm->enum_phrase_->start_T_enumerate() << " Stop T symbol: " << fsm->enum_phrase_->stop_T_enumerate() << std::endl; *** } } Renum_err (){ -> { op Cenumerate_T_alphabet* fsm = (Cenumerate_T_alphabet*)rule_info__.parser__->fsm_tbl__; T_error_symbols_phrase* lr_ph = O2_ERROR_PHASE; std::vector* order = lr_ph->crt_order(); std::vector::iterator i = order->begin(); std::vector::iterator ie = order->end(); for(fsm->enum_phrase_->start_err_enumerate(START_OF_RULES_ENUM); i!=ie;++i,++START_OF_RULES_ENUM){ T_terminal_def* tdef = *i; tdef->enum_id(START_OF_RULES_ENUM); } fsm->enum_phrase_->stop_err_enumerate(START_OF_RULES_ENUM-1); fsm->enum_phrase_-> total_err_enumerate (fsm->enum_phrase_->stop_err_enumerate() - fsm->enum_phrase_->start_err_enumerate() + 1); lrclog << "Total error symbols: " << fsm->enum_phrase_->total_err_enumerate() << std::endl; lrclog << "Start error symbol: " << fsm->enum_phrase_->start_err_enumerate() << " Stop error symbol: " << fsm->enum_phrase_->stop_err_enumerate() << std::endl; *** } } }// end of rules