// c.gr see license.txt for copyright and terms of use // grammar for C // this is derived from the C++ grammar by commenting-out // some rules, and has not been maintained in a while // 1/08/03: commented-out all of the unreachable nonterminals // because bison-1.875 spews massive warnings when it sees them verbatim [ #include "strtable.h" // StringRef (r) #include "c_type.h" // type identifiers like ST_CHAR (r) #include "lexer2.h" // lexer2's token ids for classify() #include "cparse.h" // ParseEnv #include "trace.h" // trace #include "c.ast.gen.h" // C abstract syntax #ifdef NDEBUG #define D(msg) #else #define D(msg) \ trace("c") << msg << endl #endif ] context_class CParse : public UserActions, public ParseEnv { public: CParse(StringTable &table, CCLang &lang) : ParseEnv(table, lang) {} // when this is the last element in a parameter list, the function // is a vararg function ASTTypeId *ellipsisTypeId() { return new ASTTypeId(new TS_simple(ST_ELLIPSIS), new Declarator(new D_name(SL_UNKNOWN, NULL, NULL), NULL)); } }; terminals { // grab list generated by lexer include("c.tok") token[int] L2_INT_LITERAL ; token[float*] L2_FLOAT_LITERAL ; token[char] L2_CHAR_LITERAL ; token[StringRef] L2_NAME { // every time I pull an L2_NAME from the lexer, this code is // run to possibly reclassify the token kind; the semantic // value is passed (and ParseEnv is available as context), but // the same semantic value will be used // I mark this, and all places the deal with resolving the // type-name vs variable-name ambiguity, with "TYPE/NAME" fun classify(s) [ if (isType(s)) { return L2_TYPE_NAME; } else { return L2_VARIABLE_NAME; } ] } token[StringRef] L2_TYPE_NAME; token[StringRef] L2_VARIABLE_NAME; token[StringRef] L2_STRING_LITERAL ; precedence { // high precedence prec 200 L2_PREFER_REDUCE; prec 190 "const" "volatile" "owner_ptr_qualifier" "__attribute__" "else"; right 120 ".*" "->*"; left 110 "*" "/" "%"; left 100 "+" "-"; left 90 "<<" ">>"; left 80 "<" ">" "<=" ">="; left 70 "==" "!="; left 60 "&"; left 50 "^"; left 40 "|"; left 30 "&&"; left 20 "||"; right 10 "==>"; // ESC's user manual specifies a precedence but not an associativity for "==>" prec 1 L2_PREFER_SHIFT; // low precedence } } nonterm[TranslationUnit*] File -> EnterScope t:TranslationUnit LeaveScope [ return t; ] // scoping nonterm EnterScope -> empty [ D("entering scope"); enterScope(); ] nonterm LeaveScope -> empty [ D("exiting scope"); leaveScope(); ] // ------------- identifier ambiguity ------------------- // identifiers can play two primary roles, and this is the // source of problems parsing C and C++ // name of a type; introduced by class, struct, union, enum, typedef nonterm[StringRef] TypeName -> n:L2_TYPE_NAME [ return n; ] // name of a variable or function; introduced by a declaration nonterm[StringRef] VariableName -> n:L2_VARIABLE_NAME [ return n; ] // names for situations where it does not matter what its previous // meaning may have been nonterm[StringRef] AnyName { -> n:VariableName [ return n; ] -> n:TypeName [ return n; ] } // ---------------- higher-level syntax ----------------- // the section labels that follow (like "A.3") are from the // C++ standard document (should track down a proper reference..) // ------ A.3 Basic Concepts ------ nonterm[TranslationUnit*] TranslationUnit { -> empty [ return new TranslationUnit(NULL); ] -> t:TranslationUnit d:Declaration [ t->topForms.append(d); return t; ] // allow assembly at toplevel for gnu compatibility -> t:TranslationUnit a:GNUAsmStatement [ return t; ] // drop 'asm' for now // gnu extension: allow random semicolons at toplevel -> t:TranslationUnit ";" [ return t; ] } // ------ A.4 Expressions ------ nonterm[Expression*] PrimaryExpression { -> e:Literal [ return e; ] //-> "this" ; -> "(" e:Expression ")" [ return e; ] -> e:PQVarName [ return new E_variable(e, NULL /*scope*/); ] // qualified variable; a special case of what PQVarName originally covered -> scope:TypeName "::" name:AnyName [ return new E_variable(name, scope); ] -> e:ThmprvPredicate [ return e; ] // GNU extension //-> "(" s:CompoundStatement ")" ; // gnu extension: "constructor expression" // (notation for literal struct values) // e.g.: // struct Foo { int x; int y; }; // struct Foo myfoo; // myfoo = (struct Foo) { 4, 5 }; <-- here // which would assign x=4 and y=5 in 'myfoo' //-> "(" t:TypeId ")" i:CompoundInitializer // [ return new E_structLit(t, i); ] } nonterm[Expression*] Literal { -> i:L2_INT_LITERAL [ return new E_intLit(i); ] -> f:L2_FLOAT_LITERAL [ E_floatLit *ret = new E_floatLit(*f); delete f; return ret; ] -> s:StringLiteral [ return new E_stringLit(s); ] -> c:L2_CHAR_LITERAL [ return new E_charLit(c); ] -> L2_TRUE [ return new E_intLit(1); ] -> L2_FALSE [ return new E_intLit(0); ] } // gnu: this is to handle gnu's f'd up pseduo-symbols, which // have the annoying property that they have to concatenate // with string literals (so I can't just, say, declare them // to be char* and be done with it) nonterm[StringRef] StringLiteral { -> s:L2_STRING_LITERAL [ return s; ] //-> StringLiteral L2_STRING_LITERAL ; //-> StringLiteral "__FUNCTION__" ; //-> StringLiteral "__PRETTY_FUNCTION__" ; } // possibly-qualified name; essentially spec's id-expression // missing: template-id because I don't know what that is nonterm[StringRef] PQVarName { -> n:VariableName [ return n; ] //-> OperatorFunctionId ; //-> ConversionFunctionId ; //-> "~" TypeName ; //-> q:Qualifier rhs:PQVarName ; } // nonterm Qualifier { // -> "::" ; // -> TypeName "::" ; // } nonterm[Expression*] PostfixExpression { -> e:PrimaryExpression [ return e; ] // array access -> a:PostfixExpression "[" e:Expression "]" //[ return new E_arrayAcc(a, e); ] [ return new E_deref(new E_binary(a, BIN_PLUS, e)); ] // fn call -> f:PostfixExpression "(" e:ExpressionListOpt ")" [ return new E_funCall(f, e); ] // field access -> p:PostfixExpression "." n:PQVarName [ return new E_fieldAcc(p, n); ] // deref + field access -> p:PostfixExpression "->" n:PQVarName [ return new E_fieldAcc(new E_deref(p), n); ] -> p:PostfixExpression "++" [ return new E_effect(EFF_POSTINC, p); ] -> p:PostfixExpression "--" [ return new E_effect(EFF_POSTDEC, p); ] // pulled these two because other rules supercede them //-> PostfixExpression "." PseudoDestructorName ; // explicit dtor call //-> PostfixExpression "->" PseudoDestructorName ; //-> "dynamic_cast" "<" TypeId ">" "(" e:Expression ")" ; // casts //-> "static_cast" "<" TypeId ">" "(" e:Expression ")" ; //-> "reinterpret_cast" "<" TypeId ">" "(" e:Expression ")" ; //-> "const_cast" "<" TypeId ">" "(" e:Expression ")" ; // removed for now since I don't know much about them //-> "typeid" "(" Expression ")" ; // RTTI //-> "typeid" "(" TypeId ")" ; } nonterm[ASTList*] ExpressionList { -> a:AssignmentExpression [ return new ASTList(a); ] -> e:ExpressionList "," a:AssignmentExpression [ e->append(a); return e; ] } nonterm[ASTList*] ExpressionListOpt { -> empty [ return new ASTList(); ] -> e:ExpressionList [ return e; ] } // I am pulling these out since PQVarName can be ~class //PseudoDestructorName -> "~" ClassName //PseudoDestructorName -> Qualifier PseudoDestructorName nonterm[Expression*] UnaryExpression { -> e:PostfixExpression [ return e; ] -> "++" e:CastExpression [ return new E_effect(EFF_PREINC, e); ] -> "--" e:CastExpression [ return new E_effect(EFF_PREDEC, e); ] // size of expression -> "sizeof" e:UnaryExpression [ return new E_sizeof(e); ] //-> e:DeleteExpression ; // dereference, addrof -> "*" e:CastExpression [ return new E_deref(e); ] -> "&" e:CastExpression [ return new E_addrOf(e); ] // other unary operators -> "+" e:CastExpression [ return new E_unary(UNY_PLUS, e); ] -> "-" e:CastExpression [ return new E_unary(UNY_MINUS, e); ] -> "!" e:CastExpression [ return new E_unary(UNY_NOT, e); ] -> "~" e:CastExpression [ return new E_unary(UNY_BITNOT, e); ] // size of type -> "sizeof" "(" t:TypeId ")" [ return new E_sizeofType(t); ] -> e:NewExpression [ return e; ] } // for now, no new-placement or "::" in front // I've changed the syntax slightly to correct what I think is a mistake // (my syntax allows array of ptr to fn, whereas std syntax doesn't -- TOVERIFY) nonterm[E_new*] NewExpression { // I tried making the 't' here a TypeId, but that introduces an // LALR(1) conflict because then "new char * 6" could either be // "(new char) * 6" or "(new (char*)) 6" -- neither makes sense but // the parser doesn't know that // there's still an LALR(1) shift/reduce conflict involving __attribute__, // but I haven't spent enough time staring at the parser dump to figure // out why, so I'll rely on GLR to deal with it; // TODO: design a way to query the parsgen to figure out where a given // lookahead symbol comes from originally (tracing by hand I get myself // stuck in long loops) -> "new" t:TypeSpecifier /*NewDeclaratorOpt NewInitializerOpt*/ [ return new E_new(new ASTTypeId(t, // declarator which doesn't modify type: new Declarator(new D_name(loc, NULL, NULL), NULL))); ] -> "new" "(" t:TypeId ")" /*NewDeclaratorOpt NewInitializerOpt*/ [ return new E_new(t); ] } // NewDeclaratorOpt is, as a regular expression: // (PtrOperator)* ("[" Expression "]")? ("[" ConstExpression "]")* // nonterm NewDeclaratorOpt { // -> empty ; // -> PtrOperator NewDeclaratorOpt ; // -> DirectNewDeclarator ; // commit to at least one "[" ... "]" // } // nonterm DirectNewDeclarator { // -> "[" Expression "]" ; // -> DirectNewDeclarator "[" ConstantExpression "]" ; // } // nonterm NewInitializerOpt { // -> empty ; // -> "(" ExpressionListOpt ")" ; // } // // omitting possibility of initial "::" // nonterm DeleteExpression { // -> "delete" e:CastExpression ; // -> "delete" "[" "]" e:CastExpression ; // } nonterm[Expression*] CastExpression { -> e:UnaryExpression [ return e; ] -> "(" t:TypeId ")" e:CastExpression [ return new E_cast(t, e); ] } // ++++ binary operator expression ++++ // primary expressions nonterm[Expression*] BinaryExpression { -> e:CastExpression [ return e; ] -> left:BinaryExpression "*" right:BinaryExpression [ return new E_binary(left, BIN_MULT, right); ] -> left:BinaryExpression "/" right:BinaryExpression [ return new E_binary(left, BIN_DIV, right); ] -> left:BinaryExpression "%" right:BinaryExpression [ return new E_binary(left, BIN_MOD, right); ] -> left:BinaryExpression "+" right:BinaryExpression [ return new E_binary(left, BIN_PLUS, right); ] -> left:BinaryExpression "-" right:BinaryExpression [ return new E_binary(left, BIN_MINUS, right); ] -> left:BinaryExpression "<<" right:BinaryExpression [ return new E_binary(left, BIN_LSHIFT, right); ] -> left:BinaryExpression ">>" right:BinaryExpression [ return new E_binary(left, BIN_RSHIFT, right); ] -> left:BinaryExpression "<" right:BinaryExpression [ return new E_binary(left, BIN_LESS, right); ] -> left:BinaryExpression ">" right:BinaryExpression [ return new E_binary(left, BIN_GREATER, right); ] -> left:BinaryExpression "<=" right:BinaryExpression [ return new E_binary(left, BIN_LESSEQ, right); ] -> left:BinaryExpression ">=" right:BinaryExpression [ return new E_binary(left, BIN_GREATEREQ, right); ] -> left:BinaryExpression "==" right:BinaryExpression [ return new E_binary(left, BIN_EQUAL, right); ] -> left:BinaryExpression "!=" right:BinaryExpression [ return new E_binary(left, BIN_NOTEQUAL, right); ] -> left:BinaryExpression "&" right:BinaryExpression [ return new E_binary(left, BIN_BITAND, right); ] -> left:BinaryExpression "^" right:BinaryExpression [ return new E_binary(left, BIN_BITXOR, right); ] -> left:BinaryExpression "|" right:BinaryExpression [ return new E_binary(left, BIN_BITOR, right); ] -> left:BinaryExpression "&&" right:BinaryExpression [ return new E_binary(left, BIN_AND, right); ] -> left:BinaryExpression "||" right:BinaryExpression [ return new E_binary(left, BIN_OR, right); ] // theorem prove extensions -> left:BinaryExpression "==>" right:BinaryExpression [ return new E_binary(left, BIN_IMPLIES, right); ] } nonterm[Expression*] ConditionalExpression { -> e:BinaryExpression [ return e; ] -> cond:BinaryExpression "?" th:Expression ":" el:AssignmentExpression [ return new E_cond(cond, th, el); ] // wtf? gnu... // linux driver code has "expr ? : expr" ... // according to Marat, "e1 ? : e2" is the same as "e1 ? e1 : e2", // except that e1 is only evaluated once //-> cond:BinaryExpression "?" ":" el:AssignmentExpression // [ return new E_gnuCond(cond, el); ] } // why is conditional not allowed on left side of = ? can I confirm // that in another language spec? clearly both alternatives would have // to be like-typed lvalues, but... nonterm[Expression*] AssignmentExpression { -> e:ConditionalExpression [ return e; ] -> e1:BinaryExpression op:AssignmentOperator e2:AssignmentExpression [ return new E_assign(e1, op, e2); ] } nonterm[enum BinaryOp] AssignmentOperator { -> "*=" [ return BIN_MULT; ] -> "/=" [ return BIN_DIV; ] -> "%=" [ return BIN_MOD; ] -> "+=" [ return BIN_PLUS; ] -> "-=" [ return BIN_MINUS; ] -> ">>=" [ return BIN_RSHIFT; ] -> "<<=" [ return BIN_LSHIFT; ] -> "&=" [ return BIN_BITAND; ] -> "^=" [ return BIN_BITXOR; ] -> "|=" [ return BIN_BITOR; ] -> "=" [ return BIN_ASSIGN; ] } // this is the same definition as ExpressionList, and perhaps it // makes sense to collapse them? the meaning of ',' is quite // different in the two cases.. does that matter? // update: now that I'm doing translation too, the difference // in the meanings is great enough that I think they should be // separate, as they are nonterm[Expression*] Expression { -> ae:AssignmentExpression [ return ae; ] -> e:Expression "," ae:AssignmentExpression [ return new E_comma(e, ae); ] } nonterm[Expression*] ExpressionOpt { // empty expression is a nop -> empty [ return new E_intLit(1); ] -> e:Expression [ return e; ] } // this is an expression with the additional requirement that // it be entirely evaluable to an int at compile time // (the name exists simply to help document that fact; the grammar // cannot enforce it) nonterm[Expression*] ConstantExpression -> e:ConditionalExpression [ return e; ] // ------ A.5 Statements ------ // labeled-statement nonterm[Statement*] Statement { -> n:L2_VARIABLE_NAME ":" s:Statement [ return new S_label(loc, n, s); ] // I need a disambiguation here other than prec/assoc.. //-> "case" e:ConstantExpression ":" s:Statement // [ return new S_case(e, s); ] // had to add this variant because I found linux kernel // code that has "case:" immediately followed by "}"; I still // prefer the above when I can, so as to minimize the use of this -> "case" e:ConstantExpression ":" precedence(L2_PREFER_SHIFT) [ return new S_case(loc, e, new S_skip(loc)); ] // gnu extension -> "case" low:ConstantExpression "..." high:ConstantExpression ":" [ return new S_caseRange(loc, low, high, new S_skip(loc)); ] -> "default" ":" s:Statement [ return new S_default(loc, s); ] // expression-statement -> s:ExpressionStatement [ return s; ] // compound-statement -> s:CompoundStatement [ return s; ] // selection-statement // (prefer to shift "else" over reducing by this rule) -> "if" "(" e:Condition ")" s:Statement precedence(L2_PREFER_SHIFT) [ return new S_if(loc, e, s, new S_skip(loc)); ] // if-then-else preferred over if-then when ambiguous -> "if" "(" e:Condition ")" s1:Statement "else" s2:Statement [ return new S_if(loc, e, s1, s2); ] -> "switch" "(" e:Condition ")" s:Statement [ return new S_switch(loc, e, s); ] -> "while" "(" e:Condition ")" s:Statement [ return new S_while(loc, e, s); ] -> "do" s:Statement "while" "(" e:Expression ")" ";" [ return new S_doWhile(loc, s, e); ] // this is a special-purpose rule intended for use with (unexpanded) // macros that, internally, contain a for loop; it is *not* part of // the C or C++ languages! //-> L2_NAME "(" e:ExpressionList ")" s:CompoundStatement { // fun typeCheck = NULL; // TODO: implement? eliminate? //} // I might like to rework this so both semicolons appear here instead // of buried in ForInitStatement. this is also a good rule to use // inline alternatives -> "for" "(" s1:ForInitStatement c:ConditionOpt ";" e:ExpressionOpt ")" s2:Statement [ return new S_for(loc, s1, c, e, s2); ] -> "break" ";" [ return new S_break(loc); ] -> "continue" ";" [ return new S_continue(loc); ] -> "return" e:Expression ";" [ return new S_return(loc, e); ] -> "return" ";" [ return new S_return(loc, NULL); ] -> "goto" n:L2_VARIABLE_NAME ";" [ return new S_goto(loc, n); ] // declaration-statement -> d:BlockDeclaration [ return new S_decl(loc, d); ] // try-block //-> s:TryBlock ; // GNU extensions -> GNUAsmStatement [ return new S_skip(loc); ] // theorem prover extensions -> "thmprv_assert" e:Expression ";" [ return new S_assert(loc, e, false /*pure*/); ] -> "thmprv_pure_assert" e:Expression ";" [ return new S_assert(loc, e, true /*pure*/); ] -> "thmprv_assume" e:Expression ";" [ return new S_assume(loc, e); ] -> "thmprv_invariant" e:Expression ";" [ return new S_invariant(loc, e); ] -> "thmprv_let" s:Statement [ return new S_thmprv(loc, s); ] } nonterm[Statement*] ExpressionStatement { -> ";" [ return new S_skip(loc); ] -> e:Expression ";" [ return new S_expr(loc, e); ] } nonterm[S_compound*] CompoundStatement { // I must enter scope immediately upon seeing the open-brace, so // that if the first token of the first statement changes or queries // the scope, it's in the new one -> EnterScope "{" seq:StatementSeqOpt LeaveScope "}" [ return new S_compound(loc, seq); ] } nonterm[ASTList*] StatementSeqOpt { -> empty [ return new ASTList(); ] -> seq:StatementSeqOpt s:Statement [ seq->append(s); return seq; ] } // the guard of e.g. an 'if' statement nonterm[Expression*] Condition { -> e:Expression [ return e; ] // C++ allows variable declarations in some interesting places... //-> spec:TypeSpecifier decl:Declarator "=" e:AssignmentExpression ; } nonterm[Expression*] ConditionOpt { // an empty condition (e.g. in a for loop) is interpreted as true -> empty [ return new E_intLit(1); ] -> c:Condition [ return c; ] } nonterm[Statement*] ForInitStatement { -> s:ExpressionStatement [ return s; ] //-> s:SimpleDeclaration ; // C++ } // ----- A.6 Declarations ------ //nonterm[ASTList*] DeclarationSeqOpt { // -> empty // [ return new ASTList(); ] // -> seq:DeclarationSeqOpt d:Declaration // [ seq->append(d); return seq; ] //} nonterm[TopForm*] Declaration { -> d:BlockDeclaration [ return new TF_decl(loc, d); ] -> d:FunctionDefinition [ return d; ] //-> TemplateDeclaration ; //-> d:LinkageSpecification ; } // C++ has other alternatives.. nonterm[Declaration*] BlockDeclaration { -> d:SimpleDeclaration [ return d; ] } // is the DeclSpecifierSeq optional for implicit-int?? // no, it's for constructors, destructors, and conversion operators, // all of which are C++ only, so the DeclSpecifierSeq is now mandatory // ok, why is the InitDeclaratorList optional? // for declaring classes and enums nonterm[Declaration*] SimpleDeclaration { //e.g.: int x ; -> spec:DeclSpecifier list:InitDeclaratorListOpt ";" [ spec->decllist.steal(list); return spec; ] -> "typedef" spec:DeclSpecifier list:TypedefDeclaratorList ";" [ spec->dflags = (DeclFlags)(spec->dflags | DF_TYPEDEF); spec->decllist.steal(list); return spec; ] // gnu; specifically for linux printk declaration //-> GNUAttribute decl:SimpleDeclaration ; } // old: //DeclSpecifier -> StorageClassOpt CVQualifiersOpt TypeSpecifier //CVQualifiersOpt -> "const" CVQualifiersOpt | "volatile" CVQualifiersOpt | empty // now I'm folding CVQualifier into TypeSpecifier; in particular, this allows // the TypeId in a cast expression to contain a "const" // I return a Declaration object with an empty decllist, as a proxy // for returning dflags and spec as a pair nonterm[Declaration*] DeclSpecifier { -> "inline" m:DeclModifier s:TypeSpecifier [ return new Declaration((DeclFlags)(DF_INLINE | m), s, NULL); ] -> m:DeclModifier "inline" s:TypeSpecifier [ return new Declaration((DeclFlags)(DF_INLINE | m), s, NULL); ] -> m:DeclModifier s:TypeSpecifier [ return new Declaration(m, s, NULL); ] -> "inline" s:TypeSpecifier [ return new Declaration(DF_INLINE, s, NULL); ] -> s:TypeSpecifier [ return new Declaration(DF_NONE, s, NULL); ] } // my analysis (informal and ad-hoc) indicates that none of these can // be used together; "inline" is pulled out because it can be used // with "virtual", "static", "friend", and possibly "extern"; I // consider all this preferable to just allowing a "word soup" nonterm[enum DeclFlags] DeclModifier { -> "virtual" [ return DF_VIRTUAL; ] -> "friend" [ return DF_FRIEND; ] -> "mutable" [ return DF_MUTABLE; ] -> "auto" [ return DF_AUTO; ] -> "register" [ return DF_REGISTER; ] -> "static" [ return DF_STATIC; ] -> "extern" [ return DF_EXTERN; ] -> "thmprv_predicate" [ return DF_PREDICATE; ] } nonterm[TypeSpecifier*] TypeSpecifier { // the existence of this production is part of why parsing C is hard -> n:PQTypeName [ return new TS_name(n); ] -> s:SimpleTypeSpecifier // int [ return new TS_simple(s); ] // pulled this because it causes shift/reduce conflicts; on input // unsigned . const ... // I don't know whether to reduce the 'unsigned' as a type by itself, // or shift the 'const' in expectation of finding 'char' after it // 9/25/01 16:28: I'm putting it back in and we'll see if magic GLR will work // 9/26/01 00:13: pulling again until I can find a good way to suppress the conflict report //-> s:SimpleCVTypeSpecifier // unsigned const char (not good style, IMO) // [ // TS_simple *ret = new TS_simple((SimpleTypeId)(s & ST_BITMASK)); // ret->cv = (CVFlags)(s & CV_ALL); // return ret; // ] -> s:ElaboratedTypeSpecifier // class foo or enum bar [ return s; ] -> s:ClassSpecifier // class { ... } [ return s; ] -> s:EnumSpecifier // enum { ... } [ return s; ] // the following 3 rules create an ambiguity because they don't // say how to parse things like "const int const"; so assign // all of them precedence higher than const/volatile/attribute, // so we'll just prefer reducing always -> q:CVQualifier s:TypeSpecifier precedence(L2_PREFER_REDUCE) // const int [ s->cv = (CVFlags)(s->cv | q); return s; ] -> s:TypeSpecifier q:CVQualifier precedence(L2_PREFER_REDUCE) // int const [ s->cv = (CVFlags)(s->cv | q); return s; ] // yet another attempt to find a good place for this -> s:TypeSpecifier GNUAttribute precedence(L2_PREFER_REDUCE) [ return s; ] // drop attr for now } nonterm[TS_elaborated*] ElaboratedTypeSpecifier { -> k:ClassKeyword n:AnyName [ declareClassTag(n); return new TS_elaborated(k, n); ] -> "enum" n:AnyName [ return new TS_elaborated(TI_ENUM, n); ] } // this list comes from Table 7 (p.109) of the C++ standard // NOTE: this deviates from the language spec, which allows other // decl-specifiers to mix with the tokens here; I do not nonterm[enum SimpleTypeId] SimpleTypeSpecifier { -> "char" [ return ST_CHAR; ] -> "unsigned" "char" [ return ST_UNSIGNED_CHAR; ] -> "signed" "char" [ return ST_SIGNED_CHAR; ] -> "bool" [ return ST_BOOL; ] -> "unsigned" [ return ST_UNSIGNED_INT; ] -> "unsigned" "int" [ return ST_UNSIGNED_INT; ] -> "signed" [ return ST_INT; ] -> "signed" "int" [ return ST_INT; ] -> "int" [ return ST_INT; ] -> "unsigned" "short" "int" [ return ST_UNSIGNED_SHORT_INT; ] -> "unsigned" "short" [ return ST_UNSIGNED_SHORT_INT; ] -> "unsigned" "long" "int" [ return ST_UNSIGNED_LONG_INT; ] -> "unsigned" "long" [ return ST_UNSIGNED_LONG_INT; ] -> "signed" "long" "int" [ return ST_LONG_INT; ] -> "signed" "long" [ return ST_LONG_INT; ] -> "long" "int" [ return ST_LONG_INT; ] -> "long" [ return ST_LONG_INT; ] -> "signed" "long" "long" [ return ST_LONG_LONG; ] -> "long" "long" [ return ST_LONG_LONG; ] -> "unsigned" "long" "long" [ return ST_UNSIGNED_LONG_LONG; ] -> "signed" "short" "int" [ return ST_SHORT_INT; ] -> "signed" "short" [ return ST_SHORT_INT; ] -> "short" "int" [ return ST_SHORT_INT; ] -> "short" [ return ST_SHORT_INT; ] -> "wchar_t" [ return ST_WCHAR_T; ] -> "float" [ return ST_FLOAT; ] -> "double" [ return ST_DOUBLE; ] -> "long" "double" [ return ST_LONG_DOUBLE; ] -> "void" [ return ST_VOID; ] } // I had been separating these into typedef/enum/class names, but // the parser can never distinguish, so the grammar shouldn't suggest // that it can nonterm[StringRef] PQTypeName { -> n:TypeName [ return n; ] //-> TemplateId ; //-> Qualifier n:PQTypeName ; } // the C++ standard allows "const" and "volatile" to be arbitrarily // interleaved with the words of a simple-type-specifier.. so I've // created this set of alternative type specifiers which have at // least one CV qualifier buried in them // // technically, I'm still missing things like // unsigned const short volatile int // but yikes, I pity the fool with such code! // // I really should just fold these into the above, but my dislike for // the interleaving thing makes me try to keep the above decls more // or less "pure".. but I'll probably merge them at some point // // I return an 'int' here because it's an OR of CVFlags and SimpleTypeId // nonterm[int] SimpleCVTypeSpecifier { // -> "unsigned" q:CVQualifierSeq "char" [ return q | ST_UNSIGNED_CHAR; ] // -> "signed" q:CVQualifierSeq "char" [ return q | ST_SIGNED_CHAR; ] // -> "unsigned" q:CVQualifierSeq "int" [ return q | ST_UNSIGNED_INT; ] // -> "signed" q:CVQualifierSeq "int" [ return q | ST_INT; ] // -> "unsigned" q:CVQualifierSeq "short" "int" [ return q | ST_UNSIGNED_SHORT_INT; ] // -> "unsigned" "short" q:CVQualifierSeq "int" [ return q | ST_UNSIGNED_SHORT_INT; ] // -> "unsigned" q:CVQualifierSeq "short" [ return q | ST_UNSIGNED_SHORT_INT; ] // -> "unsigned" q:CVQualifierSeq "long" "int" [ return q | ST_UNSIGNED_LONG_INT; ] // -> "unsigned" "long" q:CVQualifierSeq "int" [ return q | ST_UNSIGNED_LONG_INT; ] // -> "unsigned" q:CVQualifierSeq "long" [ return q | ST_UNSIGNED_LONG_INT; ] // -> "signed" q:CVQualifierSeq "long" "int" [ return q | ST_LONG_INT; ] // -> "signed" "long" q:CVQualifierSeq "int" [ return q | ST_LONG_INT; ] // -> "signed" q:CVQualifierSeq "long" [ return q | ST_LONG_INT; ] // -> "long" q:CVQualifierSeq "int" [ return q | ST_LONG_INT; ] // -> "signed" q:CVQualifierSeq "long" "long" [ return q | ST_LONG_LONG; ] // -> "unsigned" q:CVQualifierSeq "long" "long" [ return q | ST_UNSIGNED_LONG_LONG; ] // -> "signed" q:CVQualifierSeq "short" "int" [ return q | ST_SHORT_INT; ] // -> "signed" "short" q:CVQualifierSeq "int" [ return q | ST_SHORT_INT; ] // -> "signed" q:CVQualifierSeq "short" [ return q | ST_SHORT_INT; ] // -> "short" q:CVQualifierSeq "int" [ return q | ST_SHORT_INT; ] // -> "long" q:CVQualifierSeq "double" [ return q | ST_LONG_DOUBLE; ] // } nonterm[TS_enumSpec*] EnumSpecifier { -> "enum" "{" list:EnumeratorListOpt "}" [ return new TS_enumSpec(NULL /*name*/, list); ] -> "enum" n:AnyName "{" list:EnumeratorListOpt "}" [ return new TS_enumSpec(n, list); ] } nonterm[ASTList*] EnumeratorList { -> def:EnumeratorDefinition [ return new ASTList(def); ] -> list:EnumeratorList "," def:EnumeratorDefinition [ list->append(def); return list; ] } nonterm[ASTList*] EnumeratorListOpt { -> empty [ return new ASTList(); ] -> list:EnumeratorList CommaOpt // CommaOpt is GNU extension (??) [ return list; ] } nonterm[Enumerator*] EnumeratorDefinition { -> name:AnyName [ return new Enumerator(loc, name, NULL /*expr*/); ] -> name:AnyName "=" expr:ConstantExpression [ return new Enumerator(loc, name, expr); ] } //nonterm AsmDefinition -> "asm" "(" L2_STRING_LITERAL ")" ";" ; //nonterm LinkageSpecification { // -> "extern" L2_STRING_LITERAL "{" d:DeclarationSeqOpt "}" ; // -> "extern" L2_STRING_LITERAL d:Declaration ; //} // ------ A.7 Declarators ------ // -- declarator -- // a declarator is the "x" in a declaration like "int x" nonterm[ASTList*] InitDeclaratorList { -> d:InitDeclarator [ return new ASTList(d); ] -> list:InitDeclaratorList "," d:InitDeclarator [ list->append(d); return list; ] } nonterm[ASTList*] InitDeclaratorListOpt { -> empty [ return new ASTList(); ] -> list:InitDeclaratorList [ return list; ] } nonterm[Declarator*] InitDeclarator { -> d:Declarator // (int) x [ return new Declarator(d, NULL); ] -> d:Declarator i:Initializer // (int) x = 5 [ return new Declarator(d, i); ] } nonterm[Initializer*] Initializer { -> "=" i:InitializerClause [ return i; ] // this causes an ambiguity with // int fileno(FILE *f); // because it could be multiplication.. handling it isn't that // hard, but for now let's just take out this rule //-> "(" ExpressionList ")" ; // ctor args } nonterm[Initializer*] InitializerClause { -> e:AssignmentExpression // scalar [ return new IN_expr(e); ] -> c:CompoundInitializer // array/structure initializer [ return new IN_compound(c); ] } // gnu extensions: labeled elements in initializers // (*potentially* labeled) nonterm[Initializer*] LabeledInitializerClause { // no label -> init:InitializerClause [ return init; ] // initialize a specific element of the array -> "[" index:ConstantExpression "]" "=" init:InitializerClause [ init->label = new IL_element(index); return init; ] // initialize a range (inclusive) of elements of the array -> "[" lo:ConstantExpression "..." hi:ConstantExpression "]" "=" init:InitializerClause [ init->label = new IL_range(lo, hi); return init; ] // intialize a named element of a structure -> "." field:PQVarName "=" init:InitializerClause [ init->label = new IL_field(field); return init; ] // initialize a field of a specific element of the array -> "[" index:ConstantExpression "]" "." field:PQVarName "=" init:InitializerClause [ init->label = new IL_elementField(index, field); return init; ] } nonterm[ASTList*] CompoundInitializer { // array/structure initializer -> "{" list:InitializerList CommaOpt "}" [ return list; ] // zero whatever it is -> "{" "}" [ return new ASTList(); ] } // useful syntactic quirk nonterm CommaOpt { -> empty ; -> "," ; } nonterm[ASTList*] InitializerList { -> init:LabeledInitializerClause [ return new ASTList(init); ] -> list:InitializerList "," init:LabeledInitializerClause [ list->append(init); return list; ] } // perhaps confusing name correspondence: // The AST name "Declarator" corresponds to the grammar name // "InitDeclarator"; the AST name "IDeclarator" (inner declarator) // corresponds to the grammar name "Declarator" // this name shift simply reflects the different interests of the // parser vs. subsequent phases of analysis nonterm[IDeclarator*] Declarator { // (PtrOperator)* DirectDeclarator -> p:PtrOperator d:Declarator [ d->stars.prepend(p); return d; ] // I'm still looking for the right place to put GNUAttribute ... -> d:DirectDeclarator GNUAttribute // GNU extension [ return d; ] -> d:DirectDeclarator [ return d; ] } nonterm[IDeclarator*] DirectDeclarator { // it doesn't matter how this was classified before, because // a declarator binds a new name, so it shadows any prior definitions -> n:AnyName a:ThmprvAttr [ return new D_name(loc, n, a); ] // function declarator; the return type comes from the type // specifier that preceeds this -> d:DirectDeclarator "(" args:ParameterDeclarationClause ")" /*q:CVQualifierSeqOpt*/ // ^^^ name of fn ^^^ ^^^^^^^^^^ arguments ^^^^^^^^^^ ^^^^^ const? ^^^^^^ ann:FuncAnnotationsOpt [ return new D_func(loc, d, args, ann); ] // array of specified size -> d:DirectDeclarator "[" sz:ConstantExpression "]" [ return new D_array(d, sz); ] // array of unspecified size -> d:DirectDeclarator "[" "]" [ return new D_array(d, NULL); ] // precedence grouping -> "(" d:Declarator ")" [ return d; ] } nonterm[PtrOperator*] PtrOperator { // c++ std mentions something with "::" as well, I don't know what that means -> "*" q:CVQualifierSeqOpt [ return new PtrOperator(q); ] //-> "&" ; } nonterm[enum CVFlags] CVQualifierSeqOpt { -> empty [ return CV_NONE; ] -> s:CVQualifierSeq [ return s; ] } nonterm[enum CVFlags] CVQualifierSeq { -> q:CVQualifier [ return q; ] -> q:CVQualifier s:CVQualifierSeq [ return (CVFlags)(q | s); ] } nonterm[enum CVFlags] CVQualifier { -> "const" [ return CV_CONST; ] -> "volatile" [ return CV_VOLATILE; ] -> "owner_ptr_qualifier" [ return CV_OWNER; ] } // -- declarators in typedefs -- // these are separated out because the parser wants to track the // introduction of type names nonterm[ASTList*] TypedefDeclaratorList { -> d:TypedefDeclarator [ return new ASTList(new Declarator(d, NULL)); ] -> list:TypedefDeclaratorList "," d:TypedefDeclarator [ list->append(new Declarator(d, NULL)); return list; ] } nonterm[IDeclarator*] TypedefDeclarator { -> p:PtrOperator d:TypedefDeclarator [ d->stars.append(p); return d; ] -> d:DirectTypedefDeclarator [ return d; ] } nonterm[IDeclarator*] DirectTypedefDeclarator { // changed from VariableName because this is a binding introduction // so it doesn't matter what its previous association might have been -> n:AnyName [ // TYPE/NAME D("defined new typedef name " << n); addType(n); return new D_name(loc, n, NULL); ] -> d:DirectTypedefDeclarator "(" args:ParameterDeclarationClause ")" //CVQualifierSeqOpt ann:FuncAnnotationsOpt [ return new D_func(loc, d, args, ann); ] -> d:DirectTypedefDeclarator "[" sz:ConstantExpression "]" [ return new D_array(d, sz); ] -> d:DirectTypedefDeclarator "[" "]" [ return new D_array(d, NULL); ] -> "(" d:TypedefDeclarator ")" [ return d; ] } // -- type-id -- // a type-id is like a declaration of one thing, but without the variable name; // it is, for example, what appears inside the parens of a typecast nonterm[ASTTypeId*] TypeId { -> spec:TypeSpecifier decl:AbstractDeclaratorOpt [ return new ASTTypeId(spec, new Declarator(decl, NULL)); ] // gnu extension //-> "__typeof__" "(" e:Expression ")" ; } nonterm[IDeclarator*] AbstractDeclaratorOpt { -> empty [ return new D_name(loc, NULL, NULL); ] -> d:AbstractDeclarator [ return d; ] } // an abstract declarator (not opt) must have *some* ground syntax in it nonterm[IDeclarator*] AbstractDeclarator { -> p:PtrOperator d:AbstractDeclaratorOpt [ d->stars.append(p); return d; ] -> d:DirectAbstractDeclarator [ return d; ] } nonterm[IDeclarator*] DirectAbstractDeclaratorOpt { -> empty [ return new D_name(loc, NULL, NULL); ] -> d:DirectAbstractDeclarator [ return d; ] } // this also must have some ground syntax nonterm[IDeclarator*] DirectAbstractDeclarator { // this is where abstract declarators differ from regular declarators; // for a declarator, this rule is "-> PQVarName" // update: this approach, while conceptually elegant, leads to an ambiguity // for the input "int()" between int and fn returning int //-> empty ; // function -> d:DirectAbstractDeclarator "(" args:ParameterDeclarationClause ")" //q:CVQualifierSeqOpt ann:FuncAnnotationsOpt [ return new D_func(loc, d, args, ann); ] // grammar hack: support missing function name part at this level, rather // than using DirectAbstractDeclaratorOpt, to effect one extra token // of lookahead //-> "(" args:ParameterDeclarationClause ")" q:CVQualifierSeqOpt ; // however, this postpones the ambiguity until it arises in // typedef int x; // int foo(int (x)); // is the argument an int, or a function accepting an 'x'? // the rule is complicated, depending on x's previous declaration // status.. I'll sidestep the whole issue by accepting a smaller language -> d:DirectAbstractDeclaratorOpt "[" sz:ConstantExpression "]" // array of specified size [ return new D_array(d, sz); ] -> d:DirectAbstractDeclaratorOpt "[" "]" // array of unspecified size [ return new D_array(d, NULL); ] -> "(" d:AbstractDeclarator ")" // precedence grouping [ return d; ] } nonterm[ASTList*] ParameterDeclarationClause { -> p:ParameterDeclarationList // some args [ return p; ] -> empty // no args [ return new ASTList(); ] -> "..." // all args are optional [ return new ASTList(ellipsisTypeId()); ] -> p:ParameterDeclarationList "..." // args plus optionally more [ p->append(ellipsisTypeId()); return p; ] -> p:ParameterDeclarationList "," "..." // same; alternative syntax [ p->append(ellipsisTypeId()); return p; ] } nonterm[ASTList*] ParameterDeclarationList { -> d:ParameterDeclaration [ return new ASTList(d); ] -> list:ParameterDeclarationList "," d:ParameterDeclaration [ list->append(d); return list; ] } nonterm[ASTTypeId*] ParameterDeclaration { -> RegisterOpt s:TypeSpecifier d:Declarator [ return new ASTTypeId(s, new Declarator(d, NULL)); ] -> RegisterOpt s:TypeSpecifier d:AbstractDeclaratorOpt [ return new ASTTypeId(s, new Declarator(d, NULL)); ] //-> s:DeclSpecifier d:Declarator "=" AssignmentExpression ; //-> s:DeclSpecifier d:AbstractDeclarator "=" AssignmentExpression ; } // old code uses the keyword "register" in the parameter declarations, but I // will ignore it when that happens nonterm RegisterOpt { -> empty; -> "register"; } // -- function definition -- nonterm[TF_func*] FunctionDefinition { // I am wary of letting the declspecifier be optional, because it seems // to me that may introduce more ambiguities.. but it truly is missing // in ctors and dtors; so my idea now is to only permit it when the // declarator declares a function type (can't be more specific since there // aren't good syntactic clues for ctors) // return type name/params body -> r:DeclSpecifier d:Declarator b:FunctionBody [ TF_func *ret = new TF_func(loc, r->dflags, r->spec, new Declarator(d, NULL), b); r->spec = NULL; // stole it above delete r; // was just a carrier of dflags/spec return ret; ] // return type name/params body //-> ExplicitOpt d:Declarator FunctionBody ; // {c,d}tor // return type name/params body //-> ExplicitOpt d:Declarator ":" MemInitializerList FunctionBody ; // ctor } nonterm[S_compound*] FunctionBody -> s:CompoundStatement [ return s; ] // nonterm ExplicitOpt { // -> "explicit" ; // -> empty ; // } // ------ A.8 Classes ------ // I'm going to use the "class" terminology throughout, even though // C only has "struct" and "union" nonterm[TS_classSpec*] ClassSpecifier -> k:ClassKeyword n:ClassNameOpt /*base:BaseClauseOpt*/ "{" memb:MemberDeclarationSeqOpt "}" [ return new TS_classSpec(k, n, memb); ] nonterm[StringRef] ClassNameOpt { -> empty [ return NULL; ] -> n:AnyName [ declareClassTag(n); return n; ] } nonterm[enum TypeIntr] ClassKeyword { -> "class" [ return TI_CLASS; ] -> "struct" [ return TI_STRUCT; ] -> "union" [ return TI_UNION; ] } nonterm[ASTList*] MemberDeclarationSeqOpt { -> empty [ return new ASTList(); ] -> list:MemberDeclarationSeqOpt decl:MemberDeclaration [ list->append(decl); return list; ] //-> AccessSpecifier ":" MemberDeclarationSeqOpt ; } // nonterm AccessSpecifier { // -> "public" ; // -> "private" ; // -> "protected" ; // } nonterm[Declaration*] MemberDeclaration { -> spec:DeclSpecifier list:MemberDeclaratorList ";" // member fn or data [ spec->decllist.steal(list); return spec; ] //-> FunctionDefinition ";" ; // inline fn //-> FunctionDefinition ; // syntactic tweak //-> ExplicitOpt VirtualOpt d:Declarator ";" ; // ctor, dtor, conv op // note above that "explicit" and "virtual" can't be mixed because the former // is for ctors only and the latter can't be used with ctors (so a later stage // of processing will filter it out) } // very similar to InitDeclaratorList nonterm[ASTList*] MemberDeclaratorList { -> d:MemberDeclarator [ return new ASTList(new Declarator(d, NULL)); ] -> list:MemberDeclaratorList "," d:MemberDeclarator [ list->append(new Declarator(d, NULL)); return list; ] } nonterm[IDeclarator*] MemberDeclarator { -> d:Declarator [ return d; ] //-> Declarator "=" ConstantExpression ; // pure; and member inits?? // unnamed bitfield -> ":" e:ConstantExpression [ return new D_bitfield(NULL /*name*/, e); ] -> ":" e:ConstantExpression GNUAttribute [ return new D_bitfield(NULL /*name*/, e); ] // named bitfield -> n:AnyName ":" e:ConstantExpression GNUAttribute [ return new D_bitfield(n, e); ] -> n:AnyName ":" e:ConstantExpression [ return new D_bitfield(n, e); ] } // ------ A.9 Derived classes ------ // nonterm BaseClauseOpt { // -> empty ; // -> ":" BaseSpecifierList ; // } // nonterm BaseSpecifierList { // -> BaseSpecifier ; // -> BaseSpecifierList "," BaseSpecifier ; // } // nonterm BaseSpecifier -> VirtualOpt AccessSpecifierOpt PQClassName ; // nonterm VirtualOpt { // -> empty ; // -> "virtual" ; // } // nonterm AccessSpecifierOpt { // -> empty ; // -> AccessSpecifier ; // } // nonterm PQClassName { // -> TypeName ; // -> Qualifier PQClassName ; // } // ------ A.10 Special member functions ------ // nonterm ConversionFunctionId -> "operator" ConversionTypeId ; // nonterm ConversionTypeId -> TypeSpecifier ConversionDeclaratorOpt ; // nonterm ConversionDeclaratorOpt { // -> empty ; // -> PtrOperator ConversionDeclaratorOpt ; // } // nonterm MemInitializerList { // -> MemInitializer ; // -> MemInitializer "," MemInitializerList ; // } // // std has a separate rule for base class ctor call, but that's not // // a useful parsing distinction, so it just creates gratuitous // // ambiguities, and I've eliminated it // nonterm MemInitializer -> AnyName "(" ExpressionListOpt ")" ; // member init // ------ A.11 Overloading ------ // nonterm OperatorFunctionId -> "operator" Operator ; // nonterm Operator { // -> "new" ; // -> "delete" ; // -> "new" "[" "]" ; // -> "delete" "[" "]" ; // -> "*" ; // -> "/" ; // -> "%" ; // -> "+" ; // -> "-" ; // -> "<<" ; // -> ">>" ; // -> "<" ; // -> ">" ; // -> "<=" ; // -> ">=" ; // -> "==" ; // -> "!=" ; // -> "&" ; // -> "^" ; // -> "|" ; // -> "&&" ; // -> "||" ; // -> AssignmentOperator ; // -> "!" ; // -> "~" ; // -> "," ; // -> "++" ; // -> "--" ; // -> "->" ; // -> "(" ")" ; // -> "[" "]" ; // } // ------ A.12 Templates ------ //TemplateParameterList -> TemplateParameter //TemplateParameterList -> TemplateParameterList "," TemplateParameter //TemplateParameter -> TypeParameter | ParameterDeclaration // nonterm TemplateId { // -> TypeName "<" TemplateArgumentList ">" ; // -> TypeName "<" ">" ; // } // nonterm TemplateArgumentList { // -> TemplateArgument ; // -> TemplateArgumentList "," TemplateArgument ; // } // nonterm TemplateArgument { // // where I have Literal the std has AssignmentExpression... // // std also mentions IdExpression here, which is my PQVarName..... // -> Literal ; // -> TypeId ; // } // ------ A.13 Exception handling ------ // nonterm TryBlock -> "try" s:CompoundStatement h:HandlerSeq ; // nonterm HandlerSeq { // -> h:Handler ; // -> h:Handler seq:HandlerSeq ; // } // nonterm Handler // -> "catch" "(" d:ExceptionDeclaration ")" s:CompoundStatement ; // nonterm ExceptionDeclaration { // -> s:TypeSpecifier d:Declarator ; // named exception object // -> TypeSpecifier AbstractDeclaratorOpt ; // unnamed exception object // -> "..." ; // anything // } // nonterm ThrowExpression { // -> "throw" ; // -> "throw" e:AssignmentExpression ; // } // ------------------- GNU extensions ------------------- // just enough to get past them .. // all operators/punctuators except "(" and ")" nonterm GNUExprOp { -> "[" ; -> "]" ; -> "->" ; -> "::" ; -> "." ; -> "!" ; -> "~" ; -> "+" ; -> "-" ; -> "++" ; -> "--" ; -> "&" ; -> "*" ; -> ".*" ; -> "->*" ; -> "/" ; -> "%" ; -> "<<" ; -> ">>" ; -> "<" ; -> "<=" ; -> ">" ; -> ">=" ; -> "==" ; -> "!=" ; -> "^" ; -> "|" ; -> "&&" ; -> "||" ; -> "?" ; -> ":" ; -> "=" ; -> "*=" ; -> "/=" ; -> "%=" ; -> "+=" ; -> "-=" ; -> "&=" ; -> "^=" ; -> "|=" ; -> "<<=" ; -> ">>=" ; -> "," ; -> "..." ; -> ";" ; -> "{" ; -> "}" ; } // all of them, I guess.. nonterm GNUExprKeyword { -> "asm" ; -> "auto" ; -> "break" ; -> "bool" ; -> "case" ; -> "catch" ; -> "cdecl" ; -> "char" ; -> "class" ; -> "const" ; -> "const_cast" ; -> "continue" ; -> "default" ; -> "delete" ; -> "do" ; -> "double" ; -> "dynamic_cast" ; -> "else" ; -> "enum" ; -> "explicit" ; -> "extern" ; -> "float" ; -> "for" ; -> "friend" ; -> "goto" ; -> "if" ; -> "inline" ; -> "int" ; -> "long" ; -> "mutable" ; -> "new" ; -> "operator" ; -> "pascal" ; -> "private" ; -> "protected" ; -> "public" ; -> "register" ; -> "reinterpret_cast" ; -> "return" ; -> "short" ; -> "signed" ; -> "sizeof" ; -> "static" ; -> "static_cast" ; -> "struct" ; -> "switch" ; -> "template" ; -> "this" ; -> "throw" ; -> "try" ; -> "typedef" ; -> "typeid" ; -> "union" ; -> "unsigned" ; -> "virtual" ; -> "void" ; -> "volatile" ; -> "wchar_t" ; -> "while" ; -> "__typeof__" ; } nonterm GNUAttribute -> "__attribute__" "(" "(" GNUExprSeq ")" ")"; // recognize grouping only nonterm GNUExprSeq { -> empty ; -> GNUExpr GNUExprSeq ; } nonterm GNUExpr { -> "(" GNUExprSeq ")" ; -> AnyName ; -> GNUExprOp ; -> GNUExprKeyword ; -> L2_INT_LITERAL ; -> L2_STRING_LITERAL ; } nonterm GNUAsmStatement { -> "asm" "(" GNUExprSeq ")" ";" ; -> "asm" "volatile" "(" GNUExprSeq ")" ";" ; } // ----------------- thmprv extensions ---------------- nonterm[ASTList*] FuncAnnotationsOpt { -> empty [ return new ASTList(); ] -> list:FuncAnnotationsOpt ann:FuncAnnotation [ list->append(ann); return list; ] } nonterm[FuncAnnotation*] FuncAnnotation { -> "thmprv_pre" "(" d:FADeclListOpt e:Expression ")" [ return new FA_precondition(d, e); ] -> "thmprv_post" "(" e:Expression ")" [ return new FA_postcondition(e); ] } nonterm[ASTList*] FADeclListOpt { -> empty [ return new ASTList(); ] -> list:FADeclList [ return list; ] } // syntax "thmprv_bind" is to disambiguate a declaration (which always // starts with a TypeName) from a qualified variable expression (which // also starts with a TypeName) nonterm[ASTList*] FADeclList { // ambiguity will result in a cancelled reduction here when the // parser speculates a leading TypeName is for a qualified variable; // note that no sharing will have been introduced fun del(list) [ delete list; ] -> /*"thmprv_bind"*/ d:SimpleDeclaration [ return new ASTList(d); ] -> list:FADeclList /*"thmprv_bind"*/ d:SimpleDeclaration [ list->append(d); return list; ] } nonterm[ThmprvAttr*] ThmprvAttr { -> empty [ return NULL; ] -> "thmprv_attr" "(" list:ThmprvAttrList ")" [ return new ThmprvAttr(strRefAttr, list); ] } nonterm[ASTList*] ThmprvAttrList { -> o:OneThmprvAttr [ return new ASTList(o); ] -> list:ThmprvAttrList "," o:OneThmprvAttr [ list->append(o); return list; ] } nonterm[ThmprvAttr*] OneThmprvAttr { -> name:AnyName [ return new ThmprvAttr(name, NULL /*args*/); ] -> name:AnyName "(" args:ThmprvAttrList ")" [ return new ThmprvAttr(name, args); ] } nonterm[Expression*] ThmprvPredicate { -> "thmprv_forall" "(" d:FADeclList e:Expression ")" [ return new E_quantifier(d, e, true /*forall*/); ] -> "thmprv_exists" "(" d:FADeclList e:Expression ")" [ return new E_quantifier(d, e, false /*forall*/); ] }