/* * Recognise a shell command per the POSIX Shell Command Language specification * and return an AST. * * Tree construction may be overridden in a subclass by overriding the node() * function. * * In general, AST nodes have a name and a contents array. For a binary * operator, the first element of the contents array will be the left * operand, and the second element will be the right operand. In more complex * cases, operands should generally be named by enclosing them within a named * node. * * Each element of the contents array may either be a string, a node, or an * array of nodes. */ /* * File initializer */ { use Wikimedia\WikiPEG\InternalError; // @phan-file-suppress PhanUnusedGotoLabel // @phan-file-suppress PhanNoopSwitchCases // @phan-file-suppress PhanTypeMismatchArgument // @phan-file-suppress PhanTypeComparisonFromArray } /* * Class initializer */ { /** * Overridable tree node constructor * * @stable to override * @param string $type * @param array|Node|string $contents * @return Node */ protected function node( $type, $contents ) { return new Node( $type, $contents ); } /** * Combine arrays and non-array items into a single flat array. * * @param array|Node|string|null ...$items * @return array * @phan-return array */ private function merge( ...$items ) { if ( !$items ) { return []; } $mergeArgs = []; foreach ( $items as $item ) { if ( $item !== null ) { if ( !is_array( $item ) ) { $mergeArgs[] = [ $item ]; } else { $mergeArgs[] = $item; } } } return array_merge( ...$mergeArgs ); } } program = linebreak commands:complete_commands linebreak { return $this->node( 'program', $commands ); } / linebreak { return $this->node( 'program', [] ); } complete_commands = ( c:complete_command newline_list { return $c; } / complete_command )* complete_command = OWS list: ( nodes: ( item: and_or separator: $separator_op { if ( $separator && $separator[0] === '&' ) { return $this->node( 'background', $item ); } else { return $item; } } )+ last: and_or? { if ( $last ) { $nodes[] = $last; } if ( count( $nodes ) > 1 ) { return $this->node( 'list', $nodes ); } else { return $nodes[0]; } } / and_or ) { return $this->node( 'complete_command', $list ); } and_or = first: pipeline rest: ( AND_IF linebreak pipeline:pipeline { return $this->node( 'and_if', $pipeline ); } / OR_IF linebreak pipeline:pipeline { return $this->node( 'or_if', $pipeline ); } / pipeline )* { return $this->merge( $first, $rest ); } pipeline = bang: Bang? pipeline: pipe_sequence { if ( $bang !== null ) { return $this->node( 'bang', $pipeline ); } else { return $pipeline; } } pipe_sequence = first: command rest: ( PIPE linebreak command:command { return $command; } )* { if ( count( $rest ) ) { return $this->node( 'pipeline', $this->merge( $first, $rest ) ); } else { return $first; } } command = function_definition / simple_command / c:compound_command r:redirect_list? { if ( $r !== null ) { return $this->merge( $c, $r ); } else { return $c; } } compound_command = brace_group / subshell / for_clause / case_clause / if_clause / while_clause / until_clause subshell = LPAREN list:compound_list RPAREN { return $this->node( 'subshell', $list ); } compound_list = linebreak terms:( term: and_or separator: $separator { if ( $separator && $separator[0] === '&' ) { return $this->node( 'background', $term ); } else { return $term; } } )+ last: and_or? { if ( $last ) { $terms[] = $last; } return $terms; } / linebreak term: and_or { if ( $term === null ) { // Phan is convinced $term may be null, not sure how return []; } else { return $term; } } for_clause = ( For name:for_name linebreak for_case_in wordlist:wordlist? sequential_sep do_group:do_group { return $this->node( 'for', [ $name, $this->node( 'in', $wordlist ?: [] ), $do_group ] ); } ) / ( For name:for_name sequential_sep? do_group:do_group { return $this->node( 'for', [ $name, $do_group ] ); } ) // Spec note: Apply rule 5. // This and other lexer rules in the spec are mostly workarounds for loose // integration between the lexer and the parser implied by the spec. We know // that we want a NAME here, so we can just ask for a NAME. With a separate // lexer it would be necessary for the lexer to guess whether the parser will // want a NAME or a WORD, and for the parser to somehow downgrade an // inappropriate NAME token to a WORD if the guess was wrong. for_name = name:NAME OWS { return $name; } // Spec note: Apply rule 6 for_case_in = In OWS wordlist = WORD+ case_clause = Case word:WORD linebreak for_case_in linebreak list_esac:( case_list $Esac / case_list_ns $Esac / $Esac ) { if ( is_array( $list_esac ) ) { $list = $list_esac[0]; } else { $list = []; } return $this->node( 'case', [ $word, $this->node( 'in', $list ) ] ); } // ns means no semicolon case_list_ns = list:case_list item:case_item_ns { $list[] = $item; return $list; } / item:case_item_ns { return [ $item ]; } case_list = case_item+ // ns means no semicolon case_item_ns = LPAREN? pattern:pattern RPAREN list:compound_list { return $this->node( 'case_item', [ $pattern, $this->node( 'case_consequent', $list ) ] ); } / LPAREN? pattern:pattern RPAREN linebreak { return $this->node( 'case_item', [ $pattern ] ); } case_item = LPAREN? pattern:pattern RPAREN list:compound_list DSEMI linebreak { return $this->node( 'case_item', [ $pattern, $this->node( 'case_consequent', $list ) ] ); } / LPAREN? pattern:pattern RPAREN linebreak DSEMI linebreak { return $this->node( 'case_item', $pattern ); } // Spec note: rule 4 pattern = first:WORD rest:( PIPE WORD )* { $patterns = [ $first ]; foreach ( $rest as $pattern ) { $patterns[] = $pattern[1]; } return $this->node( 'case_pattern', $patterns ); } if_clause = If condition: compound_list Then consequent: compound_list else_part: else_part? Fi { $contents = [ $this->node( 'condition', $condition ), $this->node( 'consequent', $consequent ) ]; if ( $else_part !== null ) { $contents = $this->merge( $contents, $else_part ); } return $this->node( 'if', $contents ); } else_part = Elif condition: compound_list Then consequent: compound_list else_part: else_part? { $contents = [ $this->node( 'elif_condition', $condition ), $this->node( 'elif_consequent', $consequent ) ]; if ( $else_part !== null ) { $contents = $this->merge( $contents, $else_part ); } return $contents; } / Else alternative:compound_list { return [ $this->node( 'else', $alternative ) ]; } while_clause = While list:compound_list body:do_group { return $this->node( 'while', [ $this->node( 'condition', $list ), $body ] ); } until_clause = Until list:compound_list body:do_group { return $this->node( 'until', [ $this->node( 'condition', $list ), $body ] ); } function_definition = fname:fname LPAREN RPAREN linebreak body:function_body { return $this->node( 'function_definition', $this->merge( $this->node( 'function_name', $fname ), $body ) ); } // Spec note: Apply rule 9 function_body = c:compound_command r:redirect_list? { if ( $r !== null ) { return $this->merge( $c, $r ); } else { return $c; } } fname = NAME // Apply rule 8 brace_group = Lbrace list:compound_list Rbrace { return $this->node( 'brace_group', $list ); } do_group = Do list:compound_list Done // Apply rule 6 { return $this->node( 'do', $list ); } simple_command = prefix:cmd_prefix word:cmd_word suffix:cmd_suffix? { $contents = [ $prefix, $word ]; if ( $suffix !== null ) { $contents = array_merge( $contents, $suffix ); } return $this->node( 'simple_command', $contents ); } / prefix:cmd_prefix { return $this->node( 'simple_command', [ $prefix ] ); } / name:cmd_name suffix:cmd_suffix? { $contents = [ $name ]; if ( $suffix ) { $contents = array_merge( $contents, $suffix ); } return $this->node( 'simple_command', $contents ); } // Spec note: Apply rule 7a: Guess whether ASSIGNMENT_WORD is needed. That // doesn't need any special handling, but we need to avoid allowing reserved // words here with a negative assertion. cmd_name = !reserved word:WORD { return $word; } // Apply rule 7b cmd_word = WORD cmd_prefix = contents: ( io_redirect / ASSIGNMENT_WORD )+ { return $this->node( 'cmd_prefix', $contents ); } cmd_suffix = ( io_redirect / WORD )+ redirect_list = io_redirect+ io_redirect = number:IO_NUMBER? file_or_here:(io_file / io_here) { $contents = []; if ( $number !== null ) { $contents[] = $this->node( 'io_subject', $number ); } $contents[] = $file_or_here; return $this->node( 'io_redirect', $contents ); } // Options reordered to put the longer substrings first. Otherwise we would // need assertions to protect longer operators from being partly consumed by // smaller operators. io_file = LESSAND filename:filename { return $this->node( 'duplicate_input', $filename ); } / LESSGREAT filename:filename { return $this->node( 'read_and_write', $filename ); } / LESS filename:filename { return $this->node( 'input', $filename ); } / GREATAND filename:filename { return $this->node( 'duplicate_output', $filename ); } / DGREAT filename:filename { return $this->node( 'append_output', $filename ); } / CLOBBER filename:filename { return $this->node( 'clobber', $filename ); } / GREAT filename:filename { return $this->node( 'output', $filename ); } filename = WORD // Apply rule 2 io_here = op: ( DLESSDASH { return 'io_here_strip'; } / DLESS { return 'io_here'; } ) end: here_end { // TODO: this is quite complicated to implement, especially given the way // the parser is structured. throw new UnimplementedError( 'heredoc is not implemented' ); // For phan return $this->node( 'io_here', '' ); } here_end = $WORD // Apply rule 3 newline_list = NEWLINE+ linebreak = newline_list? separator_op = AND / SEMI separator = separator_op linebreak / newline_list sequential_sep = SEMI linebreak / newline_list // Operator tokens per spec AND_IF = '&&' OWS OR_IF = '||' OWS DSEMI = ';;' OWS DLESS = '<<' OWS DGREAT = '>>' OWS LESSAND = '<&' OWS GREATAND = '>&' OWS LESSGREAT = '<>' OWS DLESSDASH = '<<-' OWS CLOBBER = '>|' OWS // Reserved word tokens per spec If = 'if' DELIM Then = 'then' DELIM Else = 'else' DELIM Elif = 'elif' DELIM Fi = 'fi' DELIM Do = 'do' DELIM Done = 'done' DELIM Case = 'case' DELIM Esac = 'esac' DELIM While = 'while' DELIM Until = 'until' DELIM For = 'for' DELIM Lbrace = '{' DELIM Rbrace = '}' DELIM Bang = '!' DELIM In = 'in' DELIM reserved = If / Then / Else / Elif / Fi / Do / Done / Case / Esac / While / Until / For / Lbrace / Rbrace / Bang / In // Single-character tokens with disambiguating assertions AND = !AND_IF '&' OWS SEMI = !DSEMI ';' OWS LESS = !DLESS !LESSAND !LESSGREAT !DLESSDASH '<' OWS GREAT = !DGREAT !GREATAND !CLOBBER '>' OWS PIPE = !OR_IF '|' OWS LPAREN = '(' OWS RPAREN = ')' OWS // Optional whitespace // Consume whitespace and comments after each token OWS = [ \t\v\r\f]* ("#" [^\n]*)? // Delimiter assertion for reserved word termination DELIM = [ \t\v\r\f]+ / !. // EOF / & "\n" WORD = parts: word_part+ OWS { return $this->node( 'word', $parts ); } word_part = single_quoted_part / double_quoted_part / bare_escape_sequence / backquote_expansion / dollar_expansion / plain_part single_quoted_part = "'" contents:$[^']* "'" { return $this->node( 'single_quote', $contents ); } double_quoted_part = '"' contents: ( dquoted_escape / backquote_expansion / dollar_expansion / "\\" / $[^"`$\\]+ )* '"' { return $this->node( 'double_quote', $contents ); } dquoted_escape = "\\" contents:[$`"\\\n] { return $this->node( 'dquoted_escape', $contents ); } bare_escape_sequence = "\\" contents:[^\n] { return $this->node( 'bare_escape', $contents ); } backquote_expansion = "`" parts: ( backquoted_escape / dollar_expansion / double_backquote_expansion / "$" / "\\" / $[^`$\\]+ )* "`" { return $this->node( 'backquote', $parts ); } backquoted_escape = "\\" contents:[$\\] { return $this->node( 'backquoted_escape', $contents ); } double_backquote_expansion = "\\`" parts: ( backquoted_escape / dollar_expansion / "$" / "\\" !"`" / $[^`$\\]+ ) "\\`" { return $this->node( 'double_backquote', $parts ); } dollar_expansion = "$" contents: ( special_parameter / short_positional_parameter / brace_expansion / arithmetic_expansion / command_expansion / named_parameter ) { return $contents; } special_parameter = contents: [@*#?\-$!0] { return $this->node( 'special_parameter', $contents ); } short_positional_parameter = contents: [1-9] { return $this->node( 'positional_parameter', $contents ); } brace_expansion = "{" contents: ( binary_expansion / string_length / braced_parameter_expansion ) "}" { return $contents; } binary_expansion = parameter:parameter operator:( $( ":" [\-=?+] ) / $( "%%" / "##" / [%#\-=?+] ) ) OWS word:WORD? { $names = [ ':-' => 'use_default', '-' => 'use_default_unset', ':=' => 'assign_default', '=' => 'assign_default_unset', ':?' => 'indicate_error', '?' => 'indicate_error_unset', ':+' => 'use_alternative', '+' => 'use_alternative_unset', '%' => 'remove_smallest_suffix', '%%' => 'remove_largest_suffix', '#' => 'remove_smallest_prefix', '##' => 'remove_largest_prefix' ]; if ( !isset( $names[$operator] ) ) { throw new InternalError( "Unable to find operator \"$operator\"" ); } return $this->node( $names[$operator], [ $parameter, $word ?? '' ] ); } string_length = "#" parameter:parameter { return $this->node( 'string_length', $parameter ); } arithmetic_expansion = "((" OWS words:WORD+ "))" { return $this->node( 'arithmetic_expansion', $words ); } command_expansion = "(" command: complete_command ")" { return $this->node( 'command_expansion', $command ); } braced_parameter_expansion = parameter:parameter { return $this->node( 'braced_parameter_expansion', $parameter ); } parameter = special_parameter / long_positional_parameter / named_parameter long_positional_parameter = parameter: $[0-9]+ { return $this->node( 'positional_parameter', $parameter ); } named_parameter = name:NAME { return $this->node( 'named_parameter', $name ); } plain_part = plain: ( $(& [^'"\\`$ \t\v\r\f\n&|;<>(){}]+) / $(! [^'"\\`$ \t\v\r\f\n&|;<>()]+) ) { return $this->node( 'unquoted_literal', $plain ); } ASSIGNMENT_WORD = name:NAME "=" word:WORD { return $this->node( 'assignment', [ $this->node( 'name', $name ), $word ] ); } NAME = $( [_a-zA-Z] [_a-zA-Z0-9]* ) NEWLINE = "\n" OWS IO_NUMBER = $[0-9]+