grammar = $grammar; $this->dummies = $grammar->getDummyString(); $this->terminals = $grammar->getTerminals(); $this->rules = $grammar->getRules(); $this->firsts = $grammar->getFirsts(); $this->follows = $grammar->getFollows(); $this->translator = $translator; } public function setDebug ( $debug ) { $this->debug = $debug; return $this; } public function parseFile ( $file, $litteral = false ) { if ( !file_exists($file) ) throw new ParserException(sprintf('File "%1$s" does not exist !', $file)); try { return $this->parseString(file_get_contents($file), $litteral); } catch ( ParserException $e ) { throw new ParserException(sprintf( 'Error parsing file "%1$s" : "%2$s"', $file, $e->getMessage()), 0, $e ); } } public function parseString ( $string, $litteral = false, $symbol = 'S', $repeat = '.' ) { $parseState = $this->getState($string, $litteral); $tree = $this->parse($parseState, $symbol, $repeat); if ( !preg_match( sprintf('/^(%1$s)*$/', $this->dummies), $string = substr($string, $parseState['state']['position']) ) ) throw new ParserException("Unexpected end of input :" . $string); foreach ( $parseState as $k => $v ) unset($parseState[$k]); return $tree['nodes']; } public function parse ( &$parseState, $symbol = 'S', $repeat = '.' ) { try { $tree = $this->work($parseState, array( 'type' => $symbol, 'repeat' => $repeat, )); } catch ( ParserException $e ) { preg_match('![^\n]++$!', substr($parseState['string'], 0, $parseState['error']['position']), $m); throw new ParserException(sprintf( 'Error on line %1$u at position %2$u.', $parseState['error']['line'], !empty($m[0]) ? strlen($m[0]) : 0), 0, $parseState['error']['ParserException'] ); } return $tree; } protected function getState ( $string, $litteral ) { return array( 'parser' => $this, 'string' => $string, 'litteral' => $litteral, 'matches' => array(), 'error' => array( 'position' => 0, 'line' => 0, 'ParserException' => null, ), 'state' => array( 'line' => 1, 'position' => 0, 'deepness' => 0, 'operators' => array(), 'ntoken' => null, 'production' => null, ), 'memo' => array(), ); } protected function setState ( &$parseState, $oldState, $no = array() ) { foreach ( array_diff_key($oldState['state'], $no) as $k => $v ) if ( empty($no[$k]) ) $parseState['state'][$k] = $v; return $parseState; } protected function saveState ( &$parseState ) { $state = array(); foreach ( $parseState['state'] as $k => $v ) $state[$k] = $v; return array('state' => $state); } protected function getId ( $parseState, $token ) { return $token['type'] . ':' . $parseState['state']['position']; } protected function reportError ( &$parseState, $token, ParserException $e ) { if ( !$parseState['error']['ParserException'] || $parseState['state']['position'] > $parseState['error']['position'] ) { $parseState['error']['position'] = $parseState['state']['position']; $parseState['error']['line'] = $parseState['state']['line']; $parseState['error']['ParserException'] = $e; } } protected function match ( &$parseState, $token ) { $id = $this->getId($parseState, $token); if ( !empty($parseState['matches'][$id]) ) return $parseState['matches'][$id]['value']; if ( !empty($this->terminals[$token['type']]) ) return $parseState['matches'][$id]['value'] = preg_match(sprintf( '/^((?:%2$s)?)(%1$s)/S', $this->terminals[$token['type']]['regex'], $this->dummies), substr($parseState['string'], $parseState['state']['position']) ); foreach ( $this->firsts[$token['type']] as $k => $terminal ) if ( preg_match(sprintf( '/^((?:%2$s)?)(%1$s)/S', $this->terminals[$k]['regex'], $this->dummies), substr($parseState['string'], $parseState['state']['position']) )) return $parseState['matches'][$id]['value'] = true; return $parseState['matches'][$id]['value'] = false; } protected function consume ( &$parseState, $token ) { if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']); $id = $this->getId($parseState, $token); if ( !empty($parseState['memo'][$id]) ) { if ( $this->debug ) echo "\n\t\t$indent memo {$token['type']} : {$parseState['state']['position']} => {$parseState['memo'][$id]['position']}"; if ( $parseState['memo'][$id]['value'] ) $parseState['state']['position'] = $parseState['memo'][$id]['position']; return $parseState['memo'][$id]['value']; } $parseState['memo'][$id] = array( 'value' => null, 'position' => null, ); if ( !empty($this->terminals[$token['type']]) ) { $nodes = $this->eat($parseState, $token); } elseif ( $this->match($parseState, $token) ) { $nodes = $this->test($parseState, $token); } else $nodes = null; if ( $nodes && $this->translator ) $nodes = call_user_func($this->translator, $parseState, $token, $nodes); if ( empty($this->rules[$token['type']]['left']) ) $parseState['memo'][$id] = array( 'value' => $nodes, 'position' => $parseState['state']['position'], ); else unset($parseState['memo'][$id]); return $nodes; } protected function test ( &$parseState, $token, $growing = false ) { $parseState['state']['deepness']++; $state = $this->saveState($parseState); $id = $this->getId($parseState, $token); if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']); $nodes = array(); $error = false; //~ $match = array('pos' => 0, 'val' => null); foreach ( $this->rules[$token['type']]['productions'] as $i => $production ) if ( $growing || !$this->rules[$token['type']]['seed'] || $production['seed'] ) if ( !$growing || $growing && !$production['seed'] ) try { if ( $this->debug ) echo "\n\t$indent [ $id : {$production['production']}"; $error = false; $nodes = $this->production($parseState, $token, $production); if ( $this->debug ) echo "\nOOO\t$indent ]O $id : {$parseState['state']['position']}"; $parseState['state']['deepness']--; break; //~ if ( $parseState['state']['position'] > $match['pos'] ) { //~ $match['pos'] = $parseState['state']['position']; //~ $match['state'] = $this->saveState($parseState); //~ $match['val'] = $nodes; //~ } //~ $this->setState($parseState, $state); } catch ( ParserException $e ) { if ( $this->debug ) echo "\n###\t$indent ]# $id : {$parseState['state']['position']}"; $this->reportError($parseState, $token, $e); $error = true; $nodes = null; $this->setState($parseState, $state); } if ( !$error && $nodes && !$growing && !empty($this->rules[$token['type']]['seed']) ) { if ( $this->debug ) echo "\n###\t$indent try to grow : {$parseState['state']['position']}"; $nodes = $this->grow($parseState, $token, $state, $nodes); } elseif ( $this->debug ) echo "\n###\t$indent dont try to grow : {$parseState['state']['position']}"; return $nodes; } protected function doesGrow ( $parseState, $token, $follow, $precedence ) { if ( $this->match($parseState, $follow) && ( !empty($parseState['state']['ntoken']) && $this->match($parseState, $parseState['state']['ntoken']) && $follow['precedence'] < $parseState['state']['ntoken']['precedence'] ) ) return false; return $this->match($parseState, $follow) && ( empty($parseState['state']['production']['right']) || !empty($parseState['state']['production']['tokens'][$token['idx'] + 1]) || $follow['precedence'] > $precedence || $follow['precedence'] === $precedence && $follow['associative'] === 'right' ); } protected function grow ( &$parseState, $token, $oldState, $nodes ) { $precedence = 0; if ( $c = count($parseState['state']['operators']) ) $precedence = $parseState['state']['operators'][$c - 1]['precedence']; if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']); $oid = $this->getId($oldState, $token); $id = $this->getId($parseState, $token); if ( $this->debug ) echo "\n\t$indent { grow : $oid => $id"; $match = false; if ( !empty($this->follows[$token['type']]) ) foreach ( $this->follows[$token['type']] as $type => $follow ) { if ( $this->doesGrow($parseState, $token, $follow, $precedence) ) { $match = true; break; } } if ( !$match ) { if ( $this->debug ) echo "\n\t$indent } # grow : $id cause not in followset "; return $nodes; } $oldNodes = $nodes; if ( $this->translator ) $nodes = call_user_func($this->translator, $parseState, $token, $nodes); $parseState['memo'][$this->getId($oldState, $token)] = array( 'value' => $nodes, 'position' => $parseState['state']['position'], ); $position = $parseState['state']['position']; $state = $this->saveState($parseState); $this->setState($parseState, $oldState); if ( $this->debug ) echo "\nGGG\t$indent grow procedure : $id"; $newNodes = $this->test($parseState, $token, true); if ( !$newNodes ) { if ( $position >= $parseState['state']['position'] ) $this->setState($parseState, $state); return $oldNodes; } $nodes = $newNodes; if ( $position < $parseState['state']['position'] ) $nodes = $this->grow($parseState, $token, $oldState, $nodes); if ( $this->debug ) echo "\nGGG\t$indent grow procedure successful : {$this->getId($parseState, $token)}"; return $nodes; } protected function production ( &$parseState, $token, $production ) { if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']); try { $prod = $parseState['state']['production']; $ntoken = $parseState['state']['ntoken']; $parseState['state']['production'] = $production; $nodes = array(); $op = 0; foreach ( $production['tokens'] as $j => $nextToken ) { if ( $this->debug ) echo "\n\t\t$indent { {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}"; if ( $nextToken['precedence'] ) { $op++; $parseState['state']['operators'][] = $nextToken; } $parseState['state']['ntoken'] = !empty($production['tokens'][$j+1]) ? $production['tokens'][$j+1] : $ntoken; $nodes[] = $this->work($parseState, $nextToken); if ( $this->debug ) echo "\nO\t\t$indent }O {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}"; } while ( $op-- ) { array_pop($parseState['state']['operators']); } $parseState['state']['production'] = $prod; $parseState['state']['ntoken'] = $ntoken; return $nodes; } catch ( ParserException $e ) { if ( $this->debug ) echo "\n#\t\t$indent }# {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}"; throw $e; } } protected function eat ( &$parseState, $token ) { if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']); $string = substr($parseState['string'], $parseState['state']['position']); $exp = sprintf( '/^((?:%2$s)?)(%1$s)/S', $this->terminals[$token['type']]['regex'], $this->dummies ); if ( preg_match($exp, $string, $m) ) { if ( $this->debug ) { $c = strlen($m[0]); echo "\n\t\t$indent consume {$token['type']} : {$parseState['state']['position']} : {$c}"; } $parseState['state']['position'] += strlen($m[0]); $parseState['state']['line'] += substr_count($m[0], "\n"); if ( $this->debug ) echo "\n\t\t$indent match {$token['type']} : {$parseState['state']['position']}"; $all = array_shift($m); $dummy = array_shift($m); $len = strlen($m[0]); return array( 'type' => $token['type'], 'line' => ( $parseState['state']['position'] - $len ) ? substr_count($parseState['string'], "\n", 0, $parseState['state']['position'] - $len) + 1 : 1, 'position' => $parseState['state']['position'] - $len, 'length' => $len, 'match' => !empty($m[1]) ? $m[1] : $m[0], 'litteral' => $parseState['litteral'] ? $all : '', 'matches' => $m, ); } else return null; } protected function work ( &$parseState, $token ) { $parseState['state']['deepness']++; $position = $parseState['state']['position']; switch ( $token['repeat'] ) { case '.' : $sub = $this->one($parseState, $token); break; case '?' : $sub = $this->oneAtMost($parseState, $token); break; case '+' : $sub = $this->oneAtLeast($parseState, $token); break; case '*' : $sub = $this->zeroAtLeast($parseState, $token); break; case '&' : $sub = $this->oneMore($parseState, $token); break; case '!' : $sub = $this->noMore($parseState, $token); break; } $parseState['state']['deepness']--; return array( 'type' => $token['type'], 'repeat' => $token['repeat'], 'line' => ( $position ? substr_count($parseState['string'], "\n", 0, $position) : 0 ) + 1, 'position' => $position, 'length' => $parseState['state']['position'] - $position, 'litteral' => $parseState['litteral'] ? substr($parseState['string'], $position, $parseState['state']['position'] - $position) : '', 'nodes' => $sub, ); } protected function one ( &$parseState, $token ) { // id if ( !$elem = $this->consume($parseState, $token) ) throw self::except($parseState, $token); return $elem; } protected function oneAtMost ( &$parseState, $token ) { // id? return $this->consume($parseState, $token); } protected function oneAtLeast ( &$parseState, $token ) { // id+ $nodes = array($this->one($parseState, $token)); while ( $elem = $this->consume($parseState, $token) ) $nodes[] = $elem; return $nodes; } protected function zeroAtLeast ( &$parseState, $token ) { // id* $nodes = array(); while ( $elem = $this->consume($parseState, $token) ) $nodes[] = $elem; return $nodes; } protected function oneMore ( &$parseState, $token ) { // id& $state = $this->saveState($parseState); if ( !$elem = $this->consume($parseState, $token) ) throw self::except($parseState, $token); $this->setState($parseState, $state); return $elem; } protected function noMore ( &$parseState, $token ) { // id! if ( $this->consume($parseState, $token, 1) ) throw self::except($parseState, $token); return true; } static protected function except ( &$parseState, $token, $notExpected = false ) { return new ParserException(sprintf( '%5$sxpected token : "%1$s" at position %2$s on line "%3$s" and col "%4$s" !', $token['type'], $parseState['state']['position'], 0, //substr_count($sub, "\n"), 0, //$parseState['state']['position'] - strrpos($sub, "\n") + 1, $notExpected ? 'Une' : 'E' )); } } ?>