getMessage()), 0, $e ); } } static public function fromString ( $string, $path = '.' ) { return new self($string, $path); } private $path = '', $dummyString = '', $dummies = array(), $terminals = array(), $rules = array(), $firsts = array(), $follows = array(); private function __construct ( $string, $path = '.' ) { $this->path = realpath($path); $this->parseGrammar($string); } private function parseGrammar ( $string ) { $lines = preg_split('!\n!', $string); $seen = array(); $current = 0; $section = 0; while ( count($lines) ) { $line = array_shift($lines); $current++; if ( $line === '%%' ) { // section separator $section++; } elseif ( preg_match('!^(//.*)?$!', $line) ) { // one line comments and empty lines } elseif ( $section < 1 ) { // terminal if ( $dummy = $this->parseTerminal($line, $current) ) $this->dummies[$dummy] = $dummy; } elseif ( $section < 2 ) { // production if ( preg_match('!\#include\(\s*(.+)\s*\)\s*$!', $line, $m) ) { $seen = $this->mergeGrammar($m[1], $seen); } else $seen = array_merge($this->parseProduction($line, $current), $seen); } else break; // eof comments } if ( $this->dummies ) $this->dummyString = '(?:' . implode('|', $this->dummies) . ')++'; if ( $undefined = array_diff_key($seen, $this->terminals, $this->rules) ) throw new Exception(sprintf( 'Malformed Grammar : undefiner symbol "%1$s" on line "%2$s".', key($undefined), current($undefined) )); $this->findRecursions(); $this->firstSet(); $this->followSet(); } private function mergeGrammar ( $file, $seen ) { $g = self::fromFile($this->path . '/' . $file); foreach ( $g->dummies as $k => $v ) if ( empty($this->dummies[$k]) ) $this->dummies[$k] = $v; foreach ( $g->terminals as $k => $v ) if ( empty($this->terminals[$k]) ) $this->terminals[$k] = $v; foreach ( $g->rules as $k => $v ) { $seen[$k] = 1; if ( empty($this->rules[$k]) ) $this->rules[$k] = $v; else { $this->rules[$k]['seed'] |= $v['seed']; $this->rules[$k]['productions'] = array_merge($this->rules[$k]['productions'], $v['productions']); array_walk($this->rules[$k]['productions'], function ( &$value, $key ) { $value['idx'] = $key; }); } } return $seen; } private function parseTerminal ( $line, $current ) { if ( preg_match('!^((?:\!|\.)?)([^\s]++)\s+->\s+!', $line, $m)) { $terminal = preg_split('!\s+->\s+!', $line); $this->terminals[$m[2]] = array( 'type' => $m[2], 'regex' => $terminal[1], 'ignore' => $m[1] === '!', 'line' => $current, ); if ( $m[1] === '!' ) return $terminal[1]; } else throw new Exception(sprintf( 'Malformed input in grammar at line "%1$s" !', $current )); } private function parseProduction ( $line, $current ) { if ( preg_match('!^(\!?)([^\s]++)\s+(\*?->|<-)\s+!', $line, $m)) { $terminals = &$this->terminals; $seen = array(); $rule = preg_split('!\s*(\*?->|<-)\s*!', $line, 2, PREG_SPLIT_DELIM_CAPTURE); if ( empty($this->rules[$rule[0]]) ) $this->rules[$rule[0]] = array( 'type' => $rule[0], 'left' => 0, 'right' => 0, 'seed' => 0, 'productions' => array(), ); $this->rules[$rule[0]]['seed'] = $this->rules[$rule[0]]['seed'] || $rule[1] !== '->'; $i = 0; $this->rules[$rule[0]]['productions'][] = array( 'idx' => count($this->rules[$rule[0]]), 'type' => $rule[0], 'line' => $current, 'rule' => $rule[2], 'left' => 0, 'right' => 0, 'seed' => $rule[1] === '->' ? false : true, 'production' => $line, 'tokens' => array_map(function ($token) use ( &$seen, $current, &$i, &$terminals ) { $precedence = 0; $associative = 0; if ( preg_match('!^((?:[0-9]++)?)(<|>).++$!', $token, $m) ) { $token = preg_replace('!^([0-9]++)(<|>)!', '', $token); $precedence = (int) $m[1]; $associative = ( $m[2] === '<' ? 'left' : 'right' ); } if ( strlen($token) === 1 || !in_array($token[strlen($token) - 1], array('+', '*', '?', '.', '&', '!')) ) $token .= '.'; $seen[$type = substr($token, 0, strlen($token) - 1)] = $current; return array( 'idx' => $i++, 'type' => $type, 'repeat' => $token[strlen($token) - 1], 'precedence' => $precedence, 'associative' => $associative, ); }, array_filter(preg_split('!\s+!', $rule[2]))) ); return $seen; } else throw new Exception(sprintf( 'Malformed input in grammar at line "%1$s" !', $current )); } private function findRecursions () { foreach ( $this->rules as $name => $rule ) foreach ( $rule['productions'] as $idx => $production ) { if ( $this->recurse($name, $production) ) { $production['left'] = 1; $this->rules[$name]['left'] = 1; } if ( $this->recurse($name, $production, true) ) { $production['right'] = 1; $this->rules[$name]['right'] = 1; } $this->rules[$name]['productions'][$idx] = $production; } } private function recurse ( $rul, $prod, $right = false, $visited = array() ) { // to support filters if ( !empty($visited[$prod['type']]) ) return ( $prod['type'] === $rul ); $visited[$prod['type']] = 1; $tokens = $prod['tokens']; if ( $right ) $tokens = array_reverse($tokens); foreach ( $tokens as $token ) { if ( !empty($this->rules[$token['type']]) ) foreach ( $this->rules[$token['type']]['productions'] as $production ) if ( $this->recurse($rul, $production, $right, $visited) ) return true; if ( in_array($token['repeat'], array('.', '+')) ) return false; } return false; } private function firstSet () { foreach ( $this->rules as $name => $rule ) foreach ( $rule['productions'] as $idx => $production ) $this->firsts[$name] = array_merge( !empty($this->firsts[$name]) ? $this->firsts[$name] : array(), $this->first($production) ); } private function first ( $production, $visited = array() ) { if ( !empty($visited[$production['type']]) ) return array(); $visited[$production['type']] = 1; $firsts = array(); foreach ( $production['tokens'] as $token ) { if ( !empty($this->terminals[$token['type']]) ) $firsts[$token['type']] = $token; else foreach ( $this->rules[$token['type']]['productions'] as $production ) $firsts = array_merge($firsts, $this->first($production, $visited)); if ( in_array($token['repeat'], array('.', '+')) ) break; } return $firsts; } private function followSet () { foreach ( $this->rules as $name => $rule ) foreach ( $rule['productions'] as $idx => $production ) { if ( $production['left'] && !empty($this->rules[$production['tokens'][0]['type']]['left']) && !empty($production['tokens'][1]) ) { if ( empty($this->follows[$production['tokens'][0]['type']]) ) $this->follows[$production['tokens'][0]['type']] = array(); //~ $this->follows[$production['tokens'][0]['type']][$production['tokens'][1]['type']] = $production['tokens'][1]; $this->follows[$production['tokens'][0]['type']][] = $production['tokens'][1]; } } } public function getDummyString () { return $this->dummyString; } public function getTerminals () { return $this->terminals; } public function getRules () { return $this->rules; } public function getFirsts () { return $this->firsts; } public function getFollows () { return $this->follows; } public function __toString() { return print_r($this, 1); } } ?>