366 lines
8.2 KiB
PHP
Executable File
366 lines
8.2 KiB
PHP
Executable File
<?php
|
|
|
|
class Grammar {
|
|
|
|
static public function fromFile ( $file, $path = null ) {
|
|
|
|
if ( !file_exists($file) || is_dir($file) )
|
|
throw new Exception('File not found : ' . $file);
|
|
|
|
try {
|
|
|
|
return self::fromString(file_get_contents($file), $path ? $path : dirname($file) );
|
|
|
|
} catch ( Exception $e ) {
|
|
|
|
throw new Exception(sprintf(
|
|
'Error in file "%1$s" : "%2$s"', $file, $e->getMessage()),
|
|
0,
|
|
$e
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static public function fromString ( $string, $path = '.' ) {
|
|
|
|
return new self($string, $path);
|
|
|
|
}
|
|
|
|
private
|
|
$path = '',
|
|
$dummyString = '',
|
|
$dummies = array(),
|
|
$terminals = array(),
|
|
$rules = array(),
|
|
$firsts = array(),
|
|
$follows = array();
|
|
|
|
private function __construct ( $string, $path = '.' ) {
|
|
|
|
$this->path = realpath($path);
|
|
|
|
$this->parseGrammar($string);
|
|
|
|
}
|
|
|
|
private function parseGrammar ( $string ) {
|
|
|
|
$lines = preg_split('!\n!', $string);
|
|
$seen = array();
|
|
$current = 0;
|
|
$section = 0;
|
|
|
|
while ( count($lines) ) {
|
|
|
|
$line = array_shift($lines);
|
|
$current++;
|
|
|
|
if ( $line === '%%' ) { // section separator
|
|
|
|
$section++;
|
|
|
|
} elseif ( preg_match('!^(//.*)?$!', $line) ) { // one line comments and empty lines
|
|
|
|
} elseif ( $section < 1 ) { // terminal
|
|
|
|
if ( $dummy = $this->parseTerminal($line, $current) )
|
|
$this->dummies[$dummy] = $dummy;
|
|
|
|
} elseif ( $section < 2 ) { // production
|
|
|
|
if ( preg_match('!\#include\(\s*(.+)\s*\)\s*$!', $line, $m) ) {
|
|
|
|
$seen = $this->mergeGrammar($m[1], $seen);
|
|
|
|
} else $seen = array_merge($this->parseProduction($line, $current), $seen);
|
|
|
|
} else break; // eof comments
|
|
|
|
}
|
|
|
|
if ( $this->dummies )
|
|
$this->dummyString = '(?:' . implode('|', $this->dummies) . ')++';
|
|
|
|
if ( $undefined = array_diff_key($seen, $this->terminals, $this->rules) )
|
|
throw new Exception(sprintf(
|
|
'Malformed Grammar : undefiner symbol "%1$s" on line "%2$s".',
|
|
key($undefined),
|
|
current($undefined)
|
|
));
|
|
|
|
$this->findRecursions();
|
|
$this->firstSet();
|
|
$this->followSet();
|
|
|
|
}
|
|
|
|
private function mergeGrammar ( $file, $seen ) {
|
|
|
|
$g = self::fromFile($this->path . '/' . $file);
|
|
|
|
foreach ( $g->dummies as $k => $v )
|
|
if ( empty($this->dummies[$k]) )
|
|
$this->dummies[$k] = $v;
|
|
|
|
foreach ( $g->terminals as $k => $v )
|
|
if ( empty($this->terminals[$k]) )
|
|
$this->terminals[$k] = $v;
|
|
|
|
foreach ( $g->rules as $k => $v ) {
|
|
|
|
$seen[$k] = 1;
|
|
|
|
if ( empty($this->rules[$k]) )
|
|
$this->rules[$k] = $v;
|
|
else {
|
|
|
|
$this->rules[$k]['seed'] |= $v['seed'];
|
|
$this->rules[$k]['productions'] = array_merge($this->rules[$k]['productions'], $v['productions']);
|
|
array_walk($this->rules[$k]['productions'], function ( &$value, $key ) {
|
|
$value['idx'] = $key;
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $seen;
|
|
|
|
}
|
|
|
|
private function parseTerminal ( $line, $current ) {
|
|
|
|
if ( preg_match('!^((?:\!|\.)?)([^\s]++)\s+->\s+!', $line, $m)) {
|
|
|
|
$terminal = preg_split('!\s+->\s+!', $line);
|
|
$this->terminals[$m[2]] = array(
|
|
'type' => $m[2],
|
|
'regex' => $terminal[1],
|
|
'ignore' => $m[1] === '!',
|
|
'line' => $current,
|
|
);
|
|
|
|
if ( $m[1] === '!' )
|
|
return $terminal[1];
|
|
|
|
} else throw new Exception(sprintf(
|
|
'Malformed input in grammar at line "%1$s" !',
|
|
$current
|
|
));
|
|
|
|
}
|
|
|
|
private function parseProduction ( $line, $current ) {
|
|
|
|
if ( preg_match('!^(\!?)([^\s]++)\s+(\*?->|<-)\s+!', $line, $m)) {
|
|
|
|
$terminals = &$this->terminals;
|
|
$seen = array();
|
|
$rule = preg_split('!\s*(\*?->|<-)\s*!', $line, 2, PREG_SPLIT_DELIM_CAPTURE);
|
|
|
|
if ( empty($this->rules[$rule[0]]) )
|
|
$this->rules[$rule[0]] = array(
|
|
'type' => $rule[0],
|
|
'left' => 0,
|
|
'right' => 0,
|
|
'seed' => 0,
|
|
'productions' => array(),
|
|
);
|
|
|
|
$this->rules[$rule[0]]['seed'] = $this->rules[$rule[0]]['seed'] || $rule[1] !== '->';
|
|
|
|
$i = 0;
|
|
$this->rules[$rule[0]]['productions'][] = array(
|
|
|
|
'idx' => count($this->rules[$rule[0]]),
|
|
'type' => $rule[0],
|
|
'line' => $current,
|
|
'rule' => $rule[2],
|
|
'left' => 0,
|
|
'right' => 0,
|
|
'seed' => $rule[1] === '->' ? false : true,
|
|
'production' => $line,
|
|
'tokens' => array_map(function ($token) use ( &$seen, $current, &$i, &$terminals ) {
|
|
|
|
$precedence = 0;
|
|
$associative = 0;
|
|
|
|
if ( preg_match('!^((?:[0-9]++)?)(<|>).++$!', $token, $m) ) {
|
|
|
|
$token = preg_replace('!^([0-9]++)(<|>)!', '', $token);
|
|
$precedence = (int) $m[1];
|
|
$associative = ( $m[2] === '<' ? 'left' : 'right' );
|
|
|
|
}
|
|
|
|
if ( strlen($token) === 1 || !in_array($token[strlen($token) - 1], array('+', '*', '?', '.', '&', '!')) )
|
|
$token .= '.';
|
|
|
|
$seen[$type = substr($token, 0, strlen($token) - 1)] = $current;
|
|
|
|
return array(
|
|
'idx' => $i++,
|
|
'type' => $type,
|
|
'repeat' => $token[strlen($token) - 1],
|
|
'precedence' => $precedence,
|
|
'associative' => $associative,
|
|
);
|
|
|
|
}, array_filter(preg_split('!\s+!', $rule[2])))
|
|
|
|
);
|
|
|
|
return $seen;
|
|
|
|
} else throw new Exception(sprintf(
|
|
'Malformed input in grammar at line "%1$s" !',
|
|
$current
|
|
));
|
|
|
|
}
|
|
|
|
private function findRecursions () {
|
|
|
|
foreach ( $this->rules as $name => $rule )
|
|
foreach ( $rule['productions'] as $idx => $production ) {
|
|
|
|
if ( $this->recurse($name, $production) ) {
|
|
$production['left'] = 1;
|
|
$this->rules[$name]['left'] = 1;
|
|
}
|
|
|
|
if ( $this->recurse($name, $production, true) ) {
|
|
$production['right'] = 1;
|
|
$this->rules[$name]['right'] = 1;
|
|
}
|
|
|
|
$this->rules[$name]['productions'][$idx] = $production;
|
|
|
|
}
|
|
}
|
|
|
|
private function recurse ( $rul, $prod, $right = false, $visited = array() ) {
|
|
|
|
// to support filters
|
|
if ( !empty($visited[$prod['type']]) )
|
|
return ( $prod['type'] === $rul );
|
|
$visited[$prod['type']] = 1;
|
|
|
|
$tokens = $prod['tokens'];
|
|
if ( $right )
|
|
$tokens = array_reverse($tokens);
|
|
|
|
foreach ( $tokens as $token ) {
|
|
|
|
if ( !empty($this->rules[$token['type']]) )
|
|
foreach ( $this->rules[$token['type']]['productions'] as $production )
|
|
if ( $this->recurse($rul, $production, $right, $visited) )
|
|
return true;
|
|
|
|
if ( in_array($token['repeat'], array('.', '+')) )
|
|
return false;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
private function firstSet () {
|
|
|
|
foreach ( $this->rules as $name => $rule )
|
|
foreach ( $rule['productions'] as $idx => $production )
|
|
$this->firsts[$name] = array_merge(
|
|
!empty($this->firsts[$name]) ? $this->firsts[$name] : array(),
|
|
$this->first($production)
|
|
);
|
|
|
|
|
|
}
|
|
|
|
private function first ( $production, $visited = array() ) {
|
|
|
|
if ( !empty($visited[$production['type']]) )
|
|
return array();
|
|
$visited[$production['type']] = 1;
|
|
|
|
$firsts = array();
|
|
|
|
foreach ( $production['tokens'] as $token ) {
|
|
|
|
if ( !empty($this->terminals[$token['type']]) )
|
|
$firsts[$token['type']] = $token;
|
|
|
|
else foreach ( $this->rules[$token['type']]['productions'] as $production )
|
|
$firsts = array_merge($firsts, $this->first($production, $visited));
|
|
|
|
if ( in_array($token['repeat'], array('.', '+')) )
|
|
break;
|
|
|
|
}
|
|
|
|
return $firsts;
|
|
|
|
}
|
|
|
|
private function followSet () {
|
|
|
|
foreach ( $this->rules as $name => $rule )
|
|
foreach ( $rule['productions'] as $idx => $production ) {
|
|
|
|
if ( $production['left'] && !empty($this->rules[$production['tokens'][0]['type']]['left']) && !empty($production['tokens'][1]) ) {
|
|
|
|
if ( empty($this->follows[$production['tokens'][0]['type']]) )
|
|
$this->follows[$production['tokens'][0]['type']] = array();
|
|
|
|
//~ $this->follows[$production['tokens'][0]['type']][$production['tokens'][1]['type']] = $production['tokens'][1];
|
|
$this->follows[$production['tokens'][0]['type']][] = $production['tokens'][1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public function getDummyString () {
|
|
|
|
return $this->dummyString;
|
|
|
|
}
|
|
|
|
public function getTerminals () {
|
|
|
|
return $this->terminals;
|
|
|
|
}
|
|
|
|
public function getRules () {
|
|
|
|
return $this->rules;
|
|
|
|
}
|
|
|
|
public function getFirsts () {
|
|
|
|
return $this->firsts;
|
|
|
|
}
|
|
|
|
public function getFollows () {
|
|
|
|
return $this->follows;
|
|
|
|
}
|
|
|
|
public function __toString() {
|
|
|
|
return print_r($this, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
?>
|