609 lines
15 KiB
PHP
Executable File
609 lines
15 KiB
PHP
Executable File
<?php
|
|
|
|
class ParserException extends Exception {}
|
|
|
|
class SGParser {
|
|
|
|
static public function fromFile ( $file, $translator = null ) {
|
|
|
|
return new static(Grammar::fromFile($file), $translator);
|
|
|
|
}
|
|
|
|
static public function fromString ( $string, $translator = null ) {
|
|
|
|
return new static(Grammar::fromString($string), $translator);
|
|
|
|
}
|
|
|
|
protected
|
|
$debug = 0,
|
|
$translator = null,
|
|
$grammar = null,
|
|
$dummies = null,
|
|
$terminals = null,
|
|
$rules = null,
|
|
$firsts = null,
|
|
$follows = null;
|
|
|
|
protected function __construct ( Grammar $grammar, $translator = null ) {
|
|
|
|
$this->grammar = $grammar;
|
|
$this->dummies = $grammar->getDummyString();
|
|
$this->terminals = $grammar->getTerminals();
|
|
$this->rules = $grammar->getRules();
|
|
$this->firsts = $grammar->getFirsts();
|
|
$this->follows = $grammar->getFollows();
|
|
|
|
$this->translator = $translator;
|
|
|
|
}
|
|
|
|
public function setDebug ( $debug ) {
|
|
|
|
$this->debug = $debug;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
public function parseFile ( $file, $litteral = false ) {
|
|
|
|
if ( !file_exists($file) )
|
|
throw new ParserException(sprintf('File "%1$s" does not exist !', $file));
|
|
|
|
try {
|
|
|
|
return $this->parseString(file_get_contents($file), $litteral);
|
|
|
|
} catch ( ParserException $e ) {
|
|
|
|
throw new ParserException(sprintf(
|
|
'Error parsing file "%1$s" : "%2$s"', $file, $e->getMessage()),
|
|
0,
|
|
$e
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public function parseString ( $string, $litteral = false, $symbol = 'S', $repeat = '.' ) {
|
|
|
|
$parseState = $this->getState($string, $litteral);
|
|
|
|
$tree = $this->parse($parseState, $symbol, $repeat);
|
|
|
|
if ( !preg_match(
|
|
sprintf('/^(%1$s)*$/', $this->dummies),
|
|
$string = substr($string, $parseState['state']['position'])
|
|
) ) throw new ParserException("Unexpected end of input :" . $string);
|
|
|
|
foreach ( $parseState as $k => $v )
|
|
unset($parseState[$k]);
|
|
|
|
return $tree['nodes'];
|
|
|
|
}
|
|
|
|
public function parse ( &$parseState, $symbol = 'S', $repeat = '.' ) {
|
|
|
|
try {
|
|
|
|
$tree = $this->work($parseState, array(
|
|
'type' => $symbol,
|
|
'repeat' => $repeat,
|
|
));
|
|
|
|
} catch ( ParserException $e ) {
|
|
|
|
preg_match('![^\n]++$!', substr($parseState['string'], 0, $parseState['error']['position']), $m);
|
|
|
|
throw new ParserException(sprintf(
|
|
'Error on line %1$u at position %2$u.',
|
|
$parseState['error']['line'],
|
|
!empty($m[0]) ? strlen($m[0]) : 0),
|
|
0,
|
|
$parseState['error']['ParserException']
|
|
);
|
|
|
|
}
|
|
|
|
return $tree;
|
|
|
|
}
|
|
|
|
protected function getState ( $string, $litteral ) {
|
|
|
|
return array(
|
|
'parser' => $this,
|
|
'string' => $string,
|
|
'litteral' => $litteral,
|
|
'matches' => array(),
|
|
'error' => array(
|
|
'position' => 0,
|
|
'line' => 0,
|
|
'ParserException' => null,
|
|
),
|
|
'state' => array(
|
|
'line' => 1,
|
|
'position' => 0,
|
|
'deepness' => 0,
|
|
'operators' => array(),
|
|
'ntoken' => null,
|
|
'production' => null,
|
|
),
|
|
'memo' => array(),
|
|
);
|
|
|
|
}
|
|
|
|
protected function setState ( &$parseState, $oldState, $no = array() ) {
|
|
|
|
foreach ( array_diff_key($oldState['state'], $no) as $k => $v )
|
|
if ( empty($no[$k]) )
|
|
$parseState['state'][$k] = $v;
|
|
|
|
return $parseState;
|
|
|
|
}
|
|
|
|
protected function saveState ( &$parseState ) {
|
|
|
|
$state = array();
|
|
foreach ( $parseState['state'] as $k => $v )
|
|
$state[$k] = $v;
|
|
|
|
return array('state' => $state);
|
|
|
|
}
|
|
|
|
protected function getId ( $parseState, $token ) {
|
|
|
|
return $token['type'] . ':' . $parseState['state']['position'];
|
|
|
|
}
|
|
|
|
protected function reportError ( &$parseState, $token, ParserException $e ) {
|
|
|
|
if ( !$parseState['error']['ParserException'] || $parseState['state']['position'] > $parseState['error']['position'] ) {
|
|
|
|
$parseState['error']['position'] = $parseState['state']['position'];
|
|
$parseState['error']['line'] = $parseState['state']['line'];
|
|
$parseState['error']['ParserException'] = $e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
protected function match ( &$parseState, $token ) {
|
|
|
|
$id = $this->getId($parseState, $token);
|
|
if ( !empty($parseState['matches'][$id]) )
|
|
return $parseState['matches'][$id]['value'];
|
|
|
|
if ( !empty($this->terminals[$token['type']]) )
|
|
return $parseState['matches'][$id]['value'] = preg_match(sprintf(
|
|
'/^((?:%2$s)?)(%1$s)/S',
|
|
$this->terminals[$token['type']]['regex'],
|
|
$this->dummies),
|
|
substr($parseState['string'], $parseState['state']['position'])
|
|
);
|
|
|
|
foreach ( $this->firsts[$token['type']] as $k => $terminal )
|
|
if ( preg_match(sprintf(
|
|
'/^((?:%2$s)?)(%1$s)/S',
|
|
$this->terminals[$k]['regex'],
|
|
$this->dummies),
|
|
substr($parseState['string'], $parseState['state']['position'])
|
|
)) return $parseState['matches'][$id]['value'] = true;
|
|
|
|
return $parseState['matches'][$id]['value'] = false;
|
|
|
|
}
|
|
|
|
protected function consume ( &$parseState, $token ) {
|
|
|
|
if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']);
|
|
|
|
$id = $this->getId($parseState, $token);
|
|
if ( !empty($parseState['memo'][$id]) ) {
|
|
|
|
if ( $this->debug ) echo "\n\t\t$indent memo {$token['type']} : {$parseState['state']['position']} => {$parseState['memo'][$id]['position']}";
|
|
|
|
if ( $parseState['memo'][$id]['value'] )
|
|
$parseState['state']['position'] = $parseState['memo'][$id]['position'];
|
|
|
|
return $parseState['memo'][$id]['value'];
|
|
|
|
}
|
|
|
|
$parseState['memo'][$id] = array(
|
|
'value' => null,
|
|
'position' => null,
|
|
);
|
|
|
|
if ( !empty($this->terminals[$token['type']]) ) {
|
|
|
|
$nodes = $this->eat($parseState, $token);
|
|
|
|
} elseif ( $this->match($parseState, $token) ) {
|
|
|
|
$nodes = $this->test($parseState, $token);
|
|
|
|
} else $nodes = null;
|
|
|
|
if ( $nodes && $this->translator )
|
|
$nodes = call_user_func($this->translator, $parseState, $token, $nodes);
|
|
|
|
if ( empty($this->rules[$token['type']]['left']) )
|
|
$parseState['memo'][$id] = array(
|
|
'value' => $nodes,
|
|
'position' => $parseState['state']['position'],
|
|
);
|
|
|
|
else unset($parseState['memo'][$id]);
|
|
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
protected function test ( &$parseState, $token, $growing = false ) {
|
|
|
|
$parseState['state']['deepness']++;
|
|
|
|
$state = $this->saveState($parseState);
|
|
|
|
$id = $this->getId($parseState, $token);
|
|
|
|
if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']);
|
|
|
|
$nodes = array();
|
|
$error = false;
|
|
//~ $match = array('pos' => 0, 'val' => null);
|
|
foreach ( $this->rules[$token['type']]['productions'] as $i => $production )
|
|
if ( $growing || !$this->rules[$token['type']]['seed'] || $production['seed'] )
|
|
if ( !$growing || $growing && !$production['seed'] )
|
|
try {
|
|
|
|
if ( $this->debug ) echo "\n\t$indent [ $id : {$production['production']}";
|
|
|
|
$error = false;
|
|
|
|
$nodes = $this->production($parseState, $token, $production);
|
|
|
|
if ( $this->debug ) echo "\nOOO\t$indent ]O $id : {$parseState['state']['position']}";
|
|
|
|
$parseState['state']['deepness']--;
|
|
|
|
break;
|
|
|
|
//~ if ( $parseState['state']['position'] > $match['pos'] ) {
|
|
|
|
//~ $match['pos'] = $parseState['state']['position'];
|
|
//~ $match['state'] = $this->saveState($parseState);
|
|
//~ $match['val'] = $nodes;
|
|
|
|
//~ }
|
|
|
|
//~ $this->setState($parseState, $state);
|
|
|
|
} catch ( ParserException $e ) {
|
|
|
|
if ( $this->debug ) echo "\n###\t$indent ]# $id : {$parseState['state']['position']}";
|
|
|
|
$this->reportError($parseState, $token, $e);
|
|
|
|
$error = true;
|
|
$nodes = null;
|
|
$this->setState($parseState, $state);
|
|
|
|
}
|
|
|
|
if ( !$error && $nodes && !$growing && !empty($this->rules[$token['type']]['seed']) ) {
|
|
|
|
if ( $this->debug ) echo "\n###\t$indent try to grow : {$parseState['state']['position']}";
|
|
$nodes = $this->grow($parseState, $token, $state, $nodes);
|
|
|
|
}
|
|
elseif ( $this->debug ) echo "\n###\t$indent dont try to grow : {$parseState['state']['position']}";
|
|
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
protected function doesGrow ( $parseState, $token, $follow, $precedence ) {
|
|
|
|
if (
|
|
$this->match($parseState, $follow) &&
|
|
(
|
|
!empty($parseState['state']['ntoken']) &&
|
|
$this->match($parseState, $parseState['state']['ntoken']) && $follow['precedence'] < $parseState['state']['ntoken']['precedence']
|
|
)
|
|
)
|
|
return false;
|
|
|
|
return
|
|
$this->match($parseState, $follow) &&
|
|
(
|
|
empty($parseState['state']['production']['right']) ||
|
|
!empty($parseState['state']['production']['tokens'][$token['idx'] + 1]) ||
|
|
$follow['precedence'] > $precedence ||
|
|
$follow['precedence'] === $precedence && $follow['associative'] === 'right'
|
|
);
|
|
|
|
}
|
|
|
|
protected function grow ( &$parseState, $token, $oldState, $nodes ) {
|
|
|
|
$precedence = 0;
|
|
if ( $c = count($parseState['state']['operators']) )
|
|
$precedence = $parseState['state']['operators'][$c - 1]['precedence'];
|
|
|
|
if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']);
|
|
|
|
$oid = $this->getId($oldState, $token);
|
|
$id = $this->getId($parseState, $token);
|
|
if ( $this->debug ) echo "\n\t$indent { grow : $oid => $id";
|
|
|
|
$match = false;
|
|
if ( !empty($this->follows[$token['type']]) )
|
|
foreach ( $this->follows[$token['type']] as $type => $follow ) {
|
|
|
|
if ( $this->doesGrow($parseState, $token, $follow, $precedence) ) {
|
|
|
|
$match = true;
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( !$match ) {
|
|
|
|
if ( $this->debug ) echo "\n\t$indent } # grow : $id cause not in followset ";
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
$oldNodes = $nodes;
|
|
|
|
if ( $this->translator )
|
|
$nodes = call_user_func($this->translator, $parseState, $token, $nodes);
|
|
|
|
$parseState['memo'][$this->getId($oldState, $token)] = array(
|
|
'value' => $nodes,
|
|
'position' => $parseState['state']['position'],
|
|
);
|
|
|
|
$position = $parseState['state']['position'];
|
|
$state = $this->saveState($parseState);
|
|
$this->setState($parseState, $oldState);
|
|
|
|
if ( $this->debug ) echo "\nGGG\t$indent grow procedure : $id";
|
|
|
|
$newNodes = $this->test($parseState, $token, true);
|
|
if ( !$newNodes ) {
|
|
|
|
if ( $position >= $parseState['state']['position'] )
|
|
$this->setState($parseState, $state);
|
|
|
|
return $oldNodes;
|
|
|
|
}
|
|
|
|
$nodes = $newNodes;
|
|
|
|
if ( $position < $parseState['state']['position'] )
|
|
$nodes = $this->grow($parseState, $token, $oldState, $nodes);
|
|
|
|
if ( $this->debug ) echo "\nGGG\t$indent grow procedure successful : {$this->getId($parseState, $token)}";
|
|
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
protected function production ( &$parseState, $token, $production ) {
|
|
|
|
if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']);
|
|
|
|
try {
|
|
|
|
$prod = $parseState['state']['production'];
|
|
$ntoken = $parseState['state']['ntoken'];
|
|
$parseState['state']['production'] = $production;
|
|
$nodes = array();
|
|
$op = 0;
|
|
foreach ( $production['tokens'] as $j => $nextToken ) {
|
|
|
|
if ( $this->debug ) echo "\n\t\t$indent { {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}";
|
|
|
|
if ( $nextToken['precedence'] ) {
|
|
|
|
$op++;
|
|
$parseState['state']['operators'][] = $nextToken;
|
|
|
|
}
|
|
|
|
$parseState['state']['ntoken'] =
|
|
!empty($production['tokens'][$j+1]) ?
|
|
$production['tokens'][$j+1] :
|
|
$ntoken;
|
|
|
|
$nodes[] = $this->work($parseState, $nextToken);
|
|
|
|
if ( $this->debug ) echo "\nO\t\t$indent }O {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}";
|
|
|
|
}
|
|
|
|
while ( $op-- ) {
|
|
array_pop($parseState['state']['operators']);
|
|
}
|
|
|
|
$parseState['state']['production'] = $prod;
|
|
$parseState['state']['ntoken'] = $ntoken;
|
|
|
|
return $nodes;
|
|
|
|
} catch ( ParserException $e ) {
|
|
|
|
if ( $this->debug ) echo "\n#\t\t$indent }# {$nextToken['type']}{$nextToken['repeat']} : {$parseState['state']['position']}";
|
|
|
|
throw $e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
protected function eat ( &$parseState, $token ) {
|
|
|
|
if ( $this->debug ) $indent = str_repeat("\t", $parseState['state']['deepness']);
|
|
|
|
$string = substr($parseState['string'], $parseState['state']['position']);
|
|
$exp = sprintf(
|
|
'/^((?:%2$s)?)(%1$s)/S',
|
|
$this->terminals[$token['type']]['regex'],
|
|
$this->dummies
|
|
);
|
|
|
|
if ( preg_match($exp, $string, $m) ) {
|
|
|
|
if ( $this->debug ) {
|
|
$c = strlen($m[0]);
|
|
echo "\n\t\t$indent consume {$token['type']} : {$parseState['state']['position']} : {$c}";
|
|
}
|
|
|
|
$parseState['state']['position'] += strlen($m[0]);
|
|
$parseState['state']['line'] += substr_count($m[0], "\n");
|
|
|
|
if ( $this->debug ) echo "\n\t\t$indent match {$token['type']} : {$parseState['state']['position']}";
|
|
|
|
$all = array_shift($m);
|
|
$dummy = array_shift($m);
|
|
$len = strlen($m[0]);
|
|
|
|
return array(
|
|
'type' => $token['type'],
|
|
'line' => ( $parseState['state']['position'] - $len ) ? substr_count($parseState['string'], "\n", 0, $parseState['state']['position'] - $len) + 1 : 1,
|
|
'position' => $parseState['state']['position'] - $len,
|
|
'length' => $len,
|
|
'match' => !empty($m[1]) ? $m[1] : $m[0],
|
|
'litteral' => $parseState['litteral'] ? $all : '',
|
|
'matches' => $m,
|
|
);
|
|
|
|
} else return null;
|
|
|
|
}
|
|
|
|
protected function work ( &$parseState, $token ) {
|
|
|
|
$parseState['state']['deepness']++;
|
|
|
|
$position = $parseState['state']['position'];
|
|
|
|
switch ( $token['repeat'] ) {
|
|
|
|
case '.' : $sub = $this->one($parseState, $token); break;
|
|
|
|
case '?' : $sub = $this->oneAtMost($parseState, $token); break;
|
|
|
|
case '+' : $sub = $this->oneAtLeast($parseState, $token); break;
|
|
|
|
case '*' : $sub = $this->zeroAtLeast($parseState, $token); break;
|
|
|
|
case '&' : $sub = $this->oneMore($parseState, $token); break;
|
|
|
|
case '!' : $sub = $this->noMore($parseState, $token); break;
|
|
|
|
}
|
|
|
|
$parseState['state']['deepness']--;
|
|
|
|
return array(
|
|
'type' => $token['type'],
|
|
'repeat' => $token['repeat'],
|
|
'line' => ( $position ? substr_count($parseState['string'], "\n", 0, $position) : 0 ) + 1,
|
|
'position' => $position,
|
|
'length' => $parseState['state']['position'] - $position,
|
|
'litteral' => $parseState['litteral'] ? substr($parseState['string'], $position, $parseState['state']['position'] - $position) : '',
|
|
'nodes' => $sub,
|
|
);
|
|
|
|
}
|
|
|
|
protected function one ( &$parseState, $token ) { // id
|
|
|
|
if ( !$elem = $this->consume($parseState, $token) )
|
|
throw self::except($parseState, $token);
|
|
|
|
return $elem;
|
|
|
|
}
|
|
|
|
protected function oneAtMost ( &$parseState, $token ) { // id?
|
|
|
|
return $this->consume($parseState, $token);
|
|
|
|
}
|
|
|
|
protected function oneAtLeast ( &$parseState, $token ) { // id+
|
|
|
|
$nodes = array($this->one($parseState, $token));
|
|
|
|
while ( $elem = $this->consume($parseState, $token) )
|
|
$nodes[] = $elem;
|
|
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
protected function zeroAtLeast ( &$parseState, $token ) { // id*
|
|
|
|
$nodes = array();
|
|
|
|
while ( $elem = $this->consume($parseState, $token) )
|
|
$nodes[] = $elem;
|
|
|
|
return $nodes;
|
|
|
|
}
|
|
|
|
protected function oneMore ( &$parseState, $token ) { // id&
|
|
|
|
$state = $this->saveState($parseState);
|
|
|
|
if ( !$elem = $this->consume($parseState, $token) )
|
|
throw self::except($parseState, $token);
|
|
|
|
$this->setState($parseState, $state);
|
|
|
|
return $elem;
|
|
|
|
}
|
|
|
|
protected function noMore ( &$parseState, $token ) { // id!
|
|
|
|
if ( $this->consume($parseState, $token, 1) )
|
|
throw self::except($parseState, $token);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
static protected function except ( &$parseState, $token, $notExpected = false ) {
|
|
|
|
return new ParserException(sprintf(
|
|
'%5$sxpected token : "%1$s" at position %2$s on line "%3$s" and col "%4$s" !',
|
|
$token['type'],
|
|
$parseState['state']['position'],
|
|
0, //substr_count($sub, "\n"),
|
|
0, //$parseState['state']['position'] - strrpos($sub, "\n") + 1,
|
|
$notExpected ? 'Une' : 'E'
|
|
));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
?>
|