cyrilleinvalides/choupas/www/admin/app/libs/sense/Parser/Grammar.php

366 lines
8.2 KiB
PHP
Executable File

<?php
class Grammar {
static public function fromFile ( $file, $path = null ) {
if ( !file_exists($file) || is_dir($file) )
throw new Exception('File not found : ' . $file);
try {
return self::fromString(file_get_contents($file), $path ? $path : dirname($file) );
} catch ( Exception $e ) {
throw new Exception(sprintf(
'Error in file "%1$s" : "%2$s"', $file, $e->getMessage()),
0,
$e
);
}
}
static public function fromString ( $string, $path = '.' ) {
return new self($string, $path);
}
private
$path = '',
$dummyString = '',
$dummies = array(),
$terminals = array(),
$rules = array(),
$firsts = array(),
$follows = array();
private function __construct ( $string, $path = '.' ) {
$this->path = realpath($path);
$this->parseGrammar($string);
}
private function parseGrammar ( $string ) {
$lines = preg_split('!\n!', $string);
$seen = array();
$current = 0;
$section = 0;
while ( count($lines) ) {
$line = array_shift($lines);
$current++;
if ( $line === '%%' ) { // section separator
$section++;
} elseif ( preg_match('!^(//.*)?$!', $line) ) { // one line comments and empty lines
} elseif ( $section < 1 ) { // terminal
if ( $dummy = $this->parseTerminal($line, $current) )
$this->dummies[$dummy] = $dummy;
} elseif ( $section < 2 ) { // production
if ( preg_match('!\#include\(\s*(.+)\s*\)\s*$!', $line, $m) ) {
$seen = $this->mergeGrammar($m[1], $seen);
} else $seen = array_merge($this->parseProduction($line, $current), $seen);
} else break; // eof comments
}
if ( $this->dummies )
$this->dummyString = '(?:' . implode('|', $this->dummies) . ')++';
if ( $undefined = array_diff_key($seen, $this->terminals, $this->rules) )
throw new Exception(sprintf(
'Malformed Grammar : undefiner symbol "%1$s" on line "%2$s".',
key($undefined),
current($undefined)
));
$this->findRecursions();
$this->firstSet();
$this->followSet();
}
private function mergeGrammar ( $file, $seen ) {
$g = self::fromFile($this->path . '/' . $file);
foreach ( $g->dummies as $k => $v )
if ( empty($this->dummies[$k]) )
$this->dummies[$k] = $v;
foreach ( $g->terminals as $k => $v )
if ( empty($this->terminals[$k]) )
$this->terminals[$k] = $v;
foreach ( $g->rules as $k => $v ) {
$seen[$k] = 1;
if ( empty($this->rules[$k]) )
$this->rules[$k] = $v;
else {
$this->rules[$k]['seed'] |= $v['seed'];
$this->rules[$k]['productions'] = array_merge($this->rules[$k]['productions'], $v['productions']);
array_walk($this->rules[$k]['productions'], function ( &$value, $key ) {
$value['idx'] = $key;
});
}
}
return $seen;
}
private function parseTerminal ( $line, $current ) {
if ( preg_match('!^((?:\!|\.)?)([^\s]++)\s+->\s+!', $line, $m)) {
$terminal = preg_split('!\s+->\s+!', $line);
$this->terminals[$m[2]] = array(
'type' => $m[2],
'regex' => $terminal[1],
'ignore' => $m[1] === '!',
'line' => $current,
);
if ( $m[1] === '!' )
return $terminal[1];
} else throw new Exception(sprintf(
'Malformed input in grammar at line "%1$s" !',
$current
));
}
private function parseProduction ( $line, $current ) {
if ( preg_match('!^(\!?)([^\s]++)\s+(\*?->|<-)\s+!', $line, $m)) {
$terminals = &$this->terminals;
$seen = array();
$rule = preg_split('!\s*(\*?->|<-)\s*!', $line, 2, PREG_SPLIT_DELIM_CAPTURE);
if ( empty($this->rules[$rule[0]]) )
$this->rules[$rule[0]] = array(
'type' => $rule[0],
'left' => 0,
'right' => 0,
'seed' => 0,
'productions' => array(),
);
$this->rules[$rule[0]]['seed'] = $this->rules[$rule[0]]['seed'] || $rule[1] !== '->';
$i = 0;
$this->rules[$rule[0]]['productions'][] = array(
'idx' => count($this->rules[$rule[0]]),
'type' => $rule[0],
'line' => $current,
'rule' => $rule[2],
'left' => 0,
'right' => 0,
'seed' => $rule[1] === '->' ? false : true,
'production' => $line,
'tokens' => array_map(function ($token) use ( &$seen, $current, &$i, &$terminals ) {
$precedence = 0;
$associative = 0;
if ( preg_match('!^((?:[0-9]++)?)(<|>).++$!', $token, $m) ) {
$token = preg_replace('!^([0-9]++)(<|>)!', '', $token);
$precedence = (int) $m[1];
$associative = ( $m[2] === '<' ? 'left' : 'right' );
}
if ( strlen($token) === 1 || !in_array($token[strlen($token) - 1], array('+', '*', '?', '.', '&', '!')) )
$token .= '.';
$seen[$type = substr($token, 0, strlen($token) - 1)] = $current;
return array(
'idx' => $i++,
'type' => $type,
'repeat' => $token[strlen($token) - 1],
'precedence' => $precedence,
'associative' => $associative,
);
}, array_filter(preg_split('!\s+!', $rule[2])))
);
return $seen;
} else throw new Exception(sprintf(
'Malformed input in grammar at line "%1$s" !',
$current
));
}
private function findRecursions () {
foreach ( $this->rules as $name => $rule )
foreach ( $rule['productions'] as $idx => $production ) {
if ( $this->recurse($name, $production) ) {
$production['left'] = 1;
$this->rules[$name]['left'] = 1;
}
if ( $this->recurse($name, $production, true) ) {
$production['right'] = 1;
$this->rules[$name]['right'] = 1;
}
$this->rules[$name]['productions'][$idx] = $production;
}
}
private function recurse ( $rul, $prod, $right = false, $visited = array() ) {
// to support filters
if ( !empty($visited[$prod['type']]) )
return ( $prod['type'] === $rul );
$visited[$prod['type']] = 1;
$tokens = $prod['tokens'];
if ( $right )
$tokens = array_reverse($tokens);
foreach ( $tokens as $token ) {
if ( !empty($this->rules[$token['type']]) )
foreach ( $this->rules[$token['type']]['productions'] as $production )
if ( $this->recurse($rul, $production, $right, $visited) )
return true;
if ( in_array($token['repeat'], array('.', '+')) )
return false;
}
return false;
}
private function firstSet () {
foreach ( $this->rules as $name => $rule )
foreach ( $rule['productions'] as $idx => $production )
$this->firsts[$name] = array_merge(
!empty($this->firsts[$name]) ? $this->firsts[$name] : array(),
$this->first($production)
);
}
private function first ( $production, $visited = array() ) {
if ( !empty($visited[$production['type']]) )
return array();
$visited[$production['type']] = 1;
$firsts = array();
foreach ( $production['tokens'] as $token ) {
if ( !empty($this->terminals[$token['type']]) )
$firsts[$token['type']] = $token;
else foreach ( $this->rules[$token['type']]['productions'] as $production )
$firsts = array_merge($firsts, $this->first($production, $visited));
if ( in_array($token['repeat'], array('.', '+')) )
break;
}
return $firsts;
}
private function followSet () {
foreach ( $this->rules as $name => $rule )
foreach ( $rule['productions'] as $idx => $production ) {
if ( $production['left'] && !empty($this->rules[$production['tokens'][0]['type']]['left']) && !empty($production['tokens'][1]) ) {
if ( empty($this->follows[$production['tokens'][0]['type']]) )
$this->follows[$production['tokens'][0]['type']] = array();
//~ $this->follows[$production['tokens'][0]['type']][$production['tokens'][1]['type']] = $production['tokens'][1];
$this->follows[$production['tokens'][0]['type']][] = $production['tokens'][1];
}
}
}
public function getDummyString () {
return $this->dummyString;
}
public function getTerminals () {
return $this->terminals;
}
public function getRules () {
return $this->rules;
}
public function getFirsts () {
return $this->firsts;
}
public function getFollows () {
return $this->follows;
}
public function __toString() {
return print_r($this, 1);
}
}
?>