Augmentation vers version 3.3.0

This commit is contained in:
Gauvain Boiché
2020-03-31 15:31:03 +02:00
parent d926806907
commit a1864c0414
2618 changed files with 406015 additions and 31377 deletions

20
vendor/s9e/regexp-builder/LICENSE vendored Normal file
View File

@@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2016-2018 The s9e Authors
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

25
vendor/s9e/regexp-builder/composer.json vendored Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "s9e/regexp-builder",
"type": "library",
"description": "Single-purpose library that generates regular expressions that match a list of strings.",
"homepage": "https://github.com/s9e/RegexpBuilder/",
"keywords": ["regexp"],
"license": "MIT",
"require": {
"lib-pcre": ">=7.2",
"php": ">=5.5.1"
},
"require-dev": {
"phpunit/phpunit": "<5.8"
},
"autoload": {
"psr-4": {
"s9e\\RegexpBuilder\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"s9e\\RegexpBuilder\\Tests\\": "tests"
}
}
}

View File

@@ -0,0 +1,167 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder;
use s9e\RegexpBuilder\Input\InputInterface;
use s9e\RegexpBuilder\Output\OutputInterface;
use s9e\RegexpBuilder\Passes\CoalesceOptionalStrings;
use s9e\RegexpBuilder\Passes\CoalesceSingleCharacterPrefix;
use s9e\RegexpBuilder\Passes\GroupSingleCharacters;
use s9e\RegexpBuilder\Passes\MergePrefix;
use s9e\RegexpBuilder\Passes\MergeSuffix;
use s9e\RegexpBuilder\Passes\PromoteSingleStrings;
use s9e\RegexpBuilder\Passes\Recurse;
class Builder
{
/**
* @var InputInterface
*/
protected $input;
/**
* @var MetaCharacters
*/
protected $meta;
/**
* @var Runner
*/
protected $runner;
/**
* @var Serializer
*/
protected $serializer;
/**
* @param array $config
*/
public function __construct(array $config = [])
{
$config += [
'delimiter' => '/',
'input' => 'Bytes',
'inputOptions' => [],
'meta' => [],
'output' => 'Bytes',
'outputOptions' => []
];
$this->setInput($config['input'], $config['inputOptions']);
$this->setMeta($config['meta']);
$this->setSerializer($config['output'], $config['outputOptions'], $config['delimiter']);
$this->setRunner();
}
/**
* Build and return a regular expression that matches all of the given strings
*
* @param string[] $strings Literal strings to be matched
* @return string Regular expression (without delimiters)
*/
public function build(array $strings)
{
$strings = array_unique($strings);
sort($strings);
if ($this->isEmpty($strings))
{
return '';
}
$strings = $this->splitStrings($strings);
$strings = $this->meta->replaceMeta($strings);
$strings = $this->runner->run($strings);
return $this->serializer->serializeStrings($strings);
}
/**
* Test whether the list of strings is empty
*
* @param string[] $strings
* @return bool
*/
protected function isEmpty(array $strings)
{
return (empty($strings) || $strings === ['']);
}
/**
* Set the InputInterface instance in $this->input
*
* @param string $inputType
* @param array $inputOptions
* @return void
*/
protected function setInput($inputType, array $inputOptions)
{
$className = __NAMESPACE__ . '\\Input\\' . $inputType;
$this->input = new $className($inputOptions);
}
/**
* Set the MetaCharacters instance in $this->meta
*
* @param array $map
* @return void
*/
protected function setMeta(array $map)
{
$this->meta = new MetaCharacters($this->input);
foreach ($map as $char => $expr)
{
$this->meta->add($char, $expr);
}
}
/**
* Set the Runner instance $in this->runner
*
* @return void
*/
protected function setRunner()
{
$this->runner = new Runner;
$this->runner->addPass(new MergePrefix);
$this->runner->addPass(new GroupSingleCharacters);
$this->runner->addPass(new Recurse($this->runner));
$this->runner->addPass(new PromoteSingleStrings);
$this->runner->addPass(new CoalesceOptionalStrings);
$this->runner->addPass(new MergeSuffix);
$this->runner->addPass(new CoalesceSingleCharacterPrefix);
}
/**
* Set the Serializer instance in $this->serializer
*
* @param string $outputType
* @param array $outputOptions
* @param string $delimiter
* @return void
*/
protected function setSerializer($outputType, array $outputOptions, $delimiter)
{
$className = __NAMESPACE__ . '\\Output\\' . $outputType;
$output = new $className($outputOptions);
$escaper = new Escaper($delimiter);
$this->serializer = new Serializer($output, $this->meta, $escaper);
}
/**
* Split all given strings by character
*
* @param string[] $strings List of strings
* @return array[] List of arrays
*/
protected function splitStrings(array $strings)
{
return array_map([$this->input, 'split'], $strings);
}
}

View File

@@ -0,0 +1,59 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder;
class Escaper
{
/**
* @var array Characters to escape in a character class
*/
public $inCharacterClass = ['-' => '\\-', '\\' => '\\\\', ']' => '\\]', '^' => '\\^'];
/**
* @var array Characters to escape outside of a character class
*/
public $inLiteral = [
'$' => '\\$', '(' => '\\(', ')' => '\\)', '*' => '\\*',
'+' => '\\+', '.' => '\\.', '?' => '\\?', '[' => '\\]',
'\\' => '\\\\', '^' => '\\^', '{' => '\\{', '|' => '\\|'
];
/**
* @param string $delimiter Delimiter used in the final regexp
*/
public function __construct($delimiter = '/')
{
foreach (str_split($delimiter, 1) as $char)
{
$this->inCharacterClass[$char] = '\\' . $char;
$this->inLiteral[$char] = '\\' . $char;
}
}
/**
* Escape given character to be used in a character class
*
* @param string $char Original character
* @return string Escaped character
*/
public function escapeCharacterClass($char)
{
return (isset($this->inCharacterClass[$char])) ? $this->inCharacterClass[$char] : $char;
}
/**
* Escape given character to be used outside of a character class
*
* @param string $char Original character
* @return string Escaped character
*/
public function escapeLiteral($char)
{
return (isset($this->inLiteral[$char])) ? $this->inLiteral[$char] : $char;
}
}

View File

@@ -0,0 +1,23 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;
abstract class BaseImplementation implements InputInterface
{
/**
* {@inheritdoc}
*/
public function __construct(array $options = [])
{
}
/**
* {@inheritdoc}
*/
abstract public function split($string);
}

View File

@@ -0,0 +1,24 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;
class Bytes extends BaseImplementation
{
/**
* {@inheritdoc}
*/
public function split($string)
{
if ($string === '')
{
return [];
}
return array_map('ord', str_split($string, 1));
}
}

View File

@@ -0,0 +1,24 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;
interface InputInterface
{
/**
* @param array $options
*/
public function __construct(array $options = []);
/**
* Split given string into a list of values
*
* @param string $string
* @return integer[]
*/
public function split($string);
}

View File

@@ -0,0 +1,101 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;
use InvalidArgumentException;
class Utf8 extends BaseImplementation
{
/**
* @var bool Whether to use surrogates to represent higher codepoints
*/
protected $useSurrogates;
/**
* {@inheritdoc}
*/
public function __construct(array $options = [])
{
$this->useSurrogates = !empty($options['useSurrogates']);
}
/**
* {@inheritdoc}
*/
public function split($string)
{
if (preg_match_all('(.)us', $string, $matches) === false)
{
throw new InvalidArgumentException('Invalid UTF-8 string');
}
return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
}
/**
* Convert a list of UTF-8 characters into a list of Unicode codepoint
*
* @param string[] $chars
* @return integer[]
*/
protected function charsToCodepoints(array $chars)
{
return array_map([$this, 'cp'], $chars);
}
/**
* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
*
* @param string[] $chars
* @return integer[]
*/
protected function charsToCodepointsWithSurrogates(array $chars)
{
$codepoints = [];
foreach ($chars as $char)
{
$cp = $this->cp($char);
if ($cp < 0x10000)
{
$codepoints[] = $cp;
}
else
{
$codepoints[] = 0xD7C0 + ($cp >> 10);
$codepoints[] = 0xDC00 + ($cp & 0x3FF);
}
}
return $codepoints;
}
/**
* Compute and return the Unicode codepoint for given UTF-8 char
*
* @param string $char UTF-8 char
* @return integer
*/
protected function cp($char)
{
$cp = ord($char[0]);
if ($cp >= 0xF0)
{
$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
}
elseif ($cp >= 0xE0)
{
$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
}
elseif ($cp >= 0xC0)
{
$cp = ($cp << 6) + ord($char[1]) - 0x3080;
}
return $cp;
}
}

View File

@@ -0,0 +1,222 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder;
use InvalidArgumentException;
use s9e\RegexpBuilder\Input\InputInterface;
class MetaCharacters
{
/**
* @const Bit value that indicates whether a meta-character represents a single character usable
* in a character class
*/
const IS_CHAR = 1;
/**
* @const Bit value that indicates whether a meta-character represents a quantifiable expression
*/
const IS_QUANTIFIABLE = 2;
/**
* @var array Map of meta values and the expression they represent
*/
protected $exprs = [];
/**
* @var InputInterface
*/
protected $input;
/**
* @var array Map of meta-characters' codepoints and their value
*/
protected $meta = [];
/**
* @param InputInterface $input
*/
public function __construct(InputInterface $input)
{
$this->input = $input;
}
/**
* Add a meta-character to the list
*
* @param string $char Meta-character
* @param string $expr Regular expression
* @return void
*/
public function add($char, $expr)
{
$split = $this->input->split($char);
if (count($split) !== 1)
{
throw new InvalidArgumentException('Meta-characters must be represented by exactly one character');
}
if (@preg_match('(' . $expr . ')u', '') === false)
{
throw new InvalidArgumentException("Invalid expression '" . $expr . "'");
}
$inputValue = $split[0];
$metaValue = $this->computeValue($expr);
$this->exprs[$metaValue] = $expr;
$this->meta[$inputValue] = $metaValue;
}
/**
* Get the expression associated with a meta value
*
* @param integer $metaValue
* @return string
*/
public function getExpression($metaValue)
{
if (!isset($this->exprs[$metaValue]))
{
throw new InvalidArgumentException('Invalid meta value ' . $metaValue);
}
return $this->exprs[$metaValue];
}
/**
* Return whether a given value represents a single character usable in a character class
*
* @param integer $value
* @return bool
*/
public static function isChar($value)
{
return ($value >= 0 || ($value & self::IS_CHAR));
}
/**
* Return whether a given value represents a quantifiable expression
*
* @param integer $value
* @return bool
*/
public static function isQuantifiable($value)
{
return ($value >= 0 || ($value & self::IS_QUANTIFIABLE));
}
/**
* Replace values from meta-characters in a list of strings with their meta value
*
* @param array[] $strings
* @return array[]
*/
public function replaceMeta(array $strings)
{
foreach ($strings as &$string)
{
foreach ($string as &$value)
{
if (isset($this->meta[$value]))
{
$value = $this->meta[$value];
}
}
}
return $strings;
}
/**
* Compute and return a value for given expression
*
* Values are meant to be a unique negative integer. The last 2 bits indicate whether the
* expression is quantifiable and/or represents a single character.
*
* @param string $expr Regular expression
* @return integer
*/
protected function computeValue($expr)
{
$properties = [
'exprIsChar' => self::IS_CHAR,
'exprIsQuantifiable' => self::IS_QUANTIFIABLE
];
$value = (1 + count($this->meta)) * -pow(2, count($properties));
foreach ($properties as $methodName => $bitValue)
{
if ($this->$methodName($expr))
{
$value |= $bitValue;
}
}
return $value;
}
/**
* Test whether given expression represents a single character usable in a character class
*
* @param string $expr
* @return bool
*/
protected function exprIsChar($expr)
{
$regexps = [
// Escaped literal or escape sequence such as \w but not \R
'(^\\\\[adefhnrstvwDHNSVW\\W]$)D',
// Unicode properties such as \pL or \p{Lu}
'(^\\\\p(?:.|\\{[^}]+\\})$)Di',
// An escape sequence such as \x1F or \x{2600}
'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di'
];
return $this->matchesAny($expr, $regexps);
}
/**
* Test whether given expression is quantifiable
*
* @param string $expr
* @return bool
*/
protected function exprIsQuantifiable($expr)
{
$regexps = [
// A dot or \R
'(^(?:\\.|\\\\R)$)D',
// A character class
'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D'
];
return $this->matchesAny($expr, $regexps) || $this->exprIsChar($expr);
}
/**
* Test whether given expression matches any of the given regexps
*
* @param string $expr
* @param string[] $regexps
* @return bool
*/
protected function matchesAny($expr, array $regexps)
{
foreach ($regexps as $regexp)
{
if (preg_match($regexp, $expr))
{
return true;
}
}
return false;
}
}

View File

@@ -0,0 +1,62 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
use InvalidArgumentException;
abstract class BaseImplementation implements OutputInterface
{
/**
* @var integer
*/
protected $maxValue = 0;
/**
* @var integer
*/
protected $minValue = 0;
/**
* @param array $options
*/
public function __construct(array $options = [])
{
}
/**
* {@inheritdoc}
*/
public function output($value)
{
$this->validate($value);
return $this->outputValidValue($value);
}
/**
* Validate given value
*
* @param integer $value
* @return void
*/
protected function validate($value)
{
if ($value < $this->minValue || $value > $this->maxValue)
{
throw new InvalidArgumentException('Value ' . $value . ' is out of bounds (' . $this->minValue . '..' . $this->maxValue . ')');
}
}
/**
* Serialize a valid value into a character
*
* @param integer $value
* @return string
*/
abstract protected function outputValidValue($value);
}

View File

@@ -0,0 +1,22 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
class Bytes extends BaseImplementation
{
/** {@inheritdoc} */
protected $maxValue = 255;
/**
* {@inheritdoc}
*/
protected function outputValidValue($value)
{
return chr($value);
}
}

View File

@@ -0,0 +1,24 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
class JavaScript extends PrintableAscii
{
/** {@inheritdoc} */
protected $maxValue = 0x10FFFF;
/**
* {@inheritdoc}
*/
protected function escapeUnicode($cp)
{
$format = ($cp > 0xFFFF) ? '\\u{%' . $this->hexCase . '}' : '\\u%04' . $this->hexCase;
return sprintf($format, $cp);
}
}

View File

@@ -0,0 +1,19 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
interface OutputInterface
{
/**
* Serialize a value into a character
*
* @param integer $value
* @return string
*/
public function output($value);
}

View File

@@ -0,0 +1,22 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
class PHP extends PrintableAscii
{
/** {@inheritdoc} */
protected $maxValue = 0x10FFFF;
/**
* {@inheritdoc}
*/
protected function escapeUnicode($cp)
{
return sprintf('\\x{%04' . $this->hexCase . '}', $cp);
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
abstract class PrintableAscii extends BaseImplementation
{
/**
* @var string 'x' for lowercase hexadecimal symbols, 'X' for uppercase
*/
protected $hexCase;
/**
* {@inheritdoc}
*/
public function __construct(array $options = [])
{
$this->hexCase = (isset($options['case']) && $options['case'] === 'lower') ? 'x' : 'X';
}
/**
* Escape given ASCII codepoint
*
* @param integer $cp
* @return string
*/
protected function escapeAscii($cp)
{
return '\\x' . sprintf('%02' . $this->hexCase, $cp);
}
/**
* Escape given control code
*
* @param integer $cp
* @return string
*/
protected function escapeControlCode($cp)
{
$table = [9 => '\\t', 10 => '\\n', 13 => '\\r'];
return (isset($table[$cp])) ? $table[$cp] : $this->escapeAscii($cp);
}
/**
* Output the representation of a unicode character
*
* @param integer $cp Unicode codepoint
* @return string
*/
abstract protected function escapeUnicode($cp);
/**
* {@inheritdoc}
*/
protected function outputValidValue($value)
{
if ($value < 32)
{
return $this->escapeControlCode($value);
}
if ($value < 127)
{
return chr($value);
}
return ($value > 255) ? $this->escapeUnicode($value) : $this->escapeAscii($value);
}
}

View File

@@ -0,0 +1,54 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Output;
use InvalidArgumentException;
class Utf8 extends BaseImplementation
{
/** {@inheritdoc} */
protected $maxValue = 0x10FFFF;
/**
* {@inheritdoc}
*/
protected function outputValidValue($value)
{
if ($value < 0x80)
{
return chr($value);
}
if ($value < 0x800)
{
return chr(0xC0 | ($value >> 6)) . chr(0x80 | ($value & 0x3F));
}
if ($value < 0x10000)
{
return chr(0xE0 | ($value >> 12))
. chr(0x80 | (($value >> 6) & 0x3F))
. chr(0x80 | ($value & 0x3F));
}
return chr(0xF0 | ($value >> 18))
. chr(0x80 | (($value >> 12) & 0x3F))
. chr(0x80 | (($value >> 6) & 0x3F))
. chr(0x80 | ($value & 0x3F));
}
/**
* {@inheritdoc}
*/
protected function validate($value)
{
if ($value >= 0xD800 && $value <= 0xDFFF)
{
throw new InvalidArgumentException(sprintf('Surrogate 0x%X is not a valid UTF-8 character', $value));
}
parent::validate($value);
}
}

View File

@@ -0,0 +1,148 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
abstract class AbstractPass implements PassInterface
{
/**
* @var bool Whether the current set of strings is optional
*/
protected $isOptional;
/**
* {@inheritdoc}
*/
public function run(array $strings)
{
$strings = $this->beforeRun($strings);
if ($this->canRun($strings))
{
$strings = $this->runPass($strings);
}
$strings = $this->afterRun($strings);
return $strings;
}
/**
* Process the list of strings after the pass is run
*
* @param array[] $strings
* @return array[]
*/
protected function afterRun(array $strings)
{
if ($this->isOptional && $strings[0] !== [])
{
array_unshift($strings, []);
}
return $strings;
}
/**
* Prepare the list of strings before the pass is run
*
* @param array[] $strings
* @return array[]
*/
protected function beforeRun(array $strings)
{
$this->isOptional = (isset($strings[0]) && $strings[0] === []);
if ($this->isOptional)
{
array_shift($strings);
}
return $strings;
}
/**
* Test whether this pass can be run on a given list of strings
*
* @param array[] $strings
* @return bool
*/
protected function canRun(array $strings)
{
return true;
}
/**
* Run this pass on a list of strings
*
* @param array[] $strings
* @return array[]
*/
abstract protected function runPass(array $strings);
/**
* Test whether given string has an optional suffix
*
* @param array $string
* @return bool
*/
protected function hasOptionalSuffix(array $string)
{
$suffix = end($string);
return (is_array($suffix) && $suffix[0] === []);
}
/**
* Test whether given string contains a single alternations made of single values
*
* @param array $string
* @return bool
*/
protected function isCharacterClassString(array $string)
{
return ($this->isSingleAlternationString($string) && $this->isSingleCharStringList($string[0]));
}
/**
* Test whether given string contains one single element that is an alternation
*
* @param array
* @return bool
*/
protected function isSingleAlternationString(array $string)
{
return (count($string) === 1 && is_array($string[0]));
}
/**
* Test whether given string contains a single character value
*
* @param array $string
* @return bool
*/
protected function isSingleCharString(array $string)
{
return (count($string) === 1 && !is_array($string[0]));
}
/**
* Test whether given list of strings contains nothing but single-char strings
*
* @param array[] $strings
* @return bool
*/
protected function isSingleCharStringList(array $strings)
{
foreach ($strings as $string)
{
if (!$this->isSingleCharString($string))
{
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,138 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Replaces (?:ab?|b)? with a?b?
*/
class CoalesceOptionalStrings extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function canRun(array $strings)
{
return ($this->isOptional && count($strings) > 1);
}
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
foreach ($this->getPrefixGroups($strings) as $suffix => $prefixStrings)
{
$suffix = unserialize($suffix);
$suffixStrings = array_diff_key($strings, $prefixStrings);
if ($suffix === $this->buildSuffix($suffixStrings))
{
$this->isOptional = false;
return $this->buildCoalescedStrings($prefixStrings, $suffix);
}
}
return $strings;
}
/**
* Build the final list of coalesced strings
*
* @param array[] $prefixStrings
* @param array $suffix
* @return array[]
*/
protected function buildCoalescedStrings(array $prefixStrings, array $suffix)
{
$strings = $this->runPass($this->buildPrefix($prefixStrings));
if (count($strings) === 1 && $strings[0][0][0] === [])
{
// If the prefix has been remerged into a list of strings which contains only one string
// of which the first element is an optional alternations, we only need to append the
// suffix
$strings[0][] = $suffix;
}
else
{
// Put the current list of strings that form the prefix into a new list of strings, of
// which the only string is composed of our optional prefix followed by the suffix
array_unshift($strings, []);
$strings = [[$strings, $suffix]];
}
return $strings;
}
/**
* Build the list of strings used as prefix
*
* @param array[] $strings
* @return array[]
*/
protected function buildPrefix(array $strings)
{
$prefix = [];
foreach ($strings as $string)
{
// Remove the last element (suffix) of each string before adding it
array_pop($string);
$prefix[] = $string;
}
return $prefix;
}
/**
* Build a list of strings that matches any given strings or nothing
*
* Will unpack groups of single characters
*
* @param array[] $strings
* @return array[]
*/
protected function buildSuffix(array $strings)
{
$suffix = [[]];
foreach ($strings as $string)
{
if ($this->isCharacterClassString($string))
{
foreach ($string[0] as $element)
{
$suffix[] = $element;
}
}
else
{
$suffix[] = $string;
}
}
return $suffix;
}
/**
* Get the list of potential prefix strings grouped by identical suffix
*
* @param array[] $strings
* @return array
*/
protected function getPrefixGroups(array $strings)
{
$groups = [];
foreach ($strings as $k => $string)
{
if ($this->hasOptionalSuffix($string))
{
$groups[serialize(end($string))][$k] = $string;
}
}
return $groups;
}
}

View File

@@ -0,0 +1,81 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Replaces (?:ab|bb|c) with (?:[ab]b|c)
*/
class CoalesceSingleCharacterPrefix extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
$newStrings = [];
foreach ($this->getEligibleKeys($strings) as $keys)
{
// Create a new string to hold the merged strings and replace the first element with
// an empty character class
$newString = $strings[$keys[0]];
$newString[0] = [];
// Fill the character class with the prefix of each string in this group before removing
// the original string
foreach ($keys as $key)
{
$newString[0][] = [$strings[$key][0]];
unset($strings[$key]);
}
$newStrings[] = $newString;
}
return array_merge($newStrings, $strings);
}
/**
* Filter the list of eligible keys and keep those that have at least two matches
*
* @param array[] $eligibleKeys List of lists of keys
* @return array[]
*/
protected function filterEligibleKeys(array $eligibleKeys)
{
$filteredKeys = [];
foreach ($eligibleKeys as $k => $keys)
{
if (count($keys) > 1)
{
$filteredKeys[] = $keys;
}
}
return $filteredKeys;
}
/**
* Get a list of keys of strings eligible to be merged together, grouped by suffix
*
* @param array[] $strings
* @return array[]
*/
protected function getEligibleKeys(array $strings)
{
$eligibleKeys = [];
foreach ($strings as $k => $string)
{
if (!is_array($string[0]) && isset($string[1]))
{
$suffix = serialize(array_slice($string, 1));
$eligibleKeys[$suffix][] = $k;
}
}
return $this->filterEligibleKeys($eligibleKeys);
}
}

View File

@@ -0,0 +1,42 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Enables other passes to replace (?:[xy]|a[xy]) with a?[xy]
*/
class GroupSingleCharacters extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
$singles = $this->getSingleCharStrings($strings);
$cnt = count($singles);
if ($cnt > 1 && $cnt < count($strings))
{
// Remove the singles from the input, then prepend them as a new string
$strings = array_diff_key($strings, $singles);
array_unshift($strings, [array_values($singles)]);
}
return $strings;
}
/**
* Return an array of every single-char string in given list of strings
*
* @param array[] $strings
* @return array[]
*/
protected function getSingleCharStrings(array $strings)
{
return array_filter($strings, [$this, 'isSingleCharString']);
}
}

View File

@@ -0,0 +1,104 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Replaces (?:axx|ayy) with a(?:xx|yy)
*/
class MergePrefix extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
$newStrings = [];
foreach ($this->getStringsByPrefix($strings) as $prefix => $strings)
{
$newStrings[] = (isset($strings[1])) ? $this->mergeStrings($strings) : $strings[0];
}
return $newStrings;
}
/**
* Get the number of leading elements common to all given strings
*
* @param array[] $strings
* @return integer
*/
protected function getPrefixLength(array $strings)
{
$len = 1;
$cnt = count($strings[0]);
while ($len < $cnt && $this->stringsMatch($strings, $len))
{
++$len;
}
return $len;
}
/**
* Return given strings grouped by their first element
*
* NOTE: assumes that this pass is run before the first element of any string could be replaced
*
* @param array[] $strings
* @return array[]
*/
protected function getStringsByPrefix(array $strings)
{
$byPrefix = [];
foreach ($strings as $string)
{
$byPrefix[$string[0]][] = $string;
}
return $byPrefix;
}
/**
* Merge given strings into a new single string
*
* @param array[] $strings
* @return array
*/
protected function mergeStrings(array $strings)
{
$len = $this->getPrefixLength($strings);
$newString = array_slice($strings[0], 0, $len);
foreach ($strings as $string)
{
$newString[$len][] = array_slice($string, $len);
}
return $newString;
}
/**
* Test whether all given strings' elements match at given position
*
* @param array[] $strings
* @param integer $pos
* @return bool
*/
protected function stringsMatch(array $strings, $pos)
{
$value = $strings[0][$pos];
foreach ($strings as $string)
{
if (!isset($string[$pos]) || $string[$pos] !== $value)
{
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,83 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Replaces (?:aax|bbx) with (?:aa|bb)x
*/
class MergeSuffix extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function canRun(array $strings)
{
return (count($strings) > 1 && $this->hasMatchingSuffix($strings));
}
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
$newString = [];
while ($this->hasMatchingSuffix($strings))
{
array_unshift($newString, end($strings[0]));
$strings = $this->pop($strings);
}
array_unshift($newString, $strings);
return [$newString];
}
/**
* Test whether all given strings have the same last element
*
* @param array[] $strings
* @return bool
*/
protected function hasMatchingSuffix(array $strings)
{
$suffix = end($strings[1]);
foreach ($strings as $string)
{
if (end($string) !== $suffix)
{
return false;
}
}
return ($suffix !== false);
}
/**
* Remove the last element of every string
*
* @param array[] $strings Original strings
* @return array[] Processed strings
*/
protected function pop(array $strings)
{
$cnt = count($strings);
$i = $cnt;
while (--$i >= 0)
{
array_pop($strings[$i]);
}
// Remove empty elements then prepend one back at the start of the array if applicable
$strings = array_filter($strings);
if (count($strings) < $cnt)
{
array_unshift($strings, []);
}
return $strings;
}
}

View File

@@ -0,0 +1,19 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
interface PassInterface
{
/**
* Run this pass
*
* @param array[] $strings Original strings
* @return array[] Modified strings
*/
public function run(array $strings);
}

View File

@@ -0,0 +1,47 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
/**
* Replaces alternations that only contain one string to allow other passes to replace
* (?:a0?x|bx) with (?:a0?|b)x
*/
class PromoteSingleStrings extends AbstractPass
{
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
return array_map([$this, 'promoteSingleStrings'], $strings);
}
/**
* Promote single strings found inside given string
*
* @param array $string Original string
* @return array Modified string
*/
protected function promoteSingleStrings(array $string)
{
$newString = [];
foreach ($string as $element)
{
if (is_array($element) && count($element) === 1)
{
$newString = array_merge($newString, $element[0]);
}
else
{
$newString[] = $element;
}
}
return $newString;
}
}

View File

@@ -0,0 +1,58 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Passes;
use s9e\RegexpBuilder\Runner;
/**
* Enables passes to be run recursively into alternations to replace a(?:x0|x1|y0|y1) with a[xy][01]
*/
class Recurse extends AbstractPass
{
/**
* @var Runner
*/
protected $runner;
/**
* @param Runner $runner
*/
public function __construct(Runner $runner)
{
$this->runner = $runner;
}
/**
* {@inheritdoc}
*/
protected function runPass(array $strings)
{
return array_map([$this, 'recurseString'], $strings);
}
/**
* Recurse into given string and run all passes on each element
*
* @param array $string
* @return array
*/
protected function recurseString(array $string)
{
$isOptional = $this->isOptional;
foreach ($string as $k => $element)
{
if (is_array($element))
{
$string[$k] = $this->runner->run($element);
}
}
$this->isOptional = $isOptional;
return $string;
}
}

View File

@@ -0,0 +1,45 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder;
use s9e\RegexpBuilder\Passes\PassInterface;
class Runner
{
/**
* @var PassInterface[]
*/
protected $passes = [];
/**
* Add a pass to the list
*
* @param PassInterface $pass
* @return void
*/
public function addPass(PassInterface $pass)
{
$this->passes[] = $pass;
}
/**
* Run all passes on the list of strings
*
* @param array[] $strings
* @return array[]
*/
public function run(array $strings)
{
foreach ($this->passes as $pass)
{
$strings = $pass->run($strings);
}
return $strings;
}
}

View File

@@ -0,0 +1,279 @@
<?php
/**
* @package s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2018 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder;
use s9e\RegexpBuilder\MetaCharacters;
use s9e\RegexpBuilder\Output\OutputInterface;
class Serializer
{
/**
* @var Escaper
*/
protected $escaper;
/**
* @var MetaCharacters
*/
protected $meta;
/**
* @var OutputInterface
*/
protected $output;
/**
* @param OutputInterface $output
* @parm MetaCharacters $meta
* @param Escaper $escaper
*/
public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper)
{
$this->escaper = $escaper;
$this->meta = $meta;
$this->output = $output;
}
/**
* Serialize given strings into a regular expression
*
* @param array[] $strings
* @return string
*/
public function serializeStrings(array $strings)
{
$info = $this->analyzeStrings($strings);
$alternations = array_map([$this, 'serializeString'], $info['strings']);
if (!empty($info['chars']))
{
// Prepend the character class to the list of alternations
array_unshift($alternations, $this->serializeCharacterClass($info['chars']));
}
$expr = implode('|', $alternations);
if ($this->needsParentheses($info))
{
$expr = '(?:' . $expr . ')';
}
return $expr . $info['quantifier'];
}
/**
* Analyze given strings to determine how to serialize them
*
* The returned array may contains any of the following elements:
*
* - (string) quantifier Either '' or '?'
* - (array) chars List of values from single-char strings
* - (array) strings List of multi-char strings
*
* @param array[] $strings
* @return array
*/
protected function analyzeStrings(array $strings)
{
$info = ['alternationsCount' => 0, 'quantifier' => ''];
if ($strings[0] === [])
{
$info['quantifier'] = '?';
unset($strings[0]);
}
$chars = $this->getChars($strings);
if (count($chars) > 1)
{
++$info['alternationsCount'];
$info['chars'] = array_values($chars);
$strings = array_diff_key($strings, $chars);
}
$info['strings'] = array_values($strings);
$info['alternationsCount'] += count($strings);
return $info;
}
/**
* Return the portion of strings that are composed of a single character
*
* @param array[]
* @return array String key => value
*/
protected function getChars(array $strings)
{
$chars = [];
foreach ($strings as $k => $string)
{
if ($this->isChar($string))
{
$chars[$k] = $string[0];
}
}
return $chars;
}
/**
* Get the list of ranges that cover all given values
*
* @param integer[] $values Ordered list of values
* @return array[] List of ranges in the form [start, end]
*/
protected function getRanges(array $values)
{
$i = 0;
$cnt = count($values);
$start = $values[0];
$end = $start;
$ranges = [];
while (++$i < $cnt)
{
if ($values[$i] === $end + 1)
{
++$end;
}
else
{
$ranges[] = [$start, $end];
$start = $end = $values[$i];
}
}
$ranges[] = [$start, $end];
return $ranges;
}
/**
* Test whether given string represents a single character
*
* @param array $string
* @return bool
*/
protected function isChar(array $string)
{
return count($string) === 1 && is_int($string[0]) && MetaCharacters::isChar($string[0]);
}
/**
* Test whether an expression is quantifiable based on the strings info
*
* @param array $info
* @return bool
*/
protected function isQuantifiable(array $info)
{
$strings = $info['strings'];
return empty($strings) || $this->isSingleQuantifiableString($strings);
}
/**
* Test whether a list of strings contains only one single quantifiable string
*
* @param array[] $strings
* @return bool
*/
protected function isSingleQuantifiableString(array $strings)
{
return count($strings) === 1 && count($strings[0]) === 1 && MetaCharacters::isQuantifiable($strings[0][0]);
}
/**
* Test whether an expression needs parentheses based on the strings info
*
* @param array $info
* @return bool
*/
protected function needsParentheses(array $info)
{
return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info)));
}
/**
* Serialize a given list of values into a character class
*
* @param integer[] $values
* @return string
*/
protected function serializeCharacterClass(array $values)
{
$expr = '[';
foreach ($this->getRanges($values) as list($start, $end))
{
$expr .= $this->serializeCharacterClassUnit($start);
if ($end > $start)
{
if ($end > $start + 1)
{
$expr .= '-';
}
$expr .= $this->serializeCharacterClassUnit($end);
}
}
$expr .= ']';
return $expr;
}
/**
* Serialize a given value to be used in a character class
*
* @param integer $value
* @return string
*/
protected function serializeCharacterClassUnit($value)
{
return $this->serializeValue($value, 'escapeCharacterClass');
}
/**
* Serialize an element from a string
*
* @param array|integer $element
* @return string
*/
protected function serializeElement($element)
{
return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element);
}
/**
* Serialize a given value to be used as a literal
*
* @param integer $value
* @return string
*/
protected function serializeLiteral($value)
{
return $this->serializeValue($value, 'escapeLiteral');
}
/**
* Serialize a given string into a regular expression
*
* @param array $string
* @return string
*/
protected function serializeString(array $string)
{
return implode('', array_map([$this, 'serializeElement'], $string));
}
/**
* Serialize a given value
*
* @param integer $value
* @param string $escapeMethod
* @return string
*/
protected function serializeValue($value, $escapeMethod)
{
return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value));
}
}