input = $input; } /** * Add a meta-character to the list * * @param string $char Meta-character * @param string $expr Regular expression * @return void */ public function add($char, $expr) { $split = $this->input->split($char); if (count($split) !== 1) { throw new InvalidArgumentException('Meta-characters must be represented by exactly one character'); } if (@preg_match('(' . $expr . ')u', '') === false) { throw new InvalidArgumentException("Invalid expression '" . $expr . "'"); } $inputValue = $split[0]; $metaValue = $this->computeValue($expr); $this->exprs[$metaValue] = $expr; $this->meta[$inputValue] = $metaValue; } /** * Get the expression associated with a meta value * * @param integer $metaValue * @return string */ public function getExpression($metaValue) { if (!isset($this->exprs[$metaValue])) { throw new InvalidArgumentException('Invalid meta value ' . $metaValue); } return $this->exprs[$metaValue]; } /** * Return whether a given value represents a single character usable in a character class * * @param integer $value * @return bool */ public static function isChar($value) { return ($value >= 0 || ($value & self::IS_CHAR)); } /** * Return whether a given value represents a quantifiable expression * * @param integer $value * @return bool */ public static function isQuantifiable($value) { return ($value >= 0 || ($value & self::IS_QUANTIFIABLE)); } /** * Replace values from meta-characters in a list of strings with their meta value * * @param array[] $strings * @return array[] */ public function replaceMeta(array $strings) { foreach ($strings as &$string) { foreach ($string as &$value) { if (isset($this->meta[$value])) { $value = $this->meta[$value]; } } } return $strings; } /** * Compute and return a value for given expression * * Values are meant to be a unique negative integer. The last 2 bits indicate whether the * expression is quantifiable and/or represents a single character. * * @param string $expr Regular expression * @return integer */ protected function computeValue($expr) { $properties = [ 'exprIsChar' => self::IS_CHAR, 'exprIsQuantifiable' => self::IS_QUANTIFIABLE ]; $value = (1 + count($this->meta)) * -pow(2, count($properties)); foreach ($properties as $methodName => $bitValue) { if ($this->$methodName($expr)) { $value |= $bitValue; } } return $value; } /** * Test whether given expression represents a single character usable in a character class * * @param string $expr * @return bool */ protected function exprIsChar($expr) { $regexps = [ // Escaped literal or escape sequence such as \w but not \R '(^\\\\[adefhnrstvwDHNSVW\\W]$)D', // Unicode properties such as \pL or \p{Lu} '(^\\\\p(?:.|\\{[^}]+\\})$)Di', // An escape sequence such as \x1F or \x{2600} '(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di' ]; return $this->matchesAny($expr, $regexps); } /** * Test whether given expression is quantifiable * * @param string $expr * @return bool */ protected function exprIsQuantifiable($expr) { $regexps = [ // A dot or \R '(^(?:\\.|\\\\R)$)D', // A character class '(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D' ]; return $this->matchesAny($expr, $regexps) || $this->exprIsChar($expr); } /** * Test whether given expression matches any of the given regexps * * @param string $expr * @param string[] $regexps * @return bool */ protected function matchesAny($expr, array $regexps) { foreach ($regexps as $regexp) { if (preg_match($regexp, $expr)) { return true; } } return false; } }