Augmentation vers version 3.3.0

This commit is contained in:
Gauvain Boiché
2020-03-31 15:31:03 +02:00
parent d926806907
commit a1864c0414
2618 changed files with 406015 additions and 31377 deletions

View File

@@ -6,15 +6,15 @@
"keywords": ["bbcode","bbcodes","blog","censor","embed","emoji","emoticons","engine","forum","html","markdown","markup","media","parser","shortcodes"],
"license": "MIT",
"require": {
"php": ">=5.4.7",
"php": ">=7.1",
"ext-dom": "*",
"ext-filter": "*",
"lib-pcre": ">=7.2"
"lib-pcre": ">=8.13",
"s9e/regexp-builder": "^1.4"
},
"require-dev": {
"matthiasmullie/minify": "*",
"php-coveralls/php-coveralls": "*",
"s9e/regexp-builder": "1.*"
"matthiasmullie/minify": "*",
"phpunit/phpunit": "^7 || 8.2.*"
},
"suggest": {
"ext-curl": "Improves the performance of the MediaEmbed plugin and some JavaScript minifiers",
@@ -34,5 +34,8 @@
"psr-4": {
"s9e\\TextFormatter\\Tests\\": "tests"
}
},
"extra": {
"version": "2.3.1"
}
}
}

View File

@@ -1,58 +1,153 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter;
abstract class Bundle
{
/**
* Return a cached instance of the parser
*
* @return Parser
*/
public static function getCachedParser()
{
if (!isset(static::$parser))
{
static::$parser = static::getParser();
}
return static::$parser;
}
/**
* Return a cached instance of the renderer
*
* @return Renderer
*/
public static function getCachedRenderer()
{
if (!isset(static::$renderer))
{
static::$renderer = static::getRenderer();
}
return static::$renderer;
}
/**
* Return a new instance of s9e\TextFormatter\Parser
*
* @return Parser
*/
abstract public static function getParser();
/**
* Return a new instance of s9e\TextFormatter\Renderer
*
* @return Renderer
*/
abstract public static function getRenderer();
/**
* Return the source of the JavaScript parser if available
*
* @return string
*/
public static function getJS()
{
return '';
}
/**
* Parse given text using a singleton instance of the bundled Parser
*
* @param string $text Original text
* @return string Intermediate representation
*/
public static function parse($text)
{
if (isset(static::$beforeParse))
$text = \call_user_func(static::$beforeParse, $text);
{
$text = call_user_func(static::$beforeParse, $text);
}
$xml = static::getCachedParser()->parse($text);
if (isset(static::$afterParse))
$xml = \call_user_func(static::$afterParse, $xml);
{
$xml = call_user_func(static::$afterParse, $xml);
}
return $xml;
}
/**
* Render an intermediate representation using a singleton instance of the bundled Renderer
*
* @param string $xml Intermediate representation
* @param array $params Stylesheet parameters
* @return string Rendered result
*/
public static function render($xml, array $params = [])
{
$renderer = static::getCachedRenderer();
if (!empty($params))
{
$renderer->setParameters($params);
}
if (isset(static::$beforeRender))
$xml = \call_user_func(static::$beforeRender, $xml);
{
$xml = call_user_func(static::$beforeRender, $xml);
}
$output = $renderer->render($xml);
if (isset(static::$afterRender))
$output = \call_user_func(static::$afterRender, $output);
{
$output = call_user_func(static::$afterRender, $output);
}
return $output;
}
/**
* Reset the cached parser and renderer
*
* @return void
*/
public static function reset()
{
static::$parser = \null;
static::$renderer = \null;
static::$parser = null;
static::$renderer = null;
}
/**
* Transform an intermediate representation back to its original form
*
* @param string $xml Intermediate representation
* @return string Original text
*/
public static function unparse($xml)
{
if (isset(static::$beforeUnparse))
$xml = \call_user_func(static::$beforeUnparse, $xml);
{
$xml = call_user_func(static::$beforeUnparse, $xml);
}
$text = Unparser::unparse($xml);
if (isset(static::$afterUnparse))
$text = \call_user_func(static::$afterUnparse, $text);
{
$text = call_user_func(static::$afterUnparse, $text);
}
return $text;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,22 +1,46 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator;
abstract class Bundle
{
/**
* Configure a Configurator instance with this bundle's settings
*
* @param Configurator $configurator
* @return void
*/
abstract public function configure(Configurator $configurator);
/**
* Create and return a configured instance of Configurator
*
* @return Configurator
*/
public static function getConfigurator()
{
$configurator = new Configurator;
$bundle = new static;
$bundle->configure($configurator);
return $configurator;
}
/**
* Return extra options to be passed to the bundle generator
*
* Used by scripts/generateBundles.php
*
* @return array
*/
public static function getOptions()
{
return [];

View File

@@ -0,0 +1,250 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP;
class BundleGenerator
{
/**
* @var Configurator Configurator this instance belongs to
*/
protected $configurator;
/**
* @var callback Callback used to serialize the objects
*/
public $serializer = 'serialize';
/**
* @var string Callback used to unserialize the serialized objects (must be a string)
*/
public $unserializer = 'unserialize';
/**
* Constructor
*
* @param Configurator $configurator Configurator
*/
public function __construct(Configurator $configurator)
{
$this->configurator = $configurator;
}
/**
* Create and return the source of a bundle based on given Configurator instance
*
* Options:
*
* - autoInclude: automatically load the source of the PHP renderer (default: true)
*
* @param string $className Name of the bundle class
* @param array $options Associative array of optional settings
* @return string PHP source for the bundle
*/
public function generate($className, array $options = [])
{
// Add default options
$options += ['autoInclude' => true];
// Copy the PHP files header if applicable
if ($this->configurator->rendering->engine instanceof PHP)
{
$this->configurator->rendering->engine->phpHeader = $this->configurator->phpHeader;
}
// Get the parser and renderer
$objects = $this->configurator->finalize();
$parser = $objects['parser'];
$renderer = $objects['renderer'];
// Split the bundle's class name and its namespace
$namespace = '';
if (preg_match('#(.*)\\\\([^\\\\]+)$#', $className, $m))
{
$namespace = $m[1];
$className = $m[2];
}
// Start with the standard header
$php = [];
$php[] = $this->configurator->phpHeader;
if ($namespace)
{
$php[] = 'namespace ' . $namespace . ';';
$php[] = '';
}
// Generate and append the bundle class
$php[] = 'abstract class ' . $className . ' extends \\s9e\\TextFormatter\\Bundle';
$php[] = '{';
$php[] = ' /**';
$php[] = ' * @var s9e\\TextFormatter\\Parser Singleton instance used by parse()';
$php[] = ' */';
$php[] = ' protected static $parser;';
$php[] = '';
$php[] = ' /**';
$php[] = ' * @var s9e\\TextFormatter\\Renderer Singleton instance used by render()';
$php[] = ' */';
$php[] = ' protected static $renderer;';
$php[] = '';
// Add the event callbacks if applicable
$events = [
'beforeParse'
=> 'Callback executed before parse(), receives the original text as argument',
'afterParse'
=> 'Callback executed after parse(), receives the parsed text as argument',
'beforeRender'
=> 'Callback executed before render(), receives the parsed text as argument',
'afterRender'
=> 'Callback executed after render(), receives the output as argument',
'beforeUnparse'
=> 'Callback executed before unparse(), receives the parsed text as argument',
'afterUnparse'
=> 'Callback executed after unparse(), receives the original text as argument'
];
foreach ($events as $eventName => $eventDesc)
{
if (isset($options[$eventName]))
{
$php[] = ' /**';
$php[] = ' * @var ' . $eventDesc;
$php[] = ' */';
$php[] = ' public static $' . $eventName . ' = ' . var_export($options[$eventName], true) . ';';
$php[] = '';
}
}
if (isset($objects['js']))
{
$php[] = ' /**';
$php[] = ' * {@inheritdoc}';
$php[] = ' */';
$php[] = ' public static function getJS()';
$php[] = ' {';
$php[] = ' return ' . var_export($objects['js'], true) . ';';
$php[] = ' }';
$php[] = '';
}
$php[] = ' /**';
$php[] = ' * {@inheritdoc}';
$php[] = ' */';
$php[] = ' public static function getParser()';
$php[] = ' {';
if (isset($options['parserSetup']))
{
$php[] = ' $parser = ' . $this->exportObject($parser) . ';';
$php[] = ' ' . $this->exportCallback($namespace, $options['parserSetup'], '$parser') . ';';
$php[] = '';
$php[] = ' return $parser;';
}
else
{
$php[] = ' return ' . $this->exportObject($parser) . ';';
}
$php[] = ' }';
$php[] = '';
$php[] = ' /**';
$php[] = ' * {@inheritdoc}';
$php[] = ' */';
$php[] = ' public static function getRenderer()';
$php[] = ' {';
// If this is a PHP renderer and we know where it's saved, automatically load it as needed
if (!empty($options['autoInclude'])
&& $this->configurator->rendering->engine instanceof PHP
&& isset($this->configurator->rendering->engine->lastFilepath))
{
$className = get_class($renderer);
$filepath = realpath($this->configurator->rendering->engine->lastFilepath);
$php[] = ' if (!class_exists(' . var_export($className, true) . ', false)';
$php[] = ' && file_exists(' . var_export($filepath, true) . '))';
$php[] = ' {';
$php[] = ' include ' . var_export($filepath, true) . ';';
$php[] = ' }';
$php[] = '';
}
if (isset($options['rendererSetup']))
{
$php[] = ' $renderer = ' . $this->exportObject($renderer) . ';';
$php[] = ' ' . $this->exportCallback($namespace, $options['rendererSetup'], '$renderer') . ';';
$php[] = '';
$php[] = ' return $renderer;';
}
else
{
$php[] = ' return ' . $this->exportObject($renderer) . ';';
}
$php[] = ' }';
$php[] = '}';
return implode("\n", $php);
}
/**
* Export a given callback as PHP code
*
* @param string $namespace Namespace in which the callback is execute
* @param callable $callback Original callback
* @param string $argument Callback's argument (as PHP code)
* @return string PHP code
*/
protected function exportCallback($namespace, callable $callback, $argument)
{
if (is_array($callback) && is_string($callback[0]))
{
// Replace ['foo', 'bar'] with 'foo::bar'
$callback = $callback[0] . '::' . $callback[1];
}
if (!is_string($callback))
{
return 'call_user_func(' . var_export($callback, true) . ', ' . $argument . ')';
}
// Ensure that the callback starts with a \
if ($callback[0] !== '\\')
{
$callback = '\\' . $callback;
}
// Replace \foo\bar::baz() with bar::baz() if we're in namespace foo
if (substr($callback, 0, 2 + strlen($namespace)) === '\\' . $namespace . '\\')
{
$callback = substr($callback, 2 + strlen($namespace));
}
return $callback . '(' . $argument . ')';
}
/**
* Serialize and export a given object as PHP code
*
* @param object $obj Original object
* @return string PHP code
*/
protected function exportObject($obj)
{
// Serialize the object
$str = call_user_func($this->serializer, $obj);
// Export the object's source
$str = var_export($str, true);
return $this->unserializer . '(' . $str . ')';
}
}

View File

@@ -1,19 +1,26 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Bundles;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator\Bundle;
class Fatdown extends Bundle
{
/**
* {@inheritdoc}
*/
public function configure(Configurator $configurator)
{
$configurator->urlConfig->allowScheme('ftp');
$configurator->Litedown->decodeHtmlEntities = \true;
$configurator->urlConfig->allowScheme('mailto');
$configurator->Litedown->decodeHtmlEntities = true;
$configurator->Autoemail;
$configurator->Autolink;
$configurator->Escaper;
@@ -21,6 +28,7 @@ class Fatdown extends Bundle
$configurator->HTMLComments;
$configurator->HTMLEntities;
$configurator->PipeTables;
$htmlAliases = [
'a' => ['URL', 'href' => 'url'],
'hr' => 'HR',
@@ -30,15 +38,23 @@ class Fatdown extends Bundle
'sup' => 'SUP'
];
foreach ($htmlAliases as $elName => $alias)
if (\is_array($alias))
{
if (is_array($alias))
{
$configurator->HTMLElements->aliasElement($elName, $alias[0]);
unset($alias[0]);
foreach ($alias as $attrName => $alias)
{
$configurator->HTMLElements->aliasAttribute($elName, $attrName, $alias);
}
}
else
{
$configurator->HTMLElements->aliasElement($elName, $alias);
}
}
$htmlElements = [
'abbr' => ['title'],
'b',
@@ -76,7 +92,7 @@ class Fatdown extends Bundle
];
foreach ($htmlElements as $k => $v)
{
if (\is_numeric($k))
if (is_numeric($k))
{
$elName = $v;
$attrNames = [];
@@ -86,15 +102,20 @@ class Fatdown extends Bundle
$elName = $k;
$attrNames = $v;
}
$configurator->HTMLElements->allowElement($elName);
foreach ($attrNames as $attrName)
{
$configurator->HTMLElements->allowAttribute($elName, $attrName);
}
}
$configurator->tags['html:dd']->rules->createParagraphs(\false);
$configurator->tags['html:dt']->rules->createParagraphs(\false);
$configurator->tags['html:td']->rules->createParagraphs(\false);
$configurator->tags['html:th']->rules->createParagraphs(\false);
$configurator->plugins->load('MediaEmbed', ['createMediaBBCode' => \false]);
$configurator->tags['html:dd']->rules->createParagraphs(false);
$configurator->tags['html:dt']->rules->createParagraphs(false);
$configurator->tags['html:td']->rules->createParagraphs(false);
$configurator->tags['html:th']->rules->createParagraphs(false);
$configurator->plugins->load('MediaEmbed', ['createMediaBBCode' => false]);
$sites = [
'bandcamp',
'dailymotion',
@@ -108,6 +129,8 @@ class Fatdown extends Bundle
'youtube'
];
foreach ($sites as $site)
{
$configurator->MediaEmbed->add($site);
}
}
}

View File

@@ -1,18 +1,24 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Bundles;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator\Bundle;
class Forum extends Bundle
{
/**
* {@inheritdoc}
*/
public function configure(Configurator $configurator)
{
$configurator->rootRules->enableAutoLineBreaks();
$configurator->BBCodes->addFromRepository('B');
$configurator->BBCodes->addFromRepository('CENTER');
$configurator->BBCodes->addFromRepository('CODE');
@@ -42,12 +48,14 @@ class Forum extends Bundle
$configurator->BBCodes->addFromRepository('U');
$configurator->BBCodes->addFromRepository('UL');
$configurator->BBCodes->addFromRepository('URL');
$configurator->rendering->parameters = [
'L_WROTE' => 'wrote:',
'L_HIDE' => 'Hide',
'L_SHOW' => 'Show',
'L_SPOILER' => 'Spoiler'
];
$emoticons = [
':)' => '1F642',
':-)' => '1F642',
@@ -73,14 +81,19 @@ class Forum extends Bundle
':o' => '1F62E',
':lol:' => '1F602'
];
foreach ($emoticons as $code => $hex)
$configurator->Emoji->addAlias($code, \html_entity_decode('&#x' . $hex . ';'));
{
$configurator->Emoji->aliases[$code] = html_entity_decode('&#x' . $hex . ';');
}
$sites = ['bandcamp', 'dailymotion', 'facebook', 'indiegogo', 'instagram', 'kickstarter', 'liveleak', 'soundcloud', 'twitch', 'twitter', 'vimeo', 'vine', 'wshh', 'youtube'];
foreach ($sites as $siteId)
{
$configurator->MediaEmbed->add($siteId);
$configurator->BBCodes->add($siteId, ['contentAttributes' => ['id', 'url']]);
}
$configurator->Autoemail;
$configurator->Autolink;
}

View File

@@ -1,24 +1,34 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Bundles;
use DOMDocument;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator\Bundle;
class MediaPack extends Bundle
{
/**
* {@inheritdoc}
*/
public function configure(Configurator $configurator)
{
if (!isset($configurator->MediaEmbed))
{
// Only create BBCodes if the BBCodes plugin is already loaded
$pluginOptions = ['createMediaBBCode' => isset($configurator->BBCodes)];
$configurator->plugins->load('MediaEmbed', $pluginOptions);
}
foreach ($configurator->MediaEmbed->defaultSites as $siteId => $siteConfig)
{
$configurator->MediaEmbed->add($siteId);
}
}
}

View File

@@ -0,0 +1,60 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use RuntimeException;
use s9e\TextFormatter\Configurator\Items\Attribute;
use s9e\TextFormatter\Configurator\Validators\AttributeName;
class AttributeCollection extends NormalizedCollection
{
/**
* {@inheritdoc}
*/
protected $onDuplicateAction = 'replace';
/**
* {@inheritdoc}
*/
protected function getAlreadyExistsException($key)
{
return new RuntimeException("Attribute '" . $key . "' already exists");
}
/**
* {@inheritdoc}
*/
protected function getNotExistException($key)
{
return new RuntimeException("Attribute '" . $key . "' does not exist");
}
/**
* Normalize a key as an attribute name
*
* @param string $key
* @return string
*/
public function normalizeKey($key)
{
return AttributeName::normalize($key);
}
/**
* Normalize a value to an instance of Attribute
*
* @param array|null|Attribute $value
* @return Attribute
*/
public function normalizeValue($value)
{
return ($value instanceof Attribute)
? $value
: new Attribute($value);
}
}

View File

@@ -0,0 +1,35 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
class AttributeFilterChain extends FilterChain
{
/**
* {@inheritdoc}
*/
public function getFilterClassName()
{
return 's9e\\TextFormatter\\Configurator\\Items\\AttributeFilter';
}
/**
* Normalize a value into an AttributeFilter instance
*
* @param mixed $value Either a valid callback or an instance of AttributeFilter
* @return \s9e\TextFormatter\Configurator\Items\AttributeFilter Normalized filter
*/
public function normalizeValue($value)
{
if (is_string($value) && preg_match('(^#\\w+$)', $value))
{
$value = AttributeFilterCollection::getDefaultFilter(substr($value, 1));
}
return parent::normalizeValue($value);
}
}

View File

@@ -0,0 +1,108 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class AttributeFilterCollection extends NormalizedCollection
{
/**
* Return a value from this collection
*
* @param string $key
* @return \s9e\TextFormatter\Configurator\Items\ProgrammableCallback
*/
public function get($key)
{
$key = $this->normalizeKey($key);
if (!$this->exists($key))
{
if ($key[0] === '#')
{
$this->set($key, self::getDefaultFilter(substr($key, 1)));
}
else
{
$this->set($key, new AttributeFilter($key));
}
}
// Get the filter from the collection
$filter = parent::get($key);
// Clone it to preserve the original instance
$filter = clone $filter;
return $filter;
}
/**
* Get an instance of the default filter for given name
*
* @param string $filterName Filter name, e.g. "int" or "color"
* @return AttributeFilter
*/
public static function getDefaultFilter($filterName)
{
$filterName = ucfirst(strtolower($filterName));
$className = 's9e\\TextFormatter\\Configurator\\Items\\AttributeFilters\\' . $filterName . 'Filter';
if (!class_exists($className))
{
throw new InvalidArgumentException("Unknown attribute filter '" . $filterName . "'");
}
return new $className;
}
/**
* Normalize the name of an attribute filter
*
* @param string $key
* @return string
*/
public function normalizeKey($key)
{
// Built-in/custom filter, normalized to lowercase
if (preg_match('/^#[a-z_0-9]+$/Di', $key))
{
return strtolower($key);
}
// Valid callback
if (is_string($key) && is_callable($key))
{
return $key;
}
throw new InvalidArgumentException("Invalid filter name '" . $key . "'");
}
/**
* Normalize a value to an instance of AttributeFilter
*
* @param callable|AttributeFilter $value
* @return AttributeFilter
*/
public function normalizeValue($value)
{
if ($value instanceof AttributeFilter)
{
return $value;
}
if (is_callable($value))
{
return new AttributeFilter($value);
}
throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be a valid callback or an instance of s9e\\TextFormatter\\Configurator\\Items\\AttributeFilter');
}
}

View File

@@ -1,22 +1,39 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\Validators\AttributeName;
/**
* Hosts a list of attribute names. The config array it returns contains the names, deduplicated and
* sorted
*/
class AttributeList extends NormalizedList
{
/**
* Normalize the name of an attribute
*
* @param string $attrName
* @return string
*/
public function normalizeValue($attrName)
{
return AttributeName::normalize($attrName);
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$list = \array_unique($this->items);
\sort($list);
$list = array_unique($this->items);
sort($list);
return $list;
}
}

View File

@@ -0,0 +1,110 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
use s9e\TextFormatter\Configurator\Items\AttributePreprocessor;
use s9e\TextFormatter\Configurator\Items\Regexp;
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
use s9e\TextFormatter\Configurator\Validators\AttributeName;
class AttributePreprocessorCollection extends Collection
{
/**
* Add an attribute preprocessor
*
* @param string $attrName Original name
* @param string $regexp Preprocessor's regexp
* @return AttributePreprocessor
*/
public function add($attrName, $regexp)
{
$attrName = AttributeName::normalize($attrName);
$k = serialize([$attrName, $regexp]);
$this->items[$k] = new AttributePreprocessor($regexp);
return $this->items[$k];
}
/**
* @return string Name of the attribute the attribute processor uses as source
*/
public function key()
{
list($attrName) = unserialize(key($this->items));
return $attrName;
}
/**
* Merge a set of attribute preprocessors into this collection
*
* @param array|AttributePreprocessorCollection $attributePreprocessors Instance of AttributePreprocessorCollection or 2D array of [[attrName,regexp|AttributePreprocessor]]
*/
public function merge($attributePreprocessors)
{
$error = false;
if ($attributePreprocessors instanceof AttributePreprocessorCollection)
{
foreach ($attributePreprocessors as $attrName => $attributePreprocessor)
{
$this->add($attrName, $attributePreprocessor->getRegexp());
}
}
elseif (is_array($attributePreprocessors))
{
// This should be a list where each element is a [attrName,regexp] pair, or
// [attrName,AttributePreprocessor]
foreach ($attributePreprocessors as $values)
{
if (!is_array($values))
{
$error = true;
break;
}
list($attrName, $value) = $values;
if ($value instanceof AttributePreprocessor)
{
$value = $value->getRegexp();
}
$this->add($attrName, $value);
}
}
else
{
$error = true;
}
if ($error)
{
throw new InvalidArgumentException('merge() expects an instance of AttributePreprocessorCollection or a 2D array where each element is a [attribute name, regexp] pair');
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$config = [];
foreach ($this->items as $k => $ap)
{
list($attrName) = unserialize($k);
$config[] = [$attrName, $ap, $ap->getCaptureNames()];
}
return $config;
}
}

View File

@@ -0,0 +1,93 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use Countable;
use Iterator;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
class Collection implements ConfigProvider, Countable, Iterator
{
/**
* @var array Items that this collection holds
*/
protected $items = [];
/**
* Empty this collection
*/
public function clear()
{
$this->items = [];
}
/**
* @return mixed
*/
public function asConfig()
{
return ConfigHelper::toArray($this->items, true);
}
//==========================================================================
// Countable stuff
//==========================================================================
/**
* @return integer
*/
public function count()
{
return count($this->items);
}
//==========================================================================
// Iterator stuff
//==========================================================================
/**
* @return mixed
*/
public function current()
{
return current($this->items);
}
/**
* @return integer|string
*/
public function key()
{
return key($this->items);
}
/**
* @return mixed
*/
public function next()
{
return next($this->items);
}
/**
* @return void
*/
public function rewind()
{
reset($this->items);
}
/**
* @return bool
*/
public function valid()
{
return (key($this->items) !== null);
}
}

View File

@@ -0,0 +1,95 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Helpers\FilterHelper;
use s9e\TextFormatter\Configurator\Items\Filter;
use s9e\TextFormatter\Configurator\Items\ProgrammableCallback;
abstract class FilterChain extends NormalizedList
{
/**
* Get the name of the filter class
*
* @return string
*/
abstract protected function getFilterClassName();
/**
* Test whether this filter chain contains given callback
*
* @param callable $callback
* @return bool
*/
public function containsCallback(callable $callback)
{
// Normalize the callback
$pc = new ProgrammableCallback($callback);
$callback = $pc->getCallback();
foreach ($this->items as $filter)
{
if ($callback === $filter->getCallback())
{
return true;
}
}
return false;
}
/**
* Normalize a value into an TagFilter instance
*
* @param mixed $value Either a valid callback or an instance of TagFilter
* @return Filter Normalized filter
*/
public function normalizeValue($value)
{
if (is_string($value) && strpos($value, '(') !== false)
{
return $this->createFilter($value);
}
$className = $this->getFilterClassName();
if ($value instanceof $className)
{
return $value;
}
if (!is_callable($value))
{
throw new InvalidArgumentException('Filter ' . var_export($value, true) . ' is neither callable nor an instance of ' . $className);
}
return new $className($value);
}
/**
* Create and return a filter
*
* @param string $filterString
* @return Filter
*/
protected function createFilter($filterString)
{
$config = FilterHelper::parse($filterString);
$filter = $this->normalizeValue($config['filter']);
if (isset($config['params']))
{
$filter->resetParameters();
foreach ($config['params'] as [$type, $value])
{
$methodName = 'addParameterBy' . $type;
$filter->$methodName($value);
}
}
return $filter;
}
}

View File

@@ -0,0 +1,96 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder;
use s9e\TextFormatter\Configurator\Items\Regexp;
class HostnameList extends NormalizedList
{
/**
* Return this hostname list as a regexp's config
*
* @return Regexp|null A Regexp instance, or NULL if the collection is empty
*/
public function asConfig()
{
if (empty($this->items))
{
return null;
}
return new Regexp($this->getRegexp());
}
/**
* Return a regexp that matches the list of hostnames
*
* @return string
*/
public function getRegexp()
{
$hosts = [];
foreach ($this->items as $host)
{
$hosts[] = $this->normalizeHostmask($host);
}
$regexp = RegexpBuilder::fromList(
$hosts,
[
// Asterisks * are turned into a catch-all expression, while ^ and $ are preserved
'specialChars' => [
'*' => '.*',
'^' => '^',
'$' => '$'
]
]
);
return '/' . $regexp . '/DSis';
}
/**
* Normalize a hostmask to a regular expression
*
* @param string $host Hostname or hostmask
* @return string
*/
protected function normalizeHostmask($host)
{
if (preg_match('#[\\x80-\xff]#', $host) && function_exists('idn_to_ascii'))
{
$variant = (defined('INTL_IDNA_VARIANT_UTS46')) ? INTL_IDNA_VARIANT_UTS46 : 0;
$host = idn_to_ascii($host, 0, $variant);
}
if (substr($host, 0, 1) === '*')
{
// *.example.com => /\.example\.com$/
$host = ltrim($host, '*');
}
else
{
// example.com => /^example\.com$/
$host = '^' . $host;
}
if (substr($host, -1) === '*')
{
// example.* => /^example\./
$host = rtrim($host, '*');
}
else
{
// example.com => /^example\.com$/
$host .= '$';
}
return $host;
}
}

View File

@@ -1,33 +1,61 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use ReflectionClass;
use s9e\TextFormatter\Configurator\JavaScript\Minifier;
class MinifierList extends NormalizedList
{
/**
* Normalize the value to an object
*
* @param Minifier|string $minifier
* @return Minifier
*/
public function normalizeValue($minifier)
{
if (\is_string($minifier))
if (is_string($minifier))
{
$minifier = $this->getMinifierInstance($minifier);
elseif (\is_array($minifier) && !empty($minifier[0]))
$minifier = $this->getMinifierInstance($minifier[0], \array_slice($minifier, 1));
}
elseif (is_array($minifier) && !empty($minifier[0]))
{
$minifier = $this->getMinifierInstance($minifier[0], array_slice($minifier, 1));
}
if (!($minifier instanceof Minifier))
throw new InvalidArgumentException('Invalid minifier ' . \var_export($minifier, \true));
{
throw new InvalidArgumentException('Invalid minifier ' . var_export($minifier, true));
}
return $minifier;
}
/**
* Create and return a Minifier instance
*
* @param string Minifier's name
* @param array Constructor's arguments
* @return Minifier
*/
protected function getMinifierInstance($name, array $args = [])
{
$className = 's9e\\TextFormatter\\Configurator\\JavaScript\\Minifiers\\' . $name;
if (!\class_exists($className))
throw new InvalidArgumentException('Invalid minifier ' . \var_export($name, \true));
if (!class_exists($className))
{
throw new InvalidArgumentException('Invalid minifier ' . var_export($name, true));
}
$reflection = new ReflectionClass($className);
$minifier = (empty($args)) ? $reflection->newInstance() : $reflection->newInstanceArgs($args);
return $minifier;
}
}

View File

@@ -0,0 +1,262 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use ArrayAccess;
use InvalidArgumentException;
use RuntimeException;
class NormalizedCollection extends Collection implements ArrayAccess
{
/**
* @var string Action to take when add() is called with a key that already exists
*/
protected $onDuplicateAction = 'error';
/**
* {@inheritdoc}
*/
public function asConfig()
{
$config = parent::asConfig();
ksort($config);
return $config;
}
/**
* Query and set the action to take when add() is called with a key that already exists
*
* @param string|null $action If specified: either "error", "ignore" or "replace"
* @return string Old action
*/
public function onDuplicate($action = null)
{
// Save the old action so it can be returned
$old = $this->onDuplicateAction;
if (func_num_args() && $action !== 'error' && $action !== 'ignore' && $action !== 'replace')
{
throw new InvalidArgumentException("Invalid onDuplicate action '" . $action . "'. Expected: 'error', 'ignore' or 'replace'");
}
$this->onDuplicateAction = $action;
return $old;
}
//==========================================================================
// Overridable methods
//==========================================================================
/**
* Return the exception that is thrown when creating an item using a key that already exists
*
* @param string $key Item's key
* @return RuntimeException
*/
protected function getAlreadyExistsException($key)
{
return new RuntimeException("Item '" . $key . "' already exists");
}
/**
* Return the exception that is thrown when accessing an item that does not exist
*
* @param string $key Item's key
* @return RuntimeException
*/
protected function getNotExistException($key)
{
return new RuntimeException("Item '" . $key . "' does not exist");
}
/**
* Normalize an item's key
*
* This method can be overridden to implement keys normalization or implement constraints
*
* @param string $key Original key
* @return string Normalized key
*/
public function normalizeKey($key)
{
return $key;
}
/**
* Normalize a value for storage
*
* This method can be overridden to implement value normalization
*
* @param mixed $value Original value
* @return mixed Normalized value
*/
public function normalizeValue($value)
{
return $value;
}
//==========================================================================
// Items access/manipulation
//==========================================================================
/**
* Add an item to this collection
*
* NOTE: relies on exists() to check the key for invalid values and on set() to normalize it
*
* @param string $key Item's key
* @param mixed $value Item's value
* @return mixed Normalized value
*/
public function add($key, $value = null)
{
// Test whether this key is already in use
if ($this->exists($key))
{
// If the action is "ignore" we return the old value, if it's "error" we throw an
// exception. Otherwise, we keep going and replace the value
if ($this->onDuplicateAction === 'ignore')
{
return $this->get($key);
}
elseif ($this->onDuplicateAction === 'error')
{
throw $this->getAlreadyExistsException($key);
}
}
return $this->set($key, $value);
}
/**
* Test whether a given value is present in this collection
*
* @param mixed $value Original value
* @return bool Whether the normalized value was found in this collection
*/
public function contains($value)
{
return in_array($this->normalizeValue($value), $this->items);
}
/**
* Delete an item from this collection
*
* @param string $key Item's key
* @return void
*/
public function delete($key)
{
$key = $this->normalizeKey($key);
unset($this->items[$key]);
}
/**
* Test whether an item of given key exists
*
* @param string $key Item's key
* @return bool Whether this key exists in this collection
*/
public function exists($key)
{
$key = $this->normalizeKey($key);
return array_key_exists($key, $this->items);
}
/**
* Return a value from this collection
*
* @param string $key Item's key
* @return mixed Normalized value
*/
public function get($key)
{
if (!$this->exists($key))
{
throw $this->getNotExistException($key);
}
$key = $this->normalizeKey($key);
return $this->items[$key];
}
/**
* Find the index of a given value
*
* Will return the first key associated with the given value, or FALSE if the value is not found
*
* @param mixed $value Original value
* @return mixed Index of the value, or FALSE if not found
*/
public function indexOf($value)
{
return array_search($this->normalizeValue($value), $this->items);
}
/**
* Set and overwrite a value in this collection
*
* @param string $key Item's key
* @param mixed $value Item's value
* @return mixed Normalized value
*/
public function set($key, $value)
{
$key = $this->normalizeKey($key);
$this->items[$key] = $this->normalizeValue($value);
return $this->items[$key];
}
//==========================================================================
// ArrayAccess stuff
//==========================================================================
/**
* @param string|integer $offset
* @return bool
*/
public function offsetExists($offset)
{
return $this->exists($offset);
}
/**
* @param string|integer $offset
* @return mixed
*/
public function offsetGet($offset)
{
return $this->get($offset);
}
/**
* @param string|integer $offset
* @param mixed $value
* @return void
*/
public function offsetSet($offset, $value)
{
$this->set($offset, $value);
}
/**
* @param string|integer $offset
* @return void
*/
public function offsetUnset($offset)
{
$this->delete($offset);
}
}

View File

@@ -0,0 +1,162 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
class NormalizedList extends NormalizedCollection
{
/**
* Add (append) a value to this list
*
* Alias for append(). Overrides NormalizedCollection::add()
*
* @param mixed $value Original value
* @param null $void Unused
* @return mixed Normalized value
*/
public function add($value, $void = null)
{
return $this->append($value);
}
/**
* Append a value to this list
*
* @param mixed $value Original value
* @return mixed Normalized value
*/
public function append($value)
{
$value = $this->normalizeValue($value);
$this->items[] = $value;
return $value;
}
/**
* Delete a value from this list and remove gaps in keys
*
* NOTE: parent::offsetUnset() maps to $this->delete() so this method covers both usages
*
* @param string $key
* @return void
*/
public function delete($key)
{
parent::delete($key);
// Reindex the array to eliminate any gaps
$this->items = array_values($this->items);
}
/**
* Insert a value at an arbitrary 0-based position
*
* @param integer $offset
* @param mixed $value
* @return mixed Normalized value
*/
public function insert($offset, $value)
{
$offset = $this->normalizeKey($offset);
$value = $this->normalizeValue($value);
// Insert the value at given offset. We put the value into an array so that array_splice()
// won't insert it as multiple elements if it happens to be an array
array_splice($this->items, $offset, 0, [$value]);
return $value;
}
/**
* Ensure that the key is a valid offset
*
* Negative values count from the end of the list
*
* @param mixed $key
* @return integer
*/
public function normalizeKey($key)
{
$normalizedKey = filter_var(
(preg_match('(^-\\d+$)D', $key)) ? count($this->items) + $key : $key,
FILTER_VALIDATE_INT,
[
'options' => [
'min_range' => 0,
'max_range' => count($this->items)
]
]
);
if ($normalizedKey === false)
{
throw new InvalidArgumentException("Invalid offset '" . $key . "'");
}
return $normalizedKey;
}
/**
* Custom offsetSet() implementation to allow assignment with a null offset to append to the
* chain
*
* @param mixed $offset
* @param mixed $value
* @return void
*/
public function offsetSet($offset, $value)
{
if ($offset === null)
{
// $list[] = 'foo' maps to $list->append('foo')
$this->append($value);
}
else
{
// Use the default implementation
parent::offsetSet($offset, $value);
}
}
/**
* Prepend a value to this list
*
* @param mixed $value
* @return mixed Normalized value
*/
public function prepend($value)
{
$value = $this->normalizeValue($value);
array_unshift($this->items, $value);
return $value;
}
/**
* Remove all items matching given value
*
* @param mixed $value Original value
* @return integer Number of items removed
*/
public function remove($value)
{
$keys = array_keys($this->items, $this->normalizeValue($value));
foreach ($keys as $k)
{
unset($this->items[$k]);
}
$this->items = array_values($this->items);
return count($keys);
}
}

View File

@@ -0,0 +1,149 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use RuntimeException;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Plugins\ConfiguratorBase;
class PluginCollection extends NormalizedCollection
{
/**
* @var Configurator
*/
protected $configurator;
/**
* Constructor
*
* @param Configurator $configurator
*/
public function __construct(Configurator $configurator)
{
$this->configurator = $configurator;
}
/**
* Finalize all of this collection's plugins
*
* @return void
*/
public function finalize()
{
foreach ($this->items as $plugin)
{
$plugin->finalize();
}
}
/**
* Validate a plugin name
*
* @param string $pluginName
* @return string
*/
public function normalizeKey($pluginName)
{
if (!preg_match('#^[A-Z][A-Za-z_0-9]+$#D', $pluginName))
{
throw new InvalidArgumentException("Invalid plugin name '" . $pluginName . "'");
}
return $pluginName;
}
/**
* Create a plugin instance/ensure it implements the correct interface
*
* @param mixed Either a class name or an object that implements ConfiguratorBase
* @return ConfiguratorBase
*/
public function normalizeValue($value)
{
if (is_string($value) && class_exists($value))
{
$value = new $value($this->configurator);
}
if ($value instanceof ConfiguratorBase)
{
return $value;
}
throw new InvalidArgumentException('PluginCollection::normalizeValue() expects a class name or an object that implements s9e\\TextFormatter\\Plugins\\ConfiguratorBase');
}
/**
* Load a default plugin
*
* @param string $pluginName Name of the plugin
* @param array $overrideProps Properties of the plugin will be overwritten with those
* @return ConfiguratorBase
*/
public function load($pluginName, array $overrideProps = [])
{
// Validate the plugin name / class
$pluginName = $this->normalizeKey($pluginName);
$className = 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Configurator';
if (!class_exists($className))
{
throw new RuntimeException("Class '" . $className . "' does not exist");
}
// Create the plugin
$plugin = new $className($this->configurator, $overrideProps);
// Save it
$this->set($pluginName, $plugin);
// Return it
return $plugin;
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$plugins = parent::asConfig();
// Adjust plugins' default properties
foreach ($plugins as $pluginName => &$pluginConfig)
{
$plugin = $this->get($pluginName);
// Add base properties
$pluginConfig += $plugin->getBaseProperties();
// Remove quickMatch if it's false
if ($pluginConfig['quickMatch'] === false)
{
unset($pluginConfig['quickMatch']);
}
// Remove regexpLimit if there's no regexp
if (!isset($pluginConfig['regexp']))
{
unset($pluginConfig['regexpLimit']);
}
// Remove className if it's a default plugin using its default name. Its class name will
// be generated by the parser automatically
$className = 's9e\\TextFormatter\\Plugins\\' . $pluginName . '\\Parser';
if ($pluginConfig['className'] === $className)
{
unset($pluginConfig['className']);
}
}
unset($pluginConfig);
return $plugins;
}
}

View File

@@ -0,0 +1,42 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\RulesGenerators\Interfaces\BooleanRulesGenerator;
use s9e\TextFormatter\Configurator\RulesGenerators\Interfaces\TargetedRulesGenerator;
class RulesGeneratorList extends NormalizedList
{
/**
* Normalize the value to an object
*
* @param string|BooleanRulesGenerator|TargetedRulesGenerator $generator Either a string, or an instance of a rules generator
* @return BooleanRulesGenerator|TargetedRulesGenerator
*/
public function normalizeValue($generator)
{
if (is_string($generator))
{
$className = 's9e\\TextFormatter\\Configurator\\RulesGenerators\\' . $generator;
if (class_exists($className))
{
$generator = new $className;
}
}
if (!($generator instanceof BooleanRulesGenerator)
&& !($generator instanceof TargetedRulesGenerator))
{
throw new InvalidArgumentException('Invalid rules generator ' . var_export($generator, true));
}
return $generator;
}
}

View File

@@ -0,0 +1,310 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use ArrayAccess;
use BadMethodCallException;
use InvalidArgumentException;
use RuntimeException;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\JavaScript\Dictionary;
use s9e\TextFormatter\Configurator\Validators\TagName;
use s9e\TextFormatter\Parser;
/**
* @method void allowChild(string $tagName)
* @method void allowDescendant(string $tagName)
* @method void autoClose(bool $bool = true)
* @method void autoReopen(bool $bool = true)
* @method void breakParagraph(bool $bool = true)
* @method void closeAncestor(string $tagName)
* @method void closeParent(string $tagName)
* @method void createChild(string $tagName)
* @method void createParagraphs(bool $bool = true)
* @method void denyChild(string $tagName)
* @method void denyDescendant(string $tagName)
* @method void disableAutoLineBreaks(bool $bool = true)
* @method void enableAutoLineBreaks(bool $bool = true)
* @method void fosterParent(string $tagName)
* @method void ignoreSurroundingWhitespace(bool $bool = true)
* @method void ignoreTags(bool $bool = true)
* @method void ignoreText(bool $bool = true)
* @method void isTransparent(bool $bool = true)
* @method void preventLineBreaks(bool $bool = true)
* @method void requireParent(string $tagName)
* @method void requireAncestor(string $tagName)
* @method void suspendAutoLineBreaks(bool $bool = true)
* @method void trimFirstLine(bool $bool = true)
* @see /docs/Rules.md
*/
class Ruleset extends Collection implements ArrayAccess, ConfigProvider
{
/**
* @var array Supported rules and the method used to add them
*/
protected $rules = [
'allowChild' => 'addTargetedRule',
'allowDescendant' => 'addTargetedRule',
'autoClose' => 'addBooleanRule',
'autoReopen' => 'addBooleanRule',
'breakParagraph' => 'addBooleanRule',
'closeAncestor' => 'addTargetedRule',
'closeParent' => 'addTargetedRule',
'createChild' => 'addTargetedRule',
'createParagraphs' => 'addBooleanRule',
'denyChild' => 'addTargetedRule',
'denyDescendant' => 'addTargetedRule',
'disableAutoLineBreaks' => 'addBooleanRule',
'enableAutoLineBreaks' => 'addBooleanRule',
'fosterParent' => 'addTargetedRule',
'ignoreSurroundingWhitespace' => 'addBooleanRule',
'ignoreTags' => 'addBooleanRule',
'ignoreText' => 'addBooleanRule',
'isTransparent' => 'addBooleanRule',
'preventLineBreaks' => 'addBooleanRule',
'requireParent' => 'addTargetedRule',
'requireAncestor' => 'addTargetedRule',
'suspendAutoLineBreaks' => 'addBooleanRule',
'trimFirstLine' => 'addBooleanRule'
];
/**
* Add a rule to this set
*
* @param string $methodName Rule name
* @param array $args Arguments used to add given rule
* @return self
*/
public function __call($methodName, array $args)
{
if (!isset($this->rules[$methodName]))
{
throw new BadMethodCallException("Undefined method '" . $methodName . "'");
}
array_unshift($args, $methodName);
call_user_func_array([$this, $this->rules[$methodName]], $args);
return $this;
}
//==========================================================================
// ArrayAccess methods
//==========================================================================
/**
* Test whether a rule category exists
*
* @param string $k Rule name, e.g. "allowChild" or "isTransparent"
*/
public function offsetExists($k)
{
return isset($this->items[$k]);
}
/**
* Return the content of a rule category
*
* @param string $k Rule name, e.g. "allowChild" or "isTransparent"
* @return mixed
*/
public function offsetGet($k)
{
return $this->items[$k];
}
/**
* Not supported
*/
public function offsetSet($k, $v)
{
throw new RuntimeException('Not supported');
}
/**
* Clear a subset of the rules
*
* @see clear()
*
* @param string $k Rule name, e.g. "allowChild" or "isTransparent"
*/
public function offsetUnset($k)
{
return $this->remove($k);
}
//==========================================================================
// Generic methods
//==========================================================================
/**
* {@inheritdoc}
*/
public function asConfig()
{
$config = $this->items;
// Remove rules that are not needed at parsing time. All of those are resolved when building
// the allowed bitfields
unset($config['allowChild']);
unset($config['allowDescendant']);
unset($config['denyChild']);
unset($config['denyDescendant']);
unset($config['requireParent']);
// Pack boolean rules into a bitfield
$bitValues = [
'autoClose' => Parser::RULE_AUTO_CLOSE,
'autoReopen' => Parser::RULE_AUTO_REOPEN,
'breakParagraph' => Parser::RULE_BREAK_PARAGRAPH,
'createParagraphs' => Parser::RULE_CREATE_PARAGRAPHS,
'disableAutoLineBreaks' => Parser::RULE_DISABLE_AUTO_BR,
'enableAutoLineBreaks' => Parser::RULE_ENABLE_AUTO_BR,
'ignoreSurroundingWhitespace' => Parser::RULE_IGNORE_WHITESPACE,
'ignoreTags' => Parser::RULE_IGNORE_TAGS,
'ignoreText' => Parser::RULE_IGNORE_TEXT,
'isTransparent' => Parser::RULE_IS_TRANSPARENT,
'preventLineBreaks' => Parser::RULE_PREVENT_BR,
'suspendAutoLineBreaks' => Parser::RULE_SUSPEND_AUTO_BR,
'trimFirstLine' => Parser::RULE_TRIM_FIRST_LINE
];
$bitfield = 0;
foreach ($bitValues as $ruleName => $bitValue)
{
if (!empty($config[$ruleName]))
{
$bitfield |= $bitValue;
}
unset($config[$ruleName]);
}
// In order to speed up lookups, we use the tag names as keys
foreach (['closeAncestor', 'closeParent', 'fosterParent'] as $ruleName)
{
if (isset($config[$ruleName]))
{
$targets = array_fill_keys($config[$ruleName], 1);
$config[$ruleName] = new Dictionary($targets);
}
}
// Add the bitfield to the config
$config['flags'] = $bitfield;
return $config;
}
/**
* Merge a set of rules into this collection
*
* @param array|Ruleset $rules 2D array of rule definitions, or instance of Ruleset
* @param bool $overwrite Whether to overwrite scalar rules (e.g. boolean rules)
*/
public function merge($rules, $overwrite = true)
{
if (!is_array($rules)
&& !($rules instanceof self))
{
throw new InvalidArgumentException('merge() expects an array or an instance of Ruleset');
}
foreach ($rules as $action => $value)
{
if (is_array($value))
{
foreach ($value as $tagName)
{
$this->$action($tagName);
}
}
elseif ($overwrite || !isset($this->items[$action]))
{
$this->$action($value);
}
}
}
/**
* Remove a specific rule, or all the rules of a given type
*
* @param string $type Type of rules to clear
* @param string $tagName Name of the target tag, or none to remove all rules of given type
* @return void
*/
public function remove($type, $tagName = null)
{
if (preg_match('(^default(?:Child|Descendant)Rule)', $type))
{
throw new InvalidArgumentException('Cannot remove ' . $type);
}
if (isset($tagName))
{
$tagName = TagName::normalize($tagName);
if (isset($this->items[$type]))
{
// Compute the difference between current list and our one tag name
$this->items[$type] = array_diff(
$this->items[$type],
[$tagName]
);
if (empty($this->items[$type]))
{
// If the list is now empty, keep it neat and unset it
unset($this->items[$type]);
}
else
{
// If the list still have names, keep it neat and rearrange keys
$this->items[$type] = array_values($this->items[$type]);
}
}
}
else
{
unset($this->items[$type]);
}
}
//==========================================================================
// Rules
//==========================================================================
/**
* Add a boolean rule
*
* @param string $ruleName Name of the rule
* @param bool $bool Whether to enable or disable the rule
* @return self
*/
protected function addBooleanRule($ruleName, $bool = true)
{
if (!is_bool($bool))
{
throw new InvalidArgumentException($ruleName . '() expects a boolean');
}
$this->items[$ruleName] = $bool;
}
/**
* Add a targeted rule
*
* @param string $ruleName Name of the rule
* @param string $tagName Name of the target tag
* @return self
*/
protected function addTargetedRule($ruleName, $tagName)
{
$this->items[$ruleName][] = TagName::normalize($tagName);
}
}

View File

@@ -0,0 +1,43 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder;
use s9e\TextFormatter\Configurator\Items\Regexp;
class SchemeList extends NormalizedList
{
/**
* Return this scheme list as a regexp
*
* @return Regexp
*/
public function asConfig()
{
return new Regexp('/^' . RegexpBuilder::fromList($this->items) . '$/Di');
}
/**
* Validate and normalize a scheme name to lowercase, or throw an exception if invalid
*
* @link http://tools.ietf.org/html/rfc3986#section-3.1
*
* @param string $scheme URL scheme, e.g. "file" or "ed2k"
* @return string
*/
public function normalizeValue($scheme)
{
if (!preg_match('#^[a-z][a-z0-9+\\-.]*$#Di', $scheme))
{
throw new InvalidArgumentException("Invalid scheme name '" . $scheme . "'");
}
return strtolower($scheme);
}
}

View File

@@ -0,0 +1,60 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use RuntimeException;
use s9e\TextFormatter\Configurator\Items\Tag;
use s9e\TextFormatter\Configurator\Validators\TagName;
class TagCollection extends NormalizedCollection
{
/**
* {@inheritdoc}
*/
protected $onDuplicateAction = 'replace';
/**
* {@inheritdoc}
*/
protected function getAlreadyExistsException($key)
{
return new RuntimeException("Tag '" . $key . "' already exists");
}
/**
* {@inheritdoc}
*/
protected function getNotExistException($key)
{
return new RuntimeException("Tag '" . $key . "' does not exist");
}
/**
* Normalize a tag name used as a key in this colelction
*
* @param string $key Original name
* @return string Normalized name
*/
public function normalizeKey($key)
{
return TagName::normalize($key);
}
/**
* Normalize a value to an instance of Tag
*
* @param array|null|Tag $value
* @return Tag
*/
public function normalizeValue($value)
{
return ($value instanceof Tag)
? $value
: new Tag($value);
}
}

View File

@@ -0,0 +1,19 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
class TagFilterChain extends FilterChain
{
/**
* {@inheritdoc}
*/
public function getFilterClassName()
{
return 's9e\\TextFormatter\\Configurator\\Items\\TagFilter';
}
}

View File

@@ -1,22 +1,38 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\Validators\TagName;
/**
* Hosts a list of tag names. The config array it returns contains the names, deduplicated and sorted
*/
class TagList extends NormalizedList
{
/**
* Normalize a value to a tag name
*
* @param string $attrName
* @return string
*/
public function normalizeValue($attrName)
{
return TagName::normalize($attrName);
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$list = \array_unique($this->items);
\sort($list);
$list = array_unique($this->items);
sort($list);
return $list;
}
}

View File

@@ -0,0 +1,30 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\TemplateCheck;
class TemplateCheckList extends NormalizedList
{
/**
* Normalize the value to an instance of TemplateCheck
*
* @param mixed $check Either a string, or an instance of TemplateCheck
* @return TemplateCheck An instance of TemplateCheck
*/
public function normalizeValue($check)
{
if (!($check instanceof TemplateCheck))
{
$className = 's9e\\TextFormatter\\Configurator\\TemplateChecks\\' . $check;
$check = new $className;
}
return $check;
}
}

View File

@@ -0,0 +1,37 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\TemplateNormalizations\AbstractNormalization;
use s9e\TextFormatter\Configurator\TemplateNormalizations\Custom;
class TemplateNormalizationList extends NormalizedList
{
/**
* Normalize the value to an instance of AbstractNormalization
*
* @param mixed $value Either a string, or an instance of AbstractNormalization
* @return AbstractNormalization An instance of AbstractNormalization
*/
public function normalizeValue($value)
{
if ($value instanceof AbstractNormalization)
{
return $value;
}
if (is_callable($value))
{
return new Custom($value);
}
$className = 's9e\\TextFormatter\\Configurator\\TemplateNormalizations\\' . $value;
return new $className;
}
}

View File

@@ -0,0 +1,35 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Collections;
use s9e\TextFormatter\Configurator\Validators\TemplateParameterName;
class TemplateParameterCollection extends NormalizedCollection
{
/**
* Normalize a parameter name
*
* @param string $key
* @return string
*/
public function normalizeKey($key)
{
return TemplateParameterName::normalize($key);
}
/**
* Normalize a parameter value
*
* @param mixed $value
* @return string
*/
public function normalizeValue($value)
{
return (string) $value;
}
}

View File

@@ -0,0 +1,21 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator;
interface ConfigProvider
{
/**
* Return an array-based representation of this object to be used for parsing
*
* NOTE: if this method was named getConfig() it could interfere with magic getters from
* the Configurable trait
*
* @return array|\s9e\TextFormatter\Configurator\JavaScript\Dictionary|null
*/
public function asConfig();
}

View File

@@ -1,30 +1,61 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Exceptions;
use DOMNode;
use RuntimeException;
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper;
class UnsafeTemplateException extends RuntimeException
{
/**
* @var DOMNode The node that is responsible for this exception
*/
protected $node;
/**
* @param string $msg Exception message
* @param DOMNode $node The node that is responsible for this exception
*/
public function __construct($msg, DOMNode $node)
{
parent::__construct($msg);
$this->node = $node;
}
/**
* Return the node that has caused this exception
*
* @return DOMNode
*/
public function getNode()
{
return $this->node;
}
/**
* Highlight the source of the template that has caused this exception, with the node highlighted
*
* @param string $prepend HTML to prepend
* @param string $append HTML to append
* @return string Template's source, as HTML
*/
public function highlightNode($prepend = '<span style="background-color:#ff0">', $append = '</span>')
{
return TemplateHelper::highlightNode($this->node, $prepend, $append);
}
/**
* Change the node associated with this exception
*
* @param DOMNode $node
* @return void
*/
public function setNode(DOMNode $node)
{
$this->node = $node;

View File

@@ -0,0 +1,19 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator;
interface FilterableConfigValue
{
/**
* Return the config value for given target
*
* @param $target
* @return mixed
*/
public function filterConfig($target);
}

View File

@@ -0,0 +1,119 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMAttr;
use RuntimeException;
abstract class AVTHelper
{
/**
* Parse an attribute value template
*
* @link https://www.w3.org/TR/1999/REC-xslt-19991116#dt-attribute-value-template
*
* @param string $attrValue Attribute value
* @return array Array of tokens
*/
public static function parse($attrValue)
{
preg_match_all('((*MARK:literal)(?:[^{]|\\{\\{)++|(*MARK:expression)\\{(?:[^}"\']|"[^"]*+"|\'[^\']*+\')++\\}|(*MARK:junk).++)s', $attrValue, $matches);
$tokens = [];
foreach ($matches[0] as $i => $str)
{
if ($matches['MARK'][$i] === 'expression')
{
$tokens[] = ['expression', substr($str, 1, -1)];
}
else
{
$tokens[] = ['literal', strtr($str, ['{{' => '{', '}}' => '}'])];
}
}
return $tokens;
}
/**
* Replace the value of an attribute via the provided callback
*
* The callback will receive an array containing the type and value of each token in the AVT.
* Its return value should use the same format
*
* @param DOMAttr $attribute
* @param callable $callback
* @return void
*/
public static function replace(DOMAttr $attribute, callable $callback)
{
$tokens = self::parse($attribute->value);
foreach ($tokens as $k => $token)
{
$tokens[$k] = $callback($token);
}
$attribute->value = htmlspecialchars(self::serialize($tokens), ENT_NOQUOTES, 'UTF-8');
}
/**
* Serialize an array of AVT tokens back into an attribute value
*
* @param array $tokens
* @return string
*/
public static function serialize(array $tokens)
{
$attrValue = '';
foreach ($tokens as $token)
{
if ($token[0] === 'literal')
{
$attrValue .= preg_replace('([{}])', '$0$0', $token[1]);
}
elseif ($token[0] === 'expression')
{
$attrValue .= '{' . $token[1] . '}';
}
else
{
throw new RuntimeException('Unknown token type');
}
}
return $attrValue;
}
/**
* Transform given attribute value template into an XSL fragment
*
* @param string $attrValue
* @return string
*/
public static function toXSL($attrValue)
{
$xsl = '';
foreach (self::parse($attrValue) as list($type, $content))
{
if ($type === 'expression')
{
$xsl .= '<xsl:value-of select="' . htmlspecialchars($content, ENT_COMPAT, 'UTF-8') . '"/>';
}
elseif (trim($content) !== $content)
{
$xsl .= '<xsl:text>' . htmlspecialchars($content, ENT_NOQUOTES, 'UTF-8') . '</xsl:text>';
}
else
{
$xsl .= htmlspecialchars($content, ENT_NOQUOTES, 'UTF-8');
}
}
return $xsl;
}
}

View File

@@ -0,0 +1,186 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use RuntimeException;
use Traversable;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\FilterableConfigValue;
use s9e\TextFormatter\Configurator\JavaScript\Dictionary;
abstract class ConfigHelper
{
/**
* Recursively filter a config array to replace variants with the desired value
*
* @param array $config Config array
* @param string $target Target parser
* @return array Filtered config
*/
public static function filterConfig(array $config, $target = 'PHP')
{
$filteredConfig = [];
foreach ($config as $name => $value)
{
if ($value instanceof FilterableConfigValue)
{
$value = $value->filterConfig($target);
if (!isset($value))
{
continue;
}
}
if (is_array($value))
{
$value = self::filterConfig($value, $target);
}
$filteredConfig[$name] = $value;
}
return $filteredConfig;
}
/**
* Generate a quickMatch string from a list of strings
*
* This is basically a LCS implementation, tuned for small strings and fast failure
*
* @param array $strings Array of strings
* @return mixed quickMatch string, or FALSE if none could be generated
*/
public static function generateQuickMatchFromList(array $strings)
{
foreach ($strings as $string)
{
$stringLen = strlen($string);
$substrings = [];
for ($len = $stringLen; $len; --$len)
{
$pos = $stringLen - $len;
do
{
$substrings[substr($string, $pos, $len)] = 1;
}
while (--$pos >= 0);
}
if (isset($goodStrings))
{
$goodStrings = array_intersect_key($goodStrings, $substrings);
if (empty($goodStrings))
{
break;
}
}
else
{
$goodStrings = $substrings;
}
}
if (empty($goodStrings))
{
return false;
}
// The strings are stored by length descending, so we return the first in the list
return strval(key($goodStrings));
}
/**
* Optimize the size of a deep array by deduplicating identical structures
*
* This method is meant to be used on a config array which is only read and never modified
*
* @param array &$config
* @param array &$cache
* @return array
*/
public static function optimizeArray(array &$config, array &$cache = [])
{
foreach ($config as $k => &$v)
{
if (!is_array($v))
{
continue;
}
// Dig deeper into this array
self::optimizeArray($v, $cache);
// Look for a matching structure
$cacheKey = serialize($v);
if (!isset($cache[$cacheKey]))
{
// Record this value in the cache
$cache[$cacheKey] = $v;
}
// Replace the entry in $config with a reference to the cached value
$config[$k] =& $cache[$cacheKey];
}
unset($v);
}
/**
* Convert a structure to a (possibly multidimensional) array
*
* @param mixed $value
* @param bool $keepEmpty Whether to keep empty arrays instead of removing them
* @param bool $keepNull Whether to keep NULL values instead of removing them
* @return array
*/
public static function toArray($value, $keepEmpty = false, $keepNull = false)
{
$array = [];
foreach ($value as $k => $v)
{
$isDictionary = $v instanceof Dictionary;
if ($v instanceof ConfigProvider)
{
$v = $v->asConfig();
}
elseif ($v instanceof Traversable || is_array($v))
{
$v = self::toArray($v, $keepEmpty, $keepNull);
}
elseif (is_scalar($v) || is_null($v))
{
// Do nothing
}
else
{
$type = (is_object($v))
? 'an instance of ' . get_class($v)
: 'a ' . gettype($v);
throw new RuntimeException('Cannot convert ' . $type . ' to array');
}
if (!isset($v) && !$keepNull)
{
// We don't record NULL values
continue;
}
if (!$keepEmpty && $v === [])
{
// We don't record empty structures
continue;
}
$array[$k] = ($isDictionary) ? new Dictionary($v) : $v;
}
return $array;
}
}

View File

@@ -1,23 +1,62 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
abstract class ContextSafeness
{
/**
* Get the list of UTF-8 characters that are disallowed as a URL
*
* ":" is disallowed to prevent the URL to have a scheme.
*
* @return string[]
*/
public static function getDisallowedCharactersAsURL()
{
return [':'];
}
/**
* Get the list of UTF-8 characters that are disallowed in CSS
*
* - "(" and ")" are disallowed to prevent executing CSS functions or proprietary extensions that
* may execute JavaScript.
* - ":" is disallowed to prevent setting extra CSS properties as well as possibly misusing the
* url() function with javascript: URIs.
* - "\", '"' and "'" are disallowed to prevent breaking out of or interfering with strings.
* - ";", "{" and "}" to prevent breaking out of a declaration
*
* @return string[]
*/
public static function getDisallowedCharactersInCSS()
{
return ['(', ')', ':', '\\', '"', "'", ';', '{', '}'];
}
/**
* Get the list of UTF-8 characters that are disallowed in JS
*
* Allowing *any* input inside of a JavaScript context is a risky proposition. The use cases are
* also pretty rare. This list of disallowed characters attempts to block any character that is
* potentially unsafe either inside or outside of a string.
*
* - "(" and ")" are disallowed to prevent executing functions.
* - '"', "'", "\" and "`" are disallowed to prevent breaking out of or interfering with strings.
* - "\r", "\n", U+2028 and U+2029 are disallowed inside of JavaScript strings.
* - ":" and "%" are disallowed to prevent potential exploits that set document.location to a
* javascript: URI.
* - "=" is disallowed to prevent overwriting existing vars (or constructors, such as Array's) if
* the input is used outside of a string
*
* @return string[]
*/
public static function getDisallowedCharactersInJS()
{
return ['(', ')', '"', "'", '\\', "\r", "\n", "\xE2\x80\xA8", "\xE2\x80\xA9", ':', '%', '='];
return ['(', ')', '"', "'", '\\', '`', "\r", "\n", "\xE2\x80\xA8", "\xE2\x80\xA9", ':', '%', '='];
}
}

View File

@@ -0,0 +1,399 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMElement;
use DOMXPath;
class ElementInspector
{
/**
* This is an abridged version of the HTML5 content models and rules, with some liberties taken.
*
* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
* as raw bytes, formatted using the octal notation to keep the sources ASCII.
*
* "c" represents the categories the element belongs to. The categories are comprised of HTML5
* content models (such as "phrasing content" or "interactive content") plus a few special
* categories created to cover the parts of the specs that refer to "a group of X and Y
* elements" rather than a specific content model.
*
* "ac" represents the categories that are allowed as children of given element.
*
* "dd" represents the categories that must not appear as a descendant of given element.
*
* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
* or what categories the element belongs to. For example, an <img> element is only part of the
* "interactive content" category if it has a "usemap" attribute. Those restrictions are
* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
* plus the bit number of the category. For instance, if "interactive content" got assigned to
* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
*
* Additionally, other flags are set:
*
* "t" indicates that the element uses the "transparent" content model.
* "e" indicates that the element uses the "empty" content model.
* "v" indicates that the element is a void element.
* "nt" indicates that the element does not accept text nodes. (no text)
* "to" indicates that the element should only contain text. (text-only)
* "fe" indicates that the element is a formatting element. It will automatically be reopened
* when closed by an end tag of a different name.
* "b" indicates that the element is not phrasing content, which makes it likely to act like
* a block element.
*
* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
*
* @var array
* @see /scripts/patchElementInspector.php
*/
protected static $htmlElements = [
'a'=>['c'=>"\17\0\0\0\0\2",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\2",'t'=>1,'fe'=>1],
'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'address'=>['c'=>"\3\10",'ac'=>"\1",'dd'=>"\200\14",'b'=>1,'cp'=>['p']],
'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\40\1",'ac29'=>'not(@src)','dd'=>"\0\0\0\0\0\4",'dd42'=>'@src','t'=>1],
'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'blockquote'=>['c'=>"\103",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'body'=>['c'=>"\100\0\40",'ac'=>"\1",'dd'=>"\0",'b'=>1],
'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
'button'=>['c'=>"\17\2",'ac'=>"\4",'dd'=>"\10"],
'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\0",'t'=>1],
'caption'=>['c'=>"\0\1",'ac'=>"\1",'dd'=>"\0\0\20",'b'=>1],
'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'col'=>['c'=>"\0\0\200",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'colgroup'=>['c'=>"\0\1",'ac'=>"\0\0\200",'ac23'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e?'=>'@span','b'=>1],
'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'datalist'=>['c'=>"\5",'ac'=>"\4\0\1\100",'dd'=>"\0"],
'dd'=>['c'=>"\0\200\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']],
'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1],
'details'=>['c'=>"\113",'ac'=>"\1\0\0\20",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'dfn'=>['c'=>"\7\0\0\0\100",'ac'=>"\4",'dd'=>"\0\0\0\0\100"],
'dialog'=>['c'=>"\101",'ac'=>"\1",'dd'=>"\0",'b'=>1],
'div'=>['c'=>"\3\200",'ac'=>"\1\0\1\4",'ac0'=>'not(ancestor::dl)','dd'=>"\0",'b'=>1,'cp'=>['p']],
'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\1",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
'dt'=>['c'=>"\0\200\0\4",'ac'=>"\1",'dd'=>"\200\4\10",'b'=>1,'cp'=>['dd','dt']],
'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
'fieldset'=>['c'=>"\103\2",'ac'=>"\1\0\0\200",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'figcaption'=>['c'=>"\0\0\0\0\0\10",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'figure'=>['c'=>"\103",'ac'=>"\1\0\0\0\0\10",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'footer'=>['c'=>"\3\110\10",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
'form'=>['c'=>"\3\0\0\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\40",'b'=>1,'cp'=>['p']],
'h1'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'h2'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'h3'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'h4'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'h5'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'h6'=>['c'=>"\203",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'head'=>['c'=>"\0\0\40",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1],
'header'=>['c'=>"\3\110\10",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
'hr'=>['c'=>"\1",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
'html'=>['c'=>"\0",'ac'=>"\0\0\40",'dd'=>"\0",'nt'=>1,'b'=>1],
'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"],
'img'=>['c'=>"\57\40\100",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
'input'=>['c'=>"\17\40",'c3'=>'@type!="hidden"','c13'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1],
'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'label'=>['c'=>"\17\40\0\0\10",'ac'=>"\4",'dd'=>"\0\0\2\0\10"],
'legend'=>['c'=>"\0\0\0\200",'ac'=>"\204",'dd'=>"\0",'b'=>1],
'li'=>['c'=>"\0\0\0\0\0\1",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']],
'link'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
'main'=>['c'=>"\3\110\20\0\20",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'media element'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1],
'meta'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'meter'=>['c'=>"\7\0\2\0\4",'ac'=>"\4",'dd'=>"\0\0\0\0\4"],
'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
'object'=>['c'=>"\47",'ac'=>"\0\0\0\0\2",'dd'=>"\0",'t'=>1],
'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\0\1\0\0\1",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
'optgroup'=>['c'=>"\0\0\4",'ac'=>"\0\0\1\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
'option'=>['c'=>"\0\0\4\100",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']],
'output'=>['c'=>"\7\2",'ac'=>"\4",'dd'=>"\0"],
'p'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'param'=>['c'=>"\0\0\0\0\2",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'picture'=>['c'=>"\45",'ac'=>"\0\0\101",'dd'=>"\0",'nt'=>1],
'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']],
'progress'=>['c'=>"\7\0\2\10",'ac'=>"\4",'dd'=>"\0\0\0\10"],
'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'rb'=>['c'=>"\0\20",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rb','rt','rtc']],
'rp'=>['c'=>"\0\20\0\2",'ac'=>"\4",'dd'=>"\0",'b'=>1],
'rt'=>['c'=>"\0\20\0\2",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rb','rt']],
'rtc'=>['c'=>"\0\20",'ac'=>"\4\0\0\2",'dd'=>"\0",'b'=>1,'cp'=>['rt','rtc']],
'ruby'=>['c'=>"\7",'ac'=>"\4\20",'dd'=>"\0"],
's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'script'=>['c'=>"\25\0\1",'ac'=>"\0",'dd'=>"\0",'to'=>1],
'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
'select'=>['c'=>"\17\2",'ac'=>"\0\0\5",'dd'=>"\0",'nt'=>1],
'slot'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1],
'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'source'=>['c'=>"\0\0\100\40",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'style'=>['c'=>"\21",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'summary'=>['c'=>"\0\0\0\20",'ac'=>"\204",'dd'=>"\0",'b'=>1],
'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'table'=>['c'=>"\3\0\20",'ac'=>"\0\1\1",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
'tbody'=>['c'=>"\0\1",'ac'=>"\0\0\1\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
'td'=>['c'=>"\100\0\0\1",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['td','th']],
'template'=>['c'=>"\25\0\201",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
'textarea'=>['c'=>"\17\2",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1],
'tfoot'=>['c'=>"\0\1",'ac'=>"\0\0\1\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
'th'=>['c'=>"\0\0\0\1",'ac'=>"\1",'dd'=>"\200\104",'b'=>1,'cp'=>['td','th']],
'thead'=>['c'=>"\0\1",'ac'=>"\0\0\1\0\200",'dd'=>"\0",'nt'=>1,'b'=>1],
'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"],
'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
'tr'=>['c'=>"\0\1\0\0\200",'ac'=>"\0\0\1\1",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
'track'=>['c'=>"\0\0\0\0\1",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\0\1\0\0\1",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\40\1",'ac29'=>'not(@src)','dd'=>"\0\0\0\0\0\4",'dd42'=>'@src','t'=>1],
'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1]
];
/**
* Test whether given child element closes given parent element
*
* @param DOMElement $child
* @param DOMElement $parent
* @return bool
*/
public static function closesParent(DOMElement $child, DOMElement $parent)
{
$parentName = $parent->nodeName;
$childName = $child->nodeName;
return !empty(self::$htmlElements[$childName]['cp']) && in_array($parentName, self::$htmlElements[$childName]['cp'], true);
}
/**
* Test whether given element disallows text nodes
*
* @param DOMElement $element
* @return bool
*/
public static function disallowsText(DOMElement $element)
{
return self::hasProperty($element, 'nt');
}
/**
* Return the "allowChild" bitfield for given element
*
* @param DOMElement $element
* @return string
*/
public static function getAllowChildBitfield(DOMElement $element)
{
return self::getBitfield($element, 'ac');
}
/**
* Return the "category" bitfield for given element
*
* @param DOMElement $element
* @return string
*/
public static function getCategoryBitfield(DOMElement $element)
{
return self::getBitfield($element, 'c');
}
/**
* Return the "denyDescendant" bitfield for given element
*
* @param DOMElement $element
* @return string
*/
public static function getDenyDescendantBitfield(DOMElement $element)
{
return self::getBitfield($element, 'dd');
}
/**
* Test whether given element is a block element
*
* @param DOMElement $element
* @return bool
*/
public static function isBlock(DOMElement $element)
{
return self::hasProperty($element, 'b');
}
/**
* Test whether given element uses the empty content model
*
* @param DOMElement $element
* @return bool
*/
public static function isEmpty(DOMElement $element)
{
return self::hasProperty($element, 'e');
}
/**
* Test whether given element is a formatting element
*
* @param DOMElement $element
* @return bool
*/
public static function isFormattingElement(DOMElement $element)
{
return self::hasProperty($element, 'fe');
}
/**
* Test whether given element only accepts text nodes
*
* @param DOMElement $element
* @return bool
*/
public static function isTextOnly(DOMElement $element)
{
return self::hasProperty($element, 'to');
}
/**
* Test whether given element uses the transparent content model
*
* @param DOMElement $element
* @return bool
*/
public static function isTransparent(DOMElement $element)
{
return self::hasProperty($element, 't');
}
/**
* Test whether given element uses the void content model
*
* @param DOMElement $element
* @return bool
*/
public static function isVoid(DOMElement $element)
{
return self::hasProperty($element, 'v');
}
/**
* Test whether given element preserves whitespace in its content
*
* @param DOMElement $element
* @return bool
*/
public static function preservesWhitespace(DOMElement $element)
{
return self::hasProperty($element, 'pre');
}
/**
* Evaluate an XPath query using given element as context node
*
* @param string $query XPath query
* @param DOMElement $element Context node
* @return bool
*/
protected static function evaluate($query, DOMElement $element)
{
$xpath = new DOMXPath($element->ownerDocument);
return $xpath->evaluate('boolean(' . $query . ')', $element);
}
/**
* Get the bitfield value for a given element
*
* @param DOMElement $element Context node
* @param string $name Bitfield name: either 'c', 'ac' or 'dd'
* @return string
*/
protected static function getBitfield(DOMElement $element, $name)
{
$props = self::getProperties($element);
$bitfield = self::toBin($props[$name]);
// For each bit set to 1, test whether there is an XPath condition to it and whether it is
// fulfilled. If not, turn the bit to 0
foreach (array_keys(array_filter(str_split($bitfield, 1))) as $bitNumber)
{
$conditionName = $name . $bitNumber;
if (isset($props[$conditionName]) && !self::evaluate($props[$conditionName], $element))
{
$bitfield[$bitNumber] = '0';
}
}
return self::toRaw($bitfield);
}
/**
* Return the properties associated with given element
*
* Returns span's properties if the element is not defined
*
* @param DOMElement $element
* @return array
*/
protected static function getProperties(DOMElement $element)
{
return (isset(self::$htmlElements[$element->nodeName])) ? self::$htmlElements[$element->nodeName] : self::$htmlElements['span'];
}
/**
* Test whether given element has given property in context
*
* @param DOMElement $element Context node
* @param string $propName Property name, see self::$htmlElements
* @return bool
*/
protected static function hasProperty(DOMElement $element, $propName)
{
$props = self::getProperties($element);
return !empty($props[$propName]) && (!isset($props[$propName . '?']) || self::evaluate($props[$propName . '?'], $element));
}
/**
* Convert a raw string to a series of 0 and 1 in LSB order
*
* @param string $raw
* @return string
*/
protected static function toBin($raw)
{
$bin = '';
foreach (str_split($raw, 1) as $char)
{
$bin .= strrev(substr('0000000' . decbin(ord($char)), -8));
}
return $bin;
}
/**
* Convert a series of 0 and 1 in LSB order to a raw string
*
* @param string $bin
* @return string
*/
protected static function toRaw($bin)
{
return implode('', array_map('chr', array_map('bindec', array_map('strrev', str_split($bin, 8)))));
}
}

View File

@@ -0,0 +1,70 @@
<?php declare(strict_types=1);
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Items\Filter;
use s9e\TextFormatter\Configurator\RecursiveParser;
abstract class FilterHelper
{
/**
* @var RecursiveParser
*/
protected static $parser;
/**
* Return the cached instance of RecursiveParser
*
* @return RecursiveParser
*/
public static function getParser(): RecursiveParser
{
if (!isset(self::$parser))
{
self::$parser = new RecursiveParser;
self::$parser->setMatchers([new FilterSyntaxMatcher(self::$parser)]);
}
return self::$parser;
}
/**
* Test whether given filter is a default filter or is in the list of allowed filters
*
* @param string $filter
* @param string[] $allowedFilters
* @return bool
*/
public static function isAllowed(string $filter, array $allowedFilters): bool
{
if (substr($filter, 0, 1) === '#')
{
// Default filters are always allowed
return true;
}
$filter = trim(preg_replace('(^\\\\|\\(.*)s', '', $filter));
return in_array($filter, $allowedFilters, true);
}
/**
* Parse a filter definition
*
* @param string $filterString Filter definition such as "#number" or "strtolower($attrValue)"
* @return array Associative array with a "filter" element and optionally a
* "params" array
*/
public static function parse(string $filterString): array
{
$filterConfig = self::getParser()->parse($filterString)['value'];
$filterConfig['filter'] = ltrim($filterConfig['filter'], '\\');
return $filterConfig;
}
}

View File

@@ -0,0 +1,248 @@
<?php declare(strict_types=1);
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use s9e\TextFormatter\Configurator\Items\Regexp;
use s9e\TextFormatter\Configurator\RecursiveParser\AbstractRecursiveMatcher;
class FilterSyntaxMatcher extends AbstractRecursiveMatcher
{
/**
* {@inheritdoc}
*/
public function getMatchers(): array
{
return [
'Array' => [
'groups' => ['FilterArg', 'Literal'],
'regexp' => '\\[ ((?&ArrayElements))? \\]',
],
'ArrayElement' => [
'regexp' => '(?:((?&Scalar)) => )?((?&Literal))',
],
'ArrayElements' => [
'regexp' => '((?&ArrayElement))(?: , ((?&ArrayElements)))?',
],
'DoubleQuotedString' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '"((?:[^\\\\"]|\\\\.)*)"',
],
'False' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '[Ff][Aa][Ll][Ss][Ee]',
],
'FilterArgs' => [
'regexp' => '((?&FilterArg))(?: , ((?&FilterArgs)))?',
],
'FilterCallback' => [
'regexp' => '([#:\\\\\\w]+)(?: \\( ((?&FilterArgs)?) \\))?',
],
'Float' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '([-+]?(?:\\.[0-9]+|[0-9]+\\.[0-9]*|[0-9]+(?=[Ee]))(?:[Ee]-?[0-9]+)?)',
],
'Integer' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '(-?(?:0[Bb][01]+|0[Xx][0-9A-Fa-f]+|[0-9]+))',
],
'Null' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '[Nn][Uu][Ll][Ll]',
],
'Param' => [
'groups' => ['FilterArg'],
'regexp' => '\\$(\\w+(?:\\.\\w+)*)',
],
'Regexp' => [
'groups' => ['FilterArg', 'Literal'],
'regexp' => '(/(?:[^\\\\/]|\\\\.)*/)([Sgimsu]*)',
],
'SingleQuotedString' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => "'((?:[^\\\\']|\\\\.)*)'",
],
'True' => [
'groups' => ['FilterArg', 'Literal', 'Scalar'],
'regexp' => '[Tt][Rr][Uu][Ee]'
]
];
}
/**
* @param string $elements
* @return array
*/
public function parseArray(string $elements = ''): array
{
$array = [];
if ($elements !== '')
{
foreach ($this->recurse($elements, 'ArrayElements') as $element)
{
if (array_key_exists('key', $element))
{
$array[$element['key']] = $element['value'];
}
else
{
$array[] = $element['value'];
}
}
}
return $array;
}
/**
* @param string $key
* @param string $value
* @return array
*/
public function parseArrayElement(string $key, string $value): array
{
$element = ['value' => $this->recurse($value, 'Literal')];
if ($key !== '')
{
$element['key'] = $this->recurse($key, 'Scalar');
}
return $element;
}
/**
* @param string $firstElement
* @param string $otherElements
* @return array
*/
public function parseArrayElements(string $firstElement, string $otherElements = null)
{
$elements = [$this->recurse($firstElement, 'ArrayElement')];
if (isset($otherElements))
{
$elements = array_merge($elements, $this->recurse($otherElements, 'ArrayElements'));
}
return $elements;
}
/**
* @param string $str
* @return string
*/
public function parseDoubleQuotedString(string $str): string
{
return stripcslashes($str);
}
/**
* @return bool
*/
public function parseFalse(): bool
{
return false;
}
/**
* @param string $callback
* @param string $args
* @return array
*/
public function parseFilterCallback(string $callback, string $args = null): array
{
$config = ['filter' => $callback];
if (isset($args))
{
$config['params'] = ($args === '') ? [] : $this->recurse($args, 'FilterArgs');
}
return $config;
}
/**
* @param string $firstArg
* @param string $otherArgs
* @return array
*/
public function parseFilterArgs(string $firstArg, string $otherArgs = null)
{
$parsedArg = $this->parser->parse($firstArg, 'FilterArg');
$type = ($parsedArg['match'] === 'Param') ? 'Name' : 'Value';
$args = [[$type, $parsedArg['value']]];
if (isset($otherArgs))
{
$args = array_merge($args, $this->recurse($otherArgs, 'FilterArgs'));
}
return $args;
}
/**
* @return null
*/
public function parseNull()
{
return null;
}
/**
* @param string $str
* @return float
*/
public function parseFloat(string $str): float
{
return (float) $str;
}
/**
* @param string $str
* @return integer
*/
public function parseInteger(string $str): int
{
return intval($str, 0);
}
/**
* @param string $str
* @return string
*/
public function parseParam(string $str): string
{
return $str;
}
/**
* @param string $regexp
* @param string $flags
* @return Regexp
*/
public function parseRegexp(string $regexp, string $flags): Regexp
{
$regexp .= str_replace('g', '', $flags);
return new Regexp($regexp, true);
}
/**
* @param string $str
* @return string
*/
public function parseSingleQuotedString(string $str): string
{
return preg_replace("(\\\\([\\\\']))", '$1', $str);
}
/**
* @return bool
*/
public function parseTrue(): bool
{
return true;
}
}

View File

@@ -0,0 +1,205 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMDocument;
use DOMXPath;
abstract class NodeLocator
{
/**
* Return all attributes (literal or generated) that match given regexp
*
* @param DOMDocument $dom Document
* @param string $regexp Regexp
* @return DOMNode[] List of DOMNode instances
*/
public static function getAttributesByRegexp(DOMDocument $dom, $regexp)
{
return self::getNodesByRegexp($dom, $regexp, 'attribute');
}
/**
* Return all DOMNodes whose content is CSS
*
* @param DOMDocument $dom Document
* @return DOMNode[] List of DOMNode instances
*/
public static function getCSSNodes(DOMDocument $dom)
{
$regexp = '/^(?:color|style)$/i';
$nodes = array_merge(
self::getAttributesByRegexp($dom, $regexp),
self::getElementsByRegexp($dom, '/^style$/i')
);
return $nodes;
}
/**
* Return all elements (literal or generated) that match given regexp
*
* @param DOMDocument $dom Document
* @param string $regexp Regexp
* @return DOMNode[] List of DOMNode instances
*/
public static function getElementsByRegexp(DOMDocument $dom, $regexp)
{
return self::getNodesByRegexp($dom, $regexp, 'element');
}
/**
* Return all DOMNodes whose content is JavaScript
*
* @param DOMDocument $dom Document
* @return DOMNode[] List of DOMNode instances
*/
public static function getJSNodes(DOMDocument $dom)
{
$regexp = '/^(?:data-s9e-livepreview-)?on/i';
$nodes = array_merge(
self::getAttributesByRegexp($dom, $regexp),
self::getElementsByRegexp($dom, '/^script$/i')
);
return $nodes;
}
/**
* Return all elements (literal or generated) that match given regexp
*
* Will return all <param/> descendants of <object/> and all attributes of <embed/> whose name
* matches given regexp. This method will NOT catch <param/> elements whose 'name' attribute is
* set via an <xsl:attribute/>
*
* @param DOMDocument $dom Document
* @param string $regexp
* @return DOMNode[] List of DOMNode instances
*/
public static function getObjectParamsByRegexp(DOMDocument $dom, $regexp)
{
$xpath = new DOMXPath($dom);
$nodes = [];
// Collect attributes from <embed/> elements
foreach (self::getAttributesByRegexp($dom, $regexp) as $attribute)
{
if ($attribute->nodeType === XML_ATTRIBUTE_NODE)
{
if (strtolower($attribute->parentNode->localName) === 'embed')
{
$nodes[] = $attribute;
}
}
elseif ($xpath->evaluate('count(ancestor::embed)', $attribute))
{
// Assuming <xsl:attribute/> or <xsl:copy-of/>
$nodes[] = $attribute;
}
}
// Collect <param/> descendants of <object/> elements
foreach ($xpath->query('//object//param') as $param)
{
if (preg_match($regexp, $param->getAttribute('name')))
{
$nodes[] = $param;
}
}
return $nodes;
}
/**
* Return all DOMNodes whose content is an URL
*
* NOTE: it will also return HTML4 nodes whose content is an URI
*
* @param DOMDocument $dom Document
* @return DOMNode[] List of DOMNode instances
*/
public static function getURLNodes(DOMDocument $dom)
{
$regexp = '/(?:^(?:action|background|c(?:ite|lassid|odebase)|data|formaction|href|i(?:con|tem(?:id|prop|type))|longdesc|manifest|p(?:ing|luginspage|oster|rofile)|usemap)|src)$/i';
$nodes = self::getAttributesByRegexp($dom, $regexp);
/**
* @link http://helpx.adobe.com/flash/kb/object-tag-syntax-flash-professional.html
* @link http://www.sitepoint.com/control-internet-explorer/
*/
foreach (self::getObjectParamsByRegexp($dom, '/^(?:dataurl|movie)$/i') as $param)
{
$node = $param->getAttributeNode('value');
if ($node)
{
$nodes[] = $node;
}
}
return $nodes;
}
/**
* Return all nodes of given type
*
* @param DOMDocument $dom Owner document
* @param string $type Node type ('element' or 'attribute')
* @return DOMNode[] List of DOMNode instances
*/
protected static function getNodes(DOMDocument $dom, $type)
{
$nodes = [];
$prefix = ($type === 'attribute') ? '@' : '';
$xpath = new DOMXPath($dom);
// Get natural nodes
foreach ($xpath->query('//' . $prefix . '*') as $node)
{
$nodes[] = [$node, $node->nodeName];
}
// Get XSL-generated nodes
foreach ($xpath->query('//xsl:' . $type) as $node)
{
$nodes[] = [$node, $node->getAttribute('name')];
}
// Get xsl:copy-of nodes
foreach ($xpath->query('//xsl:copy-of') as $node)
{
if (preg_match('/^' . $prefix . '(\\w+)$/', $node->getAttribute('select'), $m))
{
$nodes[] = [$node, $m[1]];
}
}
return $nodes;
}
/**
* Return all nodes (literal or generated) that match given regexp
*
* @param DOMDocument $dom Owner document
* @param string $regexp Regexp
* @param string $type Node type ('element' or 'attribute')
* @return DOMNode[] List of DOMNode instances
*/
protected static function getNodesByRegexp(DOMDocument $dom, $regexp, $type)
{
$nodes = [];
foreach (self::getNodes($dom, $type) as list($node, $name))
{
if (preg_match($regexp, $name))
{
$nodes[] = $node;
}
}
return $nodes;
}
}

View File

@@ -0,0 +1,49 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use s9e\RegexpBuilder\Builder;
abstract class RegexpBuilder
{
/**
* Create a regexp pattern that matches a list of words
*
* @param array $words Words to sort (must be UTF-8)
* @param array $options
* @return string
*/
public static function fromList(array $words, array $options = [])
{
$options += [
'delimiter' => '/',
'caseInsensitive' => false,
'specialChars' => [],
'unicode' => true
];
// Normalize ASCII if the regexp is meant to be case-insensitive
if ($options['caseInsensitive'])
{
foreach ($words as &$word)
{
$word = strtr($word, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
}
unset($word);
}
$builder = new Builder([
'delimiter' => $options['delimiter'],
'meta' => $options['specialChars'],
'input' => $options['unicode'] ? 'Utf8' : 'Bytes',
'output' => $options['unicode'] ? 'Utf8' : 'Bytes'
]);
return $builder->build($words);
}
}

View File

@@ -1,232 +1,393 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use RuntimeException;
abstract class RegexpParser
{
/**
* Generate a regexp that matches any single character allowed in a regexp
*
* This method will generate a regexp that can be used to determine whether a given character
* could in theory be allowed in a string that matches the source regexp. For example, the source
* regexp /^a+$/D would generate /a/ while /^foo\d+$/D would generate /[fo\d]/ whereas the regexp
* /foo/ would generate // because it's not anchored so any characters could be found before or
* after the literal "foo".
*
* @param string $regexp Source regexp
* @return string Regexp that matches any single character allowed in the source regexp
*/
public static function getAllowedCharacterRegexp($regexp)
{
$def = self::parse($regexp);
if (\strpos($def['modifiers'], 'm') !== \false)
// If the regexp is uses the multiline modifier, this regexp can't match the whole string if
// it contains newlines, so in effect it could allow any content
if (strpos($def['modifiers'], 'm') !== false)
{
return '//';
if (\substr($def['regexp'], 0, 1) !== '^'
|| \substr($def['regexp'], -1) !== '$')
}
if (substr($def['regexp'], 0, 1) !== '^'
|| substr($def['regexp'], -1) !== '$')
{
return '//';
}
// Append a token to mark the end of the regexp
$def['tokens'][] = [
'pos' => \strlen($def['regexp']),
'pos' => strlen($def['regexp']),
'len' => 0,
'type' => 'end'
];
$patterns = [];
// Collect the literal portions of the source regexp while testing for alternations
$literal = '';
$pos = 0;
$skipPos = 0;
$depth = 0;
foreach ($def['tokens'] as $token)
{
// Skip options
if ($token['type'] === 'option')
$skipPos = \max($skipPos, $token['pos'] + $token['len']);
if (\strpos($token['type'], 'AssertionStart') !== \false)
{
$skipPos = max($skipPos, $token['pos'] + $token['len']);
}
// Skip assertions
if (strpos($token['type'], 'AssertionStart') !== false)
{
$endToken = $def['tokens'][$token['endToken']];
$skipPos = \max($skipPos, $endToken['pos'] + $endToken['len']);
$skipPos = max($skipPos, $endToken['pos'] + $endToken['len']);
}
if ($token['pos'] >= $skipPos)
{
if ($token['type'] === 'characterClass')
{
$patterns[] = '[' . $token['content'] . ']';
}
if ($token['pos'] > $pos)
{
$tmp = \substr($def['regexp'], $pos, $token['pos'] - $pos);
// Capture the content between last position and current position
$tmp = substr($def['regexp'], $pos, $token['pos'] - $pos);
// Append the content to the literal portion
$literal .= $tmp;
// Test for alternations if it's the root of the regexp
if (!$depth)
{
$tmp = \str_replace('\\\\', '', $tmp);
if (\preg_match('/(?<!\\\\)\\|(?!\\^)/', $tmp))
// Remove literal backslashes for convenience
$tmp = str_replace('\\\\', '', $tmp);
// Look for an unescaped | that is not followed by ^
if (preg_match('/(?<!\\\\)\\|(?!\\^)/', $tmp))
{
return '//';
if (\preg_match('/(?<![$\\\\])\\|/', $tmp))
}
// Look for an unescaped | that is not preceded by $
if (preg_match('/(?<![$\\\\])\\|/', $tmp))
{
return '//';
}
}
}
}
if (\substr($token['type'], -5) === 'Start')
if (substr($token['type'], -5) === 'Start')
{
++$depth;
elseif (\substr($token['type'], -3) === 'End')
}
elseif (substr($token['type'], -3) === 'End')
{
--$depth;
$pos = \max($skipPos, $token['pos'] + $token['len']);
}
$pos = max($skipPos, $token['pos'] + $token['len']);
}
if (\preg_match('#(?<!\\\\)(?:\\\\\\\\)*\\.#', $literal))
// Test for the presence of an unescaped dot
if (preg_match('#(?<!\\\\)(?:\\\\\\\\)*\\.#', $literal))
{
if (\strpos($def['modifiers'], 's') !== \false
|| \strpos($literal, "\n") !== \false)
if (strpos($def['modifiers'], 's') !== false
|| strpos($literal, "\n") !== false)
{
return '//';
}
$patterns[] = '.';
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\.#', '$1', $literal);
// Remove unescaped dots
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\.#', '$1', $literal);
}
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[*+?]#', '$1', $literal);
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\{[^}]+\\}#', '$1', $literal);
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\\\[bBAZzG1-9]#', '$1', $literal);
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[$^|]#', '$1', $literal);
$literal = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)([-^\\]])#', '$1\\\\$2', $literal);
if (\strpos($def['modifiers'], 'D') === \false)
// Remove unescaped quantifiers *, + and ?
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[*+?]#', '$1', $literal);
// Remove unescaped quantifiers {}
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\{[^}]+\\}#', '$1', $literal);
// Remove backslash assertions \b, \B, \A, \Z, \z and \G, as well as back references
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)\\\\[bBAZzG1-9]#', '$1', $literal);
// Remove unescaped ^, | and $
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)[$^|]#', '$1', $literal);
// Escape unescaped - and ] so they are safe to use in a character class
$literal = preg_replace('#(?<!\\\\)((?:\\\\\\\\)*)([-^\\]])#', '$1\\\\$2', $literal);
// If the regexp doesn't use PCRE_DOLLAR_ENDONLY, it could end with a \n
if (strpos($def['modifiers'], 'D') === false)
{
$literal .= "\n";
}
// Add the literal portion of the regexp to the patterns, as a character class
if ($literal !== '')
{
$patterns[] = '[' . $literal . ']';
}
// Test whether this regexp actually matches anything
if (empty($patterns))
{
return '/^$/D';
$regexp = $def['delimiter'] . \implode('|', $patterns) . $def['delimiter'];
if (\strpos($def['modifiers'], 'i') !== \false)
}
// Build the allowed characters regexp
$regexp = $def['delimiter'] . implode('|', $patterns) . $def['delimiter'];
// Add the modifiers
if (strpos($def['modifiers'], 'i') !== false)
{
$regexp .= 'i';
if (\strpos($def['modifiers'], 'u') !== \false)
}
if (strpos($def['modifiers'], 'u') !== false)
{
$regexp .= 'u';
}
return $regexp;
}
/**
* Return the name of each capture in given regexp
*
* Will return an empty string for unnamed captures
*
* @param string $regexp
* @return string[]
*/
public static function getCaptureNames($regexp)
{
$map = [''];
$regexpInfo = self::parse($regexp);
foreach ($regexpInfo['tokens'] as $tok)
{
if ($tok['type'] === 'capturingSubpatternStart')
{
$map[] = (isset($tok['name'])) ? $tok['name'] : '';
}
}
return $map;
}
/**
* @param string $regexp
* @return array
*/
public static function parse($regexp)
{
if (!\preg_match('#^(.)(.*?)\\1([a-zA-Z]*)$#Ds', $regexp, $m))
if (!preg_match('#^(.)(.*?)\\1([a-zA-Z]*)$#Ds', $regexp, $m))
{
throw new RuntimeException('Could not parse regexp delimiters');
}
$ret = [
'delimiter' => $m[1],
'modifiers' => $m[3],
'regexp' => $m[2],
'tokens' => []
];
$regexp = $m[2];
$openSubpatterns = [];
$pos = 0;
$regexpLen = \strlen($regexp);
$regexpLen = strlen($regexp);
while ($pos < $regexpLen)
{
switch ($regexp[$pos])
{
case '\\':
// skip next character
$pos += 2;
break;
case '[':
if (!\preg_match('#\\[(.*?(?<!\\\\)(?:\\\\\\\\)*+)\\]((?:[+*][+?]?|\\?)?)#A', $regexp, $m, 0, $pos))
if (!preg_match('#\\[(.*?(?<!\\\\)(?:\\\\\\\\)*+)\\]((?:[+*][+?]?|\\?)?)#A', $regexp, $m, 0, $pos))
{
throw new RuntimeException('Could not find matching bracket from pos ' . $pos);
}
$ret['tokens'][] = [
'pos' => $pos,
'len' => \strlen($m[0]),
'len' => strlen($m[0]),
'type' => 'characterClass',
'content' => $m[1],
'quantifiers' => $m[2]
];
$pos += \strlen($m[0]);
$pos += strlen($m[0]);
break;
case '(':
if (\preg_match('#\\(\\?([a-z]*)\\)#iA', $regexp, $m, 0, $pos))
if (preg_match('#\\(\\?([a-z]*)\\)#iA', $regexp, $m, 0, $pos))
{
// This is an option (?i) so we skip past the right parenthesis
$ret['tokens'][] = [
'pos' => $pos,
'len' => \strlen($m[0]),
'len' => strlen($m[0]),
'type' => 'option',
'options' => $m[1]
];
$pos += \strlen($m[0]);
$pos += strlen($m[0]);
break;
}
if (\preg_match("#(?J)\\(\\?(?:P?<(?<name>[a-z_0-9]+)>|'(?<name>[a-z_0-9]+)')#A", $regexp, $m, \PREG_OFFSET_CAPTURE, $pos))
// This should be a subpattern, we just have to sniff which kind
if (preg_match("#(?J)\\(\\?(?:P?<(?<name>[a-z_0-9]+)>|'(?<name>[a-z_0-9]+)')#A", $regexp, $m, \PREG_OFFSET_CAPTURE, $pos))
{
// This is a named capture
$tok = [
'pos' => $pos,
'len' => \strlen($m[0][0]),
'len' => strlen($m[0][0]),
'type' => 'capturingSubpatternStart',
'name' => $m['name'][0]
];
$pos += \strlen($m[0][0]);
$pos += strlen($m[0][0]);
}
elseif (\preg_match('#\\(\\?([a-z]*):#iA', $regexp, $m, 0, $pos))
elseif (preg_match('#\\(\\?([a-z]*):#iA', $regexp, $m, 0, $pos))
{
// This is a non-capturing subpattern (?:xxx)
$tok = [
'pos' => $pos,
'len' => \strlen($m[0]),
'len' => strlen($m[0]),
'type' => 'nonCapturingSubpatternStart',
'options' => $m[1]
];
$pos += \strlen($m[0]);
$pos += strlen($m[0]);
}
elseif (\preg_match('#\\(\\?>#iA', $regexp, $m, 0, $pos))
elseif (preg_match('#\\(\\?>#iA', $regexp, $m, 0, $pos))
{
/* This is a non-capturing subpattern with atomic grouping "(?>x+)" */
$tok = [
'pos' => $pos,
'len' => \strlen($m[0]),
'len' => strlen($m[0]),
'type' => 'nonCapturingSubpatternStart',
'subtype' => 'atomic'
];
$pos += \strlen($m[0]);
$pos += strlen($m[0]);
}
elseif (\preg_match('#\\(\\?(<?[!=])#A', $regexp, $m, 0, $pos))
elseif (preg_match('#\\(\\?(<?[!=])#A', $regexp, $m, 0, $pos))
{
// This is an assertion
$assertions = [
'=' => 'lookahead',
'<=' => 'lookbehind',
'!' => 'negativeLookahead',
'<!' => 'negativeLookbehind'
];
$tok = [
'pos' => $pos,
'len' => \strlen($m[0]),
'len' => strlen($m[0]),
'type' => $assertions[$m[1]] . 'AssertionStart'
];
$pos += \strlen($m[0]);
$pos += strlen($m[0]);
}
elseif (\preg_match('#\\(\\?#A', $regexp, $m, 0, $pos))
elseif (preg_match('#\\(\\?#A', $regexp, $m, 0, $pos))
{
throw new RuntimeException('Unsupported subpattern type at pos ' . $pos);
}
else
{
// This should be a normal capture
$tok = [
'pos' => $pos,
'len' => 1,
'type' => 'capturingSubpatternStart'
];
++$pos;
}
$openSubpatterns[] = \count($ret['tokens']);
$openSubpatterns[] = count($ret['tokens']);
$ret['tokens'][] = $tok;
break;
case ')':
if (empty($openSubpatterns))
{
throw new RuntimeException('Could not find matching pattern start for right parenthesis at pos ' . $pos);
$k = \array_pop($openSubpatterns);
}
// Add the key to this token to its matching token and capture this subpattern's
// content
$k = array_pop($openSubpatterns);
$startToken =& $ret['tokens'][$k];
$startToken['endToken'] = \count($ret['tokens']);
$startToken['content'] = \substr(
$startToken['endToken'] = count($ret['tokens']);
$startToken['content'] = substr(
$regexp,
$startToken['pos'] + $startToken['len'],
$pos - ($startToken['pos'] + $startToken['len'])
);
$spn = \strspn($regexp, '+*?', 1 + $pos);
$quantifiers = \substr($regexp, 1 + $pos, $spn);
// Look for quantifiers after the subpattern, e.g. (?:ab)++
$spn = strspn($regexp, '+*?', 1 + $pos);
$quantifiers = substr($regexp, 1 + $pos, $spn);
$ret['tokens'][] = [
'pos' => $pos,
'len' => 1 + $spn,
'type' => \substr($startToken['type'], 0, -5) . 'End',
'type' => substr($startToken['type'], 0, -5) . 'End',
'quantifiers' => $quantifiers
];
unset($startToken);
$pos += 1 + $spn;
break;
default:
++$pos;
}
}
if (!empty($openSubpatterns))
{
throw new RuntimeException('Could not find matching pattern end for left parenthesis at pos ' . $ret['tokens'][$openSubpatterns[0]]['pos']);
}
return $ret;
}
}

View File

@@ -0,0 +1,257 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use s9e\TextFormatter\Configurator\Collections\Ruleset;
use s9e\TextFormatter\Configurator\Collections\TagCollection;
abstract class RulesHelper
{
/**
* Generate the allowedChildren and allowedDescendants bitfields for every tag and for the root context
*
* @param TagCollection $tags
* @param Ruleset $rootRules
* @return array
*/
public static function getBitfields(TagCollection $tags, Ruleset $rootRules)
{
$rules = ['*root*' => iterator_to_array($rootRules)];
foreach ($tags as $tagName => $tag)
{
$rules[$tagName] = iterator_to_array($tag->rules);
}
// Create a matrix that contains all of the tags and whether every other tag is allowed as
// a child and as a descendant
$matrix = self::unrollRules($rules);
// Remove unusable tags from the matrix
self::pruneMatrix($matrix);
// Group together tags are allowed in the exact same contexts
$groupedTags = [];
foreach (array_keys($matrix) as $tagName)
{
if ($tagName === '*root*')
{
continue;
}
$k = '';
foreach ($matrix as $tagMatrix)
{
$k .= $tagMatrix['allowedChildren'][$tagName];
$k .= $tagMatrix['allowedDescendants'][$tagName];
}
$groupedTags[$k][] = $tagName;
}
// Record the bit number of each tag, and the name of a tag for each bit
$bitTag = [];
$bitNumber = 0;
$tagsConfig = [];
foreach ($groupedTags as $tagNames)
{
foreach ($tagNames as $tagName)
{
$tagsConfig[$tagName]['bitNumber'] = $bitNumber;
$bitTag[$bitNumber] = $tagName;
}
++$bitNumber;
}
// Build the bitfields of each tag, including the *root* pseudo-tag
foreach ($matrix as $tagName => $tagMatrix)
{
$allowedChildren = '';
$allowedDescendants = '';
foreach ($bitTag as $targetName)
{
$allowedChildren .= $tagMatrix['allowedChildren'][$targetName];
$allowedDescendants .= $tagMatrix['allowedDescendants'][$targetName];
}
$tagsConfig[$tagName]['allowed'] = self::pack($allowedChildren, $allowedDescendants);
}
// Prepare the return value
$return = [
'root' => $tagsConfig['*root*'],
'tags' => $tagsConfig
];
unset($return['tags']['*root*']);
return $return;
}
/**
* Initialize a matrix of settings
*
* @param array $rules Rules for each tag
* @return array Multidimensional array of [tagName => [scope => [targetName => setting]]]
*/
protected static function initMatrix(array $rules)
{
$matrix = [];
$tagNames = array_keys($rules);
foreach ($rules as $tagName => $tagRules)
{
$matrix[$tagName]['allowedChildren'] = array_fill_keys($tagNames, 0);
$matrix[$tagName]['allowedDescendants'] = array_fill_keys($tagNames, 0);
}
return $matrix;
}
/**
* Apply given rule from each applicable tag
*
* For each tag, if the rule has any target we set the corresponding value for each target in the
* matrix
*
* @param array &$matrix Settings matrix
* @param array $rules Rules for each tag
* @param string $ruleName Rule name
* @param string $key Key in the matrix
* @param integer $value Value to be set
* @return void
*/
protected static function applyTargetedRule(array &$matrix, $rules, $ruleName, $key, $value)
{
foreach ($rules as $tagName => $tagRules)
{
if (!isset($tagRules[$ruleName]))
{
continue;
}
foreach ($tagRules[$ruleName] as $targetName)
{
$matrix[$tagName][$key][$targetName] = $value;
}
}
}
/**
* @param array $rules
* @return array
*/
protected static function unrollRules(array $rules)
{
// Initialize the matrix with default values
$matrix = self::initMatrix($rules);
// Convert ignoreTags and requireParent to denyDescendant and denyChild rules
$tagNames = array_keys($rules);
foreach ($rules as $tagName => $tagRules)
{
if (!empty($tagRules['ignoreTags']))
{
$rules[$tagName]['denyChild'] = $tagNames;
$rules[$tagName]['denyDescendant'] = $tagNames;
}
if (!empty($tagRules['requireParent']))
{
$denyParents = array_diff($tagNames, $tagRules['requireParent']);
foreach ($denyParents as $parentName)
{
$rules[$parentName]['denyChild'][] = $tagName;
}
}
}
// Apply "allow" rules to grant usage, overwriting the default settings
self::applyTargetedRule($matrix, $rules, 'allowChild', 'allowedChildren', 1);
self::applyTargetedRule($matrix, $rules, 'allowDescendant', 'allowedDescendants', 1);
// Apply "deny" rules to remove usage
self::applyTargetedRule($matrix, $rules, 'denyChild', 'allowedChildren', 0);
self::applyTargetedRule($matrix, $rules, 'denyDescendant', 'allowedDescendants', 0);
return $matrix;
}
/**
* Remove unusable tags from the matrix
*
* @param array &$matrix
* @return void
*/
protected static function pruneMatrix(array &$matrix)
{
$usableTags = ['*root*' => 1];
// Start from the root and keep digging
$parentTags = $usableTags;
do
{
$nextTags = [];
foreach (array_keys($parentTags) as $tagName)
{
// Accumulate the names of tags that are allowed as children of our parent tags
$nextTags += array_filter($matrix[$tagName]['allowedChildren']);
}
// Keep only the tags that are in the matrix but aren't in the usable array yet, then
// add them to the array
$parentTags = array_diff_key($nextTags, $usableTags);
$parentTags = array_intersect_key($parentTags, $matrix);
$usableTags += $parentTags;
}
while (!empty($parentTags));
// Remove unusable tags from the matrix
$matrix = array_intersect_key($matrix, $usableTags);
unset($usableTags['*root*']);
// Remove unusable tags from the targets
foreach ($matrix as $tagName => &$tagMatrix)
{
$tagMatrix['allowedChildren']
= array_intersect_key($tagMatrix['allowedChildren'], $usableTags);
$tagMatrix['allowedDescendants']
= array_intersect_key($tagMatrix['allowedDescendants'], $usableTags);
}
unset($tagMatrix);
}
/**
* Convert a binary representation such as "101011" to an array of integer
*
* Each bitfield is split in groups of 8 bits, then converted to a 16-bit integer where the
* allowedChildren bitfield occupies the least significant bits and the allowedDescendants
* bitfield occupies the most significant bits
*
* @param string $allowedChildren
* @param string $allowedDescendants
* @return integer[]
*/
protected static function pack($allowedChildren, $allowedDescendants)
{
$allowedChildren = str_split($allowedChildren, 8);
$allowedDescendants = str_split($allowedDescendants, 8);
$allowed = [];
foreach (array_keys($allowedChildren) as $k)
{
$allowed[] = bindec(sprintf(
'%1$08s%2$08s',
strrev($allowedDescendants[$k]),
strrev($allowedChildren[$k])
));
}
return $allowed;
}
}

View File

@@ -0,0 +1,200 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMAttr;
use DOMCharacterData;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMProcessingInstruction;
use DOMText;
use DOMXPath;
abstract class TemplateHelper
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* Return a list of parameters in use in given XSL
*
* @param string $xsl XSL source
* @return array Alphabetically sorted list of unique parameter names
*/
public static function getParametersFromXSL($xsl)
{
$paramNames = [];
$xpath = new DOMXPath(TemplateLoader::load($xsl));
// Start by collecting XPath expressions in XSL elements
$query = '//xsl:*/@match | //xsl:*/@select | //xsl:*/@test';
foreach ($xpath->query($query) as $attribute)
{
$expr = $attribute->value;
$paramNames += array_flip(self::getParametersFromExpression($attribute, $expr));
}
// Collect XPath expressions in attribute value templates
$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]/@*[contains(., "{")]';
foreach ($xpath->query($query) as $attribute)
{
foreach (AVTHelper::parse($attribute->value) as $token)
{
if ($token[0] === 'expression')
{
$expr = $token[1];
$paramNames += array_flip(self::getParametersFromExpression($attribute, $expr));
}
}
}
// Sort the parameter names and return them in a list
ksort($paramNames);
return array_keys($paramNames);
}
/**
* Highlight the source of a node inside of a template
*
* @param DOMNode $node Node to highlight
* @param string $prepend HTML to prepend
* @param string $append HTML to append
* @return string Template's source, as HTML
*/
public static function highlightNode(DOMNode $node, $prepend, $append)
{
// Create a copy of the document that we can modify without side effects
$dom = $node->ownerDocument->cloneNode(true);
$dom->formatOutput = true;
$xpath = new DOMXPath($dom);
$node = $xpath->query($node->getNodePath())->item(0);
// Add a unique token to the node
$uniqid = uniqid('_');
if ($node instanceof DOMAttr)
{
$node->value .= $uniqid;
}
elseif ($node instanceof DOMElement)
{
$node->setAttribute($uniqid, '');
}
elseif ($node instanceof DOMCharacterData || $node instanceof DOMProcessingInstruction)
{
$node->data .= $uniqid;
}
$docXml = TemplateLoader::innerXML($dom->documentElement);
$docXml = trim(str_replace("\n ", "\n", $docXml));
$nodeHtml = htmlspecialchars(trim($dom->saveXML($node)));
$docHtml = htmlspecialchars($docXml);
// Enclose the node's representation in our highlighting HTML
$html = str_replace($nodeHtml, $prepend . $nodeHtml . $append, $docHtml);
// Remove the unique token from HTML
$html = str_replace(' ' . $uniqid . '=&quot;&quot;', '', $html);
$html = str_replace($uniqid, '', $html);
return $html;
}
/**
* Replace simple templates (in an array, in-place) with a common template
*
* In some situations, renderers can take advantage of multiple tags having the same template. In
* any configuration, there's almost always a number of "simple" tags that are rendered as an
* HTML element of the same name with no HTML attributes. For instance, the system tag "p" used
* for paragraphs, "B" tags used for "b" HTML elements, etc... This method replaces those
* templates with a common template that uses a dynamic element name based on the tag's name,
* either its nodeName or localName depending on whether the tag is namespaced, and normalized to
* lowercase using XPath's translate() function
*
* @param array<string> &$templates Associative array of [tagName => template]
* @param integer $minCount
* @return void
*/
public static function replaceHomogeneousTemplates(array &$templates, $minCount = 3)
{
// Prepare the XPath expression used for the element's name
$expr = 'name()';
// Identify "simple" tags, whose template is one element of the same name. Their template
// can be replaced with a dynamic template shared by all the simple tags
$tagNames = [];
foreach ($templates as $tagName => $template)
{
// Generate the element name based on the tag's localName, lowercased
$elName = strtolower(preg_replace('/^[^:]+:/', '', $tagName));
if ($template === '<' . $elName . '><xsl:apply-templates/></' . $elName . '>')
{
$tagNames[] = $tagName;
// Use local-name() if any of the tags are namespaced
if (strpos($tagName, ':') !== false)
{
$expr = 'local-name()';
}
}
}
// We only bother replacing their template if there are at least $minCount simple tags.
// Otherwise it only makes the stylesheet bigger
if (count($tagNames) < $minCount)
{
return;
}
// Generate a list of uppercase characters from the tags' names
$chars = preg_replace('/[^A-Z]+/', '', count_chars(implode('', $tagNames), 3));
if ($chars > '')
{
$expr = 'translate(' . $expr . ",'" . $chars . "','" . strtolower($chars) . "')";
}
// Prepare the common template
$template = '<xsl:element name="{' . $expr . '}"><xsl:apply-templates/></xsl:element>';
// Replace the templates
foreach ($tagNames as $tagName)
{
$templates[$tagName] = $template;
}
}
/**
* Get a list of parameters from given XPath expression
*
* @param DOMNode $node Context node
* @param string $expr XPath expression
* @return string[]
*/
protected static function getParametersFromExpression(DOMNode $node, $expr)
{
$varNames = XPathHelper::getVariables($expr);
$paramNames = [];
$xpath = new DOMXPath($node->ownerDocument);
foreach ($varNames as $name)
{
// Test whether this is the name of a local variable
$query = 'ancestor-or-self::*/preceding-sibling::xsl:variable[@name="' . $name . '"]';
if (!$xpath->query($query, $node)->length)
{
$paramNames[] = $name;
}
}
return $paramNames;
}
}

View File

@@ -0,0 +1,713 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMElement;
use DOMXPath;
/**
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
* content model.
*
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
*
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
*
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
*/
class TemplateInspector
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* @var string[] allowChild bitfield for each branch
*/
protected $allowChildBitfields = [];
/**
* @var bool Whether elements are allowed as children
*/
protected $allowsChildElements;
/**
* @var bool Whether text nodes are allowed as children
*/
protected $allowsText;
/**
* @var array[] Array of array of DOMElement instances
*/
protected $branches;
/**
* @var string OR-ed bitfield representing all of the categories used by this template
*/
protected $contentBitfield = "\0";
/**
* @var string Default bitfield used at the root of a branch
*/
protected $defaultBranchBitfield;
/**
* @var string denyDescendant bitfield
*/
protected $denyDescendantBitfield = "\0";
/**
* @var \DOMDocument Document containing the template
*/
protected $dom;
/**
* @var bool Whether this template contains any HTML elements
*/
protected $hasElements = false;
/**
* @var bool Whether this template renders non-whitespace text nodes at its root
*/
protected $hasRootText;
/**
* @var bool Whether this template should be considered a block-level element
*/
protected $isBlock = false;
/**
* @var bool Whether the template uses the "empty" content model
*/
protected $isEmpty;
/**
* @var bool Whether this template adds to the list of active formatting elements
*/
protected $isFormattingElement;
/**
* @var bool Whether this template lets content through via an xsl:apply-templates element
*/
protected $isPassthrough = false;
/**
* @var bool Whether all branches use the transparent content model
*/
protected $isTransparent = false;
/**
* @var bool Whether all branches have an ancestor that is a void element
*/
protected $isVoid;
/**
* @var array Last HTML element that precedes an <xsl:apply-templates/> node
*/
protected $leafNodes = [];
/**
* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
*/
protected $preservesNewLines = false;
/**
* @var array Bitfield of the first HTML element of every branch
*/
protected $rootBitfields = [];
/**
* @var array Every HTML element that has no HTML parent
*/
protected $rootNodes = [];
/**
* @var DOMXPath XPath engine associated with $this->dom
*/
protected $xpath;
/**
* Constructor
*
* @param string $template Template content
*/
public function __construct($template)
{
$this->dom = TemplateLoader::load($template);
$this->xpath = new DOMXPath($this->dom);
$this->defaultBranchBitfield = ElementInspector::getAllowChildBitfield($this->dom->createElement('div'));
$this->analyseRootNodes();
$this->analyseBranches();
$this->analyseContent();
}
/**
* Return whether this template allows a given child
*
* @param TemplateInspector $child
* @return bool
*/
public function allowsChild(TemplateInspector $child)
{
// Sometimes, a template can technically be allowed as a child but denied as a descendant
if (!$this->allowsDescendant($child))
{
return false;
}
foreach ($child->rootBitfields as $rootBitfield)
{
foreach ($this->allowChildBitfields as $allowChildBitfield)
{
if (!self::match($rootBitfield, $allowChildBitfield))
{
return false;
}
}
}
return ($this->allowsText || !$child->hasRootText);
}
/**
* Return whether this template allows a given descendant
*
* @param TemplateInspector $descendant
* @return bool
*/
public function allowsDescendant(TemplateInspector $descendant)
{
// Test whether the descendant is explicitly disallowed
if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
{
return false;
}
// Test whether the descendant contains any elements and we disallow elements
return ($this->allowsChildElements || !$descendant->hasElements);
}
/**
* Return whether this template allows elements as children
*
* @return bool
*/
public function allowsChildElements()
{
return $this->allowsChildElements;
}
/**
* Return whether this template allows text nodes as children
*
* @return bool
*/
public function allowsText()
{
return $this->allowsText;
}
/**
* Return whether this template automatically closes given parent template
*
* @param TemplateInspector $parent
* @return bool
*/
public function closesParent(TemplateInspector $parent)
{
// Test whether any of this template's root nodes closes any of given template's leaf nodes
foreach ($this->rootNodes as $rootNode)
{
foreach ($parent->leafNodes as $leafNode)
{
if (ElementInspector::closesParent($rootNode, $leafNode))
{
return true;
}
}
}
return false;
}
/**
* Evaluate an XPath expression
*
* @param string $expr XPath expression
* @param DOMElement $node Context node
* @return mixed
*/
public function evaluate($expr, DOMElement $node = null)
{
return $this->xpath->evaluate($expr, $node);
}
/**
* Return whether this template should be considered a block-level element
*
* @return bool
*/
public function isBlock()
{
return $this->isBlock;
}
/**
* Return whether this template adds to the list of active formatting elements
*
* @return bool
*/
public function isFormattingElement()
{
return $this->isFormattingElement;
}
/**
* Return whether this template uses the "empty" content model
*
* @return bool
*/
public function isEmpty()
{
return $this->isEmpty;
}
/**
* Return whether this template lets content through via an xsl:apply-templates element
*
* @return bool
*/
public function isPassthrough()
{
return $this->isPassthrough;
}
/**
* Return whether this template uses the "transparent" content model
*
* @return bool
*/
public function isTransparent()
{
return $this->isTransparent;
}
/**
* Return whether all branches have an ancestor that is a void element
*
* @return bool
*/
public function isVoid()
{
return $this->isVoid;
}
/**
* Return whether this template preserves the whitespace in its descendants
*
* @return bool
*/
public function preservesNewLines()
{
return $this->preservesNewLines;
}
/**
* Analyses the content of the whole template and set $this->contentBitfield accordingly
*/
protected function analyseContent()
{
// Get all non-XSL elements
$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
foreach ($this->xpath->query($query) as $node)
{
$this->contentBitfield |= ElementInspector::getCategoryBitfield($node);
$this->hasElements = true;
}
// Test whether this template is passthrough
$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
}
/**
* Records the HTML elements (and their bitfield) rendered at the root of the template
*/
protected function analyseRootNodes()
{
// Get every non-XSL element with no non-XSL ancestor. This should return us the first
// HTML element of every branch
$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'
. '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
foreach ($this->xpath->query($query) as $node)
{
// Store the root node of this branch
$this->rootNodes[] = $node;
// If any root node is a block-level element, we'll mark the template as such
if ($this->elementIsBlock($node))
{
$this->isBlock = true;
}
$this->rootBitfields[] = ElementInspector::getCategoryBitfield($node);
}
// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
// out: nodes with a non-XSL ancestor,
$predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
$query = '//text()[normalize-space() != ""]' . $predicate
. '|'
. '//xsl:text[normalize-space() != ""]' . $predicate
. '|'
. '//xsl:value-of' . $predicate;
$this->hasRootText = (bool) $this->evaluate('count(' . $query . ')');
}
/**
* Analyses each branch that leads to an <xsl:apply-templates/> tag
*/
protected function analyseBranches()
{
$this->branches = [];
foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates)
{
$query = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
$this->branches[] = iterator_to_array($this->xpath->query($query, $applyTemplates));
}
$this->computeAllowsChildElements();
$this->computeAllowsText();
$this->computeBitfields();
$this->computeFormattingElement();
$this->computeIsEmpty();
$this->computeIsTransparent();
$this->computeIsVoid();
$this->computePreservesNewLines();
$this->storeLeafNodes();
}
/**
* Test whether any branch of this template has an element that has given property
*
* @param string $methodName
* @return bool
*/
protected function anyBranchHasProperty($methodName)
{
foreach ($this->branches as $branch)
{
foreach ($branch as $element)
{
if (ElementInspector::$methodName($element))
{
return true;
}
}
}
return false;
}
/**
* Compute the allowChildBitfields and denyDescendantBitfield properties
*
* @return void
*/
protected function computeBitfields()
{
if (empty($this->branches))
{
$this->allowChildBitfields = ["\0"];
return;
}
foreach ($this->branches as $branch)
{
/**
* @var string allowChild bitfield for current branch. Starts with the value associated
* with <div> in order to approximate a value if the whole branch uses the
* transparent content model
*/
$branchBitfield = $this->defaultBranchBitfield;
foreach ($branch as $element)
{
if (!ElementInspector::isTransparent($element))
{
// If the element isn't transparent, we reset its bitfield
$branchBitfield = "\0";
}
// allowChild rules are cumulative if transparent, and reset above otherwise
$branchBitfield |= ElementInspector::getAllowChildBitfield($element);
// denyDescendant rules are cumulative
$this->denyDescendantBitfield |= ElementInspector::getDenyDescendantBitfield($element);
}
// Add this branch's bitfield to the list
$this->allowChildBitfields[] = $branchBitfield;
}
}
/**
* Compute the allowsChildElements property
*
* A template allows child Elements if it has at least one xsl:apply-templates and none of its
* ancestors have the text-only ("to") property
*
* @return void
*/
protected function computeAllowsChildElements()
{
$this->allowsChildElements = ($this->anyBranchHasProperty('isTextOnly')) ? false : !empty($this->branches);
}
/**
* Compute the allowsText property
*
* A template is said to allow text if none of the leaf elements disallow text
*
* @return void
*/
protected function computeAllowsText()
{
foreach (array_filter($this->branches) as $branch)
{
if (ElementInspector::disallowsText(end($branch)))
{
$this->allowsText = false;
return;
}
}
$this->allowsText = true;
}
/**
* Compute the isFormattingElement property
*
* A template is said to be a formatting element if all (non-zero) of its branches are entirely
* composed of formatting elements
*
* @return void
*/
protected function computeFormattingElement()
{
foreach ($this->branches as $branch)
{
foreach ($branch as $element)
{
if (!ElementInspector::isFormattingElement($element) && !$this->isFormattingSpan($element))
{
$this->isFormattingElement = false;
return;
}
}
}
$this->isFormattingElement = (bool) count(array_filter($this->branches));
}
/**
* Compute the isEmpty property
*
* A template is said to be empty if it has no xsl:apply-templates elements or any there is a empty
* element ancestor to an xsl:apply-templates element
*
* @return void
*/
protected function computeIsEmpty()
{
$this->isEmpty = ($this->anyBranchHasProperty('isEmpty')) || empty($this->branches);
}
/**
* Compute the isTransparent property
*
* A template is said to be transparent if it has at least one branch and no non-transparent
* elements in its path
*
* @return void
*/
protected function computeIsTransparent()
{
foreach ($this->branches as $branch)
{
foreach ($branch as $element)
{
if (!ElementInspector::isTransparent($element))
{
$this->isTransparent = false;
return;
}
}
}
$this->isTransparent = !empty($this->branches);
}
/**
* Compute the isVoid property
*
* A template is said to be void if it has no xsl:apply-templates elements or any there is a void
* element ancestor to an xsl:apply-templates element
*
* @return void
*/
protected function computeIsVoid()
{
$this->isVoid = ($this->anyBranchHasProperty('isVoid')) || empty($this->branches);
}
/**
* Compute the preservesNewLines property
*
* @return void
*/
protected function computePreservesNewLines()
{
foreach ($this->branches as $branch)
{
$style = '';
foreach ($branch as $element)
{
$style .= $this->getStyle($element, true);
}
if (preg_match('(.*white-space\\s*:\\s*(no|pre))is', $style, $m) && strtolower($m[1]) === 'pre')
{
$this->preservesNewLines = true;
return;
}
}
$this->preservesNewLines = false;
}
/**
* Test whether given element is a block-level element
*
* @param DOMElement $element
* @return bool
*/
protected function elementIsBlock(DOMElement $element)
{
$style = $this->getStyle($element);
if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
{
return true;
}
if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
{
return false;
}
return ElementInspector::isBlock($element);
}
/**
* Retrieve and return the inline style assigned to given element
*
* @param DOMElement $node Context node
* @param bool $deep Whether to retrieve the content of all xsl:attribute descendants
* @return string
*/
protected function getStyle(DOMElement $node, $deep = false)
{
$style = '';
if (ElementInspector::preservesWhitespace($node))
{
$style .= 'white-space:pre;';
}
$style .= $node->getAttribute('style');
// Add the content of any descendant/child xsl:attribute named "style"
$query = (($deep) ? './/' : './') . 'xsl:attribute[@name="style"]';
foreach ($this->xpath->query($query, $node) as $attribute)
{
$style .= ';' . $attribute->textContent;
}
return $style;
}
/**
* Test whether given node is a span element used for formatting
*
* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
* and no other attributes
*
* @param DOMElement $node
* @return boolean
*/
protected function isFormattingSpan(DOMElement $node)
{
if ($node->nodeName !== 'span')
{
return false;
}
if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
{
return false;
}
foreach ($node->attributes as $attrName => $attribute)
{
if ($attrName !== 'class' && $attrName !== 'style')
{
return false;
}
}
return true;
}
/**
* Store the names of every leaf node
*
* A leaf node is defined as the closest non-XSL ancestor to an xsl:apply-templates element
*
* @return void
*/
protected function storeLeafNodes()
{
foreach (array_filter($this->branches) as $branch)
{
$this->leafNodes[] = end($branch);
}
}
/**
* Test whether two bitfields have any bits in common
*
* @param string $bitfield1
* @param string $bitfield2
* @return bool
*/
protected static function match($bitfield1, $bitfield2)
{
return (trim($bitfield1 & $bitfield2, "\0") !== '');
}
}

View File

@@ -0,0 +1,188 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMDocument;
use DOMElement;
use DOMXPath;
use RuntimeException;
abstract class TemplateLoader
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* Get the XML content of an element
*
* @private
*
* @param DOMElement $element
* @return string
*/
public static function innerXML(DOMElement $element)
{
// Serialize the XML then remove the outer element
$xml = $element->ownerDocument->saveXML($element);
$pos = 1 + strpos($xml, '>');
$len = strrpos($xml, '<') - $pos;
// If the template is empty, return an empty string
return ($len < 1) ? '' : substr($xml, $pos, $len);
}
/**
* Load a template as an xsl:template node
*
* Will attempt to load it as XML first, then as HTML as a fallback. Either way, an xsl:template
* node is returned
*
* @param string $template
* @return DOMDocument
*/
public static function load($template)
{
$dom = self::loadAsXML($template) ?: self::loadAsXML(self::fixEntities($template));
if ($dom)
{
return $dom;
}
// If the template contains an XSL element, abort now. Otherwise, try reparsing it as HTML
if (strpos($template, '<xsl:') !== false)
{
$error = libxml_get_last_error();
throw new RuntimeException('Invalid XSL: ' . $error->message);
}
return self::loadAsHTML($template);
}
/**
* Serialize a loaded template back into a string
*
* NOTE: removes the root node created by load()
*
* @param DOMDocument $dom
* @return string
*/
public static function save(DOMDocument $dom)
{
$xml = self::innerXML($dom->documentElement);
if (strpos($xml, 'xmlns:xsl') !== false)
{
$xml = preg_replace('((<[^>]+?) xmlns:xsl="' . self::XMLNS_XSL . '")', '$1', $xml);
}
return $xml;
}
/**
* Replace HTML entities and unescaped ampersands in given template
*
* @param string $template
* @return string
*/
protected static function fixEntities($template)
{
return preg_replace_callback(
'(&(?!quot;|amp;|apos;|lt;|gt;)\\w+;)',
function ($m)
{
return html_entity_decode($m[0], ENT_NOQUOTES, 'UTF-8');
},
preg_replace('(&(?![A-Za-z0-9]+;|#\\d+;|#x[A-Fa-f0-9]+;))', '&amp;', $template)
);
}
/**
* Load given HTML template in a DOM document
*
* @param string $template Original template
* @return DOMDocument
*/
protected static function loadAsHTML($template)
{
$template = self::replaceCDATA($template);
$dom = new DOMDocument;
$html = '<?xml version="1.0" encoding="utf-8" ?><html><body><div>' . $template . '</div></body></html>';
$useErrors = libxml_use_internal_errors(true);
$dom->loadHTML($html, LIBXML_NSCLEAN);
self::removeInvalidAttributes($dom);
libxml_use_internal_errors($useErrors);
// Now dump the thing as XML then reload it with the proper root element
$xml = '<?xml version="1.0" encoding="utf-8" ?><xsl:template xmlns:xsl="' . self::XMLNS_XSL . '">' . self::innerXML($dom->documentElement->firstChild->firstChild) . '</xsl:template>';
$useErrors = libxml_use_internal_errors(true);
$dom->loadXML($xml, LIBXML_NSCLEAN);
libxml_use_internal_errors($useErrors);
return $dom;
}
/**
* Load given XSL template in a DOM document
*
* @param string $template Original template
* @return bool|DOMDocument DOMDocument on success, FALSE otherwise
*/
protected static function loadAsXML($template)
{
$xml = '<?xml version="1.0" encoding="utf-8" ?><xsl:template xmlns:xsl="' . self::XMLNS_XSL . '">' . $template . '</xsl:template>';
$useErrors = libxml_use_internal_errors(true);
$dom = new DOMDocument;
$success = $dom->loadXML($xml, LIBXML_NOCDATA | LIBXML_NSCLEAN);
self::removeInvalidAttributes($dom);
libxml_use_internal_errors($useErrors);
return ($success) ? $dom : false;
}
/**
* Remove attributes with an invalid name from given DOM document
*
* @param DOMDocument $dom
* @return void
*/
protected static function removeInvalidAttributes(DOMDocument $dom)
{
$xpath = new DOMXPath($dom);
foreach ($xpath->query('//@*') as $attribute)
{
if (!preg_match('(^(?:[-\\w]+:)?(?!\\d)[-\\w]+$)D', $attribute->nodeName))
{
$attribute->parentNode->removeAttributeNode($attribute);
}
}
}
/**
* Replace CDATA sections in given template
*
* @param string $template Original template
* @return string Modified template
*/
protected static function replaceCDATA($template)
{
return preg_replace_callback(
'(<!\\[CDATA\\[(.*?)\\]\\]>)',
function ($m)
{
return htmlspecialchars($m[1]);
},
$template
);
}
}

View File

@@ -1,28 +1,70 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use DOMAttr;
use DOMDocument;
use DOMText;
use DOMXPath;
abstract class TemplateModifier
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* Replace parts of a template that match given regexp
*
* Treats attribute values as plain text. Replacements within XPath expression is unsupported.
* The callback must return an array with two elements. The first must be either of 'expression',
* 'literal' or 'passthrough', and the second element depends on the first.
*
* - 'expression' indicates that the replacement must be treated as an XPath expression such as
* '@foo', which must be passed as the second element.
*
* - 'literal' indicates a literal (plain text) replacement, passed as its second element.
*
* - 'passthrough' indicates that the replacement should the tag's content. It works differently
* whether it is inside an attribute's value or a text node. Within an attribute's value, the
* replacement will be the text content of the tag. Within a text node, the replacement
* becomes an <xsl:apply-templates/> node. A second optional argument can be passed to be used
* as its @select node-set.
*
* @param string $template Original template
* @param string $regexp Regexp for matching parts that need replacement
* @param callback $fn Callback used to get the replacement
* @return string Processed template
*/
public static function replaceTokens($template, $regexp, $fn)
{
$dom = TemplateLoader::load($template);
$xpath = new DOMXPath($dom);
foreach ($xpath->query('//@*') as $attribute)
{
self::replaceTokensInAttribute($attribute, $regexp, $fn);
}
foreach ($xpath->query('//text()') as $node)
{
self::replaceTokensInText($node, $regexp, $fn);
}
return TemplateLoader::save($dom);
}
/**
* Create a node that implements given replacement strategy
*
* @param DOMDocument $dom
* @param array $replacement
* @return DOMNode
*/
protected static function createReplacementNode(DOMDocument $dom, array $replacement)
{
if ($replacement[0] === 'expression')
@@ -34,49 +76,89 @@ abstract class TemplateModifier
{
$newNode = $dom->createElementNS(self::XMLNS_XSL, 'xsl:apply-templates');
if (isset($replacement[1]))
{
$newNode->setAttribute('select', $replacement[1]);
}
}
else
{
$newNode = $dom->createTextNode($replacement[1]);
}
return $newNode;
}
/**
* Replace parts of an attribute that match given regexp
*
* @param DOMAttr $attribute Attribute
* @param string $regexp Regexp for matching parts that need replacement
* @param callback $fn Callback used to get the replacement
* @return void
*/
protected static function replaceTokensInAttribute(DOMAttr $attribute, $regexp, $fn)
{
$attrValue = \preg_replace_callback(
$attrValue = preg_replace_callback(
$regexp,
function ($m) use ($fn, $attribute)
{
$replacement = $fn($m, $attribute);
if ($replacement[0] === 'expression' || $replacement[0] === 'passthrough')
{
// Use the node's text content as the default expression
$replacement[] = '.';
return '{' . $replacement[1] . '}';
}
else
{
return $replacement[1];
}
},
$attribute->value
);
$attribute->value = \htmlspecialchars($attrValue, \ENT_COMPAT, 'UTF-8');
$attribute->value = htmlspecialchars($attrValue, ENT_COMPAT, 'UTF-8');
}
/**
* Replace parts of a text node that match given regexp
*
* @param DOMText $node Text node
* @param string $regexp Regexp for matching parts that need replacement
* @param callback $fn Callback used to get the replacement
* @return void
*/
protected static function replaceTokensInText(DOMText $node, $regexp, $fn)
{
// Grab the node's parent so that we can rebuild the text with added variables right
// before the node, using DOM's insertBefore(). Technically, it would make more sense
// to create a document fragment, append nodes then replace the node with the fragment
// but it leads to namespace redeclarations, which looks ugly
$parentNode = $node->parentNode;
$dom = $node->ownerDocument;
\preg_match_all($regexp, $node->textContent, $matches, \PREG_SET_ORDER | \PREG_OFFSET_CAPTURE);
preg_match_all($regexp, $node->textContent, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
$lastPos = 0;
foreach ($matches as $m)
{
$pos = $m[0][1];
$text = \substr($node->textContent, $lastPos, $pos - $lastPos);
// Catch-up to current position
$text = substr($node->textContent, $lastPos, $pos - $lastPos);
$parentNode->insertBefore($dom->createTextNode($text), $node);
$lastPos = $pos + \strlen($m[0][0]);
$_m=[];foreach($m as $v)$_m[]=$v[0];$replacement = $fn($_m, $node);
$lastPos = $pos + strlen($m[0][0]);
// Get the replacement for this token
$replacement = $fn(array_column($m, 0), $node);
$newNode = self::createReplacementNode($dom, $replacement);
$parentNode->insertBefore($newNode, $node);
}
$text = \substr($node->textContent, $lastPos);
// Append the rest of the text
$text = substr($node->textContent, $lastPos);
$parentNode->insertBefore($dom->createTextNode($text), $node);
// Now remove the old text node
$parentNode->removeChild($node);
}
}

View File

@@ -0,0 +1,39 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use s9e\TextFormatter\Configurator\Helpers\TemplateParser\Normalizer;
use s9e\TextFormatter\Configurator\Helpers\TemplateParser\Optimizer;
use s9e\TextFormatter\Configurator\Helpers\TemplateParser\Parser;
class TemplateParser
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* @var string Regexp that matches the names of all void elements
* @link http://www.w3.org/TR/html-markup/syntax.html#void-elements
*/
public static $voidRegexp = '/^(?:area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/Di';
/**
* Parse a template into an internal representation
*
* @param string $template Source template
* @return DOMDocument Internal representation
*/
public static function parse($template)
{
$parser = new Parser(new Normalizer(new Optimizer));
return $parser->parse($template);
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers\TemplateParser;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMXPath;
abstract class IRProcessor
{
/**
* XSL namespace
*/
const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
/**
* @var DOMXPath
*/
protected $xpath;
/**
* Create and append an element to given node in the IR
*
* @param DOMElement $parentNode Parent node of the element
* @param string $name Tag name of the element
* @param string $value Value of the element
* @return DOMElement The created element
*/
protected function appendElement(DOMElement $parentNode, $name, $value = '')
{
return $parentNode->appendChild($parentNode->ownerDocument->createElement($name, $value));
}
/**
* Create and store an instance of DOMXPath for given document
*
* @param DOMDocument $dom
* @return void
*/
protected function createXPath(DOMDocument $dom)
{
$this->xpath = new DOMXPath($dom);
}
/**
* Evaluate an XPath expression and return its result
*
* @param string $expr XPath expression
* @param DOMNode $node Context node
* @return mixed
*/
protected function evaluate($expr, DOMNode $node = null)
{
return (isset($node)) ? $this->xpath->evaluate($expr, $node) : $this->xpath->evaluate($expr);
}
/**
* Run an XPath query and return its result
*
* @param string $query XPath query
* @param DOMNode $node Context node
* @return \DOMNodeList
*/
protected function query($query, DOMNode $node = null)
{
return (isset($node)) ? $this->xpath->query($query, $node) : $this->xpath->query($query);
}
}

View File

@@ -0,0 +1,282 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers\TemplateParser;
use DOMDocument;
use DOMElement;
use DOMNode;
use s9e\TextFormatter\Configurator\Helpers\XPathHelper;
class Normalizer extends IRProcessor
{
/**
* @var Optimizer
*/
protected $optimizer;
/**
* @var string Regexp that matches the names of all void elements
* @link http://www.w3.org/TR/html-markup/syntax.html#void-elements
*/
public $voidRegexp = '/^(?:area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/Di';
/**
* @param Optimizer $optimizer
* @return void
*/
public function __construct(Optimizer $optimizer)
{
$this->optimizer = $optimizer;
}
/**
* Normalize an IR
*
* @param DOMDocument $ir
* @return void
*/
public function normalize(DOMDocument $ir)
{
$this->createXPath($ir);
$this->addDefaultCase($ir);
$this->addElementIds($ir);
$this->addCloseTagElements($ir);
$this->markVoidElements($ir);
$this->optimizer->optimize($ir);
$this->markConditionalCloseTagElements($ir);
$this->setOutputContext($ir);
$this->markBranchTables($ir);
}
/**
* Add <closeTag/> elements everywhere an open start tag should be closed
*
* @param DOMDocument $ir
* @return void
*/
protected function addCloseTagElements(DOMDocument $ir)
{
$exprs = [
'//applyTemplates[not(ancestor::attribute)]',
'//comment',
'//element',
'//output[not(ancestor::attribute)]'
];
foreach ($this->query(implode('|', $exprs)) as $node)
{
$parentElementId = $this->getParentElementId($node);
if (isset($parentElementId))
{
$node->parentNode
->insertBefore($ir->createElement('closeTag'), $node)
->setAttribute('id', $parentElementId);
}
// Append a <closeTag/> to <element/> nodes to ensure that empty elements get closed
if ($node->nodeName === 'element')
{
$id = $node->getAttribute('id');
$this->appendElement($node, 'closeTag')->setAttribute('id', $id);
}
}
}
/**
* Add an empty default <case/> to <switch/> nodes that don't have one
*
* @param DOMDocument $ir
* @return void
*/
protected function addDefaultCase(DOMDocument $ir)
{
foreach ($this->query('//switch[not(case[not(@test)])]') as $switch)
{
$this->appendElement($switch, 'case');
}
}
/**
* Add an id attribute to <element/> nodes
*
* @param DOMDocument $ir
* @return void
*/
protected function addElementIds(DOMDocument $ir)
{
$id = 0;
foreach ($this->query('//element') as $element)
{
$element->setAttribute('id', ++$id);
}
}
/**
* Get the context type for given output element
*
* @param DOMNode $output
* @return string
*/
protected function getOutputContext(DOMNode $output)
{
$contexts = [
'boolean(ancestor::attribute)' => 'attribute',
'@disable-output-escaping="yes"' => 'raw',
'count(ancestor::element[@name="script"])' => 'raw'
];
foreach ($contexts as $expr => $context)
{
if ($this->evaluate($expr, $output))
{
return $context;
}
}
return 'text';
}
/**
* Get the ID of the closest "element" ancestor
*
* @param DOMNode $node Context node
* @return string|null
*/
protected function getParentElementId(DOMNode $node)
{
$parentNode = $node->parentNode;
while (isset($parentNode))
{
if ($parentNode->nodeName === 'element')
{
return $parentNode->getAttribute('id');
}
$parentNode = $parentNode->parentNode;
}
}
/**
* Mark switch elements that are used as branch tables
*
* If a switch is used for a series of equality tests against the same attribute or variable, the
* attribute/variable is stored within the switch as "branch-key" and the values it is compared
* against are stored JSON-encoded in the case as "branch-values". It can be used to create
* optimized branch tables
*
* @param DOMDocument $ir
* @return void
*/
protected function markBranchTables(DOMDocument $ir)
{
// Iterate over switch elements that have at least two case children with a test attribute
foreach ($this->query('//switch[case[2][@test]]') as $switch)
{
$this->markSwitchTable($switch);
}
}
/**
* Mark given switch element if it's used as a branch table
*
* @param DOMElement $switch
* @return void
*/
protected function markSwitchTable(DOMElement $switch)
{
$cases = [];
$maps = [];
foreach ($this->query('./case[@test]', $switch) as $i => $case)
{
$map = XPathHelper::parseEqualityExpr($case->getAttribute('test'));
if ($map === false)
{
return;
}
$maps += $map;
$cases[$i] = [$case, end($map)];
}
if (count($maps) !== 1)
{
return;
}
$switch->setAttribute('branch-key', key($maps));
foreach ($cases as list($case, $values))
{
sort($values);
$case->setAttribute('branch-values', serialize($values));
}
}
/**
* Mark conditional <closeTag/> nodes
*
* @param DOMDocument $ir
* @return void
*/
protected function markConditionalCloseTagElements(DOMDocument $ir)
{
foreach ($this->query('//closeTag') as $closeTag)
{
$id = $closeTag->getAttribute('id');
// For each <switch/> ancestor, look for a <closeTag/> and that is either a sibling or
// the descendant of a sibling, and that matches the id
$query = 'ancestor::switch/'
. 'following-sibling::*/'
. 'descendant-or-self::closeTag[@id = "' . $id . '"]';
foreach ($this->query($query, $closeTag) as $following)
{
// Mark following <closeTag/> nodes to indicate that the status of this tag must
// be checked before it is closed
$following->setAttribute('check', '');
// Mark the current <closeTag/> to indicate that it must set a flag to indicate
// that its tag has been closed
$closeTag->setAttribute('set', '');
}
}
}
/**
* Mark void elements
*
* @param DOMDocument $ir
* @return void
*/
protected function markVoidElements(DOMDocument $ir)
{
foreach ($this->query('//element') as $element)
{
// Test whether this element is (maybe) void
$elName = $element->getAttribute('name');
if (strpos($elName, '{') !== false)
{
// Dynamic element names must be checked at runtime
$element->setAttribute('void', 'maybe');
}
elseif (preg_match($this->voidRegexp, $elName))
{
// Static element names can be checked right now
$element->setAttribute('void', 'yes');
}
}
}
/**
* Fill in output context
*
* @param DOMDocument $ir
* @return void
*/
protected function setOutputContext(DOMDocument $ir)
{
foreach ($this->query('//output') as $output)
{
$output->setAttribute('escape', $this->getOutputContext($output));
}
}
}

View File

@@ -0,0 +1,244 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers\TemplateParser;
use DOMDocument;
use DOMElement;
use DOMNode;
class Optimizer extends IRProcessor
{
/**
* Optimize an IR
*
* @param DOMDocument $ir
* @return void
*/
public function optimize(DOMDocument $ir)
{
$this->createXPath($ir);
// Get a snapshot of current internal representation
$xml = $ir->saveXML();
// Set a maximum number of loops to ward against infinite loops
$remainingLoops = 10;
// From now on, keep looping until no further modifications are applied
do
{
$old = $xml;
$this->optimizeCloseTagElements($ir);
$xml = $ir->saveXML();
}
while (--$remainingLoops > 0 && $xml !== $old);
$this->removeCloseTagSiblings($ir);
$this->removeContentFromVoidElements($ir);
$this->mergeConsecutiveLiteralOutputElements($ir);
$this->removeEmptyDefaultCases($ir);
}
/**
* Clone closeTag elements that follow a switch into said switch
*
* If there's a <closeTag/> right after a <switch/>, clone the <closeTag/> at the end of
* the every <case/> that does not end with a <closeTag/>
*
* @param DOMDocument $ir
* @return void
*/
protected function cloneCloseTagElementsIntoSwitch(DOMDocument $ir)
{
$query = '//switch[name(following-sibling::*[1]) = "closeTag"]';
foreach ($this->query($query) as $switch)
{
$closeTag = $switch->nextSibling;
foreach ($this->query('case', $switch) as $case)
{
if (!$case->lastChild || $case->lastChild->nodeName !== 'closeTag')
{
$case->appendChild($closeTag->cloneNode());
}
}
}
}
/**
* Clone closeTag elements from the head of a switch's cases before said switch
*
* If there's a <closeTag/> at the beginning of every <case/>, clone it and insert it
* right before the <switch/> unless there's already one
*
* @param DOMDocument $ir
* @return void
*/
protected function cloneCloseTagElementsOutOfSwitch(DOMDocument $ir)
{
$query = '//switch[case/closeTag][not(case[name(*[1]) != "closeTag"])]';
foreach ($this->query($query) as $switch)
{
$case = $this->query('case/closeTag', $switch)->item(0);
$switch->parentNode->insertBefore($case->cloneNode(), $switch);
}
}
/**
* Merge consecutive literal outputs
*
* @param DOMDocument $ir
* @return void
*/
protected function mergeConsecutiveLiteralOutputElements(DOMDocument $ir)
{
foreach ($this->query('//output[@type="literal"]') as $output)
{
$disableOutputEscaping = $output->getAttribute('disable-output-escaping');
while ($this->nextSiblingIsLiteralOutput($output, $disableOutputEscaping))
{
$output->nodeValue = htmlspecialchars($output->nodeValue . $output->nextSibling->nodeValue);
$output->parentNode->removeChild($output->nextSibling);
}
}
}
/**
* Test whether the next sibling of an element is a literal output element with matching escaping
*
* @param DOMElement $node
* @param string $disableOutputEscaping
* @return bool
*/
protected function nextSiblingIsLiteralOutput(DOMElement $node, $disableOutputEscaping)
{
return isset($node->nextSibling) && $node->nextSibling->nodeName === 'output' && $node->nextSibling->getAttribute('type') === 'literal' && $node->nextSibling->getAttribute('disable-output-escaping') === $disableOutputEscaping;
}
/**
* Optimize closeTags elements
*
* @param DOMDocument $ir
* @return void
*/
protected function optimizeCloseTagElements(DOMDocument $ir)
{
$this->cloneCloseTagElementsIntoSwitch($ir);
$this->cloneCloseTagElementsOutOfSwitch($ir);
$this->removeRedundantCloseTagElementsInSwitch($ir);
$this->removeRedundantCloseTagElements($ir);
}
/**
* Remove redundant closeTag siblings after a switch
*
* If all branches of a switch have a closeTag we can remove any closeTag siblings of the switch
*
* @param DOMDocument $ir
* @return void
*/
protected function removeCloseTagSiblings(DOMDocument $ir)
{
$query = '//switch[not(case[not(closeTag)])]/following-sibling::closeTag';
$this->removeNodes($ir, $query);
}
/**
* Remove content from void elements
*
* For each void element, we find whichever <closeTag/> elements close it and remove everything
* after
*
* @param DOMDocument $ir
* @return void
*/
protected function removeContentFromVoidElements(DOMDocument $ir)
{
foreach ($this->query('//element[@void="yes"]') as $element)
{
$id = $element->getAttribute('id');
$query = './/closeTag[@id="' . $id . '"]/following-sibling::*';
$this->removeNodes($ir, $query, $element);
}
}
/**
* Remove empty default cases (no test and no descendants)
*
* @param DOMDocument $ir
* @return void
*/
protected function removeEmptyDefaultCases(DOMDocument $ir)
{
$query = '//case[not(@test)][not(*)][. = ""]';
$this->removeNodes($ir, $query);
}
/**
* Remove all nodes that match given XPath query
*
* @param DOMDocument $ir
* @param string $query
* @param DOMNode $contextNode
* @return void
*/
protected function removeNodes(DOMDocument $ir, $query, DOMNode $contextNode = null)
{
foreach ($this->query($query, $contextNode) as $node)
{
if ($node->parentNode instanceof DOMElement)
{
$node->parentNode->removeChild($node);
}
}
}
/**
* Remove redundant closeTag elements from the tail of a switch's cases
*
* For each <closeTag/> remove duplicate <closeTag/> nodes that are either siblings or
* descendants of a sibling
*
* @param DOMDocument $ir
* @return void
*/
protected function removeRedundantCloseTagElements(DOMDocument $ir)
{
foreach ($this->query('//closeTag') as $closeTag)
{
$id = $closeTag->getAttribute('id');
$query = 'following-sibling::*/descendant-or-self::closeTag[@id="' . $id . '"]';
$this->removeNodes($ir, $query, $closeTag);
}
}
/**
* Remove redundant closeTag elements from the tail of a switch's cases
*
* If there's a <closeTag/> right after a <switch/>, remove all <closeTag/> nodes at the
* end of every <case/>
*
* @param DOMDocument $ir
* @return void
*/
protected function removeRedundantCloseTagElementsInSwitch(DOMDocument $ir)
{
$query = '//switch[name(following-sibling::*[1]) = "closeTag"]';
foreach ($this->query($query) as $switch)
{
foreach ($this->query('case', $switch) as $case)
{
while ($case->lastChild && $case->lastChild->nodeName === 'closeTag')
{
$case->removeChild($case->lastChild);
}
}
}
}
}

View File

@@ -0,0 +1,381 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers\TemplateParser;
use DOMDocument;
use DOMElement;
use DOMXPath;
use RuntimeException;
use s9e\TextFormatter\Configurator\Helpers\AVTHelper;
use s9e\TextFormatter\Configurator\Helpers\TemplateLoader;
class Parser extends IRProcessor
{
/**
* @var Normalizer
*/
protected $normalizer;
/**
* @param Normalizer $normalizer
* @return void
*/
public function __construct(Normalizer $normalizer)
{
$this->normalizer = $normalizer;
}
/**
* Parse a template into an internal representation
*
* @param string $template Source template
* @return DOMDocument Internal representation
*/
public function parse($template)
{
$dom = TemplateLoader::load($template);
$ir = new DOMDocument;
$ir->loadXML('<template/>');
$this->createXPath($dom);
$this->parseChildren($ir->documentElement, $dom->documentElement);
$this->normalizer->normalize($ir);
return $ir;
}
/**
* Append <output/> elements corresponding to given AVT
*
* @param DOMElement $parentNode Parent node
* @param string $avt Attribute value template
* @return void
*/
protected function appendAVT(DOMElement $parentNode, $avt)
{
foreach (AVTHelper::parse($avt) as $token)
{
if ($token[0] === 'expression')
{
$this->appendXPathOutput($parentNode, $token[1]);
}
else
{
$this->appendLiteralOutput($parentNode, $token[1]);
}
}
}
/**
* Append an <output/> element with literal content to given node
*
* @param DOMElement $parentNode Parent node
* @param string $content Content to output
* @return void
*/
protected function appendLiteralOutput(DOMElement $parentNode, $content)
{
if ($content === '')
{
return;
}
$this->appendElement($parentNode, 'output', htmlspecialchars($content))
->setAttribute('type', 'literal');
}
/**
* Append the structure for a <xsl:copy-of/> element to given node
*
* @param DOMElement $parentNode Parent node
* @param string $expr Select expression, which is should only contain attributes
* @return void
*/
protected function appendConditionalAttributes(DOMElement $parentNode, $expr)
{
preg_match_all('(@([-\\w]+))', $expr, $matches);
foreach ($matches[1] as $attrName)
{
// Create a switch element in the IR
$switch = $this->appendElement($parentNode, 'switch');
$case = $this->appendElement($switch, 'case');
$case->setAttribute('test', '@' . $attrName);
// Append an attribute element
$attribute = $this->appendElement($case, 'attribute');
$attribute->setAttribute('name', $attrName);
// Set the attribute's content, which is simply the copied attribute's value
$this->appendXPathOutput($attribute, '@' . $attrName);
}
}
/**
* Append an <output/> element for given XPath expression to given node
*
* @param DOMElement $parentNode Parent node
* @param string $expr XPath expression
* @return void
*/
protected function appendXPathOutput(DOMElement $parentNode, $expr)
{
$this->appendElement($parentNode, 'output', htmlspecialchars(trim($expr)))
->setAttribute('type', 'xpath');
}
/**
* Parse all the children of a given element
*
* @param DOMElement $ir Node in the internal representation that represents the parent node
* @param DOMElement $parent Parent node
* @return void
*/
protected function parseChildren(DOMElement $ir, DOMElement $parent)
{
foreach ($parent->childNodes as $child)
{
switch ($child->nodeType)
{
case XML_COMMENT_NODE:
// Do nothing
break;
case XML_TEXT_NODE:
if (trim($child->textContent) !== '')
{
$this->appendLiteralOutput($ir, $child->textContent);
}
break;
case XML_ELEMENT_NODE:
$this->parseNode($ir, $child);
break;
default:
throw new RuntimeException("Cannot parse node '" . $child->nodeName . "''");
}
}
}
/**
* Parse a given node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node Node to parse
* @return void
*/
protected function parseNode(DOMElement $ir, DOMElement $node)
{
// XSL elements are parsed by the corresponding parseXsl* method
if ($node->namespaceURI === self::XMLNS_XSL)
{
$methodName = 'parseXsl' . str_replace(' ', '', ucwords(str_replace('-', ' ', $node->localName)));
if (!method_exists($this, $methodName))
{
throw new RuntimeException("Element '" . $node->nodeName . "' is not supported");
}
return $this->$methodName($ir, $node);
}
// Create an <element/> with a name attribute equal to given node's name
$element = $this->appendElement($ir, 'element');
$element->setAttribute('name', $node->nodeName);
// Append an <attribute/> element for each namespace declaration
$xpath = new DOMXPath($node->ownerDocument);
foreach ($xpath->query('namespace::*', $node) as $ns)
{
if ($node->hasAttribute($ns->nodeName))
{
$irAttribute = $this->appendElement($element, 'attribute');
$irAttribute->setAttribute('name', $ns->nodeName);
$this->appendLiteralOutput($irAttribute, $ns->nodeValue);
}
}
// Append an <attribute/> element for each of this node's attribute
foreach ($node->attributes as $attribute)
{
$irAttribute = $this->appendElement($element, 'attribute');
$irAttribute->setAttribute('name', $attribute->nodeName);
// Append an <output/> element to represent the attribute's value
$this->appendAVT($irAttribute, $attribute->value);
}
// Parse the content of this node
$this->parseChildren($element, $node);
}
/**
* Parse an <xsl:apply-templates/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:apply-templates/> node
* @return void
*/
protected function parseXslApplyTemplates(DOMElement $ir, DOMElement $node)
{
$applyTemplates = $this->appendElement($ir, 'applyTemplates');
if ($node->hasAttribute('select'))
{
$applyTemplates->setAttribute('select', $node->getAttribute('select'));
}
}
/**
* Parse an <xsl:attribute/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:attribute/> node
* @return void
*/
protected function parseXslAttribute(DOMElement $ir, DOMElement $node)
{
$attribute = $this->appendElement($ir, 'attribute');
$attribute->setAttribute('name', $node->getAttribute('name'));
$this->parseChildren($attribute, $node);
}
/**
* Parse an <xsl:choose/> node and its <xsl:when/> and <xsl:otherwise/> children into the
* internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:choose/> node
* @return void
*/
protected function parseXslChoose(DOMElement $ir, DOMElement $node)
{
$switch = $this->appendElement($ir, 'switch');
foreach ($this->query('./xsl:when', $node) as $when)
{
// Create a <case/> element with the original test condition in @test
$case = $this->appendElement($switch, 'case');
$case->setAttribute('test', $when->getAttribute('test'));
$this->parseChildren($case, $when);
}
// Add the default branch, which is presumed to be last
foreach ($this->query('./xsl:otherwise', $node) as $otherwise)
{
$case = $this->appendElement($switch, 'case');
$this->parseChildren($case, $otherwise);
// There should be only one <xsl:otherwise/> but we'll break anyway
break;
}
}
/**
* Parse an <xsl:comment/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:comment/> node
* @return void
*/
protected function parseXslComment(DOMElement $ir, DOMElement $node)
{
$comment = $this->appendElement($ir, 'comment');
$this->parseChildren($comment, $node);
}
/**
* Parse an <xsl:copy-of/> node into the internal representation
*
* NOTE: only attributes are supported
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:copy-of/> node
* @return void
*/
protected function parseXslCopyOf(DOMElement $ir, DOMElement $node)
{
$expr = $node->getAttribute('select');
if (preg_match('#^@[-\\w]+(?:\\s*\\|\\s*@[-\\w]+)*$#', $expr, $m))
{
// <xsl:copy-of select="@foo"/>
$this->appendConditionalAttributes($ir, $expr);
}
elseif ($expr === '@*')
{
// <xsl:copy-of select="@*"/>
$this->appendElement($ir, 'copyOfAttributes');
}
else
{
throw new RuntimeException("Unsupported <xsl:copy-of/> expression '" . $expr . "'");
}
}
/**
* Parse an <xsl:element/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:element/> node
* @return void
*/
protected function parseXslElement(DOMElement $ir, DOMElement $node)
{
$element = $this->appendElement($ir, 'element');
$element->setAttribute('name', $node->getAttribute('name'));
$this->parseChildren($element, $node);
}
/**
* Parse an <xsl:if/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:if/> node
* @return void
*/
protected function parseXslIf(DOMElement $ir, DOMElement $node)
{
// An <xsl:if/> is represented by a <switch/> with only one <case/>
$switch = $this->appendElement($ir, 'switch');
$case = $this->appendElement($switch, 'case');
$case->setAttribute('test', $node->getAttribute('test'));
// Parse this branch's content
$this->parseChildren($case, $node);
}
/**
* Parse an <xsl:text/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:text/> node
* @return void
*/
protected function parseXslText(DOMElement $ir, DOMElement $node)
{
$this->appendLiteralOutput($ir, $node->textContent);
if ($node->getAttribute('disable-output-escaping') === 'yes')
{
$ir->lastChild->setAttribute('disable-output-escaping', 'yes');
}
}
/**
* Parse an <xsl:value-of/> node into the internal representation
*
* @param DOMElement $ir Node in the internal representation that represents the node's parent
* @param DOMElement $node <xsl:value-of/> node
* @return void
*/
protected function parseXslValueOf(DOMElement $ir, DOMElement $node)
{
$this->appendXPathOutput($ir, $node->getAttribute('select'));
if ($node->getAttribute('disable-output-escaping') === 'yes')
{
$ir->lastChild->setAttribute('disable-output-escaping', 'yes');
}
}
}

View File

@@ -0,0 +1,264 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Helpers;
use RuntimeException;
use s9e\TextFormatter\Configurator\RecursiveParser;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\BooleanFunctions;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\BooleanOperators;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Comparisons;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Core;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Math;
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\SingleByteStringFunctions;
use s9e\TextFormatter\Utils\XPath;
abstract class XPathHelper
{
/**
* Decode strings inside of an XPath expression
*
* @param string $expr
* @return string
*/
public static function decodeStrings($expr)
{
return preg_replace_callback(
'(([\'"])(.*?)\\1)s',
function ($m)
{
return $m[1] . hex2bin($m[2]) . $m[1];
},
$expr
);
}
/**
* Encode strings inside of an XPath expression
*
* @param string $expr
* @return string
*/
public static function encodeStrings($expr)
{
return preg_replace_callback(
'(([\'"])(.*?)\\1)s',
function ($m)
{
return $m[1] . bin2hex($m[2]) . $m[1];
},
$expr
);
}
/**
* Return the list of variables used in a given XPath expression
*
* @param string $expr XPath expression
* @return array Alphabetically sorted list of unique variable names
*/
public static function getVariables($expr)
{
// First, remove strings' contents to prevent false-positives
$expr = preg_replace('/(["\']).*?\\1/s', '$1$1', $expr);
// Capture all the variable names
preg_match_all('/\\$(\\w+)/', $expr, $matches);
// Dedupe and sort names
$varNames = array_unique($matches[1]);
sort($varNames);
return $varNames;
}
/**
* Determine whether given XPath expression definitely evaluates to a number
*
* @param string $expr XPath expression
* @return bool Whether given XPath expression definitely evaluates to a number
*/
public static function isExpressionNumeric($expr)
{
// Detect simple arithmetic operations
if (preg_match('(^([$@][-\\w]++|-?[.\\d]++)(?: *(?:[-*+]|div) *(?1))+$)', $expr))
{
return true;
}
// Try parsing the expression as a math expression
try
{
return (bool) self::getXPathParser()->parse($expr, 'Math');
}
catch (RuntimeException $e)
{
// Do nothing
}
return false;
}
/**
* Remove extraneous space in a given XPath expression
*
* @param string $expr Original XPath expression
* @return string Minified XPath expression
*/
public static function minify($expr)
{
$old = $expr;
$strings = [];
// Trim the surrounding whitespace then temporarily remove literal strings
$expr = preg_replace_callback(
'/"[^"]*"|\'[^\']*\'/',
function ($m) use (&$strings)
{
$uniqid = '(' . sha1(uniqid()) . ')';
$strings[$uniqid] = $m[0];
return $uniqid;
},
trim($expr)
);
if (preg_match('/[\'"]/', $expr))
{
throw new RuntimeException("Cannot parse XPath expression '" . $old . "'");
}
// Normalize whitespace to a single space
$expr = preg_replace('/\\s+/', ' ', $expr);
// Remove the space between a non-word character and a word character
$expr = preg_replace('/([-a-z_0-9]) ([^-a-z_0-9])/i', '$1$2', $expr);
$expr = preg_replace('/([^-a-z_0-9]) ([-a-z_0-9])/i', '$1$2', $expr);
// Remove the space between two non-word characters as long as they're not two -
$expr = preg_replace('/(?!- -)([^-a-z_0-9]) ([^-a-z_0-9])/i', '$1$2', $expr);
// Remove the space between a - and a word character, as long as there's a space before -
$expr = preg_replace('/ - ([a-z_0-9])/i', ' -$1', $expr);
// Remove the spaces between a number and a div or "-" operator and the next token
$expr = preg_replace('/(?:^|[ \\(])\\d+\\K (div|-) ?/', '$1', $expr);
// Remove the space between the div operator the next token
$expr = preg_replace('/([^-a-z_0-9]div) (?=[$0-9@])/', '$1', $expr);
// Restore the literals
$expr = strtr($expr, $strings);
return $expr;
}
/**
* Parse an XPath expression that is composed entirely of equality tests between a variable part
* and a constant part
*
* @param string $expr
* @return array|false
*/
public static function parseEqualityExpr($expr)
{
// Match an equality between a variable and a literal or the concatenation of strings
$eq = '(?<equality>'
. '(?<key>@[-\\w]+|\\$\\w+|\\.)'
. '(?<operator>\\s*=\\s*)'
. '(?:'
. '(?<literal>(?<string>"[^"]*"|\'[^\']*\')|0|[1-9][0-9]*)'
. '|'
. '(?<concat>concat\\(\\s*(?&string)\\s*(?:,\\s*(?&string)\\s*)+\\))'
. ')'
. '|'
. '(?:(?<literal>(?&literal))|(?<concat>(?&concat)))(?&operator)(?<key>(?&key))'
. ')';
// Match a string that is entirely composed of equality checks separated with "or"
$regexp = '(^(?J)\\s*' . $eq . '\\s*(?:or\\s*(?&equality)\\s*)*$)';
if (!preg_match($regexp, $expr))
{
return false;
}
preg_match_all("((?J)$eq)", $expr, $matches, PREG_SET_ORDER);
$map = [];
foreach ($matches as $m)
{
$key = $m['key'];
$value = (!empty($m['concat']))
? self::evaluateConcat($m['concat'])
: self::evaluateLiteral($m['literal']);
$map[$key][] = $value;
}
return $map;
}
/**
* Evaluate a concat() expression where all arguments are string literals
*
* @param string $expr concat() expression
* @return string Expression's value
*/
protected static function evaluateConcat($expr)
{
preg_match_all('(\'[^\']*\'|"[^"]*")', $expr, $strings);
$value = '';
foreach ($strings[0] as $string)
{
$value .= substr($string, 1, -1);
}
return $value;
}
/**
* Evaluate an XPath literal
*
* @param string $expr XPath literal
* @return string Literal's string value
*/
protected static function evaluateLiteral($expr)
{
if ($expr[0] === '"' || $expr[0] === "'")
{
$expr = substr($expr, 1, -1);
}
return $expr;
}
/**
* Generate and return a cached XPath parser with a default set of matchers
*
* @return RecursiveParser
*/
protected static function getXPathParser()
{
static $parser;
if (!isset($parser))
{
$parser = new RecursiveParser;
$matchers = [];
$matchers[] = new BooleanFunctions($parser);
$matchers[] = new BooleanOperators($parser);
$matchers[] = new Comparisons($parser);
$matchers[] = new Core($parser);
$matchers[] = new Math($parser);
$matchers[] = new SingleByteStringFunctions($parser);
$parser->setMatchers($matchers);
}
return $parser;
}
}

View File

@@ -0,0 +1,92 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use s9e\TextFormatter\Configurator\Collections\AttributeFilterChain;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
use s9e\TextFormatter\Configurator\Items\ProgrammableCallback;
use s9e\TextFormatter\Configurator\Traits\Configurable;
use s9e\TextFormatter\Configurator\Traits\TemplateSafeness;
/**
* @property mixed $defaultValue Default value used for this attribute
* @property AttributeFilterChain $filterChain This attribute's filter chain
* @property bool $required Whether this attribute is required for the tag to be valid
*/
class Attribute implements ConfigProvider
{
use Configurable;
use TemplateSafeness;
/**
* @var mixed Default value used for this attribute
*/
protected $defaultValue;
/**
* @var AttributeFilterChain This attribute's filter chain
*/
protected $filterChain;
/**
* @var bool Whether this attribute is required for the tag to be valid
*/
protected $required = true;
/**
* Constructor
*
* @param array $options This attribute's options
*/
public function __construct(array $options = null)
{
$this->filterChain = new AttributeFilterChain;
if (isset($options))
{
foreach ($options as $optionName => $optionValue)
{
$this->__set($optionName, $optionValue);
}
}
}
/**
* Return whether this attribute is safe to be used in given context
*
* @param string $context Either 'AsURL', 'InCSS' or 'InJS'
* @return bool
*/
protected function isSafe($context)
{
// Test this attribute's filters
$methodName = 'isSafe' . $context;
foreach ($this->filterChain as $filter)
{
if ($filter->$methodName())
{
// If any filter makes it safe, we consider it safe
return true;
}
}
return !empty($this->markedSafe[$context]);
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$vars = get_object_vars($this);
unset($vars['markedSafe']);
return ConfigHelper::toArray($vars) + ['filterChain' => []];
}
}

View File

@@ -0,0 +1,54 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use s9e\TextFormatter\Configurator\Traits\TemplateSafeness;
class AttributeFilter extends Filter
{
use TemplateSafeness;
/**
* Constructor
*
* @param callable $callback
*/
public function __construct($callback)
{
parent::__construct($callback);
// Set the default signature
$this->resetParameters();
$this->addParameterByName('attrValue');
}
/**
* Return whether this filter makes a value safe to be used in JavaScript
*
* @return bool
*/
public function isSafeInJS()
{
// List of callbacks that make a value safe to be used in a script, hardcoded for
// convenience. Technically, there are numerous built-in PHP functions that would make an
// arbitrary value safe in JS, but only a handful have the potential to be used as an
// attribute filter
$safeCallbacks = [
'urlencode',
'strtotime',
'rawurlencode'
];
if (in_array($this->callback, $safeCallbacks, true))
{
return true;
}
return $this->isSafe('InJS');
}
}

View File

@@ -1,13 +1,17 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class AlnumFilter extends RegexpFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('/^[0-9A-Za-z]+$/D');

View File

@@ -1,31 +1,64 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder;
class ChoiceFilter extends RegexpFilter
{
public function __construct(array $values = \null, $caseSensitive = \false)
/**
* Constructor
*
* @param array $values List of allowed values
* @param bool $caseSensitive Whether the choice is case-sensitive
*/
public function __construct(array $values = null, $caseSensitive = false)
{
parent::__construct();
if (isset($values))
{
$this->setValues($values, $caseSensitive);
}
}
public function setValues(array $values, $caseSensitive = \false)
/**
* Set the list of allowed values
*
* @param array $values List of allowed values
* @param bool $caseSensitive Whether the choice is case-sensitive
* @return void
*/
public function setValues(array $values, $caseSensitive = false)
{
if (!\is_bool($caseSensitive))
if (!is_bool($caseSensitive))
{
throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
}
// Create a regexp based on the list of allowed values
$regexp = RegexpBuilder::fromList($values, ['delimiter' => '/']);
$regexp = '/^' . $regexp . '$/D';
// Add the case-insensitive flag if applicable
if (!$caseSensitive)
{
$regexp .= 'i';
if (!\preg_match('#^[[:ascii:]]*$#D', $regexp))
}
// Add the Unicode flag if the regexp isn't purely ASCII
if (!preg_match('#^[[:ascii:]]*$#D', $regexp))
{
$regexp .= 'u';
}
// Set the regexp associated with this list of values
$this->setRegexp($regexp);
}
}

View File

@@ -1,13 +1,17 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class ColorFilter extends RegexpFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('/^(?:#(?:(?:[0-9a-f]{3}){1,2}|(?:[0-9a-f]{4}){1,2})|rgb\\(\\d{1,3}, *\\d{1,3}, *\\d{1,3}\\)|rgba\\(\\d{1,3}, *\\d{1,3}, *\\d{1,3}, *\\d*(?:\\.\\d+)?\\)|[a-z]+)$/Di');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class EmailFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\EmailFilter::filter');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class FalseFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\FalseFilter::filter');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class FloatFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NumericFilter::filterFloat');

View File

@@ -1,20 +1,23 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class FontfamilyFilter extends RegexpFilter
{
public function __construct()
{
// This is more restrictive than the specs but safer
$namechars = '[- \\w]+';
$double = '"' . $namechars . '"';
$single = "'" . $namechars . "'";
$name = '(?:' . $single . '|' . $double . '|' . $namechars . ')';
$regexp = '/^' . $name . '(?:, *' . $name . ')*$/';
parent::__construct($regexp);
$this->markAsSafeInCSS();
}

View File

@@ -1,44 +1,83 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use InvalidArgumentException;
use RuntimeException;
use s9e\TextFormatter\Configurator\Helpers\ContextSafeness;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
use s9e\TextFormatter\Configurator\JavaScript\Dictionary;
class HashmapFilter extends AttributeFilter
{
public function __construct(array $map = \null, $strict = \false)
/**
* Constructor
*
* @param array $map Associative array in the form [key => value]
* @param bool $strict Whether this map is strict (values with no match are invalid)
*/
public function __construct(array $map = null, $strict = false)
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\HashmapFilter::filter');
$this->resetParameters();
$this->addParameterByName('attrValue');
$this->addParameterByName('map');
$this->addParameterByName('strict');
$this->setJS('HashmapFilter.filter');
if (isset($map))
{
$this->setMap($map, $strict);
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
if (!isset($this->vars['map']))
{
throw new RuntimeException("Hashmap filter is missing a 'map' value");
}
return parent::asConfig();
}
public function setMap(array $map, $strict = \false)
/**
* Set the content of this map
*
* @param array $map Associative array in the form [word => replacement]
* @param bool $strict Whether this map is strict (values with no match are invalid)
* @return void
*/
public function setMap(array $map, $strict = false)
{
if (!\is_bool($strict))
if (!is_bool($strict))
{
throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
}
// If the map is not strict, we can optimize away the values that are identical to their key
if (!$strict)
{
$map = $this->optimizeLooseMap($map);
\ksort($map);
}
// Sort the map so it looks tidy
ksort($map);
// Record this filter's variables
$this->vars['map'] = new Dictionary($map);
$this->vars['strict'] = $strict;
// Evaluate safeness
$this->resetSafeness();
if (!empty($this->vars['strict']))
{
@@ -46,29 +85,69 @@ class HashmapFilter extends AttributeFilter
$this->evaluateSafenessInJS();
}
}
/**
* Mark whether this filter makes a value safe to be used in CSS
*
* @return void
*/
protected function evaluateSafenessInCSS()
{
// Test each value against the list of disallowed characters
$disallowedChars = ContextSafeness::getDisallowedCharactersInCSS();
foreach ($this->vars['map'] as $value)
{
foreach ($disallowedChars as $char)
if (\strpos($value, $char) !== \false)
{
if (strpos($value, $char) !== false)
{
return;
}
}
}
$this->markAsSafeInCSS();
}
/**
* Mark whether this filter makes a value safe to be used in JS
*
* @return void
*/
protected function evaluateSafenessInJS()
{
// Test each value against the list of disallowed characters
$disallowedChars = ContextSafeness::getDisallowedCharactersInJS();
foreach ($this->vars['map'] as $value)
{
foreach ($disallowedChars as $char)
if (\strpos($value, $char) !== \false)
{
if (strpos($value, $char) !== false)
{
return;
}
}
}
$this->markAsSafeInJS();
}
/**
* Optimize a non-strict map by removing values that are identical to their key
*
* @param array $map Original map
* @return array Optimized map
*/
protected function optimizeLooseMap(array $map)
{
foreach ($map as $k => $v)
{
if ($k === $v)
{
unset($map[$k]);
}
}
return $map;
}
}

View File

@@ -1,13 +1,17 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class IdentifierFilter extends RegexpFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('/^[-0-9A-Za-z_]+$/D');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class IntFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NumericFilter::filterInt');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class IpFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NetworkFilter::filterIp');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class IpportFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NetworkFilter::filterIpport');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class Ipv4Filter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NetworkFilter::filterIpv4');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class Ipv6Filter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NetworkFilter::filterIpv6');

View File

@@ -1,47 +1,93 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use InvalidArgumentException;
use RuntimeException;
use s9e\TextFormatter\Configurator\Helpers\ContextSafeness;
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
use s9e\TextFormatter\Configurator\Items\Regexp;
class MapFilter extends AttributeFilter
{
public function __construct(array $map = \null, $caseSensitive = \false, $strict = \false)
/**
* Constructor
*
* @param array $map Associative array in the form [word => replacement]
* @param bool $caseSensitive Whether this map is case-sensitive
* @param bool $strict Whether this map is strict (values with no match are invalid)
*/
public function __construct(array $map = null, $caseSensitive = false, $strict = false)
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\MapFilter::filter');
$this->resetParameters();
$this->addParameterByName('attrValue');
$this->addParameterByName('map');
$this->setJS('MapFilter.filter');
if (isset($map))
{
$this->setMap($map, $caseSensitive, $strict);
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
if (!isset($this->vars['map']))
{
throw new RuntimeException("Map filter is missing a 'map' value");
}
return parent::asConfig();
}
public function setMap(array $map, $caseSensitive = \false, $strict = \false)
/**
* Set the content of this map
*
* @param array $map Associative array in the form [word => replacement]
* @param bool $caseSensitive Whether this map is case-sensitive
* @param bool $strict Whether this map is strict (values with no match are invalid)
* @return void
*/
public function setMap(array $map, $caseSensitive = false, $strict = false)
{
if (!\is_bool($caseSensitive))
if (!is_bool($caseSensitive))
{
throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
if (!\is_bool($strict))
}
if (!is_bool($strict))
{
throw new InvalidArgumentException('Argument 3 passed to ' . __METHOD__ . ' must be a boolean');
}
// Reset the template safeness marks for the new map
$this->resetSafeness();
// If the map is strict, we can assess its safeness
if ($strict)
{
$this->assessSafeness($map);
}
// Group values by keys
$valueKeys = [];
foreach ($map as $key => $value)
{
$valueKeys[$value][] = $key;
}
// Now create a regexp and an entry in the map for each group
$map = [];
foreach ($valueKeys as $value => $keys)
{
@@ -53,36 +99,73 @@ class MapFilter extends AttributeFilter
]
);
$regexp = '/^' . $regexp . '$/D';
// Add the case-insensitive flag if applicable
if (!$caseSensitive)
{
$regexp .= 'i';
if (!\preg_match('#^[[:ascii:]]*$#D', $regexp))
}
// Add the Unicode flag if the regexp isn't purely ASCII
if (!preg_match('#^[[:ascii:]]*$#D', $regexp))
{
$regexp .= 'u';
}
// Add the [regexp,value] pair to the map
$map[] = [new Regexp($regexp), $value];
}
// If the "strict" option is enabled, a catch-all regexp which replaces the value with FALSE
// is appended to the list
if ($strict)
$map[] = [new Regexp('//'), \false];
{
$map[] = [new Regexp('//'), false];
}
// Record the map in this filter's variables
$this->vars['map'] = $map;
}
/**
* Assess the safeness of given map in contexts
*
* @param array $map
* @return void
*/
protected function assessSafeness(array $map)
{
$values = \implode('', $map);
$isSafeInCSS = \true;
// Concatenate the values so we can check them as a single string
$values = implode('', $map);
// Test whether the values contain any character that's disallowed in CSS
$isSafeInCSS = true;
foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char)
if (\strpos($values, $char) !== \false)
{
if (strpos($values, $char) !== false)
{
$isSafeInCSS = \false;
$isSafeInCSS = false;
break;
}
}
if ($isSafeInCSS)
{
$this->markAsSafeInCSS();
$isSafeInJS = \true;
}
// Test whether the values contain any character that's disallowed in JS
$isSafeInJS = true;
foreach (ContextSafeness::getDisallowedCharactersInJS() as $char)
if (\strpos($values, $char) !== \false)
{
if (strpos($values, $char) !== false)
{
$isSafeInJS = \false;
$isSafeInJS = false;
break;
}
}
if ($isSafeInJS)
{
$this->markAsSafeInJS();
}
}
}

View File

@@ -1,13 +1,17 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class NumberFilter extends RegexpFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('/^[0-9]+$/D');

View File

@@ -1,19 +1,28 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use InvalidArgumentException;
use RuntimeException;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class RangeFilter extends AttributeFilter
{
public function __construct($min = \null, $max = \null)
/**
* Constructor
*
* @param integer $min Minimum value for this range
* @param integer $max Maximum value for this range
*/
public function __construct($min = null, $max = null)
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NumericFilter::filterRange');
$this->resetParameters();
$this->addParameterByName('attrValue');
$this->addParameterByName('min');
@@ -23,27 +32,58 @@ class RangeFilter extends AttributeFilter
$this->markAsSafeAsURL();
$this->markAsSafeInCSS();
$this->markAsSafeInJS();
if (isset($min))
{
$this->setRange($min, $max);
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
if (!isset($this->vars['min']))
{
throw new RuntimeException("Range filter is missing a 'min' value");
}
if (!isset($this->vars['max']))
{
throw new RuntimeException("Range filter is missing a 'max' value");
}
return parent::asConfig();
}
/**
* Set the allowed range of values
*
* @param integer $min Minimum value
* @param integer $max Maximum value
* @return void
*/
public function setRange($min, $max)
{
$min = \filter_var($min, \FILTER_VALIDATE_INT);
$max = \filter_var($max, \FILTER_VALIDATE_INT);
if ($min === \false)
$min = filter_var($min, FILTER_VALIDATE_INT);
$max = filter_var($max, FILTER_VALIDATE_INT);
if ($min === false)
{
throw new InvalidArgumentException('Argument 1 passed to ' . __METHOD__ . ' must be an integer');
if ($max === \false)
}
if ($max === false)
{
throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be an integer');
}
if ($min > $max)
{
throw new InvalidArgumentException('Invalid range: min (' . $min . ') > max (' . $max . ')');
}
$this->vars['min'] = $min;
$this->vars['max'] = $max;
}

View File

@@ -1,47 +1,87 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use Exception;
use RuntimeException;
use s9e\TextFormatter\Configurator\Helpers\ContextSafeness;
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
use s9e\TextFormatter\Configurator\Items\Regexp;
class RegexpFilter extends AttributeFilter
{
public function __construct($regexp = \null)
/**
* Constructor
*
* @param string $regexp PCRE regexp
*/
public function __construct($regexp = null)
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\RegexpFilter::filter');
$this->resetParameters();
$this->addParameterByName('attrValue');
$this->addParameterByName('regexp');
$this->setJS('RegexpFilter.filter');
if (isset($regexp))
{
$this->setRegexp($regexp);
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
if (!isset($this->vars['regexp']))
{
throw new RuntimeException("Regexp filter is missing a 'regexp' value");
}
return parent::asConfig();
}
/**
* Return this filter's regexp
*
* @return string
*/
public function getRegexp()
{
return (string) $this->vars['regexp'];
}
/**
* Set this filter's regexp
*
* @param string $regexp PCRE regexp
* @return void
*/
public function setRegexp($regexp)
{
if (\is_string($regexp))
if (is_string($regexp))
{
$regexp = new Regexp($regexp);
}
$this->vars['regexp'] = $regexp;
$this->resetSafeness();
$this->evaluateSafeness();
}
/**
* Mark in which contexts values processed by this filter are safe to be used
*
* @return void
*/
protected function evaluateSafeness()
{
try
@@ -52,41 +92,89 @@ class RegexpFilter extends AttributeFilter
}
catch (Exception $e)
{
}
// If anything unexpected happens we don't try to mark this filter as safe
}
}
/**
* Mark whether this filter makes a value safe to be used as a URL
*
* @return void
*/
protected function evaluateSafenessAsURL()
{
$regexpInfo = RegexpParser::parse($this->vars['regexp']);
// Match any number of "(" optionally followed by "?:"
$captureStart = '(?>\\((?:\\?:)?)*';
// Regexps that start with a fixed scheme are considered safe. As a special case, we
// allow the scheme part to end with a single ? to allow the regexp "https?"
$regexp = '#^\\^' . $captureStart . '(?!data|\\w*script)[a-z0-9]+\\??:#i';
if (\preg_match($regexp, $regexpInfo['regexp'])
&& \strpos($regexpInfo['modifiers'], 'm') === \false)
if (preg_match($regexp, $regexpInfo['regexp'])
&& strpos($regexpInfo['modifiers'], 'm') === false)
{
$this->markAsSafeAsURL();
return;
}
// Test whether this regexp could allow any character that's disallowed in URLs
$regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']);
foreach (ContextSafeness::getDisallowedCharactersAsURL() as $char)
if (\preg_match($regexp, $char))
{
if (preg_match($regexp, $char))
{
return;
}
}
$this->markAsSafeAsURL();
}
/**
* Mark whether this filter makes a value safe to be used in CSS
*
* @return void
*/
protected function evaluateSafenessInCSS()
{
// Test whether this regexp could allow any character that's disallowed in URLs
$regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']);
foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char)
if (\preg_match($regexp, $char))
{
if (preg_match($regexp, $char))
{
return;
}
}
$this->markAsSafeInCSS();
}
/**
* Mark whether this filter makes a value safe to be used in JS
*
* @return void
*/
protected function evaluateSafenessInJS()
{
$safeExpressions = [
'\\d+',
'[0-9]+'
];
$regexp = '(^(?<delim>.)\\^(?:(?<expr>' . \implode('|', \array_map('preg_quote', $safeExpressions)) . ')|\\((?:\\?[:>])?(?&expr)\\))\\$(?&delim)(?=.*D)[Dis]*$)D';
if (\preg_match($regexp, $this->vars['regexp']))
// Ensure that the regexp is anchored with ^ and $, that it only contains a safe expression
// optionally contained in a subpattern and that its modifiers contain PCRE_DOLLAR_ENDONLY
// but no modifiers other than Dis
$regexp = '(^(?<delim>.)\\^(?:'
. '(?<expr>' . implode('|', array_map('preg_quote', $safeExpressions)) . ')'
. '|'
. '\\((?:\\?[:>])?(?&expr)\\)'
. ')\\$(?&delim)(?=.*D)[Dis]*$)D';
if (preg_match($regexp, $this->vars['regexp']))
{
$this->markAsSafeInJS();
}
}
}

View File

@@ -1,13 +1,17 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
class SimpletextFilter extends RegexpFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('/^[- +,.0-9A-Za-z_]+$/D');

View File

@@ -1,14 +1,19 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class TimestampFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\TimestampFilter::filter');

View File

@@ -1,29 +1,46 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class UintFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\NumericFilter::filterUint');
$this->setJS('NumericFilter.filterUint');
}
/**
* {@inheritdoc}
*/
public function isSafeInCSS()
{
return \true;
return true;
}
/**
* {@inheritdoc}
*/
public function isSafeInJS()
{
return \true;
return true;
}
/**
* {@inheritdoc}
*/
public function isSafeAsURL()
{
return \true;
return true;
}
}

View File

@@ -0,0 +1,51 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items\AttributeFilters;
use s9e\TextFormatter\Configurator\Items\AttributeFilter;
class UrlFilter extends AttributeFilter
{
/**
* Constructor
*/
public function __construct()
{
parent::__construct('s9e\\TextFormatter\\Parser\\AttributeFilters\\UrlFilter::filter');
$this->resetParameters();
$this->addParameterByName('attrValue');
$this->addParameterByName('urlConfig');
$this->addParameterByName('logger');
$this->setJS('UrlFilter.filter');
}
/**
* {@inheritdoc}
*/
public function isSafeInCSS()
{
return true;
}
/**
* {@inheritdoc}
*/
public function isSafeInJS()
{
return true;
}
/**
* {@inheritdoc}
*/
public function isSafeAsURL()
{
return true;
}
}

View File

@@ -1,20 +1,32 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Items\Regexp;
class AttributePreprocessor extends Regexp
{
/**
* Return all the attributes created by the preprocessor along with the regexp that matches them
*
* @return array Array of [attribute name => regexp]
*/
public function getAttributes()
{
return $this->getNamedCaptures();
}
/**
* Return the regexp this preprocessor is based on
*
* @return string
*/
public function getRegexp()
{
return $this->regexp;

View File

@@ -0,0 +1,12 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
abstract class Filter extends ProgrammableCallback
{
}

View File

@@ -0,0 +1,248 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
use s9e\TextFormatter\Configurator\JavaScript\Code;
use s9e\TextFormatter\Configurator\JavaScript\FunctionProvider;
class ProgrammableCallback implements ConfigProvider
{
/**
* @var callback Callback
*/
protected $callback;
/**
* @var string JavaScript source code for this callback
*/
protected $js = 'returnFalse';
/**
* @var array List of params to be passed to the callback
*/
protected $params = [];
/**
* @var array Variables associated with this instance
*/
protected $vars = [];
/**
* @param callable $callback
*/
public function __construct($callback)
{
if (!is_callable($callback))
{
throw new InvalidArgumentException(__METHOD__ . '() expects a callback');
}
$this->callback = $this->normalizeCallback($callback);
$this->autoloadJS();
}
/**
* Add a parameter by value
*
* @param mixed $paramValue
* @return self
*/
public function addParameterByValue($paramValue)
{
$this->params[] = $paramValue;
return $this;
}
/**
* Add a parameter by name
*
* The value will be dynamically generated by the caller
*
* @param string $paramName
* @return self
*/
public function addParameterByName($paramName)
{
if (array_key_exists($paramName, $this->params))
{
throw new InvalidArgumentException("Parameter '" . $paramName . "' already exists");
}
$this->params[$paramName] = null;
return $this;
}
/**
* Get this object's callback
*
* @return callback
*/
public function getCallback()
{
return $this->callback;
}
/**
* Get this callback's JavaScript
*
* @return string
*/
public function getJS()
{
return $this->js;
}
/**
* Get this object's variables
*
* @return array
*/
public function getVars()
{
return $this->vars;
}
/**
* Remove all the parameters
*
* @return self
*/
public function resetParameters()
{
$this->params = [];
return $this;
}
/**
* Set this callback's JavaScript
*
* @param string $js JavaScript source code for this callback
* @return self
*/
public function setJS($js)
{
$this->js = $js;
return $this;
}
/**
* Set or overwrite one of this callback's variable
*
* @param string $name Variable name
* @param string $value Variable value
* @return self
*/
public function setVar($name, $value)
{
$this->vars[$name] = $value;
return $this;
}
/**
* Set all of this callback's variables at once
*
* @param array $vars Associative array of values
* @return self
*/
public function setVars(array $vars)
{
$this->vars = $vars;
return $this;
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$config = ['callback' => $this->callback];
foreach ($this->params as $k => $v)
{
if (is_numeric($k))
{
// By value
$config['params'][] = $v;
}
elseif (isset($this->vars[$k]))
{
// By name, but the value is readily available in $this->vars
$config['params'][] = $this->vars[$k];
}
else
{
// By name
$config['params'][$k] = null;
}
}
if (isset($config['params']))
{
$config['params'] = ConfigHelper::toArray($config['params'], true, true);
}
// Add the callback's JavaScript representation
$config['js'] = new Code($this->js);
return $config;
}
/**
* Try to load the JavaScript source for this callback
*
* @return void
*/
protected function autoloadJS()
{
if (!is_string($this->callback))
{
return;
}
try
{
$this->js = FunctionProvider::get($this->callback);
}
catch (InvalidArgumentException $e)
{
// Do nothing
}
}
/**
* Normalize a callback's representation
*
* @param callable $callback
* @return callable
*/
protected function normalizeCallback($callback)
{
// Normalize ['foo', 'bar'] to 'foo::bar'
if (is_array($callback) && is_string($callback[0]))
{
$callback = $callback[0] . '::' . $callback[1];
}
// Normalize '\\foo' to 'foo' and '\\foo::bar' to 'foo::bar'
if (is_string($callback))
{
$callback = ltrim($callback, '\\');
}
return $callback;
}
}

View File

@@ -0,0 +1,165 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\FilterableConfigValue;
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
use s9e\TextFormatter\Configurator\JavaScript\Code;
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
class Regexp implements ConfigProvider, FilterableConfigValue
{
/**
* @var bool Whether this regexp should have the global flag set in JavaScript
*/
protected $isGlobal;
/**
* @var string JavaScript regexp, with delimiters and modifiers, e.g. "/foo/i"
*/
protected $jsRegexp;
/**
* @var string PCRE regexp, with delimiters and modifiers, e.g. "/foo/i"
*/
protected $regexp;
/**
* Constructor
*
* @param string $regexp PCRE regexp, with delimiters and modifiers, e.g. "/foo/i"
*/
public function __construct($regexp, $isGlobal = false)
{
if (@preg_match($regexp, '') === false)
{
throw new InvalidArgumentException('Invalid regular expression ' . var_export($regexp, true));
}
$this->regexp = $regexp;
$this->isGlobal = $isGlobal;
}
/**
* Return this regexp as a string
*
* @return string
*/
public function __toString()
{
return $this->regexp;
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
return $this;
}
/**
* {@inheritdoc}
*/
public function filterConfig($target)
{
return ($target === 'JS') ? new Code($this->getJS()) : (string) $this;
}
/**
* Return the name of each capture in this regexp
*
* @return string[]
*/
public function getCaptureNames()
{
return RegexpParser::getCaptureNames($this->regexp);
}
/**
* Return this regexp's JavaScript representation
*
* @return string
*/
public function getJS()
{
if (!isset($this->jsRegexp))
{
$this->jsRegexp = RegexpConvertor::toJS($this->regexp, $this->isGlobal);
}
return $this->jsRegexp;
}
/**
* Return all the named captures with a standalone regexp that matches them
*
* @return array Array of [capture name => regexp]
*/
public function getNamedCaptures()
{
$captures = [];
$regexpInfo = RegexpParser::parse($this->regexp);
// Prepare the start/end of the regexp and ensure that we use the D modifier
$start = $regexpInfo['delimiter'] . '^';
$end = '$' . $regexpInfo['delimiter'] . $regexpInfo['modifiers'];
if (strpos($regexpInfo['modifiers'], 'D') === false)
{
$end .= 'D';
}
foreach ($this->getNamedCapturesExpressions($regexpInfo['tokens']) as $name => $expr)
{
$captures[$name] = $start . $expr . $end;
}
return $captures;
}
/**
* Return the expression used in each named capture
*
* @param array[] $tokens
* @return array
*/
protected function getNamedCapturesExpressions(array $tokens)
{
$exprs = [];
foreach ($tokens as $token)
{
if ($token['type'] !== 'capturingSubpatternStart' || !isset($token['name']))
{
continue;
}
$expr = $token['content'];
if (strpos($expr, '|') !== false)
{
$expr = '(?:' . $expr . ')';
}
$exprs[$token['name']] = $expr;
}
return $exprs;
}
/**
* Set this regexp's JavaScript representation
*
* @param string $jsRegexp
* @return void
*/
public function setJS($jsRegexp)
{
$this->jsRegexp = $jsRegexp;
}
}

View File

@@ -0,0 +1,245 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use InvalidArgumentException;
use s9e\TextFormatter\Configurator\Collections\AttributeCollection;
use s9e\TextFormatter\Configurator\Collections\AttributePreprocessorCollection;
use s9e\TextFormatter\Configurator\Collections\Ruleset;
use s9e\TextFormatter\Configurator\Collections\TagFilterChain;
use s9e\TextFormatter\Configurator\ConfigProvider;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
use s9e\TextFormatter\Configurator\Items\Template;
use s9e\TextFormatter\Configurator\Traits\Configurable;
/**
* @property AttributeCollection $attributes This tag's attributes
* @property AttributePreprocessorCollection $attributePreprocessors This tag's attribute parsers
* @property TagFilterChain $filterChain This tag's filter chain
* @property integer $nestingLimit Maximum nesting level for this tag
* @property Ruleset $rules Rules associated with this tag
* @property integer $tagLimit Maximum number of this tag per message
* @property Template $template Template associated with this tag
* @property-write string|Template $template Template associated with this tag
*/
class Tag implements ConfigProvider
{
use Configurable;
/**
* @var AttributeCollection This tag's attributes
*/
protected $attributes;
/**
* @var AttributePreprocessorCollection This tag's attribute parsers
*/
protected $attributePreprocessors;
/**
* @var TagFilterChain This tag's filter chain
*/
protected $filterChain;
/**
* @var integer Maximum nesting level for this tag
*/
protected $nestingLimit = 10;
/**
* @var Ruleset Rules associated with this tag
*/
protected $rules;
/**
* @var integer Maximum number of this tag per message
*/
protected $tagLimit = 5000;
/**
* @var Template Template associated with this tag
*/
protected $template;
/**
* Constructor
*
* @param array $options This tag's options
*/
public function __construct(array $options = null)
{
$this->attributes = new AttributeCollection;
$this->attributePreprocessors = new AttributePreprocessorCollection;
$this->filterChain = new TagFilterChain;
$this->rules = new Ruleset;
// Start the filterChain with the default processing
$this->filterChain->append('s9e\\TextFormatter\\Parser\\FilterProcessing::executeAttributePreprocessors')
->addParameterByName('tagConfig')
->setJS('executeAttributePreprocessors');
$this->filterChain->append('s9e\\TextFormatter\\Parser\\FilterProcessing::filterAttributes')
->addParameterByName('tagConfig')
->addParameterByName('registeredVars')
->addParameterByName('logger')
->setJS('filterAttributes');
if (isset($options))
{
// Sort the options by name so that attributes are set before the template, which is
// necessary to evaluate whether the template is safe
ksort($options);
foreach ($options as $optionName => $optionValue)
{
$this->__set($optionName, $optionValue);
}
}
}
/**
* {@inheritdoc}
*/
public function asConfig()
{
$vars = get_object_vars($this);
// Remove properties that are not needed during parsing
unset($vars['template']);
// If there are no attribute preprocessors defined, we can remove the step from this tag's
// filterChain
if (!count($this->attributePreprocessors))
{
$callback = 's9e\\TextFormatter\\Parser\\FilterProcessing::executeAttributePreprocessors';
// We operate on a copy of the filterChain, without modifying the original
$filterChain = clone $vars['filterChain'];
// Process the chain in reverse order so that we don't skip indices
$i = count($filterChain);
while (--$i >= 0)
{
if ($filterChain[$i]->getCallback() === $callback)
{
unset($filterChain[$i]);
}
}
$vars['filterChain'] = $filterChain;
}
return ConfigHelper::toArray($vars) + ['attributes' => [], 'filterChain' => []];
}
/**
* Return this tag's template
*
* @return Template
*/
public function getTemplate()
{
return $this->template;
}
/**
* Test whether this tag has a template
*
* @return bool
*/
public function issetTemplate()
{
return isset($this->template);
}
/**
* Set this tag's attribute preprocessors
*
* @param array|AttributePreprocessorCollection $attributePreprocessors 2D array of [attrName=>[regexp]], or an instance of AttributePreprocessorCollection
* @return void
*/
public function setAttributePreprocessors($attributePreprocessors)
{
$this->attributePreprocessors->clear();
$this->attributePreprocessors->merge($attributePreprocessors);
}
/**
* Set this tag's nestingLimit
*
* @param integer $limit
* @return void
*/
public function setNestingLimit($limit)
{
$limit = (int) $limit;
if ($limit < 1)
{
throw new InvalidArgumentException('nestingLimit must be a number greater than 0');
}
$this->nestingLimit = $limit;
}
/**
* Set this tag's rules
*
* @param array|Ruleset $rules 2D array of rule definitions, or instance of Ruleset
* @return void
*/
public function setRules($rules)
{
$this->rules->clear();
$this->rules->merge($rules);
}
/**
* Set this tag's tagLimit
*
* @param integer $limit
* @return void
*/
public function setTagLimit($limit)
{
$limit = (int) $limit;
if ($limit < 1)
{
throw new InvalidArgumentException('tagLimit must be a number greater than 0');
}
$this->tagLimit = $limit;
}
/**
* Set the template associated with this tag
*
* @param string|Template $template
* @return void
*/
public function setTemplate($template)
{
if (!($template instanceof Template))
{
$template = new Template($template);
}
$this->template = $template;
}
/**
* Unset this tag's template
*
* @return void
*/
public function unsetTemplate()
{
unset($this->template);
}
}

View File

@@ -0,0 +1,25 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
class TagFilter extends Filter
{
/**
* Constructor
*
* @param callable $callback
*/
public function __construct($callback)
{
parent::__construct($callback);
// Set the default signature
$this->resetParameters();
$this->addParameterByName('tag');
}
}

View File

@@ -0,0 +1,195 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use DOMDocument;
use s9e\TextFormatter\Configurator\Helpers\NodeLocator;
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper;
use s9e\TextFormatter\Configurator\Helpers\TemplateInspector;
use s9e\TextFormatter\Configurator\Helpers\TemplateModifier;
use s9e\TextFormatter\Configurator\TemplateNormalizer;
class Template
{
/**
* @var TemplateInspector Instance of TemplateInspector based on the content of this template
*/
protected $inspector;
/**
* @var bool Whether this template has been normalized
*/
protected $isNormalized = false;
/**
* @var string This template's content
*/
protected $template;
/**
* Constructor
*
* @param string $template This template's content
*/
public function __construct($template)
{
$this->template = $template;
}
/**
* Handle calls to undefined methods
*
* Forwards calls to this template's TemplateInspector instance
*
* @return mixed
*/
public function __call($methodName, $args)
{
return call_user_func_array([$this->getInspector(), $methodName], $args);
}
/**
* Return this template's content
*
* @return string
*/
public function __toString()
{
return $this->template;
}
/**
* Return the content of this template as a DOMDocument
*
* NOTE: the content is wrapped in an <xsl:template/> node
*
* @return DOMDocument
*/
public function asDOM()
{
$xml = '<xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform">'
. $this->__toString()
. '</xsl:template>';
$dom = new TemplateDocument($this);
$dom->loadXML($xml);
return $dom;
}
/**
* Return all the nodes in this template whose content type is CSS
*
* @return array
*/
public function getCSSNodes()
{
return NodeLocator::getCSSNodes($this->asDOM());
}
/**
* Return an instance of TemplateInspector based on this template's content
*
* @return TemplateInspector
*/
public function getInspector()
{
if (!isset($this->inspector))
{
$this->inspector = new TemplateInspector($this->__toString());
}
return $this->inspector;
}
/**
* Return all the nodes in this template whose content type is JavaScript
*
* @return array
*/
public function getJSNodes()
{
return NodeLocator::getJSNodes($this->asDOM());
}
/**
* Return all the nodes in this template whose value is an URL
*
* @return array
*/
public function getURLNodes()
{
return NodeLocator::getURLNodes($this->asDOM());
}
/**
* Return a list of parameters in use in this template
*
* @return array Alphabetically sorted list of unique parameter names
*/
public function getParameters()
{
return TemplateHelper::getParametersFromXSL($this->__toString());
}
/**
* Set and/or return whether this template has been normalized
*
* @param bool $bool If present, the new value for this template's isNormalized flag
* @return bool Whether this template has been normalized
*/
public function isNormalized($bool = null)
{
if (isset($bool))
{
$this->isNormalized = $bool;
}
return $this->isNormalized;
}
/**
* Normalize this template's content
*
* @param TemplateNormalizer $templateNormalizer
* @return void
*/
public function normalize(TemplateNormalizer $templateNormalizer)
{
$this->inspector = null;
$this->template = $templateNormalizer->normalizeTemplate($this->template);
$this->isNormalized = true;
}
/**
* Replace parts of this template that match given regexp
*
* @param string $regexp Regexp for matching parts that need replacement
* @param callback $fn Callback used to get the replacement
* @return void
*/
public function replaceTokens($regexp, $fn)
{
$this->inspector = null;
$this->template = TemplateModifier::replaceTokens($this->template, $regexp, $fn);
$this->isNormalized = false;
}
/**
* Replace this template's content
*
* @param string $template New content
* @return void
*/
public function setContent($template)
{
$this->inspector = null;
$this->template = (string) $template;
$this->isNormalized = false;
}
}

View File

@@ -1,22 +1,39 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
use DOMDocument;
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper;
use s9e\TextFormatter\Configurator\Helpers\TemplateLoader;
class TemplateDocument extends DOMDocument
{
/**
* @var Template Template instance that created this document
*/
protected $template;
/**
* Constructor
*
* @param Template Template instance that created this document
*/
public function __construct(Template $template)
{
$this->template = $template;
}
/**
* Update the original template with this document's content
*
* @return void
*/
public function saveChanges()
{
$this->template->setContent(TemplateHelper::saveTemplate($this));
$this->template->setContent(TemplateLoader::save($this));
}
}

View File

@@ -1,11 +1,12 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\Items;
class UnsafeTemplate extends Template
{
}

View File

@@ -1,11 +1,12 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator;
use ReflectionClass;
use s9e\TextFormatter\Configurator;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
@@ -20,14 +21,37 @@ use s9e\TextFormatter\Configurator\JavaScript\Minifiers\Noop;
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
use s9e\TextFormatter\Configurator\JavaScript\StylesheetCompressor;
use s9e\TextFormatter\Configurator\RendererGenerators\XSLT;
class JavaScript
{
/**
* @var CallbackGenerator
*/
protected $callbackGenerator;
/**
* @var array Configuration, filtered for JavaScript
*/
protected $config;
/**
* @var ConfigOptimizer
*/
protected $configOptimizer;
/**
* @var Configurator Configurator this instance belongs to
*/
protected $configurator;
/**
* @var Encoder
*/
public $encoder;
public $exportMethods;
/**
* @var array List of methods and properties to be exported in the s9e.TextFormatter object
*/
public $exports = [
'disablePlugin',
'disableTag',
@@ -41,13 +65,34 @@ class JavaScript
'setParameter',
'setTagLimit'
];
/**
* @var HintGenerator
*/
protected $hintGenerator;
/**
* @var Minifier Instance of Minifier used to minify the JavaScript parser
*/
protected $minifier;
/**
* @var StylesheetCompressor
*/
protected $stylesheetCompressor;
/**
* @var string Stylesheet used for rendering
*/
protected $xsl;
/**
* Constructor
*
* @param Configurator $configurator Configurator
*/
public function __construct(Configurator $configurator)
{
$this->exportMethods =& $this->exports;
$this->encoder = new Encoder;
$this->callbackGenerator = new CallbackGenerator;
$this->configOptimizer = new ConfigOptimizer($this->encoder);
@@ -55,102 +100,211 @@ class JavaScript
$this->hintGenerator = new HintGenerator;
$this->stylesheetCompressor = new StylesheetCompressor;
}
/**
* Return the cached instance of Minifier (creates one if necessary)
*
* @return Minifier
*/
public function getMinifier()
{
if (!isset($this->minifier))
{
$this->minifier = new Noop;
}
return $this->minifier;
}
public function getParser(array $config = \null)
/**
* Get a JavaScript parser
*
* @param array $config Config array returned by the configurator
* @return string JavaScript parser
*/
public function getParser(array $config = null)
{
$this->configOptimizer->reset();
// Get the stylesheet used for rendering
$xslt = new XSLT;
$xslt->optimizer->normalizer->remove('RemoveLivePreviewAttributes');
$xslt->normalizer->remove('RemoveLivePreviewAttributes');
$this->xsl = $xslt->getXSL($this->configurator->rendering);
// Prepare the parser's config
$this->config = (isset($config)) ? $config : $this->configurator->asConfig();
$this->config = ConfigHelper::filterConfig($this->config, 'JS');
$this->config = $this->callbackGenerator->replaceCallbacks($this->config);
// Get the parser's source and inject its config
$src = $this->getHints() . $this->injectConfig($this->getSource());
// Export the public API
$src .= "if (!window['s9e']) window['s9e'] = {};\n" . $this->getExports();
// Minify the source
$src = $this->getMinifier()->get($src);
$src = '(function(){' . $src . '})()';
// Wrap the source in a function to protect the global scope
$src = '(function(){' . $src . '})();';
return $src;
}
/**
* Set the cached instance of Minifier
*
* Extra arguments will be passed to the minifier's constructor
*
* @param string|Minifier $minifier Name of a supported minifier, or an instance of Minifier
* @return Minifier The new minifier
*/
public function setMinifier($minifier)
{
if (\is_string($minifier))
if (is_string($minifier))
{
$className = __NAMESPACE__ . '\\JavaScript\\Minifiers\\' . $minifier;
$args = \array_slice(\func_get_args(), 1);
// Pass the extra argument to the constructor, if applicable
$args = array_slice(func_get_args(), 1);
if (!empty($args))
{
$reflection = new ReflectionClass($className);
$minifier = $reflection->newInstanceArgs($args);
}
else
{
$minifier = new $className;
}
}
$this->minifier = $minifier;
return $minifier;
}
//==========================================================================
// Internal
//==========================================================================
/**
* Encode a PHP value into an equivalent JavaScript representation
*
* @param mixed $value Original value
* @return string JavaScript representation
*/
protected function encode($value)
{
return $this->encoder->encode($value);
}
/**
* Generate and return the public API
*
* @return string JavaScript Code
*/
protected function getExports()
{
if (empty($this->exports))
{
return '';
}
$exports = [];
foreach ($this->exports as $export)
{
$exports[] = "'" . $export . "':" . $export;
\sort($exports);
return "window['s9e']['TextFormatter'] = {" . \implode(',', $exports) . '}';
}
sort($exports);
return "window['s9e']['TextFormatter'] = {" . implode(',', $exports) . '};';
}
/**
* Generate a HINT object that contains informations about the configuration
*
* @return string JavaScript Code
*/
protected function getHints()
{
$this->hintGenerator->setConfig($this->config);
$this->hintGenerator->setPlugins($this->configurator->plugins);
$this->hintGenerator->setXSL($this->xsl);
return $this->hintGenerator->getHints();
}
/**
* Return the plugins' config
*
* @return Dictionary
*/
protected function getPluginsConfig()
{
$plugins = new Dictionary;
foreach ($this->config['plugins'] as $pluginName => $pluginConfig)
{
if (!isset($pluginConfig['js']))
{
// Skip this plugin
continue;
}
$js = $pluginConfig['js'];
unset($pluginConfig['js']);
// Not needed in JavaScript
unset($pluginConfig['className']);
// Ensure that quickMatch is UTF-8 if present
if (isset($pluginConfig['quickMatch']))
{
// Well-formed UTF-8 sequences
$valid = [
'[[:ascii:]]',
// [1100 0000-1101 1111] [1000 0000-1011 1111]
'[\\xC0-\\xDF][\\x80-\\xBF]',
// [1110 0000-1110 1111] [1000 0000-1011 1111]{2}
'[\\xE0-\\xEF][\\x80-\\xBF]{2}',
// [1111 0000-1111 0111] [1000 0000-1011 1111]{3}
'[\\xF0-\\xF7][\\x80-\\xBF]{3}'
];
$regexp = '#(?>' . \implode('|', $valid) . ')+#';
if (\preg_match($regexp, $pluginConfig['quickMatch'], $m))
$regexp = '#(?>' . implode('|', $valid) . ')+#';
// Keep only the first valid sequence of UTF-8, or unset quickMatch if none is found
if (preg_match($regexp, $pluginConfig['quickMatch'], $m))
{
$pluginConfig['quickMatch'] = $m[0];
}
else
{
unset($pluginConfig['quickMatch']);
}
}
/**
* @var array Keys of elements that are kept in the global scope. Everything else will be
* moved into the plugin's parser
*/
$globalKeys = [
'quickMatch' => 1,
'regexp' => 1,
'regexpLimit' => 1
];
$globalConfig = \array_intersect_key($pluginConfig, $globalKeys);
$localConfig = \array_diff_key($pluginConfig, $globalKeys);
$globalConfig = array_intersect_key($pluginConfig, $globalKeys);
$localConfig = array_diff_key($pluginConfig, $globalKeys);
if (isset($globalConfig['regexp']) && !($globalConfig['regexp'] instanceof Code))
$globalConfig['regexp'] = new Code(RegexpConvertor::toJS($globalConfig['regexp'], \true));
{
$globalConfig['regexp'] = new Code(RegexpConvertor::toJS($globalConfig['regexp'], true));
}
$globalConfig['parser'] = new Code(
'/**
* @param {!string} text
* @param {!Array.<Array>} matches
* @param {string} text
* @param {!Array.<!Array>} matches
*/
function(text, matches)
{
@@ -159,57 +313,114 @@ class JavaScript
' . $js . '
}'
);
$plugins[$pluginName] = $globalConfig;
}
return $plugins;
}
/**
* Return the registeredVars config
*
* @return Dictionary
*/
protected function getRegisteredVarsConfig()
{
$registeredVars = $this->config['registeredVars'];
// Remove cacheDir from the registered vars. Not only it is useless in JavaScript, it could
// leak some informations about the server
unset($registeredVars['cacheDir']);
return new Dictionary($registeredVars);
}
/**
* Return the root context config
*
* @return array
*/
protected function getRootContext()
{
return $this->config['rootContext'];
}
/**
* Return the parser's source
*
* @return string
*/
protected function getSource()
{
$rootDir = __DIR__ . '/..';
$src = '';
$logger = (\in_array('getLogger', $this->exports)) ? 'Logger.js' : 'NullLogger.js';
$files = \glob($rootDir . '/Parser/AttributeFilters/*.js');
// If getLogger() is not exported we use a dummy Logger that can be optimized away
$logger = (in_array('getLogger', $this->exports)) ? 'Logger.js' : 'NullLogger.js';
// Prepare the list of files
$files = glob($rootDir . '/Parser/AttributeFilters/*.js');
$files[] = $rootDir . '/Parser/utils.js';
$files[] = $rootDir . '/Parser/FilterProcessing.js';
$files[] = $rootDir . '/Parser/' . $logger;
$files[] = $rootDir . '/Parser/Tag.js';
$files[] = $rootDir . '/Parser.js';
if (\in_array('preview', $this->exports, \true))
// Append render.js if we export the preview method
if (in_array('preview', $this->exports, true))
{
$files[] = $rootDir . '/render.js';
$src .= '/** @const */ var xsl=' . $this->getStylesheet() . ";\n";
}
$src .= \implode("\n", \array_map('file_get_contents', $files));
$src .= implode("\n", array_map('file_get_contents', $files));
return $src;
}
/**
* Return the JavaScript representation of the stylesheet
*
* @return string
*/
protected function getStylesheet()
{
return $this->stylesheetCompressor->encode($this->xsl);
}
/**
* Return the tags' config
*
* @return Dictionary
*/
protected function getTagsConfig()
{
// Prepare a Dictionary that will preserve tags' names
$tags = new Dictionary;
foreach ($this->config['tags'] as $tagName => $tagConfig)
{
if (isset($tagConfig['attributes']))
{
// Make the attributes array a Dictionary, to preserve the attributes' names
$tagConfig['attributes'] = new Dictionary($tagConfig['attributes']);
}
$tags[$tagName] = $tagConfig;
}
return $tags;
}
/**
* Inject the parser config into given source
*
* @param string $src Parser's source
* @return string Modified source
*/
protected function injectConfig($src)
{
$config = \array_map(
$config = array_map(
[$this, 'encode'],
$this->configOptimizer->optimize(
[
@@ -220,15 +431,19 @@ class JavaScript
]
)
);
$src = \preg_replace_callback(
'/(\\nvar (' . \implode('|', \array_keys($config)) . '))(;)/',
$src = preg_replace_callback(
'/(\\nvar (' . implode('|', array_keys($config)) . '))(;)/',
function ($m) use ($config)
{
return $m[1] . '=' . $config[$m[2]] . $m[3];
},
$src
);
// Prepend the deduplicated objects
$src = $this->configOptimizer->getVarDeclarations() . $src;
return $src;
}
}

View File

@@ -1,81 +1,183 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
class CallbackGenerator
{
/**
* @var array Path to callbacks in keys, callback signature in values
*/
public $callbacks = [
'tags.*.attributes.*.filterChain.*' => [
'attrValue' => '*',
'attrName' => '!string'
'attrName' => 'string'
],
'tags.*.filterChain.*' => [
'tag' => '!Tag',
'tagConfig' => '!Object'
]
];
/**
* @var Encoder
*/
protected $encoder;
/**
* Constructor
*/
public function __construct()
{
$this->encoder = new Encoder;
}
/**
* Replace all callbacks in given config
*
* @param array $config Original config
* @return array Modified config
*/
public function replaceCallbacks(array $config)
{
foreach ($this->callbacks as $path => $params)
$config = $this->mapArray($config, \explode('.', $path), $params);
{
$config = $this->mapArray($config, explode('.', $path), $params);
}
return $config;
}
/**
* Build the list of arguments used in a callback invocation
*
* @param array $params Callback parameters
* @param array $localVars Known vars from the calling scope
* @return string JavaScript code
*/
protected function buildCallbackArguments(array $params, array $localVars)
{
// Remove 'parser' as a parameter, since there's no such thing in JavaScript
unset($params['parser']);
$localVars += ['logger' => 1, 'openTags' => 1, 'registeredVars' => 1, 'text' => 1];
// Rebuild the local vars map to include global vars and computed values
$available = array_combine(array_keys($localVars), array_keys($localVars));
$available += [
'innerText' => '(tag.getEndTag() ? text.substr(tag.getPos() + tag.getLen(), tag.getEndTag().getPos() - tag.getPos() - tag.getLen()) : "")',
'logger' => 'logger',
'openTags' => 'openTags',
'outerText' => 'text.substr(tag.getPos(), (tag.getEndTag() ? tag.getEndTag().getPos() + tag.getEndTag().getLen() - tag.getPos() : tag.getLen()))',
'registeredVars' => 'registeredVars',
'tagText' => 'text.substr(tag.getPos(), tag.getLen())',
'text' => 'text'
];
$args = [];
foreach ($params as $k => $v)
{
if (isset($v))
{
// Param by value
$args[] = $this->encoder->encode($v);
elseif (isset($localVars[$k]))
$args[] = $k;
}
elseif (isset($available[$k]))
{
// Param by name that matches a local expression
$args[] = $available[$k];
}
else
$args[] = 'registeredVars[' . \json_encode($k) . ']';
return \implode(',', $args);
{
$args[] = 'registeredVars[' . json_encode($k) . ']';
}
}
return implode(',', $args);
}
/**
* Generate a function from a callback config
*
* @param array $config Callback config
* @param array $params Param names as keys, param types as values
* @return Code
*/
protected function generateFunction(array $config, array $params)
{
// returnFalse() and returnTrue() can be used as-is
if ($config['js'] == 'returnFalse' || $config['js'] == 'returnTrue')
{
return new Code((string) $config['js']);
}
// Add an empty list of params if none is set
$config += ['params' => []];
$src = $this->getHeader($params);
$src .= 'function(' . \implode(',', \array_keys($params)) . '){';
$src .= 'function(' . implode(',', array_keys($params)) . '){';
$src .= 'return ' . $this->parenthesizeCallback($config['js']);
$src .= '(' . $this->buildCallbackArguments($config['params'], $params) . ');}';
return new Code($src);
}
/**
* Generate a function header for given signature
*
* @param array $params Param names as keys, param types as values
* @return string
*/
protected function getHeader(array $params)
{
// Prepare the function's header
$header = "/**\n";
foreach ($params as $paramName => $paramType)
{
$header .= '* @param {' . $paramType . '} ' . $paramName . "\n";
}
$header .= "* @return {*}\n";
$header .= "*/\n";
return $header;
}
/**
* Replace callbacks in given config array
*
* @param array $array Original config
* @param string[] $path Path to callbacks
* @param array $params Default params
* @return array Modified config
*/
protected function mapArray(array $array, array $path, array $params)
{
$key = \array_shift($path);
$keys = ($key === '*') ? \array_keys($array) : [$key];
$key = array_shift($path);
$keys = ($key === '*') ? array_keys($array) : [$key];
foreach ($keys as $key)
{
if (!isset($array[$key]))
{
continue;
}
$array[$key] = (empty($path)) ? $this->generateFunction($array[$key], $params) : $this->mapArray($array[$key], $path, $params);
}
return $array;
}
/**
* Add parentheses to a function literal, if necessary
*
* Will return single vars as-is, and will put anything else between parentheses
*
* @param string $callback Original callback
* @return string Modified callback
*/
protected function parenthesizeCallback($callback)
{
return (\preg_match('(^[.\\w]+$)D', $callback)) ? $callback : '(' . $callback . ')';
return (preg_match('(^[.\\w]+$)D', $callback)) ? $callback : '(' . $callback . ')';
}
}

View File

@@ -0,0 +1,49 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
use s9e\TextFormatter\Configurator\FilterableConfigValue;
/**
* Wrapper used to identify strings that should be treated as JavaScript source code
*/
class Code implements FilterableConfigValue
{
/**
* @var string JavaScript source code
*/
public $code;
/**
* Constructor
*
* @param string $code JavaScript source code
*/
public function __construct($code)
{
$this->code = $code;
}
/**
* Return this source code
*
* @return string
*/
public function __toString()
{
return (string) $this->code;
}
/**
* {@inheritdoc}
*/
public function filterConfig($target)
{
return ($target === 'JS') ? $this : null;
}
}

View File

@@ -1,95 +1,212 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
/**
* This class creates local variables to deduplicate complex configValues
*/
class ConfigOptimizer
{
/**
* @var array Associative array of ConfigValue instances
*/
protected $configValues;
/**
* @var Encoder
*/
protected $encoder;
/**
* @var array Associative array with the length of the JavaScript representation of each value
*/
protected $jsLengths;
/**
* Constructor
*
* @param Encoder $encoder
*/
public function __construct(Encoder $encoder)
{
$this->encoder = $encoder;
$this->reset();
}
/**
* Return the var declarations for all deduplicated config values
*
* @return string JavaScript code
*/
public function getVarDeclarations()
{
\asort($this->jsLengths);
asort($this->jsLengths);
$src = '';
foreach (\array_keys($this->jsLengths) as $varName)
foreach (array_keys($this->jsLengths) as $varName)
{
$configValue = $this->configValues[$varName];
if ($configValue->isDeduplicated())
{
$src .= '/** @const */ var ' . $varName . '=' . $this->encoder->encode($configValue->getValue()) . ";\n";
}
}
return $src;
}
/**
* Optimize given config object
*
* @param array|Dictionary $object Original config object
* @return array|Dictionary Modified config object
*/
public function optimize($object)
{
return \current($this->optimizeObjectContent([$object]))->getValue();
return current($this->optimizeObjectContent([$object]))->getValue();
}
/**
* Clear the deduplicated config values stored in this instance
*
* @return void
*/
public function reset()
{
$this->configValues = [];
$this->jsLengths = [];
}
/**
* Test whether given value can be deduplicated
*
* @param mixed $value
* @return bool
*/
protected function canDeduplicate($value)
{
if (\is_array($value) || $value instanceof Dictionary)
return (bool) \count($value);
if (is_array($value) || $value instanceof Dictionary)
{
// Do not deduplicate empty arrays and dictionaries
return (bool) count($value);
}
return ($value instanceof Code);
}
/**
* Mark ConfigValue instances that have been used multiple times
*
* @return void
*/
protected function deduplicateConfigValues()
{
\arsort($this->jsLengths);
foreach (\array_keys($this->jsLengths) as $varName)
arsort($this->jsLengths);
foreach (array_keys($this->jsLengths) as $varName)
{
$configValue = $this->configValues[$varName];
if ($configValue->getUseCount() > 1)
{
$configValue->deduplicate();
}
}
}
/**
* Return the name of the variable that will a given value
*
* @param string $js JavaScript representation of the value
* @return string
*/
protected function getVarName($js)
{
return \sprintf('o%08X', \crc32($js));
return sprintf('o%08X', crc32($js));
}
/**
* Test whether given value is iterable
*
* @param mixed $value
* @return bool
*/
protected function isIterable($value)
{
return (\is_array($value) || $value instanceof Dictionary);
return (is_array($value) || $value instanceof Dictionary);
}
/**
* Optimize given object's content
*
* @param array|Dictionary $object Original object
* @return array|Dictionary Modified object
*/
protected function optimizeObjectContent($object)
{
$object = $this->recordObject($object);
$this->deduplicateConfigValues();
return $object->getValue();
}
/**
* Record a given config object as a ConfigValue instance
*
* @param array|Code|Dictionary $object Original object
* @return ConfigValue Stored ConfigValue instance
*/
protected function recordObject($object)
{
$js = $this->encoder->encode($object);
$varName = $this->getVarName($js);
if ($this->isIterable($object))
{
$object = $this->recordObjectContent($object);
}
if (!isset($this->configValues[$varName]))
{
$this->configValues[$varName] = new ConfigValue($object, $varName);
$this->jsLengths[$varName] = \strlen($js);
$this->jsLengths[$varName] = strlen($js);
}
$this->configValues[$varName]->incrementUseCount();
return $this->configValues[$varName];
}
/**
* Record the content of given config object
*
* @param array|Dictionary $object Original object
* @return array|Dictionary Modified object containing ConfigValue instances
*/
protected function recordObjectContent($object)
{
foreach ($object as $k => $v)
{
if ($this->canDeduplicate($v) && !$this->shouldPreserve($v))
{
$object[$k] = $this->recordObject($v);
}
}
return $object;
}
/**
* Test whether given value should be preserved and not deduplicated
*
* @param mixed $value
* @return bool
*/
protected function shouldPreserve($value)
{
return ($value instanceof Code && \preg_match('(^\\w+$)', $value));
// Simple variables should be kept as-is
return ($value instanceof Code && preg_match('(^\\w+$)', $value));
}
}

View File

@@ -1,56 +1,130 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
class ConfigValue
{
protected $isDeduplicated = \false;
/**
* @var bool
*/
protected $isDeduplicated = false;
/**
* @var string Name of the variable that holds this value
*/
protected $name;
/**
* @var integer Number of times this value is used or referenced
*/
protected $useCount = 0;
/**
* @var array|Code|Dictionary Original value
*/
protected $value;
/**
* @var string
*/
protected $varName;
/**
* Constructor
*
* @param array|Code|Dictionary $value Original value
* @param string $varName
*/
public function __construct($value, $varName)
{
$this->value = $value;
$this->varName = $varName;
}
/**
* Mark this value as deduplicated if it's been used more than once
*
* @return void
*/
public function deduplicate()
{
if ($this->useCount > 1)
{
$this->isDeduplicated = \true;
$this->isDeduplicated = true;
$this->decrementUseCount($this->useCount - 1);
}
}
/**
* Return the number of times this value has been used or referenced
*
* @return integer
*/
public function getUseCount()
{
return $this->useCount;
}
/**
* Return the PHP value held by this instance
*
* @return array|Code|Dictionary
*/
public function getValue()
{
return $this->value;
}
/**
* Return the variable name assigned to this value
*
* @return string
*/
public function getVarName()
{
return $this->varName;
}
/**
* Increment the use counter
*
* @return void
*/
public function incrementUseCount()
{
++$this->useCount;
}
/**
* Return whether this value is marked as deduplicated
*
* @return bool
*/
public function isDeduplicated()
{
return $this->isDeduplicated;
}
/**
* Decrement the use counter of this value as well as the values it contains
*
* @param integer $step How much to remove from the counter
* @return void
*/
protected function decrementUseCount($step = 1)
{
$this->useCount -= $step;
foreach ($this->value as $value)
{
if ($value instanceof ConfigValue)
{
$value->decrementUseCount($step);
}
}
}
}

View File

@@ -1,21 +1,32 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
use ArrayObject;
use s9e\TextFormatter\Configurator\FilterableConfigValue;
use s9e\TextFormatter\Configurator\Helpers\ConfigHelper;
/**
* This class's sole purpose is to identify arrays that need their keys to be preserved in JavaScript
*/
class Dictionary extends ArrayObject implements FilterableConfigValue
{
/**
* {@inheritdoc}
*/
public function filterConfig($target)
{
$value = $this->getArrayCopy();
if ($target === 'JS')
{
$value = new Dictionary(ConfigHelper::filterConfig($value, $target));
}
return $value;
}
}

View File

@@ -1,20 +1,35 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
use RuntimeException;
use s9e\TextFormatter\Configurator\Items\Regexp;
use s9e\TextFormatter\Configurator\JavaScript\Code;
use s9e\TextFormatter\Configurator\JavaScript\Dictionary;
use s9e\TextFormatter\Configurator\JavaScript\Encoder;
class Encoder
{
/**
* @var callable[]
*/
public $objectEncoders;
/**
* @var callable[]
*/
public $typeEncoders;
/**
* Constructor
*
* Will set up the default encoders
*/
public function __construct()
{
$ns = 's9e\\TextFormatter\\Configurator\\';
@@ -33,20 +48,45 @@ class Encoder
'string' => [$this, 'encodeScalar']
];
}
/**
* Encode a value into JavaScript
*
* @param mixed $value
* @return string
*/
public function encode($value)
{
$type = \gettype($value);
$type = gettype($value);
if (!isset($this->typeEncoders[$type]))
{
throw new RuntimeException('Cannot encode ' . $type . ' value');
}
return $this->typeEncoders[$type]($value);
}
/**
* Encode an array to JavaScript
*
* @param array $array
* @return string
*/
protected function encodeArray(array $array)
{
return ($this->isIndexedArray($array)) ? $this->encodeIndexedArray($array) : $this->encodeAssociativeArray($array);
}
protected function encodeAssociativeArray(array $array, $preserveNames = \false)
/**
* Encode an associative array to JavaScript
*
* @param array $array
* @param bool $preserveNames
* @return string
*/
protected function encodeAssociativeArray(array $array, $preserveNames = false)
{
\ksort($array);
ksort($array);
$src = '{';
$sep = '';
foreach ($array as $k => $v)
@@ -55,60 +95,160 @@ class Encoder
$sep = ',';
}
$src .= '}';
return $src;
}
/**
* Encode a boolean value into JavaScript
*
* @param bool $value
* @return string
*/
protected function encodeBoolean($value)
{
return ($value) ? '!0' : '!1';
}
/**
* Encode a Code instance into JavaScript
*
* @param Code $code
* @return string
*/
protected function encodeCode(Code $code)
{
return (string) $code;
}
/**
* Encode a ConfigValue instance into JavaScript
*
* @param ConfigValue $configValue
* @return string
*/
protected function encodeConfigValue(ConfigValue $configValue)
{
return ($configValue->isDeduplicated()) ? $configValue->getVarName() : $this->encode($configValue->getValue());
}
/**
* Encode a Dictionary object into a JavaScript object
*
* @param Dictionary $dict
* @return string
*/
protected function encodeDictionary(Dictionary $dict)
{
return $this->encodeAssociativeArray($dict->getArrayCopy(), \true);
return $this->encodeAssociativeArray($dict->getArrayCopy(), true);
}
/**
* Encode an indexed array to JavaScript
*
* @param array $array
* @return string
*/
protected function encodeIndexedArray(array $array)
{
return '[' . \implode(',', \array_map([$this, 'encode'], $array)) . ']';
return '[' . implode(',', array_map([$this, 'encode'], $array)) . ']';
}
/**
* Encode an object into JavaScript
*
* @param object $object
* @return string
*/
protected function encodeObject($object)
{
foreach ($this->objectEncoders as $className => $callback)
{
if ($object instanceof $className)
{
return $callback($object);
throw new RuntimeException('Cannot encode instance of ' . \get_class($object));
}
}
throw new RuntimeException('Cannot encode instance of ' . get_class($object));
}
/**
* Encode an object property name into JavaScript
*
* @param string $name
* @param bool $preserveNames
* @return string
*/
protected function encodePropertyName($name, $preserveNames)
{
return ($preserveNames || !$this->isLegalProp($name)) ? \json_encode($name) : $name;
return ($preserveNames || !$this->isLegalProp($name)) ? json_encode($name) : $name;
}
/**
* Encode a Regexp object into JavaScript
*
* @param Regexp $regexp
* @return string
*/
protected function encodeRegexp(Regexp $regexp)
{
return $regexp->getJS();
}
/**
* Encode a scalar value into JavaScript
*
* @param mixed $value
* @return string
*/
protected function encodeScalar($value)
{
return \json_encode($value);
return json_encode($value);
}
/**
* Test whether given array is a numerically indexed array
*
* @param array $array
* @return bool
*/
protected function isIndexedArray(array $array)
{
if (empty($array))
return \true;
if (isset($array[0]) && \array_keys($array) === \range(0, \count($array) - 1))
return \true;
return \false;
{
return true;
}
if (isset($array[0]) && array_keys($array) === range(0, count($array) - 1))
{
return true;
}
return false;
}
/**
* Test whether a string can be used as a property name, unquoted
*
* @link http://es5.github.io/#A.1
*
* @param string $name Property's name
* @return bool
*/
protected function isLegalProp($name)
{
/**
* @link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Reserved_Words
* @link http://www.crockford.com/javascript/survey.html
*/
$reserved = ['abstract', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'double', 'else', 'enum', 'export', 'extends', 'false', 'final', 'finally', 'float', 'for', 'function', 'goto', 'if', 'implements', 'import', 'in', 'instanceof', 'int', 'interface', 'let', 'long', 'native', 'new', 'null', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try', 'typeof', 'var', 'void', 'volatile', 'while', 'with'];
if (\in_array($name, $reserved, \true))
return \false;
return (bool) \preg_match('#^(?![0-9])[$_\\pL][$_\\pL\\pNl]+$#Du', $name);
if (in_array($name, $reserved, true))
{
return false;
}
return (bool) preg_match('#^(?![0-9])[$_\\pL][$_\\pL\\pNl]+$#Du', $name);
}
}

View File

@@ -0,0 +1,145 @@
<?php
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
use InvalidArgumentException;
class FunctionProvider
{
/**
* @param array Function name as keys, JavaScript source as values
*/
public static $cache = [
'addslashes' => 'function(str)
{
return str.replace(/["\'\\\\]/g, \'\\\\$&\').replace(/\\u0000/g, \'\\\\0\');
}',
'dechex' => 'function(str)
{
return parseInt(str).toString(16);
}',
'intval' => 'function(str)
{
return parseInt(str) || 0;
}',
'ltrim' => 'function(str)
{
return str.replace(/^[ \\n\\r\\t\\0\\x0B]+/g, \'\');
}',
'mb_strtolower' => 'function(str)
{
return str.toLowerCase();
}',
'mb_strtoupper' => 'function(str)
{
return str.toUpperCase();
}',
'mt_rand' => 'function(min, max)
{
return (min + Math.floor(Math.random() * (max + 1 - min)));
}',
'rawurlencode' => 'function(str)
{
return encodeURIComponent(str).replace(
/[!\'()*]/g,
/**
* @param {string} c
*/
function(c)
{
return \'%\' + c.charCodeAt(0).toString(16).toUpperCase();
}
);
}',
'rtrim' => 'function(str)
{
return str.replace(/[ \\n\\r\\t\\0\\x0B]+$/g, \'\');
}',
'str_rot13' => 'function(str)
{
return str.replace(
/[a-z]/gi,
function(c)
{
return String.fromCharCode(c.charCodeAt(0) + ((c.toLowerCase() < \'n\') ? 13 : -13));
}
);
}',
'stripslashes' => 'function(str)
{
// NOTE: this will not correctly transform \\0 into a NULL byte. I consider this a feature
// rather than a bug. There\'s no reason to use NULL bytes in a text.
return str.replace(/\\\\([\\s\\S]?)/g, \'\\\\1\');
}',
'strrev' => 'function(str)
{
return str.split(\'\').reverse().join(\'\');
}',
'strtolower' => 'function(str)
{
return str.toLowerCase();
}',
'strtotime' => 'function(str)
{
return Date.parse(str) / 1000;
}',
'strtoupper' => 'function(str)
{
return str.toUpperCase();
}',
'trim' => 'function(str)
{
return str.replace(/^[ \\n\\r\\t\\0\\x0B]+/g, \'\').replace(/[ \\n\\r\\t\\0\\x0B]+$/g, \'\');
}',
'ucfirst' => 'function(str)
{
return str[0].toUpperCase() + str.substr(1);
}',
'ucwords' => 'function(str)
{
return str.replace(
/(?:^|\\s)[a-z]/g,
function(m)
{
return m.toUpperCase()
}
);
}',
'urldecode' => 'function(str)
{
return decodeURIComponent("" + str);
}',
'urlencode' => 'function(str)
{
return encodeURIComponent(str);
}'
];
/**
* Return a function's source from the cache or the filesystem
*
* @param string $funcName Function's name
* @return string Function's source
*/
public static function get($funcName)
{
if (isset(self::$cache[$funcName]))
{
return self::$cache[$funcName];
}
if (preg_match('(^[a-z_0-9]+$)D', $funcName))
{
$filepath = __DIR__ . '/functions/' . $funcName . '.js';
if (file_exists($filepath))
{
return file_get_contents($filepath);
}
}
throw new InvalidArgumentException("Unknown function '" . $funcName . "'");
}
}

View File

@@ -1,19 +1,42 @@
<?php
/*
/**
* @package s9e\TextFormatter
* @copyright Copyright (c) 2010-2019 The s9e Authors
* @license http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\TextFormatter\Configurator\JavaScript;
use ReflectionClass;
use s9e\TextFormatter\Configurator\Collections\PluginCollection;
class HintGenerator
{
/**
* @var array Config on which hints are based
*/
protected $config;
/**
* @var array Generated hints
*/
protected $hints;
/**
* @var PluginCollection Configured plugins
*/
protected $plugins;
/**
* @var string XSL stylesheet on which hints are based
*/
protected $xsl;
/**
* Generate a HINT object that contains informations about the configuration
*
* @return string JavaScript Code
*/
public function getHints()
{
$this->hints = [];
@@ -21,34 +44,91 @@ class HintGenerator
$this->setRenderingHints();
$this->setRulesHints();
$this->setTagsHints();
// Build the source. Note that Closure Compiler seems to require that each of HINT's
// properties be declared as a const
$js = "/** @const */ var HINT={};\n";
\ksort($this->hints);
ksort($this->hints);
foreach ($this->hints as $hintName => $hintValue)
$js .= '/** @const */ HINT.' . $hintName . '=' . \json_encode($hintValue) . ";\n";
{
$js .= '/** @const */ HINT.' . $hintName . '=' . json_encode($hintValue) . ";\n";
}
return $js;
}
/**
* Set the config on which hints are based
*
* @param array $config
* @return void
*/
public function setConfig(array $config)
{
$this->config = $config;
}
/**
* Set the collection of plugins
*
* @param PluginCollection $plugins
* @return void
*/
public function setPlugins(PluginCollection $plugins)
{
$this->plugins = $plugins;
}
/**
* Set the XSL on which hints are based
*
* @param string $xsl
* @return void
*/
public function setXSL($xsl)
{
$this->xsl = $xsl;
}
/**
* Set custom hints from plugins
*
* @return void
*/
protected function setPluginsHints()
{
foreach ($this->plugins as $plugins)
{
$this->hints += $plugins->getJSHints();
}
}
/**
* Set hints related to rendering
*
* @return void
*/
protected function setRenderingHints()
{
$this->hints['postProcessing'] = (int) (\strpos($this->xsl, 'data-s9e-livepreview-postprocess') !== \false);
$this->hints['ignoreAttrs'] = (int) (\strpos($this->xsl, 'data-s9e-livepreview-ignore-attrs') !== \false);
// Test for post-processing in templates. Theorically allows for false positives and
// false negatives, but not in any realistic setting
$hints = [
'hash' => 'data-s9e-livepreview-hash',
'ignoreAttrs' => 'data-s9e-livepreview-ignore-attrs',
'onRender' => 'data-s9e-livepreview-onrender',
'onUpdate' => 'data-s9e-livepreview-onupdate'
];
foreach ($hints as $hintName => $match)
{
$this->hints[$hintName] = (int) (strpos($this->xsl, $match) !== false);
}
}
/**
* Set hints related to rules
*
* @return void
*/
protected function setRulesHints()
{
$this->hints['closeAncestor'] = 0;
@@ -56,33 +136,62 @@ class HintGenerator
$this->hints['createChild'] = 0;
$this->hints['fosterParent'] = 0;
$this->hints['requireAncestor'] = 0;
$flags = 0;
foreach ($this->config['tags'] as $tagConfig)
{
foreach (\array_intersect_key($tagConfig['rules'], $this->hints) as $k => $v)
// Test which rules are in use
foreach (array_intersect_key($tagConfig['rules'], $this->hints) as $k => $v)
{
$this->hints[$k] = 1;
}
$flags |= $tagConfig['rules']['flags'];
}
$flags |= $this->config['rootContext']['flags'];
// Iterate over Parser::RULE_* constants and test which flags are set
$parser = new ReflectionClass('s9e\\TextFormatter\\Parser');
foreach ($parser->getConstants() as $constName => $constValue)
if (\substr($constName, 0, 5) === 'RULE_')
{
if (substr($constName, 0, 5) === 'RULE_')
{
// This will set HINT.RULE_AUTO_CLOSE and others
$this->hints[$constName] = ($flags & $constValue) ? 1 : 0;
}
}
}
/**
* Set hints based on given tag's attributes config
*
* @param array $tagConfig
* @return void
*/
protected function setTagAttributesHints(array $tagConfig)
{
if (empty($tagConfig['attributes']))
{
return;
}
foreach ($tagConfig['attributes'] as $attrConfig)
{
$this->hints['attributeDefaultValue'] |= isset($attrConfig['defaultValue']);
}
}
/**
* Set hints related to tags config
*
* @return void
*/
protected function setTagsHints()
{
$this->hints['attributeDefaultValue'] = 0;
$this->hints['namespaces'] = 0;
foreach ($this->config['tags'] as $tagName => $tagConfig)
{
$this->hints['namespaces'] |= (\strpos($tagName, ':') !== \false);
$this->hints['namespaces'] |= (strpos($tagName, ':') !== false);
$this->setTagAttributesHints($tagConfig);
}
}

Some files were not shown because too many files have changed in this diff Show More