This commit is contained in:
Paolo A
2024-08-13 13:44:16 +00:00
parent 1bbb23088d
commit e796d76612
4001 changed files with 30101 additions and 40075 deletions

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -18,39 +20,34 @@ namespace League\CommonMark\Util;
*
* @internal
*
* @phpstan-template TKey
* @phpstan-template TValue
* @phpstan-implements \IteratorAggregate<TKey, TValue>
* @phpstan-implements \ArrayAccess<TKey, TValue>
* @phpstan-template T
* @phpstan-implements \IteratorAggregate<int, T>
* @phpstan-implements \ArrayAccess<int, T>
*/
class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
final class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
{
/**
* @var array<int|string, mixed>
* @phpstan-var array<TKey, TValue>
* @var array<int, mixed>
* @phpstan-var array<int, T>
*/
private $elements;
private array $elements;
/**
* Constructor
*
* @param array<int|string, mixed> $elements
*
* @phpstan-param array<TKey, TValue> $elements
* @phpstan-param array<int, T> $elements
*/
public function __construct(array $elements = [])
{
$this->elements = $elements;
if (self::class !== static::class) {
@\trigger_error('Extending the ArrayCollection class is deprecated in league/commonmark 1.6 and will not be allowed in 2.0', \E_USER_DEPRECATED);
}
}
/**
* @return mixed|false
*
* @phpstan-return TValue|false
* @phpstan-return T|false
*/
public function first()
{
@@ -60,7 +57,7 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* @return mixed|false
*
* @phpstan-return TValue|false
* @phpstan-return T|false
*/
public function last()
{
@@ -70,153 +67,16 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Retrieve an external iterator
*
* @return \ArrayIterator<int|string, mixed>
* @return \ArrayIterator<int, mixed>
*
* @phpstan-return \ArrayIterator<int, T>
*/
#[\ReturnTypeWillChange]
public function getIterator()
public function getIterator(): \ArrayIterator
{
return new \ArrayIterator($this->elements);
}
/**
* @param mixed $element
*
* @return bool
*
* @phpstan-param TValue $element
*
* @deprecated
*/
public function add($element): bool
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'add()', '$collection[] = $value'), E_USER_DEPRECATED);
$this->elements[] = $element;
return true;
}
/**
* @param int|string $key
* @param mixed $value
*
* @return void
*
* @phpstan-param TKey $key
* @phpstan-param TValue $value
*
* @deprecated
*/
public function set($key, $value)
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'set()', '$collection[$key] = $value'), E_USER_DEPRECATED);
$this->offsetSet($key, $value);
}
/**
* @param int|string $key
*
* @return mixed
*
* @phpstan-param TKey $key
*
* @phpstan-return TValue|null
*
* @deprecated
*/
public function get($key)
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'get()', '$collection[$key]'), E_USER_DEPRECATED);
return $this->offsetGet($key);
}
/**
* @param int|string $key
*
* @return mixed
*
* @phpstan-param TKey $key
*
* @phpstan-return TValue|null
*
* @deprecated
*/
public function remove($key)
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'remove()', 'unset($collection[$key])'), E_USER_DEPRECATED);
if (!\array_key_exists($key, $this->elements)) {
return;
}
$removed = $this->elements[$key];
unset($this->elements[$key]);
return $removed;
}
/**
* @return bool
*
* @deprecated
*/
public function isEmpty(): bool
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'isEmpty()', 'count($collection) === 0'), E_USER_DEPRECATED);
return empty($this->elements);
}
/**
* @param mixed $element
*
* @return bool
*
* @phpstan-param TValue $element
*
* @deprecated
*/
public function contains($element): bool
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'contains()', 'in_array($value, $collection->toArray(), true)'), E_USER_DEPRECATED);
return \in_array($element, $this->elements, true);
}
/**
* @param mixed $element
*
* @return mixed|false
*
* @phpstan-param TValue $element
*
* @deprecated
*/
public function indexOf($element)
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'indexOf()', 'array_search($value, $collection->toArray(), true)'), E_USER_DEPRECATED);
return \array_search($element, $this->elements, true);
}
/**
* @param int|string $key
*
* @return bool
*
* @phpstan-param TKey $key
*
* @deprecated
*/
public function containsKey($key): bool
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4, use "%s" instead.', self::class, 'containsKey()', 'isset($collection[$key])'), E_USER_DEPRECATED);
return \array_key_exists($key, $this->elements);
}
/**
* Count elements of an object
*
@@ -230,11 +90,9 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Whether an offset exists
*
* @param int|string $offset An offset to check for.
* {@inheritDoc}
*
* @return bool true on success or false on failure.
*
* @phpstan-param TKey $offset
* @phpstan-param int $offset
*/
public function offsetExists($offset): bool
{
@@ -244,13 +102,11 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Offset to retrieve
*
* @param int|string $offset
* {@inheritDoc}
*
* @return mixed|null
* @phpstan-param int $offset
*
* @phpstan-param TKey $offset
*
* @phpstan-return TValue|null
* @phpstan-return T|null
*/
#[\ReturnTypeWillChange]
public function offsetGet($offset)
@@ -261,16 +117,13 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Offset to set
*
* @param int|string|null $offset The offset to assign the value to.
* @param mixed $value The value to set.
* {@inheritDoc}
*
* @return void
*
* @phpstan-param TKey|null $offset
* @phpstan-param TValue $value
* @phpstan-param int|null $offset
* @phpstan-param T $value
*/
#[\ReturnTypeWillChange]
public function offsetSet($offset, $value)
public function offsetSet($offset, $value): void
{
if ($offset === null) {
$this->elements[] = $value;
@@ -282,16 +135,14 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Offset to unset
*
* @param int|string $offset The offset to unset.
* {@inheritDoc}
*
* @return void
*
* @phpstan-param TKey $offset
* @phpstan-param int $offset
*/
#[\ReturnTypeWillChange]
public function offsetUnset($offset)
public function offsetUnset($offset): void
{
if (!\array_key_exists($offset, $this->elements)) {
if (! \array_key_exists($offset, $this->elements)) {
return;
}
@@ -301,12 +152,9 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
/**
* Returns a subset of the array
*
* @param int $offset
* @param int|null $length
* @return array<int, mixed>
*
* @return array<int|string, mixed>
*
* @phpstan-return array<TKey, TValue>
* @phpstan-return array<int, T>
*/
public function slice(int $offset, ?int $length = null): array
{
@@ -314,42 +162,12 @@ class ArrayCollection implements \IteratorAggregate, \Countable, \ArrayAccess
}
/**
* @return array<int|string, mixed>
* @return array<int, mixed>
*
* @phpstan-return array<TKey, TValue>
* @phpstan-return array<int, T>
*/
public function toArray(): array
{
return $this->elements;
}
/**
* @param array<int|string, mixed> $elements
*
* @return $this
*
* @phpstan-param array<TKey, TValue> $elements
*
* @deprecated
*/
public function replaceWith(array $elements)
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4.', self::class, 'replaceWith()'), E_USER_DEPRECATED);
$this->elements = $elements;
return $this;
}
/**
* @deprecated
*
* @return void
*/
public function removeGaps()
{
@trigger_error(sprintf('The "%s:%s" method is deprecated since league/commonmark 1.4.', self::class, 'removeGaps()'), E_USER_DEPRECATED);
$this->elements = \array_filter($this->elements);
}
}

View File

@@ -1,122 +0,0 @@
<?php
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Util;
final class Configuration implements ConfigurationInterface
{
/** @var array<string, mixed> */
private $config;
/**
* @param array<string, mixed> $config
*/
public function __construct(array $config = [])
{
$this->config = $config;
}
public function merge(array $config = [])
{
$this->config = \array_replace_recursive($this->config, $config);
}
public function replace(array $config = [])
{
$this->config = $config;
}
public function get(?string $key = null, $default = null)
{
if ($key === null) {
@\trigger_error('Calling Configuration::get() without a $key is deprecated in league/commonmark 1.6 and will not be allowed in 2.0', \E_USER_DEPRECATED);
return $this->config;
}
// accept a/b/c as ['a']['b']['c']
if (\strpos($key, '/')) {
return $this->getConfigByPath($key, $default);
}
if (!isset($this->config[$key])) {
return $default;
}
return $this->config[$key];
}
public function set(string $key, $value = null)
{
if (\func_num_args() === 1) {
@\trigger_error('Calling Configuration::set() without a $value is deprecated in league/commonmark 1.6 and will not be allowed in 2.0', \E_USER_DEPRECATED);
}
// accept a/b/c as ['a']['b']['c']
if (\strpos($key, '/')) {
$this->setByPath($key, $value);
}
$this->config[$key] = $value;
}
public function exists(string $key): bool
{
return $this->getConfigByPath($key, self::MISSING) !== self::MISSING;
}
/**
* @param string $keyPath
* @param string|null $default
*
* @return mixed|null
*/
private function getConfigByPath(string $keyPath, $default = null)
{
$keyArr = \explode('/', $keyPath);
$data = $this->config;
foreach ($keyArr as $k) {
if (!\is_array($data) || !isset($data[$k])) {
return $default;
}
$data = $data[$k];
}
return $data;
}
/**
* @param string $keyPath
* @param string|null $value
*/
private function setByPath(string $keyPath, $value = null): void
{
$keyArr = \explode('/', $keyPath);
$pointer = &$this->config;
while (($k = \array_shift($keyArr)) !== null) {
if (!\is_array($pointer)) {
$pointer = [];
}
if (!isset($pointer[$k])) {
$pointer[$k] = null;
}
$pointer = &$pointer[$k];
}
$pointer = $value;
}
}

View File

@@ -1,25 +0,0 @@
<?php
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Util;
/**
* Implement this class to inject the configuration where needed
*/
interface ConfigurationAwareInterface
{
/**
* @param ConfigurationInterface $configuration
*
* @return void
*/
public function setConfiguration(ConfigurationInterface $configuration);
}

View File

@@ -1,64 +0,0 @@
<?php
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Util;
interface ConfigurationInterface
{
/**
* @internal
*
* @deprecated
*/
public const MISSING = '833f2700-af8d-49d4-9171-4b5f12d3bfbc';
/**
* Merge an existing array into the current configuration
*
* @param array<string, mixed> $config
*
* @return void
*/
public function merge(array $config = []);
/**
* Replace the entire array with something else
*
* @param array<string, mixed> $config
*
* @return void
*/
public function replace(array $config = []);
/**
* Return the configuration value at the given key, or $default if no such config exists
*
* The key can be a string or a slash-delimited path to a nested value
*
* @param string|null $key
* @param mixed|null $default
*
* @return mixed|null
*/
public function get(?string $key = null, $default = null);
/**
* Set the configuration value at the given key
*
* The key can be a string or a slash-delimited path to a nested value
*
* @param string $key
* @param mixed|null $value
*
* @return void
*/
public function set(string $key, $value = null);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -14,8 +16,14 @@
namespace League\CommonMark\Util;
/**
* @psalm-immutable
*/
final class Html5EntityDecoder
{
/**
* @psalm-pure
*/
public static function decode(string $entity): string
{
if (\substr($entity, -1) !== ';') {
@@ -36,11 +44,12 @@ final class Html5EntityDecoder
/**
* @param mixed $number
*
* @return string
* @psalm-pure
*/
private static function fromDecimal($number): string
{
// Only convert code points within planes 0-2, excluding NULL
// phpcs:ignore Generic.PHP.ForbiddenFunctions.Found
if (empty($number) || $number > 0x2FFFF) {
return self::fromHex('fffd');
}
@@ -56,6 +65,9 @@ final class Html5EntityDecoder
return $converted;
}
/**
* @psalm-pure
*/
private static function fromHex(string $hexChars): string
{
return self::fromDecimal(\hexdec($hexChars));

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -14,16 +16,17 @@
namespace League\CommonMark\Util;
use League\CommonMark\Cursor;
use League\CommonMark\Parser\Cursor;
/**
* @psalm-immutable
*/
final class LinkParserHelper
{
/**
* Attempt to parse link destination
*
* @param Cursor $cursor
*
* @return null|string The string, or null if no match
* @return string|null The string, or null if no match
*/
public static function parseLinkDestination(Cursor $cursor): ?string
{
@@ -34,7 +37,7 @@ final class LinkParserHelper
);
}
if ($cursor->getCharacter() === '<') {
if ($cursor->getCurrentCharacter() === '<') {
return null;
}
@@ -55,7 +58,7 @@ final class LinkParserHelper
return 0;
}
$length = \mb_strlen($match, 'utf-8');
$length = \mb_strlen($match, 'UTF-8');
if ($length > 1001) {
return 0;
@@ -64,12 +67,15 @@ final class LinkParserHelper
return $length;
}
public static function parsePartialLinkLabel(Cursor $cursor): ?string
{
return $cursor->match('/^(?:[^\\\\\[\]]+|\\\\.?)*/');
}
/**
* Attempt to parse link title (sans quotes)
*
* @param Cursor $cursor
*
* @return null|string The string, or null if no match
* @return string|null The string, or null if no match
*/
public static function parseLinkTitle(Cursor $cursor): ?string
{
@@ -81,14 +87,25 @@ final class LinkParserHelper
return null;
}
public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimiter): ?string
{
$endDelimiter = \preg_quote($endDelimiter, '/');
$regex = \sprintf('/(%s|[^%s\x00])*(?:%s)?/', RegexHelper::PARTIAL_ESCAPED_CHAR, $endDelimiter, $endDelimiter);
if (($partialTitle = $cursor->match($regex)) === null) {
return null;
}
return RegexHelper::unescape($partialTitle);
}
private static function manuallyParseLinkDestination(Cursor $cursor): ?string
{
$oldPosition = $cursor->getPosition();
$oldState = $cursor->saveState();
$oldState = $cursor->saveState();
$openParens = 0;
while (($c = $cursor->getCharacter()) !== null) {
if ($c === '\\' && $cursor->peek() !== null && RegexHelper::isEscapable($cursor->peek())) {
while (($c = $cursor->getCurrentCharacter()) !== null) {
if ($c === '\\' && ($peek = $cursor->peek()) !== null && RegexHelper::isEscapable($peek)) {
$cursor->advanceBy(2);
} elseif ($c === '(') {
$cursor->advanceBy(1);
@@ -111,7 +128,7 @@ final class LinkParserHelper
return null;
}
if ($cursor->getPosition() === $oldPosition && $c !== ')') {
if ($cursor->getPosition() === $oldPosition && (! isset($c) || $c !== ')')) {
return null;
}

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -16,6 +18,7 @@ namespace League\CommonMark\Util;
/**
* @internal
*
* @phpstan-template T
* @phpstan-implements \IteratorAggregate<T>
*/
@@ -25,33 +28,32 @@ final class PrioritizedList implements \IteratorAggregate
* @var array<int, array<mixed>>
* @phpstan-var array<int, array<T>>
*/
private $list = [];
private array $list = [];
/**
* @var iterable<mixed>|null
* @phpstan-var iterable<T>|null
* @var \Traversable<mixed>|null
* @phpstan-var \Traversable<T>|null
*/
private $optimized;
private ?\Traversable $optimized = null;
/**
* @param mixed $item
* @param int $priority
*
* @phpstan-param T $item
*/
public function add($item, int $priority): void
{
$this->list[$priority][] = $item;
$this->optimized = null;
$this->optimized = null;
}
/**
* @return iterable<int, mixed>
* @return \Traversable<int, mixed>
*
* @phpstan-return iterable<int, T>
* @phpstan-return \Traversable<int, T>
*/
#[\ReturnTypeWillChange]
public function getIterator(): iterable
public function getIterator(): \Traversable
{
if ($this->optimized === null) {
\krsort($this->list);

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -14,124 +16,113 @@
namespace League\CommonMark\Util;
use League\CommonMark\Block\Element\HtmlBlock;
use League\CommonMark\Exception\InvalidArgumentException;
use League\CommonMark\Extension\CommonMark\Node\Block\HtmlBlock;
/**
* Provides regular expressions and utilities for parsing Markdown
*
* All of the PARTIAL_ regex constants assume that they'll be used in case-insensitive searches
* All other complete regexes provided by this class (either via constants or methods) will have case-insensitivity enabled.
*
* @phpcs:disable Generic.Strings.UnnecessaryStringConcat.Found
*
* @psalm-immutable
*/
final class RegexHelper
{
// Partial regular expressions (wrap with `/` on each side before use)
public const PARTIAL_ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});';
public const PARTIAL_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\]^_`{|}~-]';
public const PARTIAL_ESCAPED_CHAR = '\\\\' . self::PARTIAL_ESCAPABLE;
public const PARTIAL_IN_DOUBLE_QUOTES = '"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"';
public const PARTIAL_IN_SINGLE_QUOTES = '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'';
public const PARTIAL_IN_PARENS = '\\((' . self::PARTIAL_ESCAPED_CHAR . '|[^)\x00])*\\)';
public const PARTIAL_REG_CHAR = '[^\\\\()\x00-\x20]';
public const PARTIAL_IN_PARENS_NOSP = '\((' . self::PARTIAL_REG_CHAR . '|' . self::PARTIAL_ESCAPED_CHAR . '|\\\\)*\)';
public const PARTIAL_TAGNAME = '[A-Za-z][A-Za-z0-9-]*';
public const PARTIAL_BLOCKTAGNAME = '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)';
public const PARTIAL_ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
public const PARTIAL_UNQUOTEDVALUE = '[^"\'=<>`\x00-\x20]+';
public const PARTIAL_SINGLEQUOTEDVALUE = '\'[^\']*\'';
public const PARTIAL_DOUBLEQUOTEDVALUE = '"[^"]*"';
public const PARTIAL_ATTRIBUTEVALUE = '(?:' . self::PARTIAL_UNQUOTEDVALUE . '|' . self::PARTIAL_SINGLEQUOTEDVALUE . '|' . self::PARTIAL_DOUBLEQUOTEDVALUE . ')';
public const PARTIAL_ATTRIBUTEVALUESPEC = '(?:' . '\s*=' . '\s*' . self::PARTIAL_ATTRIBUTEVALUE . ')';
public const PARTIAL_ATTRIBUTE = '(?:' . '\s+' . self::PARTIAL_ATTRIBUTENAME . self::PARTIAL_ATTRIBUTEVALUESPEC . '?)';
public const PARTIAL_OPENTAG = '<' . self::PARTIAL_TAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>';
public const PARTIAL_CLOSETAG = '<\/' . self::PARTIAL_TAGNAME . '\s*[>]';
public const PARTIAL_OPENBLOCKTAG = '<' . self::PARTIAL_BLOCKTAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>';
public const PARTIAL_CLOSEBLOCKTAG = '<\/' . self::PARTIAL_BLOCKTAGNAME . '\s*[>]';
public const PARTIAL_HTMLCOMMENT = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->';
public const PARTIAL_PROCESSINGINSTRUCTION = '[<][?].*?[?][>]';
public const PARTIAL_DECLARATION = '<![A-Z]+' . '\s+[^>]*>';
public const PARTIAL_CDATA = '<!\[CDATA\[[\s\S]*?]\]>';
public const PARTIAL_HTMLTAG = '(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . '|' . self::PARTIAL_HTMLCOMMENT . '|' .
// Partial regular expressions (wrap with `/` on each side and add the case-insensitive `i` flag before use)
public const PARTIAL_ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});';
public const PARTIAL_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\]^_`{|}~-]';
public const PARTIAL_ESCAPED_CHAR = '\\\\' . self::PARTIAL_ESCAPABLE;
public const PARTIAL_IN_DOUBLE_QUOTES = '"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"';
public const PARTIAL_IN_SINGLE_QUOTES = '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'';
public const PARTIAL_IN_PARENS = '\\((' . self::PARTIAL_ESCAPED_CHAR . '|[^)\x00])*\\)';
public const PARTIAL_REG_CHAR = '[^\\\\()\x00-\x20]';
public const PARTIAL_IN_PARENS_NOSP = '\((' . self::PARTIAL_REG_CHAR . '|' . self::PARTIAL_ESCAPED_CHAR . '|\\\\)*\)';
public const PARTIAL_TAGNAME = '[a-z][a-z0-9-]*';
public const PARTIAL_BLOCKTAGNAME = '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)';
public const PARTIAL_ATTRIBUTENAME = '[a-z_:][a-z0-9:._-]*';
public const PARTIAL_UNQUOTEDVALUE = '[^"\'=<>`\x00-\x20]+';
public const PARTIAL_SINGLEQUOTEDVALUE = '\'[^\']*\'';
public const PARTIAL_DOUBLEQUOTEDVALUE = '"[^"]*"';
public const PARTIAL_ATTRIBUTEVALUE = '(?:' . self::PARTIAL_UNQUOTEDVALUE . '|' . self::PARTIAL_SINGLEQUOTEDVALUE . '|' . self::PARTIAL_DOUBLEQUOTEDVALUE . ')';
public const PARTIAL_ATTRIBUTEVALUESPEC = '(?:' . '\s*=' . '\s*' . self::PARTIAL_ATTRIBUTEVALUE . ')';
public const PARTIAL_ATTRIBUTE = '(?:' . '\s+' . self::PARTIAL_ATTRIBUTENAME . self::PARTIAL_ATTRIBUTEVALUESPEC . '?)';
public const PARTIAL_OPENTAG = '<' . self::PARTIAL_TAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>';
public const PARTIAL_CLOSETAG = '<\/' . self::PARTIAL_TAGNAME . '\s*[>]';
public const PARTIAL_OPENBLOCKTAG = '<' . self::PARTIAL_BLOCKTAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>';
public const PARTIAL_CLOSEBLOCKTAG = '<\/' . self::PARTIAL_BLOCKTAGNAME . '\s*[>]';
public const PARTIAL_HTMLCOMMENT = '<!-->|<!--->|<!--[\s\S]*?-->';
public const PARTIAL_PROCESSINGINSTRUCTION = '[<][?][\s\S]*?[?][>]';
public const PARTIAL_DECLARATION = '<![A-Za-z]+' . '[^>]*>';
public const PARTIAL_CDATA = '<!\[CDATA\[[\s\S]*?]\]>';
public const PARTIAL_HTMLTAG = '(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . '|' . self::PARTIAL_HTMLCOMMENT . '|' .
self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')';
public const PARTIAL_HTMLBLOCKOPEN = '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' .
public const PARTIAL_HTMLBLOCKOPEN = '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' .
'\/' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s>]|$)' . '|' . '[?!])';
public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"' .
public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"' .
'|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'' .
'|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*\))';
public const REGEX_PUNCTUATION = '/^[\x{2000}-\x{206F}\x{2E00}-\x{2E7F}\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\\\\\'!"#\$%&\(\)\*\+,\-\.\\/:;<=>\?@\[\]\^_`\{\|\}~]/u';
public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i';
public const REGEX_PUNCTUATION = '/^[!"#$%&\'()*+,\-.\\/:;<=>?@\\[\\]\\\\^_`{|}~\p{P}\p{S}]/u';
public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i';
public const REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i';
public const REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/';
public const REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/';
public const REGEX_WHITESPACE_CHAR = '/^[ \t\n\x0b\x0c\x0d]/';
public const REGEX_WHITESPACE = '/[ \t\n\x0b\x0c\x0d]+/';
public const REGEX_WHITESPACE_CHAR = '/^[ \t\n\x0b\x0c\x0d]/';
public const REGEX_UNICODE_WHITESPACE_CHAR = '/^\pZ|\s/u';
public const REGEX_THEMATIC_BREAK = '/^(?:\*[ \t]*){3,}$|^(?:_[ \t]*){3,}$|^(?:-[ \t]*){3,}$/';
public const REGEX_THEMATIC_BREAK = '/^(?:\*[ \t]*){3,}$|^(?:_[ \t]*){3,}$|^(?:-[ \t]*){3,}$/';
public const REGEX_LINK_DESTINATION_BRACES = '/^(?:<(?:[^<>\\n\\\\\\x00]|\\\\.)*>)/';
/**
* @psalm-pure
*/
public static function isEscapable(string $character): bool
{
return \preg_match('/' . self::PARTIAL_ESCAPABLE . '/', $character) === 1;
}
/**
* @psalm-pure
*/
public static function isLetter(?string $character): bool
{
if ($character === null) {
return false;
}
return \preg_match('/[\pL]/u', $character) === 1;
}
/**
* Attempt to match a regex in string s at offset offset
*
* @param string $regex
* @param string $string
* @param int $offset
* @psalm-param non-empty-string $regex
*
* @return int|null Index of match, or null
*
* @psalm-pure
*/
public static function matchAt(string $regex, string $string, int $offset = 0): ?int
{
$matches = [];
$string = \mb_substr($string, $offset, null, 'utf-8');
if (!\preg_match($regex, $string, $matches, \PREG_OFFSET_CAPTURE)) {
$string = \mb_substr($string, $offset, null, 'UTF-8');
if (! \preg_match($regex, $string, $matches, \PREG_OFFSET_CAPTURE)) {
return null;
}
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
$charPos = \mb_strlen(\mb_strcut($string, 0, $matches[0][1], 'utf-8'), 'utf-8');
$charPos = \mb_strlen(\mb_strcut($string, 0, $matches[0][1], 'UTF-8'), 'UTF-8');
return $offset + $charPos;
}
/**
* Functional wrapper around preg_match_all
*
* @param string $pattern
* @param string $subject
* @param int $offset
*
* @return array<string>|null
*
* @deprecated in 1.6; use matchFirst() instead
*/
public static function matchAll(string $pattern, string $subject, int $offset = 0): ?array
{
@\trigger_error('RegexHelper::matchAll() is deprecated in league/commonmark 1.6 and will be removed in 2.0; use RegexHelper::matchFirst() instead', \E_USER_DEPRECATED);
if ($offset !== 0) {
$subject = \substr($subject, $offset);
}
\preg_match_all($pattern, $subject, $matches, \PREG_PATTERN_ORDER);
$fullMatches = \reset($matches);
if (empty($fullMatches)) {
return null;
}
if (\count($fullMatches) === 1) {
foreach ($matches as &$match) {
$match = \reset($match);
}
}
return $matches ?: null;
}
/**
* Functional wrapper around preg_match_all which only returns the first set of matches
*
* @psalm-param non-empty-string $pattern
*
* @return string[]|null
*
* @psalm-pure
@@ -154,31 +145,32 @@ final class RegexHelper
/**
* Replace backslash escapes with literal characters
*
* @param string $string
*
* @return string
* @psalm-pure
*/
public static function unescape(string $string): string
{
$allEscapedChar = '/\\\\(' . self::PARTIAL_ESCAPABLE . ')/';
/** @var string $escaped */
$escaped = \preg_replace($allEscapedChar, '$1', $string);
\assert(\is_string($escaped));
/** @var string $replaced */
$replaced = \preg_replace_callback('/' . self::PARTIAL_ENTITY . '/i', function ($e) {
return Html5EntityDecoder::decode($e[0]);
}, $escaped);
return $replaced;
return \preg_replace_callback('/' . self::PARTIAL_ENTITY . '/i', static fn ($e) => Html5EntityDecoder::decode($e[0]), $escaped);
}
/**
* @internal
*
* @param int $type HTML block type
*
* @return string
* @psalm-param HtmlBlock::TYPE_* $type
*
* @internal
* @phpstan-param HtmlBlock::TYPE_* $type
*
* @psalm-return non-empty-string
*
* @throws InvalidArgumentException if an invalid type is given
*
* @psalm-pure
*/
public static function getHtmlBlockOpenRegex(int $type): string
{
@@ -190,24 +182,32 @@ final class RegexHelper
case HtmlBlock::TYPE_3:
return '/^<[?]/';
case HtmlBlock::TYPE_4:
return '/^<![A-Z]/';
return '/^<![A-Z]/i';
case HtmlBlock::TYPE_5_CDATA:
return '/^<!\[CDATA\[/';
return '/^<!\[CDATA\[/i';
case HtmlBlock::TYPE_6_BLOCK_ELEMENT:
return '%^<[/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)%i';
case HtmlBlock::TYPE_7_MISC_ELEMENT:
return '/^(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . ')\\s*$/i';
default:
throw new InvalidArgumentException('Invalid HTML block type');
}
throw new \InvalidArgumentException('Invalid HTML block type');
}
/**
* @internal
*
* @param int $type HTML block type
*
* @return string
* @psalm-param HtmlBlock::TYPE_* $type
*
* @internal
* @phpstan-param HtmlBlock::TYPE_* $type
*
* @psalm-return non-empty-string
*
* @throws InvalidArgumentException if an invalid type is given
*
* @psalm-pure
*/
public static function getHtmlBlockCloseRegex(int $type): string
{
@@ -222,11 +222,14 @@ final class RegexHelper
return '/>/';
case HtmlBlock::TYPE_5_CDATA:
return '/\]\]>/';
default:
throw new InvalidArgumentException('Invalid HTML block type');
}
throw new \InvalidArgumentException('Invalid HTML block type');
}
/**
* @psalm-pure
*/
public static function isLinkPotentiallyUnsafe(string $url): bool
{
return \preg_match(self::REGEX_UNSAFE_PROTOCOL, $url) !== 0 && \preg_match(self::REGEX_SAFE_DATA_PROTOCOL, $url) === 0;

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -16,11 +18,18 @@ namespace League\CommonMark\Util;
use League\CommonMark\Exception\UnexpectedEncodingException;
/**
* @psalm-immutable
*/
final class UrlEncoder
{
/** @var string[] */
private static $encodeCache = ['%00', '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08', '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10', '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18', '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20', '!', '%22', '#', '$', '%25', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '%3C', '=', '%3E', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F'];
private const ENCODE_CACHE = ['%00', '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08', '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10', '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18', '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20', '!', '%22', '#', '$', '%25', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '%3C', '=', '%3E', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F'];
/**
* @throws UnexpectedEncodingException if a non-UTF-8-compatible encoding is used
*
* @psalm-pure
*/
public static function unescapeAndEncode(string $uri): string
{
// Optimization: if the URL only includes characters we know will be kept as-is, then just return the URL as-is.
@@ -28,27 +37,27 @@ final class UrlEncoder
return $uri;
}
$result = '';
$chars = \preg_split('//u', $uri, -1, \PREG_SPLIT_NO_EMPTY);
if (!\is_array($chars) || !\mb_check_encoding($uri, 'UTF-8')) {
if (! \mb_check_encoding($uri, 'UTF-8')) {
throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
}
$result = '';
$chars = \mb_str_split($uri, 1, 'UTF-8');
$l = \count($chars);
for ($i = 0; $i < $l; $i++) {
$code = $chars[$i];
if ($code === '%' && $i + 2 < $l) {
if (\preg_match('/^[0-9a-f]{2}$/i', $chars[$i + 1] . $chars[$i + 2]) === 1) {
$result .= '%' . $chars[$i + 1] . $chars[$i + 2];
$i += 2;
$i += 2;
continue;
}
}
if (\ord($code) < 128) {
$result .= self::$encodeCache[\ord($code)];
$result .= self::ENCODE_CACHE[\ord($code)];
continue;
}

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
@@ -16,15 +18,15 @@ namespace League\CommonMark\Util;
/**
* Utility class for handling/generating XML and HTML
*
* @psalm-immutable
*/
final class Xml
{
/**
* @param string $string
*
* @return string
* @psalm-pure
*/
public static function escape($string)
public static function escape(string $string): string
{
return \str_replace(['&', '<', '>', '"'], ['&amp;', '&lt;', '&gt;', '&quot;'], $string);
}