This commit is contained in:
Paolo A
2024-08-13 13:44:16 +00:00
parent 1bbb23088d
commit e796d76612
4001 changed files with 30101 additions and 40075 deletions

View File

@@ -13,24 +13,43 @@ declare(strict_types=1);
namespace League\CommonMark\Normalizer;
use League\Config\ConfigurationAwareInterface;
use League\Config\ConfigurationInterface;
/**
* Creates URL-friendly strings based on the given string input
*/
final class SlugNormalizer implements TextNormalizerInterface
final class SlugNormalizer implements TextNormalizerInterface, ConfigurationAwareInterface
{
/**
* {@inheritdoc}
*/
public function normalize(string $text, $context = null): string
/** @psalm-allow-private-mutation */
private int $defaultMaxLength = 255;
public function setConfiguration(ConfigurationInterface $configuration): void
{
$this->defaultMaxLength = $configuration->get('slug_normalizer/max_length');
}
/**
* {@inheritDoc}
*
* @psalm-immutable
*/
public function normalize(string $text, array $context = []): string
{
// Add any requested prefix
$slug = ($context['prefix'] ?? '') . $text;
// Trim whitespace
$slug = \trim($text);
$slug = \trim($slug);
// Convert to lowercase
$slug = \mb_strtolower($slug);
$slug = \mb_strtolower($slug, 'UTF-8');
// Try replacing whitespace with a dash
$slug = \preg_replace('/\s+/u', '-', $slug) ?? $slug;
// Try removing characters other than letters, numbers, and marks.
$slug = \preg_replace('/[^\p{L}\p{Nd}\p{Nl}\p{M}-]+/u', '', $slug) ?? $slug;
// Trim to requested length if given
if ($length = $context['length'] ?? $this->defaultMaxLength) {
$slug = \mb_substr($slug, 0, $length, 'UTF-8');
}
return $slug;
}

View File

@@ -17,34 +17,22 @@ namespace League\CommonMark\Normalizer;
* Normalize text input using the steps given by the CommonMark spec to normalize labels
*
* @see https://spec.commonmark.org/0.29/#matches
*
* @psalm-immutable
*/
final class TextNormalizer implements TextNormalizerInterface
{
/**
* @var array<int, array<int, string>>
* {@inheritDoc}
*
* Source: https://github.com/symfony/polyfill-mbstring/blob/master/Mbstring.php
* @psalm-pure
*/
private const CASE_FOLD = [
['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE", "\xC3\x9F", "\xE1\xBA\x9E"],
['μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι', 'ss', 'ss'],
];
/**
* {@inheritdoc}
*/
public function normalize(string $text, $context = null): string
public function normalize(string $text, array $context = []): string
{
// Collapse internal whitespace to single space and remove
// leading/trailing whitespace
$text = \preg_replace('/\s+/', ' ', \trim($text));
if (!\defined('MB_CASE_FOLD')) {
// We're not on a version of PHP (7.3+) which has this feature
$text = \str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $text);
return \mb_strtolower($text, 'UTF-8');
}
$text = \preg_replace('/[ \t\r\n]+/', ' ', \trim($text));
\assert(\is_string($text));
return \mb_convert_case($text, \MB_CASE_FOLD, 'UTF-8');
}

View File

@@ -19,8 +19,15 @@ namespace League\CommonMark\Normalizer;
interface TextNormalizerInterface
{
/**
* @param string $text The text to normalize
* @param mixed $context Additional context about the text being normalized (optional)
* @param string $text The text to normalize
* @param array<string, mixed> $context Additional context about the text being normalized (optional)
*
* $context may include (but is not required to include) the following:
* - `prefix` - A string prefix to prepend to each normalized result
* - `length` - The requested maximum length
* - `node` - The node we're normalizing text for
*
* Implementations do not have to use or respect any information within that $context
*/
public function normalize(string $text, $context = null): string;
public function normalize(string $text, array $context = []): string;
}