This commit is contained in:
Paolo A
2024-08-13 13:44:16 +00:00
parent 1bbb23088d
commit e796d76612
4001 changed files with 30101 additions and 40075 deletions

View File

@@ -19,7 +19,7 @@ use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
* CSS selector parser.
*
* This component is a port of the Python cssselect library,
* which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
* which is copyright Ian Bicking, @see https://github.com/scrapy/cssselect.
*
* @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
*
@@ -27,16 +27,13 @@ use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
*/
class Parser implements ParserInterface
{
private $tokenizer;
private Tokenizer $tokenizer;
public function __construct(Tokenizer $tokenizer = null)
public function __construct(?Tokenizer $tokenizer = null)
{
$this->tokenizer = $tokenizer ?? new Tokenizer();
}
/**
* {@inheritdoc}
*/
public function parse(string $source): array
{
$reader = new Reader($source);
@@ -60,9 +57,7 @@ class Parser implements ParserInterface
}
}
$joined = trim(implode('', array_map(function (Token $token) {
return $token->getValue();
}, $tokens)));
$joined = trim(implode('', array_map(fn (Token $token) => $token->getValue(), $tokens)));
$int = function ($string) {
if (!is_numeric($string)) {
@@ -92,13 +87,17 @@ class Parser implements ParserInterface
];
}
private function parseSelectorList(TokenStream $stream): array
private function parseSelectorList(TokenStream $stream, bool $isArgument = false): array
{
$stream->skipWhitespace();
$selectors = [];
while (true) {
$selectors[] = $this->parserSelectorNode($stream);
if ($isArgument && $stream->getPeek()->isDelimiter([')'])) {
break;
}
$selectors[] = $this->parserSelectorNode($stream, $isArgument);
if ($stream->getPeek()->isDelimiter([','])) {
$stream->getNext();
@@ -111,15 +110,19 @@ class Parser implements ParserInterface
return $selectors;
}
private function parserSelectorNode(TokenStream $stream): Node\SelectorNode
private function parserSelectorNode(TokenStream $stream, bool $isArgument = false): Node\SelectorNode
{
[$result, $pseudoElement] = $this->parseSimpleSelector($stream);
[$result, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
while (true) {
$stream->skipWhitespace();
$peek = $stream->getPeek();
if ($peek->isFileEnd() || $peek->isDelimiter([','])) {
if (
$peek->isFileEnd()
|| $peek->isDelimiter([','])
|| ($isArgument && $peek->isDelimiter([')']))
) {
break;
}
@@ -134,7 +137,7 @@ class Parser implements ParserInterface
$combinator = ' ';
}
[$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream);
[$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream, false, $isArgument);
$result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
}
@@ -146,7 +149,7 @@ class Parser implements ParserInterface
*
* @throws SyntaxErrorException
*/
private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false): array
private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false, bool $isArgument = false): array
{
$stream->skipWhitespace();
@@ -159,7 +162,7 @@ class Parser implements ParserInterface
if ($peek->isWhitespace()
|| $peek->isFileEnd()
|| $peek->isDelimiter([',', '+', '>', '~'])
|| ($insideNegation && $peek->isDelimiter([')']))
|| ($isArgument && $peek->isDelimiter([')']))
) {
break;
}
@@ -197,7 +200,18 @@ class Parser implements ParserInterface
if (!$stream->getPeek()->isDelimiter(['('])) {
$result = new Node\PseudoNode($result, $identifier);
if ('Pseudo[Element[*]:scope]' === $result->__toString()) {
$used = \count($stream->getUsed());
if (!(2 === $used
|| 3 === $used && $stream->getUsed()[0]->isWhiteSpace()
|| $used >= 3 && $stream->getUsed()[$used - 3]->isDelimiter([','])
|| $used >= 4
&& $stream->getUsed()[$used - 3]->isWhiteSpace()
&& $stream->getUsed()[$used - 4]->isDelimiter([','])
)) {
throw SyntaxErrorException::notAtTheStartOfASelector('scope');
}
}
continue;
}
@@ -209,7 +223,7 @@ class Parser implements ParserInterface
throw SyntaxErrorException::nestedNot();
}
[$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true);
[$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true, true);
$next = $stream->getNext();
if (null !== $argumentPseudoElement) {
@@ -221,6 +235,24 @@ class Parser implements ParserInterface
}
$result = new Node\NegationNode($result, $argument);
} elseif ('is' === strtolower($identifier)) {
$selectors = $this->parseSelectorList($stream, true);
$next = $stream->getNext();
if (!$next->isDelimiter([')'])) {
throw SyntaxErrorException::unexpectedToken('")"', $next);
}
$result = new Node\MatchingNode($result, $selectors);
} elseif ('where' === strtolower($identifier)) {
$selectors = $this->parseSelectorList($stream, true);
$next = $stream->getNext();
if (!$next->isDelimiter([')'])) {
throw SyntaxErrorException::unexpectedToken('")"', $next);
}
$result = new Node\SpecificityAdjustmentNode($result, $selectors);
} else {
$arguments = [];
$next = null;
@@ -242,7 +274,7 @@ class Parser implements ParserInterface
}
}
if (empty($arguments)) {
if (!$arguments) {
throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
}