diff --git a/src/Readability.php b/src/Readability.php index 81f223f..82f09ae 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1536,16 +1536,23 @@ class Readability implements LoggerAwareInterface ); } + /** + * Checks if `$node` has only whitespace and a single element with `$tag` for the tag name. + * Returns false if `$node` contains non-empty text nodes + * or if it contains no element with given tag or more than 1 element. + */ private function hasSingleTagInsideElement(JSLikeHTMLElement $node, string $tag): bool { - if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) { - return false; + $childNodes = iterator_to_array($node->childNodes); + $children = array_filter($childNodes, fn ($childNode) => $childNode instanceof \DOMElement); + + // There should be exactly 1 element child with given tag + if (1 !== \count($children) || $children[0]->nodeName !== $tag) { + return null; } - $a = array_filter( - iterator_to_array($node->childNodes), - fn ($childNode) => $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode)) - ); + // And there should be no text nodes with real content + $a = array_filter($childNodes, fn ($childNode) => $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode))); return 0 === \count($a); }