|
|
|
|
@ -1035,8 +1035,8 @@ class Readability implements LoggerAwareInterface |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if ($this->hasSingleTagInsideElement($node, 'p') && $this->getLinkDensity($node) < 0.25) { |
|
|
|
|
$newNode = $node->childNodes->item(0); |
|
|
|
|
$newNode = $this->getSingleTagInsideElement($node, 'p'); |
|
|
|
|
if (null !== $newNode && $this->getLinkDensity($node) < 0.25) { |
|
|
|
|
$node->parentNode->replaceChild($newNode, $node); |
|
|
|
|
$nodesToScore[] = $newNode; |
|
|
|
|
} |
|
|
|
|
@ -1538,10 +1538,10 @@ class Readability implements LoggerAwareInterface |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Checks if `$node` has only whitespace and a single element with `$tag` for the tag name. |
|
|
|
|
* Returns false if `$node` contains non-empty text nodes |
|
|
|
|
* Returns the matched element, or `null` if `$node` contains non-empty text nodes |
|
|
|
|
* or if it contains no element with given tag or more than 1 element. |
|
|
|
|
*/ |
|
|
|
|
private function hasSingleTagInsideElement(JSLikeHTMLElement $node, string $tag): bool |
|
|
|
|
private function getSingleTagInsideElement(JSLikeHTMLElement $node, string $tag): ?JSLikeHTMLElement |
|
|
|
|
{ |
|
|
|
|
$childNodes = iterator_to_array($node->childNodes); |
|
|
|
|
$children = array_filter($childNodes, fn ($childNode) => $childNode instanceof \DOMElement); |
|
|
|
|
@ -1554,7 +1554,7 @@ class Readability implements LoggerAwareInterface |
|
|
|
|
// And there should be no text nodes with real content |
|
|
|
|
$a = array_filter($childNodes, fn ($childNode) => $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode))); |
|
|
|
|
|
|
|
|
|
return 0 === \count($a); |
|
|
|
|
return 0 === \count($a) ? $children[0] : null; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|