From 1226daa8f8584dd6623f936b413599faaf1ae750 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 21:51:43 +0100 Subject: [PATCH] Use JSLikeHTMLElement in type hints We use `DOMDocument::registerNodeClass()` to make DOM methods return `JSLikeHTMLElement` instead of `DOMElement`. Unfortunately, it is not possible for PHPStan to detect that so we need to cast it ourselves: https://github.com/phpstan/phpstan/discussions/10748 We may want to deprecate it in the future just to get rid of this mess. Also add PHPStan stubs for DOM classes so that we do not need to cast everything. It is fine to do that globally as we only ever use DOM with `JSLikeHTMLElement` registered. This patch also allows us to get rid of the assertions in tests. --- phpstan.dist.neon | 3 +++ src/Readability.php | 18 ++++++++--------- stubs/dom.stub | 36 ++++++++++++++++++++++++++++++++++ tests/ReadabilityTest.php | 41 --------------------------------------- 4 files changed, 48 insertions(+), 50 deletions(-) create mode 100644 stubs/dom.stub diff --git a/phpstan.dist.neon b/phpstan.dist.neon index 7e5d5d8..95b916d 100644 --- a/phpstan.dist.neon +++ b/phpstan.dist.neon @@ -8,6 +8,9 @@ parameters: bootstrapFiles: - vendor/bin/.phpunit/phpunit/vendor/autoload.php + stubFiles: + - stubs/dom.stub + includes: - vendor/phpstan/phpstan-phpunit/extension.neon - vendor/phpstan/phpstan-phpunit/rules.neon diff --git a/src/Readability.php b/src/Readability.php index 9bf62c0..9f6f87f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -36,12 +36,12 @@ class Readability implements LoggerAwareInterface public $revertForcedParagraphElements = false; /** - * @var ?\DOMElement + * @var ?JSLikeHTMLElement */ public $articleTitle; /** - * @var ?\DOMElement + * @var ?JSLikeHTMLElement */ public $articleContent; @@ -245,7 +245,7 @@ class Readability implements LoggerAwareInterface /** * Get article title element. * - * @return \DOMElement + * @return JSLikeHTMLElement */ public function getTitle() { @@ -259,7 +259,7 @@ class Readability implements LoggerAwareInterface /** * Get article content element. * - * @return \DOMElement + * @return JSLikeHTMLElement */ public function getContent() { @@ -447,7 +447,7 @@ class Readability implements LoggerAwareInterface */ public function prepArticle(\DOMNode $articleContent): void { - if (!$articleContent instanceof \DOMElement) { + if (!$articleContent instanceof JSLikeHTMLElement) { return; } @@ -474,7 +474,7 @@ class Readability implements LoggerAwareInterface } // Remove service data-candidate attribute. - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $elems = $xpath->query('.//*[@data-candidate]', $articleContent); foreach ($elems as $elem) { $elem->removeAttribute('data-candidate'); @@ -645,7 +645,7 @@ class Readability implements LoggerAwareInterface /** * Remove extraneous break tags from a node. */ - public function killBreaks(\DOMElement $node): void + public function killBreaks(JSLikeHTMLElement $node): void { $html = $node->getInnerHTML(); $html = preg_replace($this->regexps['killBreaks'], '
', $html); @@ -1160,7 +1160,7 @@ class Readability implements LoggerAwareInterface * This is faster to do before scoring but safer after. */ if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); for ($c = $candidates->length - 1; $c >= 0; --$c) { @@ -1182,7 +1182,7 @@ class Readability implements LoggerAwareInterface $topCandidates = array_fill(0, 5, null); if ($xpath) { // Using array of DOMElements after deletion is a path to DOOMElement. - /** @var \DOMNodeList<\DOMElement> */ + /** @var \DOMNodeList */ $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement); $this->logger->debug('Candidates: ' . $candidates->length); diff --git a/stubs/dom.stub b/stubs/dom.stub new file mode 100644 index 0000000..ac85e9d --- /dev/null +++ b/stubs/dom.stub @@ -0,0 +1,36 @@ + + */ + public function getElementsByTagName($name) {} +} + +class DOMNode +{ + +} + +class DOMElement extends DOMNode +{ + /** + * @param string $name + * @return DOMNodeList + */ + public function getElementsByTagName($name) {} +} diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index b09a97a..74b4d31 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -5,7 +5,6 @@ namespace Tests\Readability; use Monolog\Handler\TestHandler; use Monolog\Logger; use Psr\Log\LoggerInterface; -use Readability\JSLikeHTMLElement; use Readability\Readability; class ReadabilityTest extends \PHPUnit\Framework\TestCase @@ -80,8 +79,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertFalse($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml()); } @@ -92,8 +89,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -105,8 +100,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -119,8 +112,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -134,8 +125,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -151,8 +140,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -167,8 +154,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); @@ -182,8 +167,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); @@ -197,8 +180,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $res = $readability->init(); $this->assertTrue($res); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent()); - $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('