Use JSLikeHTMLElement in type hints

We use `DOMDocument::registerNodeClass()` to make DOM methods return
`JSLikeHTMLElement` instead of `DOMElement`. Unfortunately, it is not
possible for PHPStan to detect that so we need to cast it ourselves:
https://github.com/phpstan/phpstan/discussions/10748
We may want to deprecate it in the future just to get rid of this mess.

Also add PHPStan stubs for DOM classes so that we do not need to cast everything.
It is fine to do that globally as we only ever use DOM with `JSLikeHTMLElement` registered.

This patch also allows us to get rid of the assertions in tests.
pull/93/head
Jan Tojnar 2 years ago
parent f610333040
commit 1226daa8f8
  1. 3
      phpstan.dist.neon
  2. 18
      src/Readability.php
  3. 36
      stubs/dom.stub
  4. 41
      tests/ReadabilityTest.php

@ -8,6 +8,9 @@ parameters:
bootstrapFiles: bootstrapFiles:
- vendor/bin/.phpunit/phpunit/vendor/autoload.php - vendor/bin/.phpunit/phpunit/vendor/autoload.php
stubFiles:
- stubs/dom.stub
includes: includes:
- vendor/phpstan/phpstan-phpunit/extension.neon - vendor/phpstan/phpstan-phpunit/extension.neon
- vendor/phpstan/phpstan-phpunit/rules.neon - vendor/phpstan/phpstan-phpunit/rules.neon

@ -36,12 +36,12 @@ class Readability implements LoggerAwareInterface
public $revertForcedParagraphElements = false; public $revertForcedParagraphElements = false;
/** /**
* @var ?\DOMElement * @var ?JSLikeHTMLElement
*/ */
public $articleTitle; public $articleTitle;
/** /**
* @var ?\DOMElement * @var ?JSLikeHTMLElement
*/ */
public $articleContent; public $articleContent;
@ -245,7 +245,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get article title element. * Get article title element.
* *
* @return \DOMElement * @return JSLikeHTMLElement
*/ */
public function getTitle() public function getTitle()
{ {
@ -259,7 +259,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get article content element. * Get article content element.
* *
* @return \DOMElement * @return JSLikeHTMLElement
*/ */
public function getContent() public function getContent()
{ {
@ -447,7 +447,7 @@ class Readability implements LoggerAwareInterface
*/ */
public function prepArticle(\DOMNode $articleContent): void public function prepArticle(\DOMNode $articleContent): void
{ {
if (!$articleContent instanceof \DOMElement) { if (!$articleContent instanceof JSLikeHTMLElement) {
return; return;
} }
@ -474,7 +474,7 @@ class Readability implements LoggerAwareInterface
} }
// Remove service data-candidate attribute. // Remove service data-candidate attribute.
/** @var \DOMNodeList<\DOMElement> */ /** @var \DOMNodeList<JSLikeHTMLElement> */
$elems = $xpath->query('.//*[@data-candidate]', $articleContent); $elems = $xpath->query('.//*[@data-candidate]', $articleContent);
foreach ($elems as $elem) { foreach ($elems as $elem) {
$elem->removeAttribute('data-candidate'); $elem->removeAttribute('data-candidate');
@ -645,7 +645,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Remove extraneous break tags from a node. * Remove extraneous break tags from a node.
*/ */
public function killBreaks(\DOMElement $node): void public function killBreaks(JSLikeHTMLElement $node): void
{ {
$html = $node->getInnerHTML(); $html = $node->getInnerHTML();
$html = preg_replace($this->regexps['killBreaks'], '<br />', $html); $html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
@ -1160,7 +1160,7 @@ class Readability implements LoggerAwareInterface
* This is faster to do before scoring but safer after. * This is faster to do before scoring but safer after.
*/ */
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
/** @var \DOMNodeList<\DOMElement> */ /** @var \DOMNodeList<JSLikeHTMLElement> */
$candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
for ($c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
@ -1182,7 +1182,7 @@ class Readability implements LoggerAwareInterface
$topCandidates = array_fill(0, 5, null); $topCandidates = array_fill(0, 5, null);
if ($xpath) { if ($xpath) {
// Using array of DOMElements after deletion is a path to DOOMElement. // Using array of DOMElements after deletion is a path to DOOMElement.
/** @var \DOMNodeList<\DOMElement> */ /** @var \DOMNodeList<JSLikeHTMLElement> */
$candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement); $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
$this->logger->debug('Candidates: ' . $candidates->length); $this->logger->debug('Candidates: ' . $candidates->length);

@ -0,0 +1,36 @@
<?php
// SPDX-FileCopyrightText: 2022 Ondřej Mirtes
// SPDX-License-Identifier: MIT
// Based on https://github.com/phpstan/phpstan-src/blob/b2a9ba4b82d19b01f37eb983746f1840f1213851/stubs/dom.stub
use Readability\JSLikeHTMLElement;
class DOMDocument
{
/** @var JSLikeHTMLElement|null */
public $documentElement;
/** @var null */
public $ownerDocument;
/**
* @param string $name
* @return DOMNodeList<JSLikeHTMLElement>
*/
public function getElementsByTagName($name) {}
}
class DOMNode
{
}
class DOMElement extends DOMNode
{
/**
* @param string $name
* @return DOMNodeList<JSLikeHTMLElement>
*/
public function getElementsByTagName($name) {}
}

@ -5,7 +5,6 @@ namespace Tests\Readability;
use Monolog\Handler\TestHandler; use Monolog\Handler\TestHandler;
use Monolog\Logger; use Monolog\Logger;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use Readability\JSLikeHTMLElement;
use Readability\Readability; use Readability\Readability;
class ReadabilityTest extends \PHPUnit\Framework\TestCase class ReadabilityTest extends \PHPUnit\Framework\TestCase
@ -80,8 +79,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertFalse($res); $this->assertFalse($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml());
} }
@ -92,8 +89,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@ -105,8 +100,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@ -119,8 +112,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml());
@ -134,8 +125,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -151,8 +140,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -167,8 +154,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<div readability=', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -182,8 +167,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -197,8 +180,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('<aside>', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('<aside>', $readability->getContent()->getInnerHtml());
@ -212,8 +193,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -228,8 +207,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
@ -243,8 +220,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
} }
@ -256,8 +231,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -270,8 +243,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -284,8 +255,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertSame('this is my title', $readability->getTitle()->getInnerHtml()); $this->assertSame('this is my title', $readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -298,8 +267,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertSame('title2 - title3', $readability->getTitle()->getInnerHtml()); $this->assertSame('title2 - title3', $readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -312,8 +279,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertSame('title2 : title3', $readability->getTitle()->getInnerHtml()); $this->assertSame('title2 : title3', $readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -326,8 +291,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertSame('this is my h1 title !', $readability->getTitle()->getInnerHtml()); $this->assertSame('this is my h1 title !', $readability->getTitle()->getInnerHtml());
$this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml());
$this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('This text is also an awesome text and you should know that', $readability->getContent()->getInnerHtml());
@ -374,8 +337,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->getInnerHtml());
$this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml());
} finally { } finally {
@ -442,8 +403,6 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$res = $readability->init(); $res = $readability->init();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getContent());
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
} }
public function testPostFilters(): void public function testPostFilters(): void

Loading…
Cancel
Save