Merge pull request #80 from jtojnar/stricter

Fix some CI issues
pull/86/head 2.0.3
Jérémy Benoist 3 years ago committed by GitHub
commit 38870cdff1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .github/workflows/continuous-integration.yml
  2. 8
      composer.json
  3. 2
      phpstan.neon
  4. 21
      rector.php
  5. 11
      src/JSLikeHTMLElement.php
  6. 55
      src/Readability.php
  7. 12
      tests/ReadabilityTest.php

@ -24,6 +24,8 @@ jobs:
- "7.4" - "7.4"
- "8.0" - "8.0"
- "8.1" - "8.1"
- "8.2"
- "8.3"
steps: steps:
- name: "Checkout" - name: "Checkout"

@ -35,7 +35,7 @@
"symfony/phpunit-bridge": "^4.4|^5.3|^6.0", "symfony/phpunit-bridge": "^4.4|^5.3|^6.0",
"phpstan/phpstan": "^1.3", "phpstan/phpstan": "^1.3",
"phpstan/phpstan-phpunit": "^1.0", "phpstan/phpstan-phpunit": "^1.0",
"rector/rector": "^0.12.15" "rector/rector": "^0.15.0"
}, },
"suggest": { "suggest": {
"ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure." "ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure."
@ -45,5 +45,11 @@
}, },
"autoload-dev": { "autoload-dev": {
"psr-4": { "Tests\\Readability\\": "tests/" } "psr-4": { "Tests\\Readability\\": "tests/" }
},
"scripts": {
"fix": "php-cs-fixer fix --verbose --diff",
"phpstan": "phpstan analyze --memory-limit 512M",
"rector": "rector process",
"test": "simple-phpunit -v"
} }
} }

@ -6,7 +6,7 @@ parameters:
# https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288 # https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
bootstrapFiles: bootstrapFiles:
- vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php - vendor/bin/.phpunit/phpunit/vendor/autoload.php
checkMissingIterableValueType: false checkMissingIterableValueType: false

@ -2,31 +2,30 @@
declare(strict_types=1); declare(strict_types=1);
use Rector\Core\Configuration\Option; use Rector\Config\RectorConfig;
use Rector\Core\ValueObject\PhpVersion; use Rector\Core\ValueObject\PhpVersion;
use Rector\Set\ValueObject\LevelSetList; use Rector\Set\ValueObject\LevelSetList;
use Symfony\Component\DependencyInjection\Loader\Configurator\ContainerConfigurator;
return static function (ContainerConfigurator $containerConfigurator): void {
$parameters = $containerConfigurator->parameters();
return static function (RectorConfig $rectorConfig): void {
// paths to refactor; solid alternative to CLI arguments // paths to refactor; solid alternative to CLI arguments
$parameters->set(Option::PATHS, [ $rectorConfig->paths([
__DIR__ . '/src', __DIR__ . '/src',
__DIR__ . '/tests', __DIR__ . '/tests',
]); ]);
// Path to phpstan with extensions, that PHPSTan in Rector uses to determine types // Path to phpstan with extensions, that PHPSTan in Rector uses to determine types
$parameters->set(Option::PHPSTAN_FOR_RECTOR_PATH, __DIR__ . '/phpstan.neon'); $rectorConfig->phpstanConfig(__DIR__ . '/phpstan.neon');
$parameters->set(Option::BOOTSTRAP_FILES, [ $rectorConfig->bootstrapFiles([
__DIR__ . '/vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php', __DIR__ . '/vendor/bin/.phpunit/phpunit/vendor/autoload.php',
__DIR__ . '/vendor/autoload.php', __DIR__ . '/vendor/autoload.php',
]); ]);
// Define what rule sets will be applied // Define what rule sets will be applied
$containerConfigurator->import(LevelSetList::UP_TO_PHP_72); $rectorConfig->sets([
LevelSetList::UP_TO_PHP_72,
]);
// is your PHP version different from the one your refactor to? // is your PHP version different from the one your refactor to?
$parameters->set(Option::PHP_VERSION_FEATURES, PhpVersion::PHP_72); $rectorConfig->phpVersion(PhpVersion::PHP_72);
}; };

@ -39,9 +39,9 @@ class JSLikeHTMLElement extends \DOMElement
/** /**
* Used for setting innerHTML like it's done in JavaScript:. * Used for setting innerHTML like it's done in JavaScript:.
* *
* @code * ```php
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>'; * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
* @endcode * ```
*/ */
public function __set($name, $value) public function __set($name, $value)
{ {
@ -79,14 +79,13 @@ class JSLikeHTMLElement extends \DOMElement
} else { } else {
// $value is probably ill-formed // $value is probably ill-formed
$f = new \DOMDocument(); $f = new \DOMDocument();
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
// Using <htmlfragment> will generate a warning, but so will bad HTML // Using <htmlfragment> will generate a warning, but so will bad HTML
// (and by this point, bad HTML is what we've got). // (and by this point, bad HTML is what we've got).
// We use it (and suppress the warning) because an HTML fragment will // We use it (and suppress the warning) because an HTML fragment will
// be wrapped around <html><body> tags which we don't really want to keep. // be wrapped around <html><body> tags which we don't really want to keep.
// Note: despite the warning, if loadHTML succeeds it will return true. // Note: despite the warning, if loadHTML succeeds it will return true.
$result = $f->loadHTML('<htmlfragment>' . $value . '</htmlfragment>'); $result = $f->loadHTML('<meta charset="utf-8"><htmlfragment>' . $value . '</htmlfragment>');
if ($result) { if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0); $import = $f->getElementsByTagName('htmlfragment')->item(0);
@ -105,9 +104,9 @@ class JSLikeHTMLElement extends \DOMElement
/** /**
* Used for getting innerHTML like it's done in JavaScript:. * Used for getting innerHTML like it's done in JavaScript:.
* *
* @code * ```php
* $string = $div->innerHTML; * $string = $div->innerHTML;
* @endcode * ```
*/ */
public function __get($name) public function __get($name)
{ {

@ -2,7 +2,6 @@
namespace Readability; namespace Readability;
use DOMElement;
use Masterminds\HTML5; use Masterminds\HTML5;
use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
@ -161,7 +160,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get article title element. * Get article title element.
* *
* @return DOMElement * @return \DOMElement
*/ */
public function getTitle() public function getTitle()
{ {
@ -171,7 +170,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get article content element. * Get article content element.
* *
* @return DOMElement * @return \DOMElement
*/ */
public function getContent() public function getContent()
{ {
@ -280,7 +279,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Run any post-process modifications to article content as necessary. * Run any post-process modifications to article content as necessary.
*/ */
public function postProcessContent(DOMElement $articleContent): void public function postProcessContent(\DOMElement $articleContent): void
{ {
if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) { if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) {
$this->addFootnotes($articleContent); $this->addFootnotes($articleContent);
@ -292,7 +291,7 @@ class Readability implements LoggerAwareInterface
* *
* @see http://www.roughtype.com/archives/2010/05/experiments_in.php * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
*/ */
public function addFootnotes(DOMElement $articleContent): void public function addFootnotes(\DOMElement $articleContent): void
{ {
$footnotesWrapper = $this->dom->createElement('footer'); $footnotesWrapper = $this->dom->createElement('footer');
$footnotesWrapper->setAttribute('class', 'readability-footnotes'); $footnotesWrapper->setAttribute('class', 'readability-footnotes');
@ -335,7 +334,7 @@ class Readability implements LoggerAwareInterface
$articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
$articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
$footnote->setInnerHtml('<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> '); $footnote->setInnerHtml('<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ');
$footnoteLink->setInnerHtml(('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText)); $footnoteLink->setInnerHtml('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText);
$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
$footnote->appendChild($footnoteLink); $footnote->appendChild($footnoteLink);
@ -356,7 +355,7 @@ class Readability implements LoggerAwareInterface
*/ */
public function prepArticle(\DOMNode $articleContent): void public function prepArticle(\DOMNode $articleContent): void
{ {
if (!$articleContent instanceof DOMElement) { if (!$articleContent instanceof \DOMElement) {
return; return;
} }
@ -456,7 +455,7 @@ class Readability implements LoggerAwareInterface
* Get the inner text of a node. * Get the inner text of a node.
* This also strips out any excess whitespace to be found. * This also strips out any excess whitespace to be found.
* *
* @param DOMElement $e * @param \DOMElement $e
* @param bool $normalizeSpaces (default: true) * @param bool $normalizeSpaces (default: true)
* @param bool $flattenLines (default: false) * @param bool $flattenLines (default: false)
*/ */
@ -482,7 +481,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Remove the style attribute on every $e and under. * Remove the style attribute on every $e and under.
*/ */
public function cleanStyles(DOMElement $e): void public function cleanStyles(\DOMElement $e): void
{ {
if (\is_object($e)) { if (\is_object($e)) {
$elems = $e->getElementsByTagName('*'); $elems = $e->getElementsByTagName('*');
@ -515,7 +514,7 @@ class Readability implements LoggerAwareInterface
* This is the amount of text that is inside a link divided by the total text in the node. * This is the amount of text that is inside a link divided by the total text in the node.
* Can exclude external references to differentiate between simple text and menus/infoblocks. * Can exclude external references to differentiate between simple text and menus/infoblocks.
*/ */
public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): float public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): float
{ {
$links = $e->getElementsByTagName('a'); $links = $e->getElementsByTagName('a');
$textLength = mb_strlen($this->getInnerText($e, true, true)); $textLength = mb_strlen($this->getInnerText($e, true, true));
@ -538,7 +537,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get an element relative weight. * Get an element relative weight.
*/ */
public function getWeight(DOMElement $e): int public function getWeight(\DOMElement $e): int
{ {
if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) { if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
return 0; return 0;
@ -556,7 +555,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Remove extraneous break tags from a node. * Remove extraneous break tags from a node.
*/ */
public function killBreaks(DOMElement $node): void public function killBreaks(\DOMElement $node): void
{ {
$html = $node->getInnerHTML(); $html = $node->getInnerHTML();
$html = preg_replace($this->regexps['killBreaks'], '<br />', $html); $html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
@ -569,7 +568,7 @@ class Readability implements LoggerAwareInterface
* *
* Updated 2012-09-18 to preserve youtube/vimeo iframes * Updated 2012-09-18 to preserve youtube/vimeo iframes
*/ */
public function clean(DOMElement $e, string $tag): void public function clean(\DOMElement $e, string $tag): void
{ {
$targetList = $e->getElementsByTagName($tag); $targetList = $e->getElementsByTagName($tag);
$isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag); $isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag);
@ -601,7 +600,7 @@ class Readability implements LoggerAwareInterface
* "Fishy" is an algorithm based on content length, classnames, * "Fishy" is an algorithm based on content length, classnames,
* link density, number of images & embeds, etc. * link density, number of images & embeds, etc.
*/ */
public function cleanConditionally(DOMElement $e, string $tag): void public function cleanConditionally(\DOMElement $e, string $tag): void
{ {
if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
return; return;
@ -714,7 +713,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Clean out spurious headers from an Element. Checks things like classnames and link density. * Clean out spurious headers from an Element. Checks things like classnames and link density.
*/ */
public function cleanHeaders(DOMElement $e): void public function cleanHeaders(\DOMElement $e): void
{ {
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) { for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
$headers = $e->getElementsByTagName('h' . $headerIndex); $headers = $e->getElementsByTagName('h' . $headerIndex);
@ -754,7 +753,7 @@ class Readability implements LoggerAwareInterface
/** /**
* Get the article title as an H1. * Get the article title as an H1.
* *
* @return DOMElement * @return \DOMElement
*/ */
protected function getArticleTitle() protected function getArticleTitle()
{ {
@ -826,7 +825,7 @@ class Readability implements LoggerAwareInterface
* Initialize a node with the readability object. Also checks the * Initialize a node with the readability object. Also checks the
* className/id for special names to add to its score. * className/id for special names to add to its score.
*/ */
protected function initializeNode(DOMElement $node): void protected function initializeNode(\DOMElement $node): void
{ {
if (!isset($node->tagName)) { if (!isset($node->tagName)) {
return; return;
@ -894,11 +893,11 @@ class Readability implements LoggerAwareInterface
* Using a variety of metrics (content score, classname, element types), find the content that is * Using a variety of metrics (content score, classname, element types), find the content that is
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div. * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
* *
* @param DOMElement $page * @param \DOMElement $page
* *
* @return DOMElement|false * @return \DOMElement|false
*/ */
protected function grabArticle(DOMElement $page = null) protected function grabArticle(\DOMElement $page = null)
{ {
if (!$page) { if (!$page) {
$page = $this->dom; $page = $this->dom;
@ -1211,7 +1210,7 @@ class Readability implements LoggerAwareInterface
if (0 === strcasecmp($tagName, 'td') || 0 === strcasecmp($tagName, 'tr')) { if (0 === strcasecmp($tagName, 'td') || 0 === strcasecmp($tagName, 'tr')) {
$up = $topCandidate; $up = $topCandidate;
if ($up->parentNode instanceof DOMElement) { if ($up->parentNode instanceof \DOMElement) {
$up = $up->parentNode; $up = $up->parentNode;
if (0 === strcasecmp($up->tagName, 'table')) { if (0 === strcasecmp($up->tagName, 'table')) {
@ -1340,7 +1339,7 @@ class Readability implements LoggerAwareInterface
* Get an element weight by attribute. * Get an element weight by attribute.
* Uses regular expressions to tell if this element looks good or bad. * Uses regular expressions to tell if this element looks good or bad.
*/ */
protected function weightAttribute(DOMElement $element, string $attribute): int protected function weightAttribute(\DOMElement $element, string $attribute): int
{ {
if (!$element->hasAttribute($attribute)) { if (!$element->hasAttribute($attribute)) {
return 0; return 0;
@ -1427,7 +1426,7 @@ class Readability implements LoggerAwareInterface
unset($tidy); unset($tidy);
} }
$this->html = mb_convert_encoding((string) $this->html, 'HTML-ENTITIES', 'UTF-8'); $this->html = '<meta charset="utf-8">' . (string) $this->html;
if ('html5lib' === $this->parser || 'html5' === $this->parser) { if ('html5lib' === $this->parser || 'html5' === $this->parser) {
$this->dom = (new HTML5())->loadHTML($this->html); $this->dom = (new HTML5())->loadHTML($this->html);
@ -1443,14 +1442,14 @@ class Readability implements LoggerAwareInterface
libxml_use_internal_errors(false); libxml_use_internal_errors(false);
} }
$this->dom->registerNodeClass(DOMElement::class, \Readability\JSLikeHTMLElement::class); $this->dom->registerNodeClass(\DOMElement::class, \Readability\JSLikeHTMLElement::class);
} }
private function getAncestors(DOMElement $node, int $maxDepth = 0): array private function getAncestors(\DOMElement $node, int $maxDepth = 0): array
{ {
$ancestors = []; $ancestors = [];
$i = 0; $i = 0;
while ($node->parentNode instanceof DOMElement) { while ($node->parentNode instanceof \DOMElement) {
$ancestors[] = $node->parentNode; $ancestors[] = $node->parentNode;
if (++$i === $maxDepth) { if (++$i === $maxDepth) {
break; break;
@ -1470,7 +1469,7 @@ class Readability implements LoggerAwareInterface
}, iterator_to_array($node->childNodes)), true)); }, iterator_to_array($node->childNodes)), true));
} }
private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool
{ {
if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) { if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) {
return false; return false;
@ -1490,7 +1489,7 @@ class Readability implements LoggerAwareInterface
* Tidy must be configured to not clean the input for this function to * Tidy must be configured to not clean the input for this function to
* work as expected, see $this->tidy_config['clean'] * work as expected, see $this->tidy_config['clean']
*/ */
private function isNodeVisible(DOMElement $node): bool private function isNodeVisible(\DOMElement $node): bool
{ {
return !($node->hasAttribute('style') return !($node->hasAttribute('style')
&& preg_match($this->regexps['isNotVisible'], $node->getAttribute('style')) && preg_match($this->regexps['isNotVisible'], $node->getAttribute('style'))

@ -335,13 +335,14 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
public function testAutoClosingIframeNotThrowingException(): void public function testAutoClosingIframeNotThrowingException(): void
{ {
error_reporting(\E_ALL | \E_STRICT); $oldErrorReporting = error_reporting(\E_ALL | \E_STRICT);
ini_set('display_errors', '1'); $oldDisplayErrors = ini_set('display_errors', '1');
// dummy function to be used to the next test // dummy function to be used to the next test
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext) { set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext) {
throw new \Exception($errstr, $errno); throw new \Exception($errstr, $errno);
}, \E_ALL | \E_STRICT); }, \E_ALL | \E_STRICT);
try {
$data = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> $data = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="ru-RU" prefix="og: http://ogp.me/ns#"> <html xmlns="http://www.w3.org/1999/xhtml" lang="ru-RU" prefix="og: http://ogp.me/ns#">
@ -377,6 +378,13 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle()); $this->assertInstanceOf(JSLikeHTMLElement::class, $readability->getTitle());
$this->assertStringContainsString('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->getInnerHtml());
$this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml());
} finally {
restore_error_handler();
if (false !== $oldDisplayErrors) {
ini_set('display_errors', $oldDisplayErrors);
}
error_reporting($oldErrorReporting);
}
} }
/** /**

Loading…
Cancel
Save