diff --git a/.github/workflows/coding-standards.yml b/.github/workflows/coding-standards.yml index cd666ae..f26e7d4 100644 --- a/.github/workflows/coding-standards.yml +++ b/.github/workflows/coding-standards.yml @@ -8,9 +8,6 @@ on: branches: - master -env: - SYMFONY_PHPUNIT_VERSION: 7.5 - jobs: coding-standards: name: "CS Fixer & PHPStan" @@ -35,9 +32,6 @@ jobs: env: COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: "Add PHPStan" - run: "composer require phpstan/phpstan phpstan/phpstan-phpunit --dev --no-progress --no-suggest" - - name: "Install dependencies with Composer" uses: "ramsey/composer-install@v1" with: diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 1424248..5f9bee1 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -19,13 +19,11 @@ jobs: strategy: matrix: php: - - "5.6" - - "7.0" - - "7.1" - "7.2" - "7.3" - "7.4" - "8.0" + - "8.1" steps: - name: "Checkout" @@ -38,18 +36,12 @@ jobs: with: php-version: "${{ matrix.php }}" coverage: "none" - tools: composer:v1 + tools: composer:v2 extensions: tidy ini-values: "date.timezone=Europe/Paris" env: COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: "Force PHPUnit version" - if: matrix.php >= '7.2' - run: "echo $SYMFONY_PHPUNIT_VERSION" - env: - SYMFONY_PHPUNIT_VERSION: 7.5 - - name: "Remove useless deps" run: "composer remove friendsofphp/php-cs-fixer --dev --no-progress --no-update" @@ -84,7 +76,7 @@ jobs: with: php-version: "${{ matrix.php }}" coverage: "xdebug" - tools: composer:v1 + tools: composer:v2 extensions: tidy ini-values: "date.timezone=Europe/Paris" env: @@ -103,8 +95,6 @@ jobs: - name: "Run PHPUnit (with coverage)" run: "php vendor/bin/simple-phpunit -v --coverage-clover build/logs/clover.xml" - env: - SYMFONY_PHPUNIT_VERSION: 7.5 - name: "Retrieve Coveralls phar" run: "wget https://github.com/php-coveralls/php-coveralls/releases/download/v2.4.2/php-coveralls.phar" @@ -126,49 +116,6 @@ jobs: php: - "7.2" - steps: - - name: "Checkout" - uses: "actions/checkout@v2" - with: - fetch-depth: 2 - - - name: "Install PHP" - uses: "shivammathur/setup-php@v2" - with: - php-version: "${{ matrix.php }}" - coverage: "none" - tools: composer:v1 - extensions: tidy - ini-values: "date.timezone=Europe/Paris" - env: - COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: "Remove useless deps" - run: "composer remove friendsofphp/php-cs-fixer --dev --no-progress --no-update" - - - name: "Install dependencies with Composer" - uses: "ramsey/composer-install@v1" - with: - composer-options: "--optimize-autoloader --prefer-dist" - dependency-versions: "lowest" - - - name: "Setup logs" - run: "mkdir -p build/logs" - - - name: "Run PHPUnit" - run: "php vendor/bin/simple-phpunit -v" - env: - SYMFONY_PHPUNIT_VERSION: 7.5 - - phpunit-composerv2: - name: "PHPUnit with Composer v2 (PHP ${{ matrix.php }})" - runs-on: "ubuntu-20.04" - - strategy: - matrix: - php: - - "7.4" - steps: - name: "Checkout" uses: "actions/checkout@v2" @@ -193,11 +140,10 @@ jobs: uses: "ramsey/composer-install@v1" with: composer-options: "--optimize-autoloader --prefer-dist" + dependency-versions: "lowest" - name: "Setup logs" run: "mkdir -p build/logs" - name: "Run PHPUnit" run: "php vendor/bin/simple-phpunit -v" - env: - SYMFONY_PHPUNIT_VERSION: 7.5 diff --git a/.gitignore b/.gitignore index 160e7cc..65afc77 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ vendor/ coverage/ composer.lock .php_cs.cache +.php-cs-fixer.cache .phpunit.result.cache diff --git a/.scrutinizer.yml b/.scrutinizer.yml deleted file mode 100644 index 81538f0..0000000 --- a/.scrutinizer.yml +++ /dev/null @@ -1,12 +0,0 @@ -tools: - external_code_coverage: false - -build: - nodes: - analysis: - tests: - override: - - php-scrutinizer-run - environment: - php: - version: 7.2 diff --git a/README.md b/README.md index e3f3cf7..d285e70 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Total Downloads](https://poser.pugx.org/j0k3r/php-readability/downloads)](https://packagist.org/packages/j0k3r/php-readability) [![License](https://poser.pugx.org/j0k3r/php-readability/license)](https://packagist.org/packages/j0k3r/php-readability) -This is an extract of the Readability class from this [full-text-rss](https://github.com/Dither/full-text-rss) fork. It can be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability/overview). +This is an extract of the Readability class from this [full-text-rss](https://github.com/Dither/full-text-rss) fork. It can be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability). ## Differences diff --git a/composer.json b/composer.json index d32d576..5436e9e 100644 --- a/composer.json +++ b/composer.json @@ -24,15 +24,17 @@ "role": "Developer (original JS version)" }], "require": { - "php": ">=5.6.0", + "php": ">=7.2.0", "ext-mbstring": "*", "psr/log": "^1.0", "masterminds/html5": "^2.7" }, "require-dev": { - "friendsofphp/php-cs-fixer": "^2.14", + "friendsofphp/php-cs-fixer": "^3.0", "monolog/monolog": "^1.24|^2.1", - "symfony/phpunit-bridge": "^4.4|^5.3" + "symfony/phpunit-bridge": "^4.4|^5.3|^6.0", + "phpstan/phpstan": "^1.3", + "phpstan/phpstan-phpunit": "^1.0" }, "suggest": { "ext-tidy": "Used to clean up given HTML and to avoid problems with bad HTML structure." @@ -42,5 +44,10 @@ }, "autoload-dev": { "psr-4": { "Tests\\Readability\\": "tests/" } + }, + "config":{ + "platform": { + "php": "7.2.34" + } } } diff --git a/phpstan.neon b/phpstan.neon index 7c1f51c..4f4a583 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -6,7 +6,9 @@ parameters: # https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288 bootstrapFiles: - - vendor/bin/.phpunit/phpunit-7.5-0/vendor/autoload.php + - vendor/bin/.phpunit/phpunit-8.5-0/vendor/autoload.php + + checkMissingIterableValueType: false includes: - vendor/phpstan/phpstan-phpunit/extension.neon diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 342a478..75d1741 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -11,7 +11,7 @@ > - + ./tests/ diff --git a/src/Readability.php b/src/Readability.php index db2e27a..c554cb9 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -7,66 +7,23 @@ use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; -/** - * Arc90's Readability ported to PHP for FiveFilters.org - * Based on readability.js version 1.7.1 (without multi-page support) - * ------------------------------------------------------ - * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js - * Arc90's project URL: http://lab.arc90.com/experiments/readability/ - * JS Source: http://code.google.com/p/arc90labs-readability - * Ported by: Keyvan Minoukadeh, http://www.keyvan.net - * Modded by: Dither, https://dithersky.wordpress.com - * More information: http://fivefilters.org/content-only/ - * License: Apache License, Version 2.0 - * Requires: PHP version 5.2.0+ - * Date: 2013-08-02. - * - * Differences between the PHP port and the original - * ------------------------------------------------------ - * Arc90's Readability is designed to run in the browser. It works on the DOM - * tree (the parsed HTML) after the page's CSS styles have been applied and - * Javascript code executed. This PHP port does not run inside a browser. - * We use PHP's ability to parse HTML to build our DOM tree, but we cannot - * rely on CSS or Javascript support. As such, the results will not always - * match Arc90's Readability. (For example, if a web page contains CSS style - * rules or Javascript code which hide certain HTML elements from display, - * Arc90's Readability will dismiss those from consideration but our PHP port, - * unable to understand CSS or Javascript, will not know any better.) - * - * Another significant difference is that the aim of Arc90's Readability is - * to re-present the main content block of a given web page so users can - * read it more easily in their browsers. Correct identification, clean up, - * and separation of the content block is only a part of this process. - * This PHP port is only concerned with this part, it does not include code - * that relates to presentation in the browser - Arc90 already do - * that extremely well, and for PDF output there's FiveFilters.org's - * PDF Newspaper: http://fivefilters.org/pdf-newspaper/. - * - * Finally, this class contains methods that might be useful for developers - * working on HTML document fragments. So without deviating too much from - * the original code (which I don't want to do because it makes debugging - * and updating more difficult), I've tried to make it a little more - * developer friendly. You should be able to use the methods here on - * existing DOMElement objects without passing an entire HTML document to - * be parsed. - */ class Readability implements LoggerAwareInterface { // flags - const FLAG_STRIP_UNLIKELYS = 1; - const FLAG_WEIGHT_ATTRIBUTES = 2; - const FLAG_CLEAN_CONDITIONALLY = 4; - const FLAG_DISABLE_PREFILTER = 8; - const FLAG_DISABLE_POSTFILTER = 16; + public const FLAG_STRIP_UNLIKELYS = 1; + public const FLAG_WEIGHT_ATTRIBUTES = 2; + public const FLAG_CLEAN_CONDITIONALLY = 4; + public const FLAG_DISABLE_PREFILTER = 8; + public const FLAG_DISABLE_POSTFILTER = 16; // constants - const SCORE_CHARS_IN_PARAGRAPH = 100; - const SCORE_WORDS_IN_PARAGRAPH = 20; - const GRANDPARENT_SCORE_DIVISOR = 2; - const MIN_PARAGRAPH_LENGTH = 20; - const MIN_COMMAS_IN_PARAGRAPH = 6; - const MIN_ARTICLE_LENGTH = 200; - const MIN_NODE_LENGTH = 80; - const MAX_LINK_DENSITY = 0.25; + public const SCORE_CHARS_IN_PARAGRAPH = 100; + public const SCORE_WORDS_IN_PARAGRAPH = 20; + public const GRANDPARENT_SCORE_DIVISOR = 2; + public const MIN_PARAGRAPH_LENGTH = 20; + public const MIN_COMMAS_IN_PARAGRAPH = 6; + public const MIN_ARTICLE_LENGTH = 200; + public const MIN_NODE_LENGTH = 80; + public const MAX_LINK_DENSITY = 0.25; public $convertLinksToFootnotes = false; public $revertForcedParagraphElements = true; public $articleTitle; @@ -171,26 +128,22 @@ class Readability implements LoggerAwareInterface /** * Create instance of Readability. * - * @param string $html UTF-8 encoded string - * @param string $url URL associated with HTML (for footnotes) - * @param string $parser Which parser to use for turning raw HTML into a DOMDocument - * @param bool $use_tidy Use tidy + * @param string $html UTF-8 encoded string + * @param string $url URL associated with HTML (for footnotes) + * @param string $parser Which parser to use for turning raw HTML into a DOMDocument + * @param bool $useTidy Use tidy */ - public function __construct($html, $url = null, $parser = 'libxml', $use_tidy = true) + public function __construct(string $html, string $url = null, string $parser = 'libxml', bool $useTidy = true) { $this->url = $url; $this->html = $html; $this->parser = $parser; - $this->useTidy = $use_tidy && \function_exists('tidy_parse_string'); + $this->useTidy = $useTidy && \function_exists('tidy_parse_string'); $this->logger = new NullLogger(); - $this->loadHtml(); } - /** - * @return void - */ - public function setLogger(LoggerInterface $logger) + public function setLogger(LoggerInterface $logger): void { $this->logger = $logger; } @@ -221,7 +174,7 @@ class Readability implements LoggerAwareInterface * @param string $filter RegExp for replace * @param string $replacer Replacer */ - public function addPreFilter($filter, $replacer = '') + public function addPreFilter(string $filter, string $replacer = ''): void { $this->pre_filters[$filter] = $replacer; } @@ -232,7 +185,7 @@ class Readability implements LoggerAwareInterface * @param string $filter RegExp for replace * @param string $replacer Replacer */ - public function addPostFilter($filter, $replacer = '') + public function addPostFilter(string $filter, string $replacer = ''): void { $this->post_filters[$filter] = $replacer; } @@ -249,8 +202,10 @@ class Readability implements LoggerAwareInterface * * @return bool true if we found content, false otherwise */ - public function init() + public function init(): bool { + $this->loadHtml(); + if (!isset($this->dom->documentElement)) { return false; } @@ -315,7 +270,7 @@ class Readability implements LoggerAwareInterface /** * Run any post-process modifications to article content as necessary. */ - public function postProcessContent(\DOMElement $articleContent) + public function postProcessContent(\DOMElement $articleContent): void { if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) { $this->addFootnotes($articleContent); @@ -327,7 +282,7 @@ class Readability implements LoggerAwareInterface * * @see http://www.roughtype.com/archives/2010/05/experiments_in.php */ - public function addFootnotes(\DOMElement $articleContent) + public function addFootnotes(\DOMElement $articleContent): void { $footnotesWrapper = $this->dom->createElement('footer'); $footnotesWrapper->setAttribute('class', 'readability-footnotes'); @@ -389,7 +344,7 @@ class Readability implements LoggerAwareInterface * Prepare the article node for display. Clean out any inline styles, * iframes, forms, strip extraneous

tags, etc. */ - public function prepArticle(\DOMNode $articleContent) + public function prepArticle(\DOMNode $articleContent): void { if (!$articleContent instanceof \DOMElement) { return; @@ -491,10 +446,8 @@ class Readability implements LoggerAwareInterface * @param \DOMElement $e * @param bool $normalizeSpaces (default: true) * @param bool $flattenLines (default: false) - * - * @return string */ - public function getInnerText($e, $normalizeSpaces = true, $flattenLines = false) + public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string { if (null === $e || !isset($e->textContent) || '' === $e->textContent) { return ''; @@ -503,9 +456,11 @@ class Readability implements LoggerAwareInterface $textContent = trim($e->textContent); if ($flattenLines) { - $textContent = mb_ereg_replace('(?:[\r\n](?:\s| )*)+', '', $textContent); - } elseif ($normalizeSpaces) { - $textContent = mb_ereg_replace('\s\s+', ' ', $textContent); + return (string) mb_ereg_replace('(?:[\r\n](?:\s| )*)+', '', $textContent); + } + + if ($normalizeSpaces) { + return (string) mb_ereg_replace('\s\s+', ' ', $textContent); } return $textContent; @@ -513,30 +468,22 @@ class Readability implements LoggerAwareInterface /** * Remove the style attribute on every $e and under. - * - * @param \DOMElement $e */ - public function cleanStyles($e) + public function cleanStyles(\DOMElement $e): void { - if (!\is_object($e)) { - return; - } - - $elems = $e->getElementsByTagName('*'); + if (\is_object($e)) { + $elems = $e->getElementsByTagName('*'); - foreach ($elems as $elem) { - $elem->removeAttribute('style'); + foreach ($elems as $elem) { + $elem->removeAttribute('style'); + } } } /** * Get comma number for a given text. - * - * @param string $text - * - * @return int */ - public function getCommaCount($text) + public function getCommaCount(string $text): int { return substr_count($text, ','); } @@ -544,12 +491,8 @@ class Readability implements LoggerAwareInterface /** * Get words number for a given text if words separated by a space. * Input string should be normalized. - * - * @param string $text - * - * @return int */ - public function getWordCount($text) + public function getWordCount(string $text): int { return substr_count($text, ' '); } @@ -558,12 +501,8 @@ class Readability implements LoggerAwareInterface * Get the density of links as a percentage of the content * This is the amount of text that is inside a link divided by the total text in the node. * Can exclude external references to differentiate between simple text and menus/infoblocks. - * - * @param bool $excludeExternal - * - * @return int */ - public function getLinkDensity(\DOMElement $e, $excludeExternal = false) + public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): float { $links = $e->getElementsByTagName('a'); $textLength = mb_strlen($this->getInnerText($e, true, true)); @@ -585,10 +524,8 @@ class Readability implements LoggerAwareInterface /** * Get an element relative weight. - * - * @return int */ - public function getWeight(\DOMElement $e) + public function getWeight(\DOMElement $e): int { if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) { return 0; @@ -606,7 +543,7 @@ class Readability implements LoggerAwareInterface /** * Remove extraneous break tags from a node. */ - public function killBreaks(\DOMElement $node) + public function killBreaks(\DOMElement $node): void { $html = $node->getInnerHTML(); $html = preg_replace($this->regexps['killBreaks'], '
', $html); @@ -618,10 +555,8 @@ class Readability implements LoggerAwareInterface * (Unless it's a youtube/vimeo video. People love movies.). * * Updated 2012-09-18 to preserve youtube/vimeo iframes - * - * @param string $tag */ - public function clean(\DOMElement $e, $tag) + public function clean(\DOMElement $e, string $tag): void { $targetList = $e->getElementsByTagName($tag); $isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag); @@ -652,10 +587,8 @@ class Readability implements LoggerAwareInterface * Clean an element of all tags of type "tag" if they look fishy. * "Fishy" is an algorithm based on content length, classnames, * link density, number of images & embeds, etc. - * - * @param string $tag */ - public function cleanConditionally(\DOMElement $e, $tag) + public function cleanConditionally(\DOMElement $e, string $tag): void { if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { return; @@ -765,7 +698,7 @@ class Readability implements LoggerAwareInterface /** * Clean out spurious headers from an Element. Checks things like classnames and link density. */ - public function cleanHeaders(\DOMElement $e) + public function cleanHeaders(\DOMElement $e): void { for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) { $headers = $e->getElementsByTagName('h' . $headerIndex); @@ -780,57 +713,28 @@ class Readability implements LoggerAwareInterface /** * Check if the given flag is active. - * - * @param int $flag - * - * @return bool */ - public function flagIsActive($flag) + public function flagIsActive(int $flag): bool { return ($this->flags & $flag) > 0; } /** * Add a flag. - * - * @param int $flag */ - public function addFlag($flag) + public function addFlag(int $flag): void { $this->flags = $this->flags | $flag; } /** * Remove a flag. - * - * @param int $flag */ - public function removeFlag($flag) + public function removeFlag(int $flag): void { $this->flags = $this->flags & ~$flag; } - /** - * Debug. - * - * @deprecated use $this->logger->debug() instead - * @codeCoverageIgnore - */ - protected function dbg($msg) - { - $this->logger->debug($msg); - } - - /** - * Dump debug info. - * - * @deprecated since Monolog gather log, we don't need it - * @codeCoverageIgnore - */ - protected function dump_dbg() - { - } - /** * Get the article title as an H1. * @@ -877,7 +781,7 @@ class Readability implements LoggerAwareInterface * Prepare the HTML document for readability to scrape it. * This includes things like stripping javascript, CSS, and handling terrible markup. */ - protected function prepDocument() + protected function prepDocument(): void { /* * In some cases a body element can't be found (if the HTML is totally hosed for example) @@ -906,7 +810,7 @@ class Readability implements LoggerAwareInterface * Initialize a node with the readability object. Also checks the * className/id for special names to add to its score. */ - protected function initializeNode(\DOMElement $node) + protected function initializeNode(\DOMElement $node): void { if (!isset($node->tagName)) { return; @@ -993,7 +897,8 @@ class Readability implements LoggerAwareInterface $allElements = $page->getElementsByTagName('*'); - for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); ++$nodeIndex) { + for ($nodeIndex = 0; $allElements->item($nodeIndex); ++$nodeIndex) { + $node = $allElements->item($nodeIndex); $tagName = $node->tagName; $nodeContent = $node->getInnerHTML(); @@ -1107,7 +1012,7 @@ class Readability implements LoggerAwareInterface $contentScore += max(min($score, 3), -3);/**/ // Add the score to the parent. The grandparent gets half. - $parentNode->getAttributeNode('readability')->value += $contentScore; + $parentNode->getAttributeNode('readability')->value = ((float) $parentNode->getAttributeNode('readability')->value) + $contentScore; if ($grandParentNode) { $grandParentNode->getAttributeNode('readability')->value += round($contentScore / self::GRANDPARENT_SCORE_DIVISOR); } @@ -1228,7 +1133,7 @@ class Readability implements LoggerAwareInterface $siblingScoreThreshold = max(10, ((int) $topCandidate->getAttribute('readability')) * 0.2); $siblingNodes = $topCandidate->parentNode->childNodes; - if (null === $siblingNodes) { + if (0 === $siblingNodes->length) { $siblingNodes = new \stdClass(); $siblingNodes->length = 0; } @@ -1260,7 +1165,7 @@ class Readability implements LoggerAwareInterface $nodeLength = mb_strlen($nodeContent); if (($nodeLength > self::MIN_NODE_LENGTH && $linkDensity < self::MAX_LINK_DENSITY) - || ($nodeLength < self::MIN_NODE_LENGTH && 0 === $linkDensity && preg_match('/\.( |$)/', $nodeContent))) { + || ($nodeLength < self::MIN_NODE_LENGTH && 0 === (int) $linkDensity && preg_match('/\.( |$)/', $nodeContent))) { $append = true; } } @@ -1337,12 +1242,8 @@ class Readability implements LoggerAwareInterface /** * Get an element weight by attribute. * Uses regular expressions to tell if this element looks good or bad. - * - * @param string $attribute - * - * @return int */ - protected function weightAttribute(\DOMElement $element, $attribute) + protected function weightAttribute(\DOMElement $element, string $attribute): int { if (!$element->hasAttribute($attribute)) { return 0; @@ -1373,7 +1274,7 @@ class Readability implements LoggerAwareInterface /** * Will recreate previously deleted body property. */ - protected function reinitBody() + protected function reinitBody(): void { if (!isset($this->body->childNodes)) { $this->body = $this->dom->createElement('body'); @@ -1385,17 +1286,15 @@ class Readability implements LoggerAwareInterface * Load HTML in a DOMDocument. * Apply Pre filters * Cleanup HTML using Tidy (or not). - * - * @todo This should be called in init() instead of from __construct */ - private function loadHtml() + private function loadHtml(): void { $this->original_html = $this->html; $this->logger->debug('Parsing URL: ' . $this->url); if ($this->url) { - $this->domainRegExp = '/' . strtr(preg_replace('/www\d*\./', '', parse_url($this->url, \PHP_URL_HOST)), ['.' => '\.']) . '/'; + $this->domainRegExp = '/' . strtr((string) preg_replace('/www\d*\./', '', (string) parse_url($this->url, \PHP_URL_HOST)), ['.' => '\.']) . '/'; } mb_internal_encoding('UTF-8'); @@ -1431,7 +1330,7 @@ class Readability implements LoggerAwareInterface unset($tidy); } - $this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'); + $this->html = mb_convert_encoding((string) $this->html, 'HTML-ENTITIES', 'UTF-8'); if ('html5lib' === $this->parser || 'html5' === $this->parser) { $this->dom = (new HTML5())->loadHTML($this->html); diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index a8e8cfb..5931b43 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -4,27 +4,32 @@ namespace Tests\Readability; use Monolog\Handler\TestHandler; use Monolog\Logger; +use Psr\Log\LoggerInterface; use Readability\Readability; class ReadabilityTest extends \PHPUnit\Framework\TestCase { + /** @var TestHandler */ public $logHandler; + /** @var LoggerInterface */ public $logger; /** * @requires extension tidy */ - public function testConstructDefault() + public function testConstructDefault(): void { $readability = $this->getReadability(''); + $readability->init(); $this->assertNull($readability->url); $this->assertInstanceOf('DomDocument', $readability->dom); } - public function testConstructHtml5Parser() + public function testConstructHtml5Parser(): void { $readability = $this->getReadability('', 'http://0.0.0.0', 'html5lib'); + $readability->init(); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf('DomDocument', $readability->dom); @@ -34,9 +39,10 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase /** * @requires extension tidy */ - public function testConstructSimple() + public function testConstructSimple(): void { $readability = $this->getReadability('', 'http://0.0.0.0'); + $readability->init(); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf('DomDocument', $readability->dom); @@ -44,9 +50,10 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertTrue($readability->tidied); } - public function testConstructDefaultWithoutTidy() + public function testConstructDefaultWithoutTidy(): void { $readability = $this->getReadability('', null, 'libxml', false); + $readability->init(); $this->assertNull($readability->url); $this->assertSame('', $readability->original_html); @@ -55,9 +62,10 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertInstanceOf('DomDocument', $readability->dom); } - public function testConstructSimpleWithoutTidy() + public function testConstructSimpleWithoutTidy(): void { $readability = $this->getReadability('', 'http://0.0.0.0', 'libxml', false); + $readability->init(); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf('DomDocument', $readability->dom); @@ -65,7 +73,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertFalse($readability->tidied); } - public function testInitNoContent() + public function testInitNoContent(): void { $readability = $this->getReadability('', 'http://0.0.0.0'); $res = $readability->init(); @@ -77,7 +85,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml()); } - public function testInitP() + public function testInitP(): void { $readability = $this->getReadability(str_repeat('

This is the awesome content :)

', 7), 'http://0.0.0.0'); $res = $readability->init(); @@ -90,7 +98,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); } - public function testInitDivP() + public function testInitDivP(): void { $readability = $this->getReadability('
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'http://0.0.0.0'); $res = $readability->init(); @@ -103,7 +111,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); } - public function testInitDiv() + public function testInitDiv(): void { $readability = $this->getReadability('
' . str_repeat('This is the awesome content :)', 7) . '
', 'http://0.0.0.0'); $readability->debug = true; @@ -117,7 +125,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('This is the awesome content :)', $readability->getContent()->getInnerHtml()); } - public function testWithFootnotes() + public function testWithFootnotes(): void { $readability = $this->getReadability('
' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '
', 'http://0.0.0.0'); $readability->debug = true; @@ -134,7 +142,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('readabilityLink-3', $readability->getContent()->getInnerHtml()); } - public function testStandardClean() + public function testStandardClean(): void { $readability = $this->getReadability('

Title

' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . 'will NOT be removed
', 'http://0.0.0.0'); $readability->debug = true; @@ -151,7 +159,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringNotContainsString('

', $readability->getContent()->getInnerHtml()); } - public function testWithIframe() + public function testWithIframe(): void { $readability = $this->getReadability('

Title

' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '

This is an awesome text with some links, here there are

', 'http://0.0.0.0'); $readability->debug = true; @@ -166,7 +174,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('nofollow', $readability->getContent()->getInnerHtml()); } - public function testWithArticle() + public function testWithArticle(): void { $readability = $this->getReadability('

' . str_repeat('This is an awesome text with some links, here there are: the awesome', 20) . '

This is an awesome text with some links, here there are

', 'http://0.0.0.0'); $readability->debug = true; @@ -181,7 +189,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('nofollow', $readability->getContent()->getInnerHtml()); } - public function testWithAside() + public function testWithAside(): void { $readability = $this->getReadability('
' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '
', 'http://0.0.0.0'); $readability->debug = true; @@ -196,7 +204,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $this->assertStringContainsString('