Use a new deps for HTML5 parser

`electrolinux/php-html5lib` was quite old and incompatible with the upcoming Composer 2.0.
Jumping to `masterminds/html5` for the same result. Also the lib is maintained.

Also:
- keep README in vendors
- use new Scrutinizer engine
- test with lower deps
- remove php-coveralls dev deps and download the phar during the CI build
pull/57/head
Jeremy Benoist 6 years ago
parent 52b1ddba57
commit 6a8ecf232f
No known key found for this signature in database
GPG Key ID: BCA73962457ACC3C
  1. 2
      .editorconfig
  2. 3
      .gitattributes
  3. 1
      .gitignore
  4. 10
      .scrutinizer.yml
  5. 32
      .travis.yml
  6. 5
      composer.json
  7. 2
      phpunit.xml.dist
  8. 10
      src/Readability.php

@ -4,8 +4,6 @@ root = true
; Unix-style newlines ; Unix-style newlines
[*] [*]
end_of_line = LF end_of_line = LF
[*.php]
indent_style = space indent_style = space
indent_size = 4 indent_size = 4
trim_trailing_whitespace = true trim_trailing_whitespace = true

3
.gitattributes vendored

@ -4,7 +4,6 @@
/.scrutinizer.yml export-ignore /.scrutinizer.yml export-ignore
/.travis.yml export-ignore /.travis.yml export-ignore
/.php_cs export-ignore /.php_cs export-ignore
/README.md export-ignore
/phpunit.xml.dist export-ignore /phpunit.xml.dist export-ignore
/tests export-ignore
/phpstan.neon export-ignore /phpstan.neon export-ignore
/tests export-ignore

1
.gitignore vendored

@ -2,3 +2,4 @@ vendor/
coverage/ coverage/
composer.lock composer.lock
.php_cs.cache .php_cs.cache
.phpunit.result.cache

@ -1,2 +1,12 @@
tools: tools:
external_code_coverage: false external_code_coverage: false
build:
nodes:
analysis:
tests:
override:
- php-scrutinizer-run
environment:
php:
version: 7.2

@ -11,8 +11,10 @@ jobs:
include: include:
- php: 7.2 - php: 7.2
env: SYMFONY_PHPUNIT_VERSION=7.5 env: SYMFONY_PHPUNIT_VERSION=7.5
- php: 7.2
env: SYMFONY_PHPUNIT_VERSION=7.5 COMPOSER_FLAGS="--prefer-lowest"
- php: 7.3 - php: 7.3
env: SYMFONY_PHPUNIT_VERSION=7.5 CS_FIXER=run env: SYMFONY_PHPUNIT_VERSION=7.5 CS_FIXER=run COVERAGE=run
- php: 7.4 - php: 7.4
env: SYMFONY_PHPUNIT_VERSION=7.5 env: SYMFONY_PHPUNIT_VERSION=7.5
- php: nightly - php: nightly
@ -21,30 +23,32 @@ jobs:
allow_failures: allow_failures:
- php: nightly - php: nightly
# cache vendor dirs
cache: cache:
directories: directories:
- vendor - $HOME/.composer/cache/files
- $HOME/.composer/cache
before_install: before_install:
- if [ -n "$GH_TOKEN" ]; then composer config github-oauth.github.com ${GH_TOKEN}; fi; - if [ -n "$GH_TOKEN" ]; then composer config github-oauth.github.com ${GH_TOKEN}; fi;
install: install:
- composer self-update # only install phpstan when we need it
- if [ "$CS_FIXER" = "run" ]; then composer require phpstan/phpstan phpstan/phpstan-phpunit --dev --no-progress --no-suggest ; fi;
- composer update --prefer-dist --no-progress --no-suggest -o $COMPOSER_FLAGS
- php vendor/bin/simple-phpunit install
before_script: before_script:
- if [ "$CS_FIXER" = "run" ]; then composer require phpstan/phpstan phpstan/phpstan-phpunit --dev -n ; fi; - mkdir -p build/logs
- composer install -o --prefer-dist --no-interaction - if [ "$COVERAGE" = "run" ]; then PHPUNIT_FLAGS="--coverage-clover build/logs/clover.xml"; fi;
script: script:
- mkdir -p build/logs - php vendor/bin/simple-phpunit -v $PHPUNIT_FLAGS
- php vendor/bin/simple-phpunit -v --coverage-clover build/logs/clover.xml
- if [ "$CS_FIXER" = "run" ]; then php vendor/bin/php-cs-fixer fix --verbose --dry-run ; fi; - if [ "$CS_FIXER" = "run" ]; then php vendor/bin/php-cs-fixer fix --verbose --dry-run ; fi;
# only install phpstan when we need it
- if [ "$CS_FIXER" = "run" ]; then composer require phpstan/phpstan --no-progress --no-suggest ; fi;
- if [ "$CS_FIXER" = "run" ]; then composer require phpstan/phpstan-phpunit --no-progress --no-suggest ; fi;
- if [ "$CS_FIXER" = "run" ]; then php vendor/bin/phpstan analyse src tests --no-progress --level 1 ; fi; - if [ "$CS_FIXER" = "run" ]; then php vendor/bin/phpstan analyse src tests --no-progress --level 1 ; fi;
after_script: after_success:
- php vendor/bin/php-coveralls -v -x build/logs/clover.xml - |
if [ "$COVERAGE" = "run" ]; then
wget https://github.com/php-coveralls/php-coveralls/releases/download/v2.2.0/php-coveralls.phar
chmod +x php-coveralls.phar
php php-coveralls.phar -v -x build/logs/clover.xml
fi

@ -27,12 +27,11 @@
"php": ">=5.6.0", "php": ">=5.6.0",
"ext-mbstring": "*", "ext-mbstring": "*",
"psr/log": "^1.0", "psr/log": "^1.0",
"electrolinux/php-html5lib": "^0.1.0" "masterminds/html5": "^2.7"
}, },
"require-dev": { "require-dev": {
"php-coveralls/php-coveralls": "^2.1",
"friendsofphp/php-cs-fixer": "^2.14", "friendsofphp/php-cs-fixer": "^2.14",
"monolog/monolog": "^1.24", "monolog/monolog": "^1.24|^2.1",
"symfony/phpunit-bridge": "^4.2.3" "symfony/phpunit-bridge": "^4.2.3"
}, },
"suggest": { "suggest": {

@ -26,6 +26,6 @@
</filter> </filter>
<!-- <logging> <!-- <logging>
<log type="coverage-html" target="coverage" title="Readability" charset="UTF-8" yui="true" highlight="true" lowUpperBound="35" highLowerBound="70"/> <log type="coverage-html" target="coverage" lowUpperBound="35" highLowerBound="70"/>
</logging> --> </logging> -->
</phpunit> </phpunit>

@ -2,7 +2,7 @@
namespace Readability; namespace Readability;
use HTML5Lib\Parser; use Masterminds\HTML5;
use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger; use Psr\Log\NullLogger;
@ -616,7 +616,6 @@ class Readability implements LoggerAwareInterface
*/ */
public function clean(\DOMElement $e, $tag) public function clean(\DOMElement $e, $tag)
{ {
$currentItem = null;
$targetList = $e->getElementsByTagName($tag); $targetList = $e->getElementsByTagName($tag);
$isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag); $isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag);
@ -657,7 +656,6 @@ class Readability implements LoggerAwareInterface
$tagsList = $e->getElementsByTagName($tag); $tagsList = $e->getElementsByTagName($tag);
$curTagsLength = $tagsList->length; $curTagsLength = $tagsList->length;
$node = null;
/* /*
* Gather counts for other typical elements embedded within. * Gather counts for other typical elements embedded within.
@ -1108,7 +1106,6 @@ class Readability implements LoggerAwareInterface
*/ */
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
$candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
$node = null;
for ($c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
$node = $candidates->item($c); $node = $candidates->item($c);
@ -1120,7 +1117,6 @@ class Readability implements LoggerAwareInterface
} }
$candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement); $candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement);
$node = null;
for ($c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
$node = $candidates->item($c); $node = $candidates->item($c);
@ -1424,8 +1420,8 @@ class Readability implements LoggerAwareInterface
$this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'); $this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8');
if ('html5lib' === $this->parser) { if ('html5lib' === $this->parser || 'html5' === $this->parser) {
$this->dom = Parser::parse($this->html); $this->dom = (new HTML5())->loadHTML($this->html);
} }
if ('libxml' === $this->parser) { if ('libxml' === $this->parser) {

Loading…
Cancel
Save