From 05089bbd0303aa2fdff5d3e744d33a7af1e62c4c Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 30 Jun 2017 16:32:37 +0200 Subject: [PATCH 1/2] Add missing HTML5 class --- composer.json | 3 ++- src/Readability.php | 3 ++- tests/ReadabilityTest.php | 16 +++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/composer.json b/composer.json index e490c61..8f39405 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,8 @@ "require": { "php": ">=5.3.3", "ext-mbstring": "*", - "psr/log": "^1.0" + "psr/log": "^1.0", + "electrolinux/php-html5lib": "^0.1.0" }, "require-dev": { "satooshi/php-coveralls": "~0.6", diff --git a/src/Readability.php b/src/Readability.php index 8520032..4ad08fa 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -5,6 +5,7 @@ namespace Readability; use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; +use HTML5Lib\Parser; /** * Arc90's Readability ported to PHP for FiveFilters.org @@ -284,7 +285,7 @@ class Readability implements LoggerAwareInterface $this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'); - if (!($this->parser === 'html5lib' && ($this->dom = \HTML5_Parser::parse($this->html)))) { + if (!($this->parser === 'html5lib' && ($this->dom = Parser::parse($this->html)))) { libxml_use_internal_errors(true); $this->dom = new \DOMDocument(); diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index 50618e8..e7d7bcd 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -33,6 +33,15 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase $this->assertInstanceOf('DomDocument', $readability->dom); } + public function testConstructHtml5Parser() + { + $readability = $this->getReadability('', 'http://0.0.0.0', 'html5lib'); + + $this->assertEquals('http://0.0.0.0', $readability->url); + $this->assertInstanceOf('DomDocument', $readability->dom); + $this->assertEquals('', $readability->original_html); + } + /** * @requires extension tidy */ @@ -326,13 +335,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); } - // public function testConstructParser() - // { - // $readability = $this->getReadability('', 'http://0.0.0.0', 'html5lib'); - - // $this->assertEquals('http://0.0.0.0', $readability->url); - // } - // dummy function to be used to the next test public function error2Exception($code, $string, $file, $line, $context) { From 613a63c06211fcb2c524dff4cf2731a2ecb5f03c Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 30 Jun 2017 16:42:29 +0200 Subject: [PATCH 2/2] CS --- src/Readability.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Readability.php b/src/Readability.php index 4ad08fa..9a2e9ba 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -2,10 +2,10 @@ namespace Readability; +use HTML5Lib\Parser; use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; -use HTML5Lib\Parser; /** * Arc90's Readability ported to PHP for FiveFilters.org