getReadability(''); $this->assertNull($readability->url); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); } public function testConstructHtml5Parser() { $readability = $this->getReadability('', 'http://0.0.0.0', 'html5lib'); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); $this->assertSame('', $readability->original_html); } /** * @requires extension tidy */ public function testConstructSimple() { $readability = $this->getReadability('', 'http://0.0.0.0'); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); $this->assertSame('', $readability->original_html); $this->assertTrue($readability->tidied); } public function testConstructDefaultWithoutTidy() { $readability = $this->getReadability('', null, 'libxml', false); $this->assertNull($readability->url); $this->assertSame('', $readability->original_html); $this->assertFalse($readability->tidied); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); } public function testConstructSimpleWithoutTidy() { $readability = $this->getReadability('', 'http://0.0.0.0', 'libxml', false); $this->assertSame('http://0.0.0.0', $readability->url); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); $this->assertSame('', $readability->original_html); $this->assertFalse($readability->tidied); } public function testInitNoContent() { $readability = $this->getReadability('', 'http://0.0.0.0'); $res = $readability->init(); $this->assertFalse($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('Sorry, Readability was unable to parse this page for content.', $readability->getContent()->getInnerHtml()); } public function testInitP() { $readability = $this->getReadability(str_repeat('

This is the awesome content :)

', 7), 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); } public function testInitDivP() { $readability = $this->getReadability('
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); } public function testInitDiv() { $readability = $this->getReadability('
' . str_repeat('This is the awesome content :)', 7) . '
', 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); } public function testWithFootnotes() { $readability = $this->getReadability('
' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '
', 'http://0.0.0.0'); $readability->convertLinksToFootnotes = true; $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); $this->assertStringContainsString('readabilityFootnoteLink', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('readabilityLink-3', $readability->getContent()->getInnerHtml()); } public function testStandardClean() { $readability = $this->getReadability('

Title

' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . 'will NOT be removed
', 'http://0.0.0.0'); $readability->lightClean = false; $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); $this->assertStringContainsString('will NOT be removed', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('

', $readability->getContent()->getInnerHtml()); } public function testWithIframe() { $readability = $this->getReadability('

Title

' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '

This is an awesome text with some links, here there are

', 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('
getContent()->getInnerHtml()); $this->assertStringContainsString('nofollow', $readability->getContent()->getInnerHtml()); } public function testWithArticle() { $readability = $this->getReadability('

' . str_repeat('This is an awesome text with some links, here there are: the awesome', 20) . '

This is an awesome text with some links, here there are

', 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('alt="article"', $readability->getContent()->getInnerHtml()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('nofollow', $readability->getContent()->getInnerHtml()); } public function testWithAside() { $readability = $this->getReadability('
' . str_repeat('

This is an awesome text with some links, here there are: the awesome

', 7) . '
', 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertEmpty($readability->getTitle()->getInnerHtml()); $this->assertStringContainsString('This is an awesome text with some links, here there are', $readability->getContent()->getInnerHtml()); $this->assertStringNotContainsString('

'; $readability = $this->getReadability($data, 'http://iosgames.ru/?p=22030'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertStringContainsString('', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('3D Touch', $readability->getTitle()->getInnerHtml()); } finally { restore_error_handler(); if (false !== $oldDisplayErrors) { ini_set('display_errors', $oldDisplayErrors); } error_reporting($oldErrorReporting); } } /** * This should generate an Exception "DOMElement::setAttribute(): ID post-60 already defined". */ public function testAppendIdAlreadyHere() { $data = '
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
This is an awesome text with some links, here there are
'; $readability = $this->getReadability($data, 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); } public function testPostFilters() { $readability = $this->getReadability('
' . str_repeat('

This is the awesome content :)

', 10) . '
', 'http://0.0.0.0'); $readability->addPostFilter('!]*>(.*?)!is', ''); $res = $readability->init(); $this->assertTrue($res); $this->assertStringContainsString('This the awesome content :)', $readability->getContent()->getInnerHtml()); } public function testPreFilters() { $this->markTestSkipped('Won\'t work until loadHtml() is moved in init() instead of __construct()'); $readability = $this->getReadability('
' . str_repeat('

This is the awesome and WONDERFUL content :)

', 7) . '
', 'http://0.0.0.0'); $readability->addPreFilter('!]*>(.*?)!is', ''); $res = $readability->init(); $this->assertTrue($res); $this->assertStringContainsString('This the awesome and WONDERFUL content :)', $readability->getContent()->getInnerHtml()); } public function testChildNodeGoneNull() { // from http://www.ayyaantuu.net/ethiopia-targets-opposition-lawmakers/ $html = file_get_contents('tests/fixtures/childNodeGoesNull.html'); $readability = $this->getReadability($html, 'http://0.0.0.0'); $readability->convertLinksToFootnotes = true; $res = $readability->init(); $this->assertTrue($res); } public function testKeepFootnotes() { // from https://www.schreibdichte.de/blog/feed-aggregator-und-spaeter-lesen-dienst-im-team $html = file_get_contents('tests/fixtures/keepFootnotes.html'); $readability = $this->getReadability($html, 'http://0.0.0.0'); $res = $readability->init(); $this->assertTrue($res); $this->assertStringContainsString('2', $readability->getContent()->getInnerHtml()); $this->assertStringContainsString('getContent()->getInnerHtml()); } public function testWithWipedBody() { // from https://www.cs.cmu.edu/~rgs/alice-table.html $html = file_get_contents('tests/fixtures/wipedBody.html'); $readability = $this->getReadability($html, 'http://0.0.0.0', 'libxml', false); $res = $readability->init(); $this->assertTrue($res); $this->assertStringContainsString('Down the Rabbit-Hole', $readability->getContent()->getInnerHtml()); } /** * @return array */ public function dataForHtmlLang() { return [ 'meta' => [ '
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'fr', ], 'head' => [ 'Foo
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'fr', ], 'headless' => [ '
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'fr', // tidy would add tag. false, ], 'fragment' => [ '
' . str_repeat('

This is the awesome content :)

', 7) . '
', '', // tidy would add . false, ], ]; } /** * @dataProvider dataForHtmlLang */ public function testHtmlLang($html, $lang, $useTidy = true) { $readability = $this->getReadability($html, 'http://0.0.0.0', 'libxml', $useTidy); $res = $readability->init(); $this->assertTrue($res); $this->assertInstanceOf(\DOMDocument::class, $readability->dom); $this->assertSame($lang, $readability->dom->documentElement->getAttribute('lang')); } private function getReadability($html, $url = null, $parser = 'libxml', $useTidy = true) { $readability = new Readability($html, $url, $parser, $useTidy); $this->logHandler = new TestHandler(); $this->logger = new Logger('test', [$this->logHandler]); $readability->setLogger($this->logger); return $readability; } }