Fix instanceof DOMElement

We previously checked `instanceof DOMElement` which was wrong since we
are in the namespace class, the class `Readability\DOMElement` does not
exists.
pull/13/head
Jeremy Benoist 10 years ago
parent 6e3f2e8c0b
commit 209c404d7b
  1. 176
      src/Readability.php
  2. 10
      tests/ReadabilityTest.php

@ -53,16 +53,23 @@ class Readability
public $articleContent; public $articleContent;
public $original_html; public $original_html;
public $dom; public $dom;
public $url = null; // optional - URL where HTML was retrieved // optional - URL where HTML was retrieved
public $lightClean = true; // preserves more content (experimental) public $url = null;
// preserves more content (experimental)
public $lightClean = true;
public $debug = false; public $debug = false;
public $tidied = false; public $tidied = false;
protected $debugText = ''; // error text for one time output // error text for one time output
protected $domainRegExp = null; // article domain regexp for calibration protected $debugText = '';
// article domain regexp for calibration
protected $domainRegExp = null;
protected $body = null; // protected $body = null; //
protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later // Cache the body HTML in case we need to re-use it later
protected $flags = 7; // 1 | 2 | 4; // Start with all processing flags set. protected $bodyCache = null;
protected $success = false; // indicates whether we were able to extract or not // 1 | 2 | 4; // Start with all processing flags set.
protected $flags = 7;
// indicates whether we were able to extract or not
protected $success = false;
/** /**
* All of the regular expressions in use within readability. * All of the regular expressions in use within readability.
@ -105,21 +112,33 @@ class Readability
); );
// raw HTML filters // raw HTML filters
protected $pre_filters = array( protected $pre_filters = array(
'!<script[^>]*>(.*?)</script>!is' => '', // remove obvious scripts // remove obvious scripts
'!<style[^>]*>(.*?)</style>!is' => '', // remove obvious styles '!<script[^>]*>(.*?)</script>!is' => '',
'!</?span[^>]*>!is' => '', // remove spans as we redefine styles and they're probably special-styled // remove obvious styles
'!<font[^>]*>\s*\[AD\]\s*</font>!is' => '', // HACK: firewall-filtered content '!<style[^>]*>(.*?)</style>!is' => '',
'!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>', // HACK: replace linebreaks plus br's with p's // remove spans as we redefine styles and they're probably special-styled
//'!</?noscript>!is' => '', // replace noscripts '!</?span[^>]*>!is' => '',
'!<(/?)font[^>]*>!is' => '<\\1span>', // replace fonts to spans // HACK: firewall-filtered content
'!<font[^>]*>\s*\[AD\]\s*</font>!is' => '',
// HACK: replace linebreaks plus br's with p's
'!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>',
// replace noscripts
//'!</?noscript>!is' => '',
// replace fonts to spans
'!<(/?)font[^>]*>!is' => '<\\1span>',
); );
// output HTML filters // output HTML filters
protected $post_filters = array( protected $post_filters = array(
'/<br\s*\/?>\s*<p/i' => '<p', // replace excessive br's // replace excessive br's
'!<(?:a|div|p)[^>]+/>!is' => '', // replace empty tags that break layouts '/<br\s*\/?>\s*<p/i' => '<p',
//'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>", // remove all attributes on text tags // replace empty tags that break layouts
"/\n+/" => "\n", //single newlines cleanup '!<(?:a|div|p)[^>]+/>!is' => '',
'!<pre[^>]*>\s*<code!is' => '<pre', // modern web... // remove all attributes on text tags
//'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
//single newlines cleanup
"/\n+/" => "\n",
// modern web...
'!<pre[^>]*>\s*<code!is' => '<pre',
'!</code>\s*</pre>!is' => '</pre>', '!</code>\s*</pre>!is' => '</pre>',
'!<[hb]r>!is' => '<\\1 />', '!<[hb]r>!is' => '<\\1 />',
); );
@ -366,12 +385,12 @@ class Readability
*/ */
protected function getArticleTitle() protected function getArticleTitle()
{ {
$curTitle = '';
$origTitle = ''; $origTitle = '';
try { try {
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
} catch (Exception $e) { } catch (\Exception $e) {
$curTitle = '';
} }
if (preg_match('/ [\|\-] /', $curTitle)) { if (preg_match('/ [\|\-] /', $curTitle)) {
@ -504,14 +523,10 @@ class Readability
*/ */
public function prepArticle(\DOMElement $articleContent) public function prepArticle(\DOMElement $articleContent)
{ {
if ($this->lightClean) { $this->dbg($this->lightClean ? 'Light clean enabled.' : 'Standard clean enabled.');
$this->dbg('Light clean enabled.');
} else {
$this->dbg('Standard clean enabled.');
}
$this->cleanStyles($articleContent); $this->cleanStyles($articleContent);
$this->killBreaks($articleContent); $this->killBreaks($articleContent);
$xpath = new \DOMXPath($articleContent->ownerDocument); $xpath = new \DOMXPath($articleContent->ownerDocument);
if ($this->revertForcedParagraphElements) { if ($this->revertForcedParagraphElements) {
@ -563,23 +578,25 @@ class Readability
$articleParagraphs = $articleContent->getElementsByTagName('p'); $articleParagraphs = $articleContent->getElementsByTagName('p');
for ($i = $articleParagraphs->length - 1; $i >= 0; --$i) { for ($i = $articleParagraphs->length - 1; $i >= 0; --$i) {
$imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length; $item = $articleParagraphs->item($i);
$embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
$objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length; $imgCount = $item->getElementsByTagName('img')->length;
$videoCount = $articleParagraphs->item($i)->getElementsByTagName('video')->length; $embedCount = $item->getElementsByTagName('embed')->length;
$audioCount = $articleParagraphs->item($i)->getElementsByTagName('audio')->length; $objectCount = $item->getElementsByTagName('object')->length;
$iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length; $videoCount = $item->getElementsByTagName('video')->length;
$audioCount = $item->getElementsByTagName('audio')->length;
if ($iframeCount === 0 && $imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $videoCount === 0 && $audioCount === 0 && mb_strlen(preg_replace('/\s+/is', '', $this->getInnerText($articleParagraphs->item($i), false, false))) === 0) { $iframeCount = $item->getElementsByTagName('iframe')->length;
$articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
if ($iframeCount === 0 && $imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $videoCount === 0 && $audioCount === 0 && mb_strlen(preg_replace('/\s+/is', '', $this->getInnerText($item, false, false))) === 0) {
$item->parentNode->removeChild($item);
} }
// add extra text to iframe tag to avoid an auto-closing iframe and then break the html code // add extra text to iframe tag to avoid an auto-closing iframe and then break the html code
if ($iframeCount) { if ($iframeCount) {
$iframe = $articleParagraphs->item($i)->getElementsByTagName('iframe'); $iframe = $item->getElementsByTagName('iframe');
$iframe->item(0)->nodeValue = ' '; $iframe->item(0)->nodeValue = ' ';
$articleParagraphs->item($i)->parentNode->replaceChild($iframe->item(0), $articleParagraphs->item($i)); $item->parentNode->replaceChild($iframe->item(0), $item);
} }
} }
@ -589,7 +606,7 @@ class Readability
$articleContent->innerHTML = preg_replace($search, $replace, $articleContent->innerHTML); $articleContent->innerHTML = preg_replace($search, $replace, $articleContent->innerHTML);
} }
unset($search, $replace); unset($search, $replace);
} catch (Exception $e) { } catch (\Exception $e) {
$this->dbg('Cleaning output HTML failed. Ignoring: '.$e->getMessage()); $this->dbg('Cleaning output HTML failed. Ignoring: '.$e->getMessage());
} }
} }
@ -626,10 +643,10 @@ class Readability
case 'FIGURE': case 'FIGURE':
$readability->value += 3; $readability->value += 3;
break; break;
/* case 'SECTION': // often misused case 'SECTION':
$readability->value += 2; // often misused
// $readability->value += 2;
break; break;
*/
case 'OL': case 'OL':
case 'UL': case 'UL':
case 'DL': case 'DL':
@ -665,12 +682,12 @@ class Readability
} }
/** /**
* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is * Using a variety of metrics (content score, classname, element types), find the content that is
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div. * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
* *
* @param \DOMElement $page * @param \DOMElement $page
* *
* @return \DOMElement * @return \DOMElement|bool
*/ */
protected function grabArticle(\DOMElement $page = null) protected function grabArticle(\DOMElement $page = null)
{ {
@ -703,13 +720,11 @@ class Readability
try { try {
$newNode->innerHTML = $node->innerHTML; $newNode->innerHTML = $node->innerHTML;
// It's easier to debug using original attributes.
//$newNode->setAttribute('class', $node->getAttribute('class')); $node->parentNode->replaceChild($newNode, $node);
//$newNode->setAttribute('id', $node->getAttribute('id'));
$node = $node->parentNode->replaceChild($newNode, $node);
--$nodeIndex; --$nodeIndex;
$nodesToScore[] = $newNode; $nodesToScore[] = $newNode;
} catch (Exception $e) { } catch (\Exception $e) {
$this->dbg('Could not alter div/article to p, reverting back to div: '.$e->getMessage()); $this->dbg('Could not alter div/article to p, reverting back to div: '.$e->getMessage());
} }
} else { } else {
@ -717,12 +732,15 @@ class Readability
for ($i = 0, $il = $node->childNodes->length; $i < $il; ++$i) { for ($i = 0, $il = $node->childNodes->length; $i < $il; ++$i) {
$childNode = $node->childNodes->item($i); $childNode = $node->childNodes->item($i);
if (is_object($childNode) && get_class($childNode) === 'DOMProcessingInstruction') { //executable tags (<?php or <?xml) warning // executable tags (<?php or <?xml) warning
if (is_object($childNode) && get_class($childNode) === 'DOMProcessingInstruction') {
$childNode->parentNode->removeChild($childNode); $childNode->parentNode->removeChild($childNode);
continue; continue;
} }
if ($childNode->nodeType == 3) { // XML_TEXT_NODE // XML_TEXT_NODE
if ($childNode->nodeType == 3) {
//$this->dbg('replacing text node with a P tag with the same content.'); //$this->dbg('replacing text node with a P tag with the same content.');
$p = $this->dom->createElement('p'); $p = $this->dom->createElement('p');
$p->innerHTML = $childNode->nodeValue; $p->innerHTML = $childNode->nodeValue;
@ -743,12 +761,13 @@ class Readability
*/ */
for ($pt = 0, $scored = count($nodesToScore); $pt < $scored; ++$pt) { for ($pt = 0, $scored = count($nodesToScore); $pt < $scored; ++$pt) {
$parentNode = $nodesToScore[$pt]->parentNode; $parentNode = $nodesToScore[$pt]->parentNode;
// No parent node? Move on... // No parent node? Move on...
if (!$parentNode) { if (!$parentNode) {
continue; continue;
} }
$grandParentNode = ($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null; $grandParentNode = ($parentNode->parentNode instanceof \DOMElement) ? $parentNode->parentNode : null;
$innerText = $this->getInnerText($nodesToScore[$pt]); $innerText = $this->getInnerText($nodesToScore[$pt]);
// If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it. // If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
@ -778,7 +797,7 @@ class Readability
/* TEST: For every positive/negative parent tag, add/substract half point. Up to 3 points. *\/ /* TEST: For every positive/negative parent tag, add/substract half point. Up to 3 points. *\/
$up = $nodesToScore[$pt]; $up = $nodesToScore[$pt];
$score = 0; $score = 0;
while ($up->parentNode instanceof DOMElement) { while ($up->parentNode instanceof \DOMElement) {
$up = $up->parentNode; $up = $up->parentNode;
if (preg_match($this->regexps['positive'], $up->getAttribute('class') . ' ' . $up->getAttribute('id'))) { if (preg_match($this->regexps['positive'], $up->getAttribute('class') . ' ' . $up->getAttribute('id'))) {
$score += 0.5; $score += 0.5;
@ -802,8 +821,9 @@ class Readability
*/ */
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) { if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
$candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement); $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
$node = null;
for ($node = null, $c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
$node = $candidates->item($c); $node = $candidates->item($c);
// node should be readable but not inside of an article otherwise it's probably non-readable block // node should be readable but not inside of an article otherwise it's probably non-readable block
if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) { if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) {
@ -813,8 +833,9 @@ class Readability
} }
$candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement); $candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement);
$node = null;
for ($node = null, $c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
$node = $candidates->item($c); $node = $candidates->item($c);
// Remove unlikely candidates // Remove unlikely candidates
@ -842,15 +863,17 @@ class Readability
$candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement); $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
for ($c = $candidates->length - 1; $c >= 0; --$c) { for ($c = $candidates->length - 1; $c >= 0; --$c) {
$item = $candidates->item($c);
// Scale the final candidates score based on link density. Good content should have a // Scale the final candidates score based on link density. Good content should have a
// relatively small link density (5% or less) and be mostly unaffected by this operation. // relatively small link density (5% or less) and be mostly unaffected by this operation.
// If not for this we would have used XPath to find maximum @readability. // If not for this we would have used XPath to find maximum @readability.
$readability = $candidates->item($c)->getAttributeNode('readability'); $readability = $item->getAttributeNode('readability');
$readability->value = round($readability->value * (1 - $this->getLinkDensity($candidates->item($c))), 0, PHP_ROUND_HALF_UP); $readability->value = round($readability->value * (1 - $this->getLinkDensity($item)), 0, PHP_ROUND_HALF_UP);
if (!$topCandidate || $readability->value > (int) $topCandidate->getAttribute('readability')) { if (!$topCandidate || $readability->value > (int) $topCandidate->getAttribute('readability')) {
$this->dbg('Candidate: '.$candidates->item($c)->getNodePath().' ('.$candidates->item($c)->getAttribute('class').':'.$candidates->item($c)->getAttribute('id').') with score '.$readability->value); $this->dbg('Candidate: '.$item->getNodePath().' ('.$item->getAttribute('class').':'.$item->getAttribute('id').') with score '.$readability->value);
$topCandidate = $candidates->item($c); $topCandidate = $item;
} }
} }
@ -889,7 +912,7 @@ class Readability
if (strcasecmp($tagName, 'td') === 0 || strcasecmp($tagName, 'tr') === 0) { if (strcasecmp($tagName, 'td') === 0 || strcasecmp($tagName, 'tr') === 0) {
$up = $topCandidate; $up = $topCandidate;
if ($up->parentNode instanceof DOMElement) { if ($up->parentNode instanceof \DOMElement) {
$up = $up->parentNode; $up = $up->parentNode;
if (strcasecmp($up->tagName, 'table') === 0) { if (strcasecmp($up->tagName, 'table') === 0) {
@ -949,7 +972,6 @@ class Readability
if ($append) { if ($append) {
$this->dbg('Appending node: '.$siblingNode->getNodePath()); $this->dbg('Appending node: '.$siblingNode->getNodePath());
$nodeToAppend = null;
if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) { if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) {
// We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. // We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
@ -959,7 +981,7 @@ class Readability
try { try {
$nodeToAppend->setAttribute('alt', $siblingNodeName); $nodeToAppend->setAttribute('alt', $siblingNodeName);
$nodeToAppend->innerHTML = $siblingNode->innerHTML; $nodeToAppend->innerHTML = $siblingNode->innerHTML;
} catch (Exception $e) { } catch (\Exception $e) {
$this->dbg('Could not alter siblingNode "'.$siblingNodeName.'" to "div", reverting to original.'); $this->dbg('Could not alter siblingNode "'.$siblingNodeName.'" to "div", reverting to original.');
$nodeToAppend = $siblingNode; $nodeToAppend = $siblingNode;
--$s; --$s;
@ -1133,19 +1155,20 @@ class Readability
} }
$weight = 0; $weight = 0;
//$attribute_val = trim($element->getAttribute('class')." ".$element->getAttribute('id')); // $attributeValue = trim($element->getAttribute('class')." ".$element->getAttribute('id'));
$attribute_val = trim($element->getAttribute($attribute)); $attributeValue = trim($element->getAttribute($attribute));
if ($attribute_val != '') {
if (preg_match($this->regexps['negative'], $attribute_val)) { if ($attributeValue != '') {
if (preg_match($this->regexps['negative'], $attributeValue)) {
$weight -= 25; $weight -= 25;
} }
if (preg_match($this->regexps['positive'], $attribute_val)) { if (preg_match($this->regexps['positive'], $attributeValue)) {
$weight += 25; $weight += 25;
} }
if (preg_match($this->regexps['unlikelyCandidates'], $attribute_val)) { if (preg_match($this->regexps['unlikelyCandidates'], $attributeValue)) {
$weight -= 5; $weight -= 5;
} }
if (preg_match($this->regexps['okMaybeItsACandidate'], $attribute_val)) { if (preg_match($this->regexps['okMaybeItsACandidate'], $attributeValue)) {
$weight += 5; $weight += 5;
} }
} }
@ -1198,15 +1221,16 @@ class Readability
*/ */
public function clean(\DOMElement $e, $tag) public function clean(\DOMElement $e, $tag)
{ {
$currentItem = null;
$targetList = $e->getElementsByTagName($tag); $targetList = $e->getElementsByTagName($tag);
$isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed'); $isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed');
for ($cur_item = null, $y = $targetList->length - 1; $y >= 0; --$y) { for ($y = $targetList->length - 1; $y >= 0; --$y) {
// Allow youtube and vimeo videos through as people usually want to see those. // Allow youtube and vimeo videos through as people usually want to see those.
$cur_item = $targetList->item($y); $currentItem = $targetList->item($y);
if ($isEmbed) { if ($isEmbed) {
$attributeValues = $cur_item->getAttribute('src').' '.$cur_item->getAttribute('href'); $attributeValues = $currentItem->getAttribute('src').' '.$currentItem->getAttribute('href');
// First, check the elements attributes to see if any of them contain known media hosts // First, check the elements attributes to see if any of them contain known media hosts
if (preg_match($this->regexps['media'], $attributeValues)) { if (preg_match($this->regexps['media'], $attributeValues)) {
@ -1219,7 +1243,7 @@ class Readability
} }
} }
$cur_item->parentNode->removeChild($cur_item); $currentItem->parentNode->removeChild($currentItem);
} }
} }
@ -1239,6 +1263,7 @@ class Readability
$tagsList = $e->getElementsByTagName($tag); $tagsList = $e->getElementsByTagName($tag);
$curTagsLength = $tagsList->length; $curTagsLength = $tagsList->length;
$node = null;
/* /*
* Gather counts for other typical elements embedded within. * Gather counts for other typical elements embedded within.
@ -1246,9 +1271,8 @@ class Readability
* *
* TODO: Consider taking into account original contentScore here. * TODO: Consider taking into account original contentScore here.
*/ */
for ($node = null, $i = $curTagsLength - 1; $i >= 0; --$i) { for ($i = $curTagsLength - 1; $i >= 0; --$i) {
$node = $tagsList->item($i); $node = $tagsList->item($i);
//$class = $node->getAttribute('class').' '.$node->getAttribute('id'); //debug
$weight = $this->getWeight($node); $weight = $this->getWeight($node);
$contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0; $contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0;
$this->dbg('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : '')); $this->dbg('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : ''));
@ -1332,7 +1356,6 @@ class Readability
} }
if ($toRemove) { if ($toRemove) {
//$this->dbg('Removing: '.$node->innerHTML);
$this->dbg('Removing...'); $this->dbg('Removing...');
$node->parentNode->removeChild($node); $node->parentNode->removeChild($node);
} }
@ -1349,6 +1372,7 @@ class Readability
{ {
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) { for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
$headers = $e->getElementsByTagName('h'.$headerIndex); $headers = $e->getElementsByTagName('h'.$headerIndex);
for ($i = $headers->length - 1; $i >= 0; --$i) { for ($i = $headers->length - 1; $i >= 0; --$i) {
if ($this->getWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) { if ($this->getWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
$headers->item($i)->parentNode->removeChild($headers->item($i)); $headers->item($i)->parentNode->removeChild($headers->item($i));

@ -194,11 +194,10 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML); $this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('<aside>', $readability->getContent()->innerHTML); $this->assertNotContains('<aside>', $readability->getContent()->innerHTML);
$this->assertContains('<footer/>', $readability->getContent()->innerHTML); $this->assertContains('<footer readability="4"/>', $readability->getContent()->innerHTML);
} }
public function testWithClasses() public function testWithClasses()
@ -225,7 +224,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="tr"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML); $this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
} }
@ -239,7 +237,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML); $this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@ -254,7 +251,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML); $this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@ -269,7 +265,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEquals('this is my title', $readability->getTitle()->innerHTML); $this->assertEquals('this is my title', $readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@ -284,7 +279,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEquals('title2 - title3', $readability->getTitle()->innerHTML); $this->assertEquals('title2 - title3', $readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@ -299,7 +293,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEquals('title2 : title3', $readability->getTitle()->innerHTML); $this->assertEquals('title2 : title3', $readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@ -314,7 +307,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res); $this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle()); $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEquals('this is my h1 title !', $readability->getTitle()->innerHTML); $this->assertEquals('this is my h1 title !', $readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML); $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML); $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);

Loading…
Cancel
Save