From 209c404d7b651ec292541debec20234967e0bd77 Mon Sep 17 00:00:00 2001
From: Jeremy Benoist <jeremy.benoist@gmail.com>
Date: Mon, 29 Feb 2016 11:45:48 +0100
Subject: [PATCH] Fix instanceof DOMElement

We previously checked `instanceof DOMElement` which was wrong since we
are in the namespace class, the class `Readability\DOMElement` does not
exists.
---
 src/Readability.php       | 176 ++++++++++++++++++++++----------------
 tests/ReadabilityTest.php |  10 +--
 2 files changed, 101 insertions(+), 85 deletions(-)
diff --git a/src/Readability.php b/src/Readability.php
index 644cf62..d658a97 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -53,16 +53,23 @@ class Readability
     public $articleContent;
     public $original_html;
     public $dom;
-    public $url = null; // optional - URL where HTML was retrieved
-    public $lightClean = true; // preserves more content (experimental)
+    // optional - URL where HTML was retrieved
+    public $url = null;
+    // preserves more content (experimental)
+    public $lightClean = true;
     public $debug = false;
     public $tidied = false;
-    protected $debugText = ''; // error text for one time output
-    protected $domainRegExp = null; // article domain regexp for calibration
+    // error text for one time output
+    protected $debugText = '';
+    // article domain regexp for calibration
+    protected $domainRegExp = null;
     protected $body = null; //
-    protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later
-    protected $flags = 7; // 1 | 2 | 4;   // Start with all processing flags set.
-    protected $success = false; // indicates whether we were able to extract or not
+    // Cache the body HTML in case we need to re-use it later
+    protected $bodyCache = null;
+    // 1 | 2 | 4;   // Start with all processing flags set.
+    protected $flags = 7;
+    // indicates whether we were able to extract or not
+    protected $success = false;
 
     /**
      * All of the regular expressions in use within readability.
@@ -105,21 +112,33 @@ class Readability
     );
     // raw HTML filters
     protected $pre_filters = array(
-        '!<script[^>]*>(.*?)</script>!is' => '', // remove obvious scripts
-        '!<style[^>]*>(.*?)</style>!is' => '', // remove obvious styles
-        '!</?span[^>]*>!is' => '', // remove spans as we redefine styles and they're probably special-styled
-        '!<font[^>]*>\s*\[AD\]\s*</font>!is' => '', // HACK: firewall-filtered content
-        '!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>', // HACK: replace linebreaks plus br's with p's
-        //'!</?noscript>!is' => '', // replace noscripts
-        '!<(/?)font[^>]*>!is' => '<\\1span>', // replace fonts to spans
+        // remove obvious scripts
+        '!<script[^>]*>(.*?)</script>!is' => '',
+        // remove obvious styles
+        '!<style[^>]*>(.*?)</style>!is' => '',
+        // remove spans as we redefine styles and they're probably special-styled
+        '!</?span[^>]*>!is' => '',
+        // HACK: firewall-filtered content
+        '!<font[^>]*>\s*\[AD\]\s*</font>!is' => '',
+        // HACK: replace linebreaks plus br's with p's
+        '!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>',
+        // replace noscripts
+        //'!</?noscript>!is' => '',
+        // replace fonts to spans
+        '!<(/?)font[^>]*>!is' => '<\\1span>',
     );
     // output HTML filters
     protected $post_filters = array(
-        '/<br\s*\/?>\s*<p/i' => '<p', // replace excessive br's
-        '!<(?:a|div|p)[^>]+/>!is' => '', // replace empty tags that break layouts
-        //'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>", // remove all attributes on text tags
-        "/\n+/" => "\n", //single newlines cleanup
-        '!<pre[^>]*>\s*<code!is' => '<pre', // modern web...
+        // replace excessive br's
+        '/<br\s*\/?>\s*<p/i' => '<p',
+        // replace empty tags that break layouts
+        '!<(?:a|div|p)[^>]+/>!is' => '',
+        // remove all attributes on text tags
+        //'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>",
+        //single newlines cleanup
+        "/\n+/" => "\n",
+        // modern web...
+        '!<pre[^>]*>\s*<code!is' => '<pre',
         '!</code>\s*</pre>!is' => '</pre>',
         '!<[hb]r>!is' => '<\\1 />',
     );
@@ -366,12 +385,12 @@ class Readability
      */
     protected function getArticleTitle()
     {
-        $curTitle = '';
         $origTitle = '';
 
         try {
             $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
-        } catch (Exception $e) {
+        } catch (\Exception $e) {
+            $curTitle = '';
         }
 
         if (preg_match('/ [\|\-] /', $curTitle)) {
@@ -504,14 +523,10 @@ class Readability
      */
     public function prepArticle(\DOMElement $articleContent)
     {
-        if ($this->lightClean) {
-            $this->dbg('Light clean enabled.');
-        } else {
-            $this->dbg('Standard clean enabled.');
-        }
-
+        $this->dbg($this->lightClean ? 'Light clean enabled.' : 'Standard clean enabled.');
         $this->cleanStyles($articleContent);
         $this->killBreaks($articleContent);
+
         $xpath = new \DOMXPath($articleContent->ownerDocument);
 
         if ($this->revertForcedParagraphElements) {
@@ -563,23 +578,25 @@ class Readability
         $articleParagraphs = $articleContent->getElementsByTagName('p');
 
         for ($i = $articleParagraphs->length - 1; $i >= 0; --$i) {
-            $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
-            $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
-            $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
-            $videoCount = $articleParagraphs->item($i)->getElementsByTagName('video')->length;
-            $audioCount = $articleParagraphs->item($i)->getElementsByTagName('audio')->length;
-            $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length;
-
-            if ($iframeCount === 0 && $imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $videoCount === 0 && $audioCount === 0 && mb_strlen(preg_replace('/\s+/is', '', $this->getInnerText($articleParagraphs->item($i), false, false))) === 0) {
-                $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
+            $item = $articleParagraphs->item($i);
+
+            $imgCount = $item->getElementsByTagName('img')->length;
+            $embedCount = $item->getElementsByTagName('embed')->length;
+            $objectCount = $item->getElementsByTagName('object')->length;
+            $videoCount = $item->getElementsByTagName('video')->length;
+            $audioCount = $item->getElementsByTagName('audio')->length;
+            $iframeCount = $item->getElementsByTagName('iframe')->length;
+
+            if ($iframeCount === 0 && $imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $videoCount === 0 && $audioCount === 0 && mb_strlen(preg_replace('/\s+/is', '', $this->getInnerText($item, false, false))) === 0) {
+                $item->parentNode->removeChild($item);
             }
 
             // add extra text to iframe tag to avoid an auto-closing iframe and then break the html code
             if ($iframeCount) {
-                $iframe = $articleParagraphs->item($i)->getElementsByTagName('iframe');
+                $iframe = $item->getElementsByTagName('iframe');
                 $iframe->item(0)->nodeValue = ' ';
 
-                $articleParagraphs->item($i)->parentNode->replaceChild($iframe->item(0), $articleParagraphs->item($i));
+                $item->parentNode->replaceChild($iframe->item(0), $item);
             }
         }
 
@@ -589,7 +606,7 @@ class Readability
                     $articleContent->innerHTML = preg_replace($search, $replace, $articleContent->innerHTML);
                 }
                 unset($search, $replace);
-            } catch (Exception $e) {
+            } catch (\Exception $e) {
                 $this->dbg('Cleaning output HTML failed. Ignoring: '.$e->getMessage());
             }
         }
@@ -626,10 +643,10 @@ class Readability
             case 'FIGURE':
                 $readability->value += 3;
                 break;
-/*          case 'SECTION': // often misused
-                $readability->value += 2;
+            case 'SECTION':
+                // often misused
+                // $readability->value += 2;
                 break;
-*/
             case 'OL':
             case 'UL':
             case 'DL':
@@ -665,12 +682,12 @@ class Readability
     }
 
     /**
-     * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
+     * Using a variety of metrics (content score, classname, element types), find the content that is
      * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
      *
      * @param \DOMElement $page
      *
-     * @return \DOMElement
+     * @return \DOMElement|bool
      */
     protected function grabArticle(\DOMElement $page = null)
     {
@@ -703,13 +720,11 @@ class Readability
 
                     try {
                         $newNode->innerHTML = $node->innerHTML;
-                        // It's easier to debug using original attributes.
-                        //$newNode->setAttribute('class', $node->getAttribute('class'));
-                        //$newNode->setAttribute('id', $node->getAttribute('id'));
-                        $node = $node->parentNode->replaceChild($newNode, $node);
+
+                        $node->parentNode->replaceChild($newNode, $node);
                         --$nodeIndex;
                         $nodesToScore[] = $newNode;
-                    } catch (Exception $e) {
+                    } catch (\Exception $e) {
                         $this->dbg('Could not alter div/article to p, reverting back to div: '.$e->getMessage());
                     }
                 } else {
@@ -717,12 +732,15 @@ class Readability
                     for ($i = 0, $il = $node->childNodes->length; $i < $il; ++$i) {
                         $childNode = $node->childNodes->item($i);
 
-                        if (is_object($childNode) && get_class($childNode) === 'DOMProcessingInstruction') { //executable tags (<?php or <?xml) warning
+                        // executable tags (<?php or <?xml) warning
+                        if (is_object($childNode) && get_class($childNode) === 'DOMProcessingInstruction') {
                             $childNode->parentNode->removeChild($childNode);
+
                             continue;
                         }
 
-                        if ($childNode->nodeType == 3) { // XML_TEXT_NODE
+                         // XML_TEXT_NODE
+                        if ($childNode->nodeType == 3) {
                             //$this->dbg('replacing text node with a P tag with the same content.');
                             $p = $this->dom->createElement('p');
                             $p->innerHTML = $childNode->nodeValue;
@@ -743,12 +761,13 @@ class Readability
          */
         for ($pt = 0, $scored = count($nodesToScore); $pt < $scored; ++$pt) {
             $parentNode = $nodesToScore[$pt]->parentNode;
+
             // No parent node? Move on...
             if (!$parentNode) {
                 continue;
             }
 
-            $grandParentNode = ($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null;
+            $grandParentNode = ($parentNode->parentNode instanceof \DOMElement) ? $parentNode->parentNode : null;
             $innerText = $this->getInnerText($nodesToScore[$pt]);
 
             // If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
@@ -778,7 +797,7 @@ class Readability
             /* TEST: For every positive/negative parent tag, add/substract half point. Up to 3 points. *\/
             $up = $nodesToScore[$pt];
             $score = 0;
-            while ($up->parentNode instanceof DOMElement) {
+            while ($up->parentNode instanceof \DOMElement) {
                 $up = $up->parentNode;
                 if (preg_match($this->regexps['positive'], $up->getAttribute('class') . ' ' . $up->getAttribute('id'))) {
                     $score += 0.5;
@@ -802,8 +821,9 @@ class Readability
          */
         if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
             $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
+            $node = null;
 
-            for ($node = null, $c = $candidates->length - 1; $c >= 0; --$c) {
+            for ($c = $candidates->length - 1; $c >= 0; --$c) {
                 $node = $candidates->item($c);
                 // node should be readable but not inside of an article otherwise it's probably non-readable block
                 if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) {
@@ -813,8 +833,9 @@ class Readability
             }
 
             $candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement);
+            $node = null;
 
-            for ($node = null, $c = $candidates->length - 1; $c >= 0; --$c) {
+            for ($c = $candidates->length - 1; $c >= 0; --$c) {
                 $node = $candidates->item($c);
 
                 // Remove unlikely candidates
@@ -842,15 +863,17 @@ class Readability
             $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
 
             for ($c = $candidates->length - 1; $c >= 0; --$c) {
+                $item = $candidates->item($c);
+
                 // Scale the final candidates score based on link density. Good content should have a
                 // relatively small link density (5% or less) and be mostly unaffected by this operation.
                 // If not for this we would have used XPath to find maximum @readability.
-                $readability = $candidates->item($c)->getAttributeNode('readability');
-                $readability->value = round($readability->value * (1 - $this->getLinkDensity($candidates->item($c))), 0, PHP_ROUND_HALF_UP);
+                $readability = $item->getAttributeNode('readability');
+                $readability->value = round($readability->value * (1 - $this->getLinkDensity($item)), 0, PHP_ROUND_HALF_UP);
 
                 if (!$topCandidate || $readability->value > (int) $topCandidate->getAttribute('readability')) {
-                    $this->dbg('Candidate: '.$candidates->item($c)->getNodePath().' ('.$candidates->item($c)->getAttribute('class').':'.$candidates->item($c)->getAttribute('id').') with score '.$readability->value);
-                    $topCandidate = $candidates->item($c);
+                    $this->dbg('Candidate: '.$item->getNodePath().' ('.$item->getAttribute('class').':'.$item->getAttribute('id').') with score '.$readability->value);
+                    $topCandidate = $item;
                 }
             }
 
@@ -889,7 +912,7 @@ class Readability
         if (strcasecmp($tagName, 'td') === 0 || strcasecmp($tagName, 'tr') === 0) {
             $up = $topCandidate;
 
-            if ($up->parentNode instanceof DOMElement) {
+            if ($up->parentNode instanceof \DOMElement) {
                 $up = $up->parentNode;
 
                 if (strcasecmp($up->tagName, 'table') === 0) {
@@ -949,7 +972,6 @@ class Readability
 
             if ($append) {
                 $this->dbg('Appending node: '.$siblingNode->getNodePath());
-                $nodeToAppend = null;
 
                 if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) {
                     // We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
@@ -959,7 +981,7 @@ class Readability
                     try {
                         $nodeToAppend->setAttribute('alt', $siblingNodeName);
                         $nodeToAppend->innerHTML = $siblingNode->innerHTML;
-                    } catch (Exception $e) {
+                    } catch (\Exception $e) {
                         $this->dbg('Could not alter siblingNode "'.$siblingNodeName.'" to "div", reverting to original.');
                         $nodeToAppend = $siblingNode;
                         --$s;
@@ -1133,19 +1155,20 @@ class Readability
         }
         $weight = 0;
 
-        //$attribute_val = trim($element->getAttribute('class')." ".$element->getAttribute('id'));
-        $attribute_val = trim($element->getAttribute($attribute));
-        if ($attribute_val != '') {
-            if (preg_match($this->regexps['negative'], $attribute_val)) {
+        // $attributeValue = trim($element->getAttribute('class')." ".$element->getAttribute('id'));
+        $attributeValue = trim($element->getAttribute($attribute));
+
+        if ($attributeValue != '') {
+            if (preg_match($this->regexps['negative'], $attributeValue)) {
                 $weight -= 25;
             }
-            if (preg_match($this->regexps['positive'], $attribute_val)) {
+            if (preg_match($this->regexps['positive'], $attributeValue)) {
                 $weight += 25;
             }
-            if (preg_match($this->regexps['unlikelyCandidates'], $attribute_val)) {
+            if (preg_match($this->regexps['unlikelyCandidates'], $attributeValue)) {
                 $weight -= 5;
             }
-            if (preg_match($this->regexps['okMaybeItsACandidate'], $attribute_val)) {
+            if (preg_match($this->regexps['okMaybeItsACandidate'], $attributeValue)) {
                 $weight += 5;
             }
         }
@@ -1198,15 +1221,16 @@ class Readability
      */
     public function clean(\DOMElement $e, $tag)
     {
+        $currentItem = null;
         $targetList = $e->getElementsByTagName($tag);
         $isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed');
 
-        for ($cur_item = null, $y = $targetList->length - 1; $y >= 0; --$y) {
+        for ($y = $targetList->length - 1; $y >= 0; --$y) {
             // Allow youtube and vimeo videos through as people usually want to see those.
-            $cur_item = $targetList->item($y);
+            $currentItem = $targetList->item($y);
 
             if ($isEmbed) {
-                $attributeValues = $cur_item->getAttribute('src').' '.$cur_item->getAttribute('href');
+                $attributeValues = $currentItem->getAttribute('src').' '.$currentItem->getAttribute('href');
 
                 // First, check the elements attributes to see if any of them contain known media hosts
                 if (preg_match($this->regexps['media'], $attributeValues)) {
@@ -1219,7 +1243,7 @@ class Readability
                 }
             }
 
-            $cur_item->parentNode->removeChild($cur_item);
+            $currentItem->parentNode->removeChild($currentItem);
         }
     }
 
@@ -1239,6 +1263,7 @@ class Readability
 
         $tagsList = $e->getElementsByTagName($tag);
         $curTagsLength = $tagsList->length;
+        $node = null;
 
         /*
          * Gather counts for other typical elements embedded within.
@@ -1246,9 +1271,8 @@ class Readability
          *
          * TODO: Consider taking into account original contentScore here.
          */
-        for ($node = null, $i = $curTagsLength - 1; $i >= 0; --$i) {
+        for ($i = $curTagsLength - 1; $i >= 0; --$i) {
             $node = $tagsList->item($i);
-            //$class = $node->getAttribute('class').' '.$node->getAttribute('id'); //debug
             $weight = $this->getWeight($node);
             $contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0;
             $this->dbg('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : ''));
@@ -1332,7 +1356,6 @@ class Readability
                 }
 
                 if ($toRemove) {
-                    //$this->dbg('Removing: '.$node->innerHTML);
                     $this->dbg('Removing...');
                     $node->parentNode->removeChild($node);
                 }
@@ -1349,6 +1372,7 @@ class Readability
     {
         for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
             $headers = $e->getElementsByTagName('h'.$headerIndex);
+
             for ($i = $headers->length - 1; $i >= 0; --$i) {
                 if ($this->getWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
                     $headers->item($i)->parentNode->removeChild($headers->item($i));
diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php
index f1fd94a..e9a0ef3 100644
--- a/tests/ReadabilityTest.php
+++ b/tests/ReadabilityTest.php
@@ -194,11 +194,10 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEmpty($readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('<aside>', $readability->getContent()->innerHTML);
-        $this->assertContains('<footer/>', $readability->getContent()->innerHTML);
+        $this->assertContains('<footer readability="4"/>', $readability->getContent()->innerHTML);
     }
 
     public function testWithClasses()
@@ -225,7 +224,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="tr"', $readability->getContent()->innerHTML);
         $this->assertEmpty($readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
     }
@@ -239,7 +237,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEmpty($readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@@ -254,7 +251,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEmpty($readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@@ -269,7 +265,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEquals('this is my title', $readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@@ -284,7 +279,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEquals('title2 - title3', $readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@@ -299,7 +293,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEquals('title2 : title3', $readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);
@@ -314,7 +307,6 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
         $this->assertTrue($res);
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
         $this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
-        $this->assertContains('alt="article"', $readability->getContent()->innerHTML);
         $this->assertEquals('this is my h1 title !', $readability->getTitle()->innerHTML);
         $this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
         $this->assertNotContains('This text is also an awesome text and you should know that', $readability->getContent()->innerHTML);