From f5d473780da96889eff75c8f08f71ba2052206a4 Mon Sep 17 00:00:00 2001
From: Jeremy Benoist <jeremy.benoist@gmail.com>
Date: Tue, 28 Apr 2015 17:56:54 +0200
Subject: [PATCH 1/2] Fix javascript typo

And add coverage
---
 .gitignore          | 2 ++
 phpunit.xml.dist    | 5 ++++-
 src/Readability.php | 5 +++--
 3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
index 48b8bf9..d191143 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 vendor/
+coverage/
+composer.lock
diff --git a/phpunit.xml.dist b/phpunit.xml.dist
index f727e46..e6a0903 100644
--- a/phpunit.xml.dist
+++ b/phpunit.xml.dist
@@ -19,11 +19,14 @@
 
     <filter>
         <whitelist>
-            <directory>./src/TubeLink/</directory>
+            <directory>./src/</directory>
             <exclude>
                 <directory>./tests</directory>
             </exclude>
         </whitelist>
     </filter>
 
+    <logging>
+        <log type="coverage-html" target="coverage" title="FullText" charset="UTF-8" yui="true" highlight="true" lowUpperBound="35" highLowerBound="70"/>
+    </logging>
 </phpunit>
diff --git a/src/Readability.php b/src/Readability.php
index 7f1837e..50e5d2f 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -52,6 +52,7 @@ class Readability
     public $revertForcedParagraphElements = true;
     public $articleTitle;
     public $articleContent;
+    public $original_html;
     public $dom;
     public $url = null; // optional - URL where HTML was retrieved
     public $lightClean = true; // preserves more content (experimental)
@@ -179,7 +180,7 @@ class Readability
             $this->debugText .= 'Tidying document'."\n";
             $tidy = tidy_parse_string($html, $this->tidy_config, 'UTF8');
             if (tidy_clean_repair($tidy)) {
-                $original_html = $html;
+                $this->original_html = $html;
                 $this->tidied = true;
                 $html = $tidy->value;
                 $html = preg_replace('/<html[^>]+>/i', '<html>', $html);
@@ -258,7 +259,7 @@ class Readability
         if ($this->bodyCache == null) {
             $this->bodyCache = '';
             foreach ($bodyElems as $bodyNode) {
-                $this->bodyCache += $bodyNode->innerHTML;
+                $this->bodyCache .= trim($bodyNode->innerHTML);
             }
         }
         if ($bodyElems->length > 0 && $this->body == null) {

From 1963319a5561c6baff4d59ed11ff49bf7561dc11 Mon Sep 17 00:00:00 2001
From: Jeremy Benoist <jeremy.benoist@gmail.com>
Date: Tue, 28 Apr 2015 17:57:11 +0200
Subject: [PATCH 2/2] Improve Travis & add Scrutinizer

+ CS
+ Update README
---
 .scrutinizer.yml          |   3 +
 .travis.yml               |  23 +++-
 README.md                 |   1 +
 src/JSLikeHTMLElement.php |  13 ++-
 src/Readability.php       | 237 +++++++++++++++++++-------------------
 tests/ReadabilityTest.php |   1 -
 6 files changed, 152 insertions(+), 126 deletions(-)
 create mode 100644 .scrutinizer.yml

diff --git a/.scrutinizer.yml b/.scrutinizer.yml
new file mode 100644
index 0000000..4bb53b4
--- /dev/null
+++ b/.scrutinizer.yml
@@ -0,0 +1,3 @@
+tools:
+    external_code_coverage:
+        timeout: 600
diff --git a/.travis.yml b/.travis.yml
index 4e7dccd..190b404 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,10 +4,29 @@ php:
     - 5.4
     - 5.5
     - 5.6
+    - nightly
+    - hhvm-nightly
 
-before_script:
+# run build against nightly but allow them to fail
+matrix:
+    fast_finish: true
+    allow_failures:
+        - php: nightly
+        - php: hhvm-nightly
+
+# faster builds on new travis setup not using sudo
+sudo: false
+
+install:
     - composer self-update
+
+before_script:
     - composer install --prefer-dist --no-interaction
 
 script:
-    - phpunit --coverage-text
+    - phpunit --coverage-clover=coverage.clover
+
+after_script:
+    - |
+        wget https://scrutinizer-ci.com/ocular.phar
+        php ocular.phar code-coverage:upload --format=php-clover coverage.clover
diff --git a/README.md b/README.md
index a5cf760..223d6ba 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # Readability
 
 [![Build Status](https://travis-ci.org/j0k3r/php-readability.svg?branch=master)](https://travis-ci.org/j0k3r/php-readability)
+[![Code Coverage](https://scrutinizer-ci.com/g/j0k3r/php-readability/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/j0k3r/php-readability/?branch=master)
 
 This is an extract of the Readability class from the [full-text-rss](https://github.com/Dither/full-text-rss) fork. It kind be defined as a better version of the original [php-readability](http://code.fivefilters.org/php-readability).
 
diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php
index 6a732cb..ef158fa 100644
--- a/src/JSLikeHTMLElement.php
+++ b/src/JSLikeHTMLElement.php
@@ -3,7 +3,7 @@
 namespace Readability;
 
 /**
- * JavaScript-like HTML DOM Element
+ * JavaScript-like HTML DOM Element.
  *
  * This class extends PHP's DOMElement to allow
  * users to get and set the innerHTML property of
@@ -31,12 +31,14 @@ namespace Readability;
  *     echo $doc->saveXML();
  *
  * @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net
+ *
  * @see http://fivefilters.org (the project this was written for)
  */
 class JSLikeHTMLElement extends \DOMElement
 {
     /**
-     * Used for setting innerHTML like it's done in JavaScript:
+     * Used for setting innerHTML like it's done in JavaScript:.
+     *
      * @code
      * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
      * @endcode
@@ -45,7 +47,7 @@ class JSLikeHTMLElement extends \DOMElement
     {
         if ($name == 'innerHTML') {
             // first, empty the element
-            for ($x=$this->childNodes->length-1; $x>=0; $x--) {
+            for ($x = $this->childNodes->length - 1; $x >= 0; $x--) {
                 $this->removeChild($this->childNodes->item($x));
             }
             // $value holds our new inner HTML
@@ -86,7 +88,8 @@ class JSLikeHTMLElement extends \DOMElement
     }
 
     /**
-     * Used for getting innerHTML like it's done in JavaScript:
+     * Used for getting innerHTML like it's done in JavaScript:.
+     *
      * @code
      * $string = $div->innerHTML;
      * @endcode
@@ -105,7 +108,7 @@ class JSLikeHTMLElement extends \DOMElement
         $trace = debug_backtrace();
         trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
 
-        return null;
+        return;
     }
 
     public function __toString()
diff --git a/src/Readability.php b/src/Readability.php
index 50e5d2f..eabcca8 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -14,7 +14,7 @@ namespace Readability;
  * More information: http://fivefilters.org/content-only/
  * License: Apache License, Version 2.0
  * Requires: PHP version 5.2.0+
- * Date: 2013-08-02
+ * Date: 2013-08-02.
  *
  * Differences between the PHP port and the original
  * ------------------------------------------------------
@@ -76,7 +76,7 @@ class Readability
         'divToPElements' => '/<(?:blockquote|code|div|article|footer|aside|img|p|pre|dl|ol|ul)/mi',
         'killBreaks' => '/(<br\s*\/?>([ \r\n\s]|&nbsp;?)*)+/',
         'media' => '!//(?:[^\.\?/]+\.)?(?:youtu(?:be)?|soundcloud|dailymotion|vimeo|pornhub|xvideos|twitvid|rutube|viddler)\.(?:com|be|org|net)/!i',
-        'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i'
+        'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i',
     );
     public $tidy_config = array(
         'tidy-mark' => false,
@@ -101,7 +101,7 @@ class Readability
         // 'merge-spans' => true,
         'input-encoding' => '????',
         'output-encoding' => 'utf8',
-        'hide-comments' => true
+        'hide-comments' => true,
     );
     // raw HTML filters
     protected $pre_filters = array(
@@ -111,7 +111,7 @@ class Readability
         '!<font[^>]*>\s*\[AD\]\s*</font>!is' => '', // HACK: firewall-filtered content
         '!(<br[^>]*>[ \r\n\s]*){2,}!i' => '</p><p>', // HACK: replace linebreaks plus br's with p's
         //'!</?noscript>!is' => '', // replace noscripts
-        '!<(/?)font[^>]*>!is' => '<\\1span>' // replace fonts to spans
+        '!<(/?)font[^>]*>!is' => '<\\1span>', // replace fonts to spans
     );
     // output HTML filters
     protected $post_filters = array(
@@ -121,7 +121,7 @@ class Readability
         "/\n+/" => "\n", //single newlines cleanup
         '!<pre[^>]*>\s*<code!is' => '<pre', // modern web...
         '!</code>\s*</pre>!is' => '</pre>',
-        '!<[hb]r>!is' => '<\\1 />'
+        '!<[hb]r>!is' => '<\\1 />',
     );
     // flags
     const FLAG_STRIP_UNLIKELYS = 1;
@@ -139,13 +139,14 @@ class Readability
     const MIN_NODE_LENGTH = 80;
     const MAX_LINK_DENSITY = 0.25;
     /**
-     * Create instance of Readability
+     * Create instance of Readability.
+     *
      * @param string UTF-8 encoded string
      * @param string (optional) URL associated with HTML (for footnotes)
      * @param string (optional) Which parser to use for turning raw HTML into a DOMDocument
-     * @param boolean (optional) Use tidy
+     * @param bool (optional) Use tidy
      */
-    public function __construct($html, $url=null, $parser='libxml', $use_tidy=true)
+    public function __construct($html, $url = null, $parser = 'libxml', $use_tidy = true)
     {
         $this->url = $url;
         $this->debugText = 'Parsing URL: '.$url."\n";
@@ -154,9 +155,9 @@ class Readability
             $this->domainRegExp = '/'.strtr(preg_replace('/www\d*\./', '', parse_url($url, PHP_URL_HOST)), array('.' => '\.')).'/';
         }
 
-        mb_internal_encoding("UTF-8");
-        mb_http_output("UTF-8");
-        mb_regex_encoding("UTF-8");
+        mb_internal_encoding('UTF-8');
+        mb_http_output('UTF-8');
+        mb_regex_encoding('UTF-8');
 
         // HACK: dirty cleanup to replace some stuff; shouldn't use regexps with HTML but well...
         if (!$this->flagIsActive(self::FLAG_DISABLE_PREFILTER)) {
@@ -170,7 +171,7 @@ class Readability
             $html = '<html></html>';
         }
 
-        /**
+        /*
          * Use tidy (if it exists).
          * This fixes problems with some sites which would otherwise trouble DOMDocument's HTML parsing.
          * Although sometimes it makes matters worse, which is why there is an option to disable it.
@@ -188,9 +189,9 @@ class Readability
             }
             unset($tidy);
         }
-        $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
+        $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
 
-        if (!($parser=='html5lib' && ($this->dom = \HTML5_Parser::parse($html)))) {
+        if (!($parser == 'html5lib' && ($this->dom = \HTML5_Parser::parse($html)))) {
             libxml_use_internal_errors(true);
             $this->dom = new \DOMDocument();
             $this->dom->preserveWhiteSpace = false;
@@ -201,7 +202,8 @@ class Readability
         $this->dom->registerNodeClass('DOMElement', 'Readability\JSLikeHTMLElement');
     }
     /**
-     * Get article title element
+     * Get article title element.
+     *
      * @return DOMElement
      */
     public function getTitle()
@@ -209,7 +211,8 @@ class Readability
         return $this->articleTitle;
     }
     /**
-     * Get article content element
+     * Get article content element.
+     *
      * @return DOMElement
      */
     public function getContent()
@@ -217,20 +220,22 @@ class Readability
         return $this->articleContent;
     }
     /**
-     * Add pre filter for raw input HTML processing
+     * Add pre filter for raw input HTML processing.
+     *
      * @param string RegExp for replace
      * @param string (optional) Replacer
      */
-    public function addPreFilter($filter, $replacer='')
+    public function addPreFilter($filter, $replacer = '')
     {
         $this->pre_filters[$filter] = $replacer;
     }
     /**
-     * Add post filter for raw output HTML processing
+     * Add post filter for raw output HTML processing.
+     *
      * @param string RegExp for replace
      * @param string (optional) Replacer
      */
-    public function addPostFilter($filter, $replacer='')
+    public function addPostFilter($filter, $replacer = '')
     {
         $this->post_filters[$filter] = $replacer;
     }
@@ -244,7 +249,7 @@ class Readability
      *  4. Replace the current DOM tree with the new one.
      *  5. Read peacefully.
      *
-     * @return boolean true if we found content, false otherwise
+     * @return bool true if we found content, false otherwise
      */
     public function init()
     {
@@ -296,7 +301,7 @@ class Readability
         return $this->success;
     }
     /**
-     * Debug
+     * Debug.
      */
     protected function dbg($msg) //, $error=false)
     {
@@ -306,12 +311,12 @@ class Readability
     }
 
     /**
-     * Dump debug info
+     * Dump debug info.
      */
     protected function dump_dbg()
     {
         if ($this->debug) {
-            openlog("Readability PHP ", LOG_PID | LOG_PERROR, 0);
+            openlog('Readability PHP ', LOG_PID | LOG_PERROR, 0);
             syslog(6, $this->debugText); // 1 - error 6 - info
         }
     }
@@ -319,7 +324,6 @@ class Readability
      * Run any post-process modifications to article content as necessary.
      *
      * @param DOMElement
-     * @return void
      */
     public function postProcessContent($articleContent)
     {
@@ -338,7 +342,8 @@ class Readability
         $origTitle = '';
         try {
             $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
-        } catch (Exception $e) {}
+        } catch (Exception $e) {
+        }
         if (preg_match('/ [\|\-] /', $curTitle)) {
             $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
             if (count(explode(' ', $curTitle)) < 3) {
@@ -347,7 +352,7 @@ class Readability
         } elseif (strpos($curTitle, ': ') !== false) {
             $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle);
             if (count(explode(' ', $curTitle)) < 3) {
-                $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle);
+                $curTitle = preg_replace('/[^:]*[:](.*)/i', '$1', $origTitle);
             }
         } elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) {
             $hOnes = $this->dom->getElementsByTagName('h1');
@@ -367,12 +372,10 @@ class Readability
     /**
      * Prepare the HTML document for readability to scrape it.
      * This includes things like stripping javascript, CSS, and handling terrible markup.
-     *
-     * @return void
      */
     protected function prepDocument()
     {
-        /**
+        /*
          * In some cases a body element can't be found (if the HTML is totally hosed for example)
          * so we create a new body node and append it to the document.
          */
@@ -383,19 +386,18 @@ class Readability
         $this->body->setAttribute('id', 'readabilityBody');
         // Remove all style tags in head.
         $styleTags = $this->dom->getElementsByTagName('style');
-        for ($i = $styleTags->length-1; $i >= 0; $i--) {
+        for ($i = $styleTags->length - 1; $i >= 0; $i--) {
             $styleTags->item($i)->parentNode->removeChild($styleTags->item($i));
         }
         $linkTags = $this->dom->getElementsByTagName('link');
-        for ($i = $linkTags->length-1; $i >= 0; $i--) {
+        for ($i = $linkTags->length - 1; $i >= 0; $i--) {
             $linkTags->item($i)->parentNode->removeChild($linkTags->item($i));
         }
     }
     /**
      * For easier reading, convert this document to have footnotes at the bottom rather than inline links.
-     * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
      *
-     * @return void
+     * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
      */
     public function addFootnotes($articleContent)
     {
@@ -422,8 +424,8 @@ class Readability
             }
             $linkCount++;
             // Add a superscript reference after the article link.
-            $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
-            $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
+            $refLink->setAttribute('href', '#readabilityFootnoteLink-'.$linkCount);
+            $refLink->innerHTML = '<small><sup>['.$linkCount.']</sup></small>';
             $refLink->setAttribute('class', 'readability-DoNotFootnote');
             $refLink->setAttribute('style', 'color: inherit;');
             if ($articleLink->parentNode->lastChild->isSameNode($articleLink)) {
@@ -432,13 +434,13 @@ class Readability
                 $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling);
             }
             $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
-            $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
-            $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
+            $articleLink->setAttribute('name', 'readabilityLink-'.$linkCount);
+            $footnote->innerHTML = '<small><sup><a href="#readabilityLink-'.$linkCount.'" title="Jump to Link in Article">^</a></sup></small> ';
             $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
-            $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
+            $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-'.$linkCount);
             $footnote->appendChild($footnoteLink);
             if ($linkDomain) {
-                $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
+                $footnote->innerHTML = $footnote->innerHTML.'<small> ('.$linkDomain.')</small>';
             }
             $articleFootnotes->appendChild($footnote);
         }
@@ -451,7 +453,6 @@ class Readability
      * iframes, forms, strip extraneous <p> tags, etc.
      *
      * @param DOMElement
-     * @return void
      */
     public function prepArticle($articleContent)
     {
@@ -464,25 +465,25 @@ class Readability
         $this->killBreaks($articleContent);
         $xpath = new \DOMXPath($articleContent->ownerDocument);
         if ($this->revertForcedParagraphElements) {
-            /**
+            /*
              * Reverts P elements with class 'readability-styled' to text nodes:
              * which is what they were before.
              */
             $elems = $xpath->query('.//p[@data-readability-styled]', $articleContent);
-            for ($i = $elems->length-1; $i >= 0; $i--) {
+            for ($i = $elems->length - 1; $i >= 0; $i--) {
                 $e = $elems->item($i);
                 $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
             }
         }
         // Remove service data-candidate attribute.
         $elems = $xpath->query('.//*[@data-candidate]', $articleContent);
-        for ($i = $elems->length-1; $i >= 0; $i--) {
+        for ($i = $elems->length - 1; $i >= 0; $i--) {
             $elems->item($i)->removeAttribute('data-candidate');
         }
         // Remove unrelated links and other unneded stuff.
         // (not(*) and not(text()[normalize-space()])) or // What's wrong here?
         $elems = $xpath->query('.//a[@rel="nofollow"]', $articleContent);
-        for ($i = $elems->length-1; $i >= 0; $i--) {
+        for ($i = $elems->length - 1; $i >= 0; $i--) {
             $elems->item($i)->parentNode->removeChild($elems->item($i));
         }
         // Clean out junk from the article content.
@@ -494,7 +495,7 @@ class Readability
         $this->clean($articleContent, 'canvas');
         $this->clean($articleContent, 'h1');
 
-        /**
+        /*
          * If there is only one h2, they are probably using it as a main header, so remove it since we
          *  already have a header.
          */
@@ -511,7 +512,7 @@ class Readability
         $this->cleanConditionally($articleContent, 'div');
         // Remove extra paragraphs.
         $articleParagraphs = $articleContent->getElementsByTagName('p');
-        for ($i = $articleParagraphs->length-1; $i >= 0; $i--) {
+        for ($i = $articleParagraphs->length - 1; $i >= 0; $i--) {
             $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
             $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
             $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
@@ -537,7 +538,7 @@ class Readability
                 }
                 unset($search, $replace);
             } catch (Exception $e) {
-                $this->dbg("Cleaning output HTML failed. Ignoring: " . $e->getMessage());
+                $this->dbg('Cleaning output HTML failed. Ignoring: '.$e->getMessage());
             }
         }
     }
@@ -546,7 +547,6 @@ class Readability
      * className/id for special names to add to its score.
      *
      * @param Element
-     * @return void
      */
     protected function initializeNode($node)
     {
@@ -615,7 +615,7 @@ class Readability
      *
      * @return DOMElement
      */
-    protected function grabArticle($page=null)
+    protected function grabArticle($page = null)
     {
         if (!$page) {
             $page = $this->dom;
@@ -647,7 +647,7 @@ class Readability
                         $nodeIndex--;
                         $nodesToScore[] = $newNode;
                     } catch (Exception $e) {
-                        $this->dbg('Could not alter div/article to p, reverting back to div: ' . $e->getMessage());
+                        $this->dbg('Could not alter div/article to p, reverting back to div: '.$e->getMessage());
                     }
                 } else {
                     // Will change these P elements back to text nodes after processing.
@@ -668,14 +668,14 @@ class Readability
                 }
             }
         }
-        /**
+        /*
          * Loop through all paragraphs, and assign a score to them based on how content-y they look.
          * Then add their score to their parent node.
          *
          * A score is determined by things like number of commas, class names, etc.
          * Maybe eventually link density.
          */
-        for ($pt=0, $scored = count($nodesToScore); $pt < $scored; $pt++) {
+        for ($pt = 0, $scored = count($nodesToScore); $pt < $scored; $pt++) {
             $parentNode = $nodesToScore[$pt]->parentNode;
             // No parent node? Move on...
             if (!$parentNode) {
@@ -690,12 +690,12 @@ class Readability
             // Initialize readability data for the parent.
             if (!$parentNode->hasAttribute('readability')) {
                 $this->initializeNode($parentNode);
-                $parentNode->setAttribute('data-candidate','true');
+                $parentNode->setAttribute('data-candidate', 'true');
             }
             // Initialize readability data for the grandparent.
             if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName)) {
                 $this->initializeNode($grandParentNode);
-                $grandParentNode->setAttribute('data-candidate','true');
+                $grandParentNode->setAttribute('data-candidate', 'true');
             }
             // Add a point for the paragraph itself as a base.
             $contentScore = 1;
@@ -704,7 +704,7 @@ class Readability
             // For every SCORE_CHARS_IN_PARAGRAPH (default:100) characters in this paragraph, add another point. Up to 3 points.
             $contentScore += min(floor(mb_strlen($innerText) / self::SCORE_CHARS_IN_PARAGRAPH), 3);
             // For every SCORE_WORDS_IN_PARAGRAPH (default:20) words in this paragraph, add another point. Up to 3 points.
-            $contentScore += min(floor($this->getWordCount($innerText)/ self::SCORE_WORDS_IN_PARAGRAPH), 3);
+            $contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3);
             /* TEST: For every positive/negative parent tag, add/substract half point. Up to 3 points. *\/
             $up = $nodesToScore[$pt];
             $score = 0;
@@ -724,13 +724,13 @@ class Readability
                 $grandParentNode->getAttributeNode('readability')->value += $contentScore / self::GRANDPARENT_SCORE_DIVISOR;
             }
         }
-        /**
+        /*
          * Node prepping: trash nodes that look cruddy (like ones with the class name "comment", etc).
          * This is faster to do before scoring but safer after.
          */
         if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS) && $xpath) {
             $candidates = $xpath->query('.//*[(self::footer and count(//footer)<2) or (self::aside and count(//aside)<2)]', $page->documentElement);
-            for ($node = null, $c = $candidates->length-1; $c >= 0; $c--) {
+            for ($node = null, $c = $candidates->length - 1; $c >= 0; $c--) {
                 $node = $candidates->item($c);
                 // node should be readable but not inside of an article otherwise it's probably non-readable block
                 if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) {
@@ -739,24 +739,24 @@ class Readability
                 }
             }
             $candidates = $xpath->query('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))]', $page->documentElement);
-            for ($node = null, $c = $candidates->length-1; $c >= 0; $c--) {
+            for ($node = null, $c = $candidates->length - 1; $c >= 0; $c--) {
                 $node = $candidates->item($c);
                 $tagName = $node->tagName;
                 /* Remove unlikely candidates */
-                $unlikelyMatchString = $node->getAttribute('class')." ".$node->getAttribute('id')." ".$node->getAttribute('style');
+                $unlikelyMatchString = $node->getAttribute('class').' '.$node->getAttribute('id').' '.$node->getAttribute('style');
                 //$this->dbg('Processing '.$node->getNodePath().' by "'. $unlikelyMatchString.'" with readability '.($node->hasAttribute('readability') ? (int)$node->getAttributeNode('readability')->value : 0));
                 if (mb_strlen($unlikelyMatchString) > 3 && // don't process "empty" strings
                     preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
                     !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString)
                 ) {
-                    $this->dbg('Removing unlikely candidate '.$node->getNodePath().' by "'. $unlikelyMatchString.'" with readability '. ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
+                    $this->dbg('Removing unlikely candidate '.$node->getNodePath().' by "'.$unlikelyMatchString.'" with readability '.($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
                     $node->parentNode->removeChild($node);
                     $nodeIndex--;
                 }
             }
             unset($candidates);
         }
-        /**
+        /*
          * After we've calculated scores, loop through all of the possible candidate nodes we found
          * and find the one with the highest score.
          */
@@ -764,7 +764,7 @@ class Readability
         if ($xpath) {
             // Using array of DOMElements after deletion is a path to DOOMElement.
             $candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
-            for ($c = $candidates->length-1; $c >= 0; $c--) {
+            for ($c = $candidates->length - 1; $c >= 0; $c--) {
                 // Scale the final candidates score based on link density. Good content should have a
                 // relatively small link density (5% or less) and be mostly unaffected by this operation.
                 // If not for this we would have used XPath to find maximum @readability.
@@ -777,7 +777,7 @@ class Readability
             }
             unset($candidates);
         }
-        /**
+        /*
          * If we still have no top candidate, just use the body as a last resort.
          * We also have to copy the body node so it is something we can modify.
          */
@@ -812,7 +812,7 @@ class Readability
             }
         }
         $this->dbg('Top candidate: '.$topCandidate->getNodePath());
-        /**
+        /*
          * Now that we have the top candidate, look through its siblings for content that might also be related.
          * Things like preambles, content split by ads that we removed, etc.
          */
@@ -828,7 +828,7 @@ class Readability
             $siblingNode = $siblingNodes->item($s);
             $siblingNodeName = $siblingNode->nodeName;
             $append = false;
-            $this->dbg('Looking at sibling node: ' . $siblingNode->getNodePath() . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
+            $this->dbg('Looking at sibling node: '.$siblingNode->getNodePath().(($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score '.$siblingNode->getAttribute('readability')) : ''));
             //$this->dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown'));
             if ($siblingNode->isSameNode($topCandidate)) {
                 $append = true;
@@ -852,18 +852,18 @@ class Readability
                 }
             }
             if ($append) {
-                $this->dbg('Appending node: ' . $siblingNode->getNodePath());
+                $this->dbg('Appending node: '.$siblingNode->getNodePath());
                 $nodeToAppend = null;
                 if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) {
                     /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
-                    $this->dbg('Altering siblingNode ' . $siblingNodeName . ' to div.');
+                    $this->dbg('Altering siblingNode '.$siblingNodeName.' to div.');
                     $nodeToAppend = $this->dom->createElement('div');
                     try {
                         $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id'));
                         $nodeToAppend->setAttribute('alt', $siblingNodeName);
                         $nodeToAppend->innerHTML = $siblingNode->innerHTML;
                     } catch (Exception $e) {
-                        $this->dbg('Could not alter siblingNode ' . $siblingNodeName . ' to div, reverting to original.');
+                        $this->dbg('Could not alter siblingNode '.$siblingNodeName.' to div, reverting to original.');
                         $nodeToAppend = $siblingNode;
                         $s--;
                         $sl--;
@@ -884,7 +884,7 @@ class Readability
         unset($xpath);
         // So we have all of the content that we need. Now we clean it up for presentation.
         $this->prepArticle($articleContent);
-        /**
+        /*
          * Now that we've gone through the full algorithm, check to see if we got any meaningful content.
          * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
          * likelihood of finding the content, and the sieve approach gives us a higher likelihood of
@@ -897,17 +897,17 @@ class Readability
             $this->body->innerHTML = $this->bodyCache;
             if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
                 $this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
-                $this->dbg("...content is shorter than ".self::MIN_ARTICLE_LENGTH." letters, trying not to strip unlikely content.\n");
+                $this->dbg('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to strip unlikely content.\n");
 
                 return $this->grabArticle($this->body);
             } elseif ($this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
                 $this->removeFlag(self::FLAG_WEIGHT_ATTRIBUTES);
-                $this->dbg("...content is shorter than ".self::MIN_ARTICLE_LENGTH." letters, trying not to weight attributes.\n");
+                $this->dbg('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to weight attributes.\n");
 
                 return $this->grabArticle($this->body);
             } elseif ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
                 $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
-                $this->dbg("...content is shorter than ".self::MIN_ARTICLE_LENGTH." letters, trying not to clean at all.\n");
+                $this->dbg('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to clean at all.\n");
 
                 return $this->grabArticle($this->body);
             } else {
@@ -921,12 +921,13 @@ class Readability
      * Get the inner text of a node.
      * This also strips out any excess whitespace to be found.
      *
-     * @param  DOMElement $e
-     * @param  boolean    $normalizeSpaces (default: true)
-     * @param  boolean    $flattenLines    (default: false)
+     * @param DOMElement $e
+     * @param bool       $normalizeSpaces (default: true)
+     * @param bool       $flattenLines    (default: false)
+     *
      * @return string
      */
-    public function getInnerText($e, $normalizeSpaces=true, $flattenLines=false)
+    public function getInnerText($e, $normalizeSpaces = true, $flattenLines = false)
     {
         if (!isset($e->textContent) || $e->textContent === '') {
             return '';
@@ -943,8 +944,7 @@ class Readability
     /**
      * Remove the style attribute on every $e and under.
      *
-     * @param  DOMElement $e
-     * @return void
+     * @param DOMElement $e
      */
     public function cleanStyles($e)
     {
@@ -959,7 +959,8 @@ class Readability
     /**
      * Get comma number for a given text.
      *
-     * @param  string $text
+     * @param string $text
+     *
      * @return number (integer)
      */
     public function getCommaCount($text)
@@ -970,7 +971,8 @@ class Readability
      * Get words number for a given text if words separated by a space.
      * Input string should be normalized.
      *
-     * @param  string $text
+     * @param string $text
+     *
      * @return number (integer)
      */
     public function getWordCount($text)
@@ -982,16 +984,17 @@ class Readability
      * This is the amount of text that is inside a link divided by the total text in the node.
      * Can exclude external references to differentiate between simple text and menus/infoblocks.
      *
-     * @param  DOMElement $e
-     * @param  string     $excludeExternal
-     * @return number     (float)
+     * @param DOMElement $e
+     * @param string     $excludeExternal
+     *
+     * @return number (float)
      */
-    public function getLinkDensity($e, $excludeExternal=false)
+    public function getLinkDensity($e, $excludeExternal = false)
     {
         $links = $e->getElementsByTagName('a');
         $textLength = mb_strlen($this->getInnerText($e, true, true));
         $linkLength = 0;
-        for ($dRe = $this->domainRegExp, $i=0, $il=$links->length; $i < $il; $i++) {
+        for ($dRe = $this->domainRegExp, $i = 0, $il = $links->length; $i < $il; $i++) {
             if ($excludeExternal && $dRe && !preg_match($dRe, $links->item($i)->getAttribute('href'))) {
                 continue;
             }
@@ -1007,9 +1010,10 @@ class Readability
      * Get an element weight by attribute.
      * Uses regular expressions to tell if this element looks good or bad.
      *
-     * @param  DOMElement $element
-     * @param  string     $attribute
-     * @return number     (Integer)
+     * @param DOMElement $element
+     * @param string     $attribute
+     *
+     * @return number (Integer)
      */
     protected function weightAttribute($element, $attribute)
     {
@@ -1039,8 +1043,9 @@ class Readability
     /**
      * Get an element relative weight.
      *
-     * @param  DOMElement $e
-     * @return number     (Integer)
+     * @param DOMElement $e
+     *
+     * @return number (Integer)
      */
     public function getWeight($e)
     {
@@ -1058,8 +1063,7 @@ class Readability
     /**
      * Remove extraneous break tags from a node.
      *
-     * @param  DOMElement $node
-     * @return void
+     * @param DOMElement $node
      */
     public function killBreaks($node)
     {
@@ -1069,19 +1073,18 @@ class Readability
     }
     /**
      * Clean a node of all elements of type "tag".
-     * (Unless it's a youtube/vimeo video. People love movies.)
+     * (Unless it's a youtube/vimeo video. People love movies.).
      *
      * Updated 2012-09-18 to preserve youtube/vimeo iframes
      *
-     * @param  DOMElement $e
-     * @param  string     $tag
-     * @return void
+     * @param DOMElement $e
+     * @param string     $tag
      */
     public function clean($e, $tag)
     {
         $targetList = $e->getElementsByTagName($tag);
         $isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed');
-        for ($cur_item = null, $y = $targetList->length-1; $y >= 0; $y--) {
+        for ($cur_item = null, $y = $targetList->length - 1; $y >= 0; $y--) {
             /* Allow youtube and vimeo videos through as people usually want to see those. */
             $cur_item = $targetList->item($y);
             if ($isEmbed) {
@@ -1103,9 +1106,8 @@ class Readability
      * "Fishy" is an algorithm based on content length, classnames,
      * link density, number of images & embeds, etc.
      *
-     * @param  DOMElement $e
-     * @param  string     $tag
-     * @return void
+     * @param DOMElement $e
+     * @param string     $tag
      */
     public function cleanConditionally($e, $tag)
     {
@@ -1114,7 +1116,7 @@ class Readability
         }
         $tagsList = $e->getElementsByTagName($tag);
         $curTagsLength = $tagsList->length;
-        /**
+        /*
          * Gather counts for other typical elements embedded within.
          * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
          *
@@ -1125,29 +1127,29 @@ class Readability
             //$class = $node->getAttribute('class').' '.$node->getAttribute('id'); //debug
             $weight = $this->getWeight($node);
             $contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0;
-            $this->dbg('Start conditional cleaning of ' . $node->getNodePath() . ' (class=' . $node->getAttribute('class') . '; id=' . $node->getAttribute('id') . ')' . (($node->hasAttribute('readability')) ? (' with score ' . $node->getAttribute('readability')) : ''));
+            $this->dbg('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : ''));
             if ($weight + $contentScore < 0) {
                 $this->dbg('Removing...');
                 $node->parentNode->removeChild($node);
             } elseif ($this->getCommaCount($this->getInnerText($node)) < self::MIN_COMMAS_IN_PARAGRAPH) {
-                /**
+                /*
                  * If there are not very many commas, and the number of
                  * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
                  */
                 $p = $node->getElementsByTagName('p')->length;
                 $img = $node->getElementsByTagName('img')->length;
-                $li = $node->getElementsByTagName('li')->length-100;
+                $li = $node->getElementsByTagName('li')->length - 100;
                 $input = $node->getElementsByTagName('input')->length;
                 $a = $node->getElementsByTagName('a')->length;
                 $embedCount = 0;
                 $embeds = $node->getElementsByTagName('embed');
-                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+                for ($ei = 0, $il = $embeds->length; $ei < $il; $ei++) {
                     if (preg_match($this->regexps['media'], $embeds->item($ei)->getAttribute('src'))) {
                         $embedCount++;
                     }
                 }
                 $embeds = $node->getElementsByTagName('iframe');
-                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+                for ($ei = 0, $il = $embeds->length; $ei < $il; $ei++) {
                     if (preg_match($this->regexps['media'], $embeds->item($ei)->getAttribute('src'))) {
                         $embedCount++;
                     }
@@ -1159,17 +1161,17 @@ class Readability
                     if ($li > $p && $tag != 'ul' && $tag != 'ol') {
                         $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
                         $toRemove = true;
-                    } elseif ( $input > floor($p/3) ) {
+                    } elseif ($input > floor($p / 3)) {
                         $this->dbg(' too many <input> elements');
                         $toRemove = true;
                     } elseif ($contentLength < 6 && ($embedCount === 0 && ($img === 0 || $img > 2))) {
                         $this->dbg(' content length less than 6 chars, 0 embeds and either 0 images or more than 2 images');
                         $toRemove = true;
                     } elseif ($weight < 25 && $linkDensity > 0.25) {
-                        $this->dbg(' weight is '.$weight.' < 25 and link density is '.sprintf("%.2f", $linkDensity).' > 0.25');
+                        $this->dbg(' weight is '.$weight.' < 25 and link density is '.sprintf('%.2f', $linkDensity).' > 0.25');
                         $toRemove = true;
                     } elseif ($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
-                        $this->dbg('  more than 2 links and weight is '.$weight.' > 25 but link density is '.sprintf("%.2f", $linkDensity).' > 0.5');
+                        $this->dbg('  more than 2 links and weight is '.$weight.' > 25 but link density is '.sprintf('%.2f', $linkDensity).' > 0.5');
                         $toRemove = true;
                     } elseif ($embedCount > 3) {
                         $this->dbg(' more than 3 embeds');
@@ -1182,17 +1184,17 @@ class Readability
                     } elseif ($li > $p && $tag != 'ul' && $tag != 'ol') {
                         $this->dbg('  too many <li> elements, and parent is not <ul> or <ol>');
                         $toRemove = true;
-                    } elseif ( $input > floor($p/3) ) {
+                    } elseif ($input > floor($p / 3)) {
                         $this->dbg('  too many <input> elements');
                         $toRemove = true;
-                    } elseif ($contentLength < 25 && ($img === 0 || $img > 2) ) {
+                    } elseif ($contentLength < 25 && ($img === 0 || $img > 2)) {
                         $this->dbg('  content length less than 25 chars and 0 images, or more than 2 images');
                         $toRemove = true;
                     } elseif ($weight < 25 && $linkDensity > 0.2) {
-                        $this->dbg('  weight is '.$weight.' lower than 0 and link density is '.sprintf("%.2f", $linkDensity).' > 0.2');
+                        $this->dbg('  weight is '.$weight.' lower than 0 and link density is '.sprintf('%.2f', $linkDensity).' > 0.2');
                         $toRemove = true;
                     } elseif ($weight >= 25 && $linkDensity > 0.5) {
-                        $this->dbg('  weight above 25 but link density is '.sprintf("%.2f", $linkDensity).' > 0.5');
+                        $this->dbg('  weight above 25 but link density is '.sprintf('%.2f', $linkDensity).' > 0.5');
                         $toRemove = true;
                     } elseif (($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
                         $this->dbg('  1 embed and content length smaller than 75 chars, or more than one embed');
@@ -1210,14 +1212,13 @@ class Readability
     /**
      * Clean out spurious headers from an Element. Checks things like classnames and link density.
      *
-     * @param  DOMElement $e
-     * @return void
+     * @param DOMElement $e
      */
     public function cleanHeaders($e)
     {
         for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) {
-            $headers = $e->getElementsByTagName('h' . $headerIndex);
-            for ($i=$headers->length-1; $i >=0; $i--) {
+            $headers = $e->getElementsByTagName('h'.$headerIndex);
+            for ($i = $headers->length - 1; $i >= 0; $i--) {
                 if ($this->getWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
                     $headers->item($i)->parentNode->removeChild($headers->item($i));
                 }
diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php
index 8107e63..52eddb0 100644
--- a/tests/ReadabilityTest.php
+++ b/tests/ReadabilityTest.php
@@ -3,7 +3,6 @@
 namespace Tests\Readability;
 
 use Readability\Readability;
-use Readability\JSLikeHTMLElement;
 
 class ReadabilityTested extends Readability
 {