diff --git a/.gitignore b/.gitignore index 48b8bf9..d191143 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ vendor/ +coverage/ +composer.lock diff --git a/phpunit.xml.dist b/phpunit.xml.dist index f727e46..e6a0903 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -19,11 +19,14 @@ - ./src/TubeLink/ + ./src/ ./tests + + + diff --git a/src/Readability.php b/src/Readability.php index 7f1837e..50e5d2f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -52,6 +52,7 @@ class Readability public $revertForcedParagraphElements = true; public $articleTitle; public $articleContent; + public $original_html; public $dom; public $url = null; // optional - URL where HTML was retrieved public $lightClean = true; // preserves more content (experimental) @@ -179,7 +180,7 @@ class Readability $this->debugText .= 'Tidying document'."\n"; $tidy = tidy_parse_string($html, $this->tidy_config, 'UTF8'); if (tidy_clean_repair($tidy)) { - $original_html = $html; + $this->original_html = $html; $this->tidied = true; $html = $tidy->value; $html = preg_replace('/]+>/i', '', $html); @@ -258,7 +259,7 @@ class Readability if ($this->bodyCache == null) { $this->bodyCache = ''; foreach ($bodyElems as $bodyNode) { - $this->bodyCache += $bodyNode->innerHTML; + $this->bodyCache .= trim($bodyNode->innerHTML); } } if ($bodyElems->length > 0 && $this->body == null) {