Merge pull request #15 from j0k3r/cs

Enable php-cs-fixer
pull/16/head
Jeremy Benoist 10 years ago committed by GitHub
commit fb9810a827
  1. 1
      .gitattributes
  2. 1
      .gitignore
  3. 20
      .php_cs
  4. 14
      .travis.yml
  5. 3
      composer.json
  6. 14
      src/JSLikeHTMLElement.php
  7. 78
      src/Readability.php
  8. 42
      tests/ReadabilityTest.php

1
.gitattributes vendored

@ -3,6 +3,7 @@
/.gitignore export-ignore
/.scrutinizer.yml export-ignore
/.travis.yml export-ignore
/.php_cs export-ignore
/README.md export-ignore
/phpunit.xml.dist export-ignore
/tests export-ignore

1
.gitignore vendored

@ -1,3 +1,4 @@
vendor/
coverage/
composer.lock
.php_cs.cache

@ -0,0 +1,20 @@
<?php
return Symfony\CS\Config\Config::create()
->setUsingCache(true)
->level(Symfony\CS\FixerInterface::SYMFONY_LEVEL)
// use default SYMFONY_LEVEL and extra fixers:
->fixers(array(
'concat_with_spaces',
'ordered_use',
'phpdoc_order',
'strict',
'strict_param',
'long_array_syntax',
))
->finder(
Symfony\CS\Finder\DefaultFinder::create()
->in(__DIR__)
->exclude(array('vendor'))
)
;

@ -10,6 +10,9 @@ php:
- hhvm
matrix:
include:
- php: 7.0
env: CS_FIXER=run
fast_finish: true
allow_failures:
- php: hhvm
@ -23,20 +26,23 @@ cache:
- vendor
- $HOME/.composer/cache
install:
- composer self-update
before_script:
before_install:
- if [ -n "$GH_TOKEN" ]; then composer config github-oauth.github.com ${GH_TOKEN}; fi;
# disable TLS for composer because openssl is disabled for PHP 5.3.3 on travis
# see: https://blog.travis-ci.com/upcoming_ubuntu_11_10_migration/
- if [[ $TRAVIS_PHP_VERSION = 5.3.3 ]]; then composer config -g -- disable-tls true; fi;
- if [[ $TRAVIS_PHP_VERSION = 5.3.3 ]]; then composer config -g -- secure-http false; fi;
install:
- composer self-update
before_script:
- composer install --prefer-dist --no-interaction
script:
- mkdir -p build/logs
- phpunit -v --coverage-clover build/logs/clover.xml
- if [ "$CS_FIXER" = "run" ]; then php vendor/bin/php-cs-fixer fix --verbose --dry-run ; fi;
after_script:
- php vendor/bin/coveralls -v

@ -28,7 +28,8 @@
"monolog/monolog": "^1.13.1"
},
"require-dev": {
"satooshi/php-coveralls": "~0.6"
"satooshi/php-coveralls": "~0.6",
"friendsofphp/php-cs-fixer": "*"
},
"autoload": {
"psr-4": { "Readability\\": "src/" }

@ -45,14 +45,14 @@ class JSLikeHTMLElement extends \DOMElement
*/
public function __set($name, $value)
{
if ($name == 'innerHTML') {
if ($name === 'innerHTML') {
// first, empty the element
for ($x = $this->childNodes->length - 1; $x >= 0; --$x) {
$this->removeChild($this->childNodes->item($x));
}
// $value holds our new inner HTML
if ($value != '') {
if ($value !== '') {
$f = $this->ownerDocument->createDocumentFragment();
// appendXML() expects well-formed markup (XHTML)
@ -72,7 +72,7 @@ class JSLikeHTMLElement extends \DOMElement
// We use it (and suppress the warning) because an HTML fragment will
// be wrapped around <html><body> tags which we don't really want to keep.
// Note: despite the warning, if loadHTML succeeds it will return true.
$result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>');
$result = @$f->loadHTML('<htmlfragment>' . $value . '</htmlfragment>');
if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0);
@ -89,7 +89,7 @@ class JSLikeHTMLElement extends \DOMElement
}
} else {
$trace = debug_backtrace();
trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE);
}
}
@ -102,7 +102,7 @@ class JSLikeHTMLElement extends \DOMElement
*/
public function __get($name)
{
if ($name == 'innerHTML') {
if ($name === 'innerHTML') {
$inner = '';
foreach ($this->childNodes as $child) {
@ -113,13 +113,13 @@ class JSLikeHTMLElement extends \DOMElement
}
$trace = debug_backtrace();
trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE);
return;
}
public function __toString()
{
return '['.$this->tagName.']';
return '[' . $this->tagName . ']';
}
}

@ -243,10 +243,10 @@ class Readability implements LoggerAwareInterface
{
$this->original_html = $this->html;
$this->logger->debug('Parsing URL: '.$this->url);
$this->logger->debug('Parsing URL: ' . $this->url);
if ($this->url) {
$this->domainRegExp = '/'.strtr(preg_replace('/www\d*\./', '', parse_url($this->url, PHP_URL_HOST)), array('.' => '\.')).'/';
$this->domainRegExp = '/' . strtr(preg_replace('/www\d*\./', '', parse_url($this->url, PHP_URL_HOST)), array('.' => '\.')) . '/';
}
mb_internal_encoding('UTF-8');
@ -284,7 +284,7 @@ class Readability implements LoggerAwareInterface
$this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8');
if (!($this->parser == 'html5lib' && ($this->dom = \HTML5_Parser::parse($this->html)))) {
if (!($this->parser === 'html5lib' && ($this->dom = \HTML5_Parser::parse($this->html)))) {
libxml_use_internal_errors(true);
$this->dom = new \DOMDocument();
@ -325,7 +325,7 @@ class Readability implements LoggerAwareInterface
$bodyElems = $this->dom->getElementsByTagName('body');
// WTF multiple body nodes?
if ($this->bodyCache == null) {
if ($this->bodyCache === null) {
$this->bodyCache = '';
foreach ($bodyElems as $bodyNode) {
$this->bodyCache .= trim($bodyNode->innerHTML);
@ -429,7 +429,7 @@ class Readability implements LoggerAwareInterface
}
} elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) {
$hOnes = $this->dom->getElementsByTagName('h1');
if ($hOnes->length == 1) {
if ($hOnes->length === 1) {
$curTitle = $this->getInnerText($hOnes->item(0));
}
}
@ -510,8 +510,8 @@ class Readability implements LoggerAwareInterface
++$linkCount;
// Add a superscript reference after the article link.
$refLink->setAttribute('href', '#readabilityFootnoteLink-'.$linkCount);
$refLink->innerHTML = '<small><sup>['.$linkCount.']</sup></small>';
$refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
$refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
$refLink->setAttribute('class', 'readability-DoNotFootnote');
$refLink->setAttribute('style', 'color: inherit;');
@ -522,14 +522,14 @@ class Readability implements LoggerAwareInterface
}
$articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
$articleLink->setAttribute('name', 'readabilityLink-'.$linkCount);
$footnote->innerHTML = '<small><sup><a href="#readabilityLink-'.$linkCount.'" title="Jump to Link in Article">^</a></sup></small> ';
$footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-'.$linkCount);
$articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
$footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
$footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') !== '' ? $footnoteLink->getAttribute('title') : $linkText);
$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
$footnote->appendChild($footnoteLink);
if ($linkDomain) {
$footnote->innerHTML = $footnote->innerHTML.'<small> ('.$linkDomain.')</small>';
$footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
}
$articleFootnotes->appendChild($footnote);
}
@ -586,7 +586,7 @@ class Readability implements LoggerAwareInterface
* already have a header.
*/
$h2s = $articleContent->getElementsByTagName('h2');
if ($h2s->length == 1 && mb_strlen($this->getInnerText($h2s->item(0), true, true)) < 100) {
if ($h2s->length === 1 && mb_strlen($this->getInnerText($h2s->item(0), true, true)) < 100) {
$this->clean($articleContent, 'h2');
}
@ -631,7 +631,7 @@ class Readability implements LoggerAwareInterface
}
unset($search, $replace);
} catch (\Exception $e) {
$this->logger->error('Cleaning output HTML failed. Ignoring: '.$e->getMessage());
$this->logger->error('Cleaning output HTML failed. Ignoring: ' . $e->getMessage());
}
}
}
@ -748,7 +748,7 @@ class Readability implements LoggerAwareInterface
--$nodeIndex;
$nodesToScore[] = $newNode;
} catch (\Exception $e) {
$this->logger->error('Could not alter div/article to p, reverting back to div: '.$e->getMessage());
$this->logger->error('Could not alter div/article to p, reverting back to div: ' . $e->getMessage());
}
} else {
// Will change these P elements back to text nodes after processing.
@ -848,7 +848,7 @@ class Readability implements LoggerAwareInterface
$node = $candidates->item($c);
// node should be readable but not inside of an article otherwise it's probably non-readable block
if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) {
$this->logger->debug('Removing unlikely candidate (using note) '.$node->getNodePath().' by "'.$node->tagName.'" with readability '.($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
$this->logger->debug('Removing unlikely candidate (using note) ' . $node->getNodePath() . ' by "' . $node->tagName . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
$node->parentNode->removeChild($node);
}
}
@ -860,13 +860,13 @@ class Readability implements LoggerAwareInterface
$node = $candidates->item($c);
// Remove unlikely candidates
$unlikelyMatchString = $node->getAttribute('class').' '.$node->getAttribute('id').' '.$node->getAttribute('style');
$unlikelyMatchString = $node->getAttribute('class') . ' ' . $node->getAttribute('id') . ' ' . $node->getAttribute('style');
if (mb_strlen($unlikelyMatchString) > 3 && // don't process "empty" strings
preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
!preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString)
) {
$this->logger->debug('Removing unlikely candidate (using conf) '.$node->getNodePath().' by "'.$unlikelyMatchString.'" with readability '.($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
$this->logger->debug('Removing unlikely candidate (using conf) ' . $node->getNodePath() . ' by "' . $unlikelyMatchString . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
$node->parentNode->removeChild($node);
--$nodeIndex;
}
@ -893,7 +893,7 @@ class Readability implements LoggerAwareInterface
$readability->value = round($readability->value * (1 - $this->getLinkDensity($item)), 0, PHP_ROUND_HALF_UP);
if (!$topCandidate || $readability->value > (int) $topCandidate->getAttribute('readability')) {
$this->logger->debug('Candidate: '.$item->getNodePath().' ('.$item->getAttribute('class').':'.$item->getAttribute('id').') with score '.$readability->value);
$this->logger->debug('Candidate: ' . $item->getNodePath() . ' (' . $item->getAttribute('class') . ':' . $item->getAttribute('id') . ') with score ' . $readability->value);
$topCandidate = $item;
}
}
@ -942,7 +942,7 @@ class Readability implements LoggerAwareInterface
}
}
$this->logger->debug('Top candidate: '.$topCandidate->getNodePath());
$this->logger->debug('Top candidate: ' . $topCandidate->getNodePath());
/*
* Now that we have the top candidate, look through its siblings for content that might also be related.
@ -962,7 +962,7 @@ class Readability implements LoggerAwareInterface
$siblingNode = $siblingNodes->item($s);
$siblingNodeName = $siblingNode->nodeName;
$append = false;
$this->logger->debug('Looking at sibling node: '.$siblingNode->getNodePath().(($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score '.$siblingNode->getAttribute('readability')) : ''));
$this->logger->debug('Looking at sibling node: ' . $siblingNode->getNodePath() . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
if ($siblingNode->isSameNode($topCandidate)) {
$append = true;
@ -971,7 +971,7 @@ class Readability implements LoggerAwareInterface
$contentBonus = 0;
// Give a bonus if sibling nodes and top candidates have the same classname.
if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') {
if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') === $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') !== '') {
$contentBonus += ((int) $topCandidate->getAttribute('readability')) * 0.2;
}
@ -991,18 +991,18 @@ class Readability implements LoggerAwareInterface
}
if ($append) {
$this->logger->debug('Appending node: '.$siblingNode->getNodePath());
$this->logger->debug('Appending node: ' . $siblingNode->getNodePath());
if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) {
// We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
$this->logger->debug('Altering siblingNode "'.$siblingNodeName.'" to "div".');
$this->logger->debug('Altering siblingNode "' . $siblingNodeName . '" to "div".');
$nodeToAppend = $this->dom->createElement('div');
try {
$nodeToAppend->setAttribute('alt', $siblingNodeName);
$nodeToAppend->innerHTML = $siblingNode->innerHTML;
} catch (\Exception $e) {
$this->logger->debug('Could not alter siblingNode "'.$siblingNodeName.'" to "div", reverting to original.');
$this->logger->debug('Could not alter siblingNode "' . $siblingNodeName . '" to "div", reverting to original.');
$nodeToAppend = $siblingNode;
--$s;
--$sl;
@ -1038,17 +1038,17 @@ class Readability implements LoggerAwareInterface
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
$this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
$this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to strip unlikely content.\n");
$this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to strip unlikely content.\n");
return $this->grabArticle($this->body);
} elseif ($this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
$this->removeFlag(self::FLAG_WEIGHT_ATTRIBUTES);
$this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to weight attributes.\n");
$this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to weight attributes.\n");
return $this->grabArticle($this->body);
} elseif ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
$this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
$this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to clean at all.\n");
$this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to clean at all.\n");
return $this->grabArticle($this->body);
}
@ -1178,7 +1178,7 @@ class Readability implements LoggerAwareInterface
// $attributeValue = trim($element->getAttribute('class')." ".$element->getAttribute('id'));
$attributeValue = trim($element->getAttribute($attribute));
if ($attributeValue != '') {
if ($attributeValue !== '') {
if (preg_match($this->regexps['negative'], $attributeValue)) {
$weight -= 25;
}
@ -1250,7 +1250,7 @@ class Readability implements LoggerAwareInterface
$currentItem = $targetList->item($y);
if ($isEmbed) {
$attributeValues = $currentItem->getAttribute('src').' '.$currentItem->getAttribute('href');
$attributeValues = $currentItem->getAttribute('src') . ' ' . $currentItem->getAttribute('href');
// First, check the elements attributes to see if any of them contain known media hosts
if (preg_match($this->regexps['media'], $attributeValues)) {
@ -1295,7 +1295,7 @@ class Readability implements LoggerAwareInterface
$node = $tagsList->item($i);
$weight = $this->getWeight($node);
$contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0;
$this->logger->debug('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : ''));
$this->logger->debug('Start conditional cleaning of ' . $node->getNodePath() . ' (class=' . $node->getAttribute('class') . '; id=' . $node->getAttribute('id') . ')' . (($node->hasAttribute('readability')) ? (' with score ' . $node->getAttribute('readability')) : ''));
if ($weight + $contentScore < 0) {
$this->logger->debug('Removing...');
@ -1331,7 +1331,7 @@ class Readability implements LoggerAwareInterface
$toRemove = false;
if ($this->lightClean) {
if ($li > $p && $tag != 'ul' && $tag != 'ol') {
if ($li > $p && $tag !== 'ul' && $tag !== 'ol') {
$this->logger->debug(' too many <li> elements, and parent is not <ul> or <ol>');
$toRemove = true;
} elseif ($input > floor($p / 3)) {
@ -1341,10 +1341,10 @@ class Readability implements LoggerAwareInterface
$this->logger->debug(' content length less than 6 chars, 0 embeds and either 0 images or more than 2 images');
$toRemove = true;
} elseif ($weight < 25 && $linkDensity > 0.25) {
$this->logger->debug(' weight is '.$weight.' < 25 and link density is '.sprintf('%.2f', $linkDensity).' > 0.25');
$this->logger->debug(' weight is ' . $weight . ' < 25 and link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.25');
$toRemove = true;
} elseif ($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
$this->logger->debug(' more than 2 links and weight is '.$weight.' > 25 but link density is '.sprintf('%.2f', $linkDensity).' > 0.5');
$this->logger->debug(' more than 2 links and weight is ' . $weight . ' > 25 but link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.5');
$toRemove = true;
} elseif ($embedCount > 3) {
$this->logger->debug(' more than 3 embeds');
@ -1354,7 +1354,7 @@ class Readability implements LoggerAwareInterface
if ($img > $p) {
$this->logger->debug(' more image elements than paragraph elements');
$toRemove = true;
} elseif ($li > $p && $tag != 'ul' && $tag != 'ol') {
} elseif ($li > $p && $tag !== 'ul' && $tag !== 'ol') {
$this->logger->debug(' too many <li> elements, and parent is not <ul> or <ol>');
$toRemove = true;
} elseif ($input > floor($p / 3)) {
@ -1364,12 +1364,12 @@ class Readability implements LoggerAwareInterface
$this->logger->debug(' content length less than 10 chars and 0 images, or more than 2 images');
$toRemove = true;
} elseif ($weight < 25 && $linkDensity > 0.2) {
$this->logger->debug(' weight is '.$weight.' lower than 0 and link density is '.sprintf('%.2f', $linkDensity).' > 0.2');
$this->logger->debug(' weight is ' . $weight . ' lower than 0 and link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.2');
$toRemove = true;
} elseif ($weight >= 25 && $linkDensity > 0.5) {
$this->logger->debug(' weight above 25 but link density is '.sprintf('%.2f', $linkDensity).' > 0.5');
$this->logger->debug(' weight above 25 but link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.5');
$toRemove = true;
} elseif (($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
} elseif (($embedCount === 1 && $contentLength < 75) || $embedCount > 1) {
$this->logger->debug(' 1 embed and content length smaller than 75 chars, or more than one embed');
$toRemove = true;
}
@ -1391,7 +1391,7 @@ class Readability implements LoggerAwareInterface
public function cleanHeaders($e)
{
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
$headers = $e->getElementsByTagName('h'.$headerIndex);
$headers = $e->getElementsByTagName('h' . $headerIndex);
for ($i = $headers->length - 1; $i >= 0; --$i) {
if ($this->getWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {

@ -2,9 +2,9 @@
namespace Tests\Readability;
use Readability\Readability;
use Monolog\Logger;
use Monolog\Handler\TestHandler;
use Monolog\Logger;
use Readability\Readability;
class ReadabilityTest extends \PHPUnit_Framework_TestCase
{
@ -94,7 +94,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testInitDivP()
{
$readability = $this->getReadability('<div>'.str_repeat('<p>This is the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div>' . str_repeat('<p>This is the awesome content :)</p>', 7) . '</div>', 'http://0.0.0.0');
$res = $readability->init();
$this->assertTrue($res);
@ -107,7 +107,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testInitDiv()
{
$readability = $this->getReadability('<div>'.str_repeat('This is the awesome content :)', 7).'</div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div>' . str_repeat('This is the awesome content :)', 7) . '</div>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -121,7 +121,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithFootnotes()
{
$readability = $this->getReadability('<div>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'</div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '</div>', 'http://0.0.0.0');
$readability->debug = true;
$readability->convertLinksToFootnotes = true;
$res = $readability->init();
@ -138,7 +138,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testStandardClean()
{
$readability = $this->getReadability('<div><h2>Title</h2>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<a href="#nofollow" rel="nofollow">will NOT be removed</a></div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div><h2>Title</h2>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<a href="#nofollow" rel="nofollow">will NOT be removed</a></div>', 'http://0.0.0.0');
$readability->debug = true;
$readability->lightClean = false;
$res = $readability->init();
@ -155,7 +155,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithIframe()
{
$readability = $this->getReadability('<div><h2>Title</h2>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe><iframe>http://soundcloud.com/test</iframe></p></div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div><h2>Title</h2>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe><iframe>http://soundcloud.com/test</iframe></p></div>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -170,7 +170,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithArticle()
{
$readability = $this->getReadability('<article><p>'.str_repeat('This is an awesome text with some links, here there are: the awesome', 20).'</p><p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article><p>' . str_repeat('This is an awesome text with some links, here there are: the awesome', 20) . '</p><p>This is an awesome text with some links, here there are <iframe src="http://youtube.com/test" href="#nofollow" rel="nofollow"></iframe></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -185,7 +185,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithAside()
{
$readability = $this->getReadability('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<footer><aside>'.str_repeat('<p>This is an awesome text with some links, here there are</p>', 8).'</aside></footer></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<footer><aside>' . str_repeat('<p>This is an awesome text with some links, here there are</p>', 8) . '</aside></footer></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -200,7 +200,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithClasses()
{
$readability = $this->getReadability('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<div style="display:none">'.str_repeat('<p class="clock">This text should be removed</p>', 10).'</div></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<div style="display:none">' . str_repeat('<p class="clock">This text should be removed</p>', 10) . '</div></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -215,7 +215,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithClassesWithoutLightClean()
{
$readability = $this->getReadability('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<div style="display:none">'.str_repeat('<p class="clock">This text should be removed</p>', 10).'</div></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article>' . str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7) . '<div style="display:none">' . str_repeat('<p class="clock">This text should be removed</p>', 10) . '</div></article>', 'http://0.0.0.0');
$readability->debug = true;
$readability->lightClean = false;
$res = $readability->init();
@ -231,7 +231,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithTd()
{
$readability = $this->getReadability('<table><tr>'.str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7).'</tr></table>', 'http://0.0.0.0');
$readability = $this->getReadability('<table><tr>' . str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7) . '</tr></table>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -244,7 +244,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithSameClasses()
{
$readability = $this->getReadability('<article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<div class="awesomecontent">This text is also an awesome text and you should know that !</div></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<div class="awesomecontent">This text is also an awesome text and you should know that !</div></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -258,7 +258,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testWithScript()
{
$readability = $this->getReadability('<article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<p><script>This text is also an awesome text and you should know that !</script></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p><script>This text is also an awesome text and you should know that !</script></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -272,7 +272,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testTitle()
{
$readability = $this->getReadability('<title>this is my title</title><article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<p></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<title>this is my title</title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -286,7 +286,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testTitleWithDash()
{
$readability = $this->getReadability('<title> title2 - title3 </title><article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<p></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<title> title2 - title3 </title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -300,7 +300,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testTitleWithDoubleDot()
{
$readability = $this->getReadability('<title> title2 : title3 </title><article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<p></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<title> title2 : title3 </title><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -314,7 +314,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testTitleTooShortUseH1()
{
$readability = $this->getReadability('<title>too short</title><h1>this is my h1 title !</h1><article class="awesomecontent">'.str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7).'<p></p></article>', 'http://0.0.0.0');
$readability = $this->getReadability('<title>too short</title><h1>this is my h1 title !</h1><article class="awesomecontent">' . str_repeat('<p>This is an awesome text with some links, here there are the awesome</p>', 7) . '<p></p></article>', 'http://0.0.0.0');
$readability->debug = true;
$res = $readability->init();
@ -383,7 +383,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
}
/**
* This should generate an Exception "DOMElement::setAttribute(): ID post-60 already defined"
* This should generate an Exception "DOMElement::setAttribute(): ID post-60 already defined".
*/
public function testAppendIdAlreadyHere()
{
@ -443,7 +443,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
public function testPostFilters()
{
$readability = $this->getReadability('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div>' . str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7) . '</div>', 'http://0.0.0.0');
$readability->addPostFilter('!<strong[^>]*>(.*?)</strong>!is', '');
$res = $readability->init();
@ -456,7 +456,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
{
$this->markTestSkipped('Won\'t work until loadHtml() is moved in init() instead of __construct()');
$readability = $this->getReadability('<div>'.str_repeat('<p>This <b>is</b> the awesome and WONDERFUL content :)</p>', 7).'</div>', 'http://0.0.0.0');
$readability = $this->getReadability('<div>' . str_repeat('<p>This <b>is</b> the awesome and WONDERFUL content :)</p>', 7) . '</div>', 'http://0.0.0.0');
$readability->addPreFilter('!<b[^>]*>(.*?)</b>!is', '');
$res = $readability->init();

Loading…
Cancel
Save