diff --git a/.gitattributes b/.gitattributes index db2f81f..8770321 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,6 +3,7 @@ /.gitignore export-ignore /.scrutinizer.yml export-ignore /.travis.yml export-ignore +/.php_cs export-ignore /README.md export-ignore /phpunit.xml.dist export-ignore /tests export-ignore diff --git a/.gitignore b/.gitignore index d191143..53c059c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ vendor/ coverage/ composer.lock +.php_cs.cache diff --git a/.php_cs b/.php_cs new file mode 100644 index 0000000..240846c --- /dev/null +++ b/.php_cs @@ -0,0 +1,20 @@ +setUsingCache(true) + ->level(Symfony\CS\FixerInterface::SYMFONY_LEVEL) + // use default SYMFONY_LEVEL and extra fixers: + ->fixers(array( + 'concat_with_spaces', + 'ordered_use', + 'phpdoc_order', + 'strict', + 'strict_param', + 'long_array_syntax', + )) + ->finder( + Symfony\CS\Finder\DefaultFinder::create() + ->in(__DIR__) + ->exclude(array('vendor')) + ) +; diff --git a/.travis.yml b/.travis.yml index 8cb01d0..6961d5d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,9 @@ php: - hhvm matrix: + include: + - php: 7.0 + env: CS_FIXER=run fast_finish: true allow_failures: - php: hhvm @@ -37,6 +40,7 @@ before_script: script: - mkdir -p build/logs - phpunit -v --coverage-clover build/logs/clover.xml + - if [ "$CS_FIXER" = "run" ]; then php vendor/bin/php-cs-fixer fix --verbose --dry-run ; fi; after_script: - php vendor/bin/coveralls -v diff --git a/composer.json b/composer.json index 3921e36..62721e0 100644 --- a/composer.json +++ b/composer.json @@ -28,7 +28,8 @@ "monolog/monolog": "^1.13.1" }, "require-dev": { - "satooshi/php-coveralls": "~0.6" + "satooshi/php-coveralls": "~0.6", + "friendsofphp/php-cs-fixer": "*" }, "autoload": { "psr-4": { "Readability\\": "src/" } diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php index 798ac05..2ce4898 100644 --- a/src/JSLikeHTMLElement.php +++ b/src/JSLikeHTMLElement.php @@ -45,14 +45,14 @@ class JSLikeHTMLElement extends \DOMElement */ public function __set($name, $value) { - if ($name == 'innerHTML') { + if ($name === 'innerHTML') { // first, empty the element for ($x = $this->childNodes->length - 1; $x >= 0; --$x) { $this->removeChild($this->childNodes->item($x)); } // $value holds our new inner HTML - if ($value != '') { + if ($value !== '') { $f = $this->ownerDocument->createDocumentFragment(); // appendXML() expects well-formed markup (XHTML) @@ -72,7 +72,7 @@ class JSLikeHTMLElement extends \DOMElement // We use it (and suppress the warning) because an HTML fragment will // be wrapped around tags which we don't really want to keep. // Note: despite the warning, if loadHTML succeeds it will return true. - $result = @$f->loadHTML(''.$value.''); + $result = @$f->loadHTML('' . $value . ''); if ($result) { $import = $f->getElementsByTagName('htmlfragment')->item(0); @@ -89,7 +89,7 @@ class JSLikeHTMLElement extends \DOMElement } } else { $trace = debug_backtrace(); - trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); + trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE); } } @@ -102,7 +102,7 @@ class JSLikeHTMLElement extends \DOMElement */ public function __get($name) { - if ($name == 'innerHTML') { + if ($name === 'innerHTML') { $inner = ''; foreach ($this->childNodes as $child) { @@ -113,13 +113,13 @@ class JSLikeHTMLElement extends \DOMElement } $trace = debug_backtrace(); - trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); + trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE); return; } public function __toString() { - return '['.$this->tagName.']'; + return '[' . $this->tagName . ']'; } } diff --git a/src/Readability.php b/src/Readability.php index 6e09e66..f120c9f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -243,10 +243,10 @@ class Readability implements LoggerAwareInterface { $this->original_html = $this->html; - $this->logger->debug('Parsing URL: '.$this->url); + $this->logger->debug('Parsing URL: ' . $this->url); if ($this->url) { - $this->domainRegExp = '/'.strtr(preg_replace('/www\d*\./', '', parse_url($this->url, PHP_URL_HOST)), array('.' => '\.')).'/'; + $this->domainRegExp = '/' . strtr(preg_replace('/www\d*\./', '', parse_url($this->url, PHP_URL_HOST)), array('.' => '\.')) . '/'; } mb_internal_encoding('UTF-8'); @@ -284,7 +284,7 @@ class Readability implements LoggerAwareInterface $this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'); - if (!($this->parser == 'html5lib' && ($this->dom = \HTML5_Parser::parse($this->html)))) { + if (!($this->parser === 'html5lib' && ($this->dom = \HTML5_Parser::parse($this->html)))) { libxml_use_internal_errors(true); $this->dom = new \DOMDocument(); @@ -325,7 +325,7 @@ class Readability implements LoggerAwareInterface $bodyElems = $this->dom->getElementsByTagName('body'); // WTF multiple body nodes? - if ($this->bodyCache == null) { + if ($this->bodyCache === null) { $this->bodyCache = ''; foreach ($bodyElems as $bodyNode) { $this->bodyCache .= trim($bodyNode->innerHTML); @@ -429,7 +429,7 @@ class Readability implements LoggerAwareInterface } } elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) { $hOnes = $this->dom->getElementsByTagName('h1'); - if ($hOnes->length == 1) { + if ($hOnes->length === 1) { $curTitle = $this->getInnerText($hOnes->item(0)); } } @@ -510,8 +510,8 @@ class Readability implements LoggerAwareInterface ++$linkCount; // Add a superscript reference after the article link. - $refLink->setAttribute('href', '#readabilityFootnoteLink-'.$linkCount); - $refLink->innerHTML = '['.$linkCount.']'; + $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount); + $refLink->innerHTML = '[' . $linkCount . ']'; $refLink->setAttribute('class', 'readability-DoNotFootnote'); $refLink->setAttribute('style', 'color: inherit;'); @@ -522,14 +522,14 @@ class Readability implements LoggerAwareInterface } $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); - $articleLink->setAttribute('name', 'readabilityLink-'.$linkCount); - $footnote->innerHTML = '^ '; - $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText); - $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-'.$linkCount); + $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); + $footnote->innerHTML = '^ '; + $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') !== '' ? $footnoteLink->getAttribute('title') : $linkText); + $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); $footnote->appendChild($footnoteLink); if ($linkDomain) { - $footnote->innerHTML = $footnote->innerHTML.' ('.$linkDomain.')'; + $footnote->innerHTML = $footnote->innerHTML . ' (' . $linkDomain . ')'; } $articleFootnotes->appendChild($footnote); } @@ -586,7 +586,7 @@ class Readability implements LoggerAwareInterface * already have a header. */ $h2s = $articleContent->getElementsByTagName('h2'); - if ($h2s->length == 1 && mb_strlen($this->getInnerText($h2s->item(0), true, true)) < 100) { + if ($h2s->length === 1 && mb_strlen($this->getInnerText($h2s->item(0), true, true)) < 100) { $this->clean($articleContent, 'h2'); } @@ -631,7 +631,7 @@ class Readability implements LoggerAwareInterface } unset($search, $replace); } catch (\Exception $e) { - $this->logger->error('Cleaning output HTML failed. Ignoring: '.$e->getMessage()); + $this->logger->error('Cleaning output HTML failed. Ignoring: ' . $e->getMessage()); } } } @@ -748,7 +748,7 @@ class Readability implements LoggerAwareInterface --$nodeIndex; $nodesToScore[] = $newNode; } catch (\Exception $e) { - $this->logger->error('Could not alter div/article to p, reverting back to div: '.$e->getMessage()); + $this->logger->error('Could not alter div/article to p, reverting back to div: ' . $e->getMessage()); } } else { // Will change these P elements back to text nodes after processing. @@ -848,7 +848,7 @@ class Readability implements LoggerAwareInterface $node = $candidates->item($c); // node should be readable but not inside of an article otherwise it's probably non-readable block if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? strcasecmp($node->parentNode->tagName, 'article') !== 0 : true)) { - $this->logger->debug('Removing unlikely candidate (using note) '.$node->getNodePath().' by "'.$node->tagName.'" with readability '.($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0)); + $this->logger->debug('Removing unlikely candidate (using note) ' . $node->getNodePath() . ' by "' . $node->tagName . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0)); $node->parentNode->removeChild($node); } } @@ -860,13 +860,13 @@ class Readability implements LoggerAwareInterface $node = $candidates->item($c); // Remove unlikely candidates - $unlikelyMatchString = $node->getAttribute('class').' '.$node->getAttribute('id').' '.$node->getAttribute('style'); + $unlikelyMatchString = $node->getAttribute('class') . ' ' . $node->getAttribute('id') . ' ' . $node->getAttribute('style'); if (mb_strlen($unlikelyMatchString) > 3 && // don't process "empty" strings preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) ) { - $this->logger->debug('Removing unlikely candidate (using conf) '.$node->getNodePath().' by "'.$unlikelyMatchString.'" with readability '.($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0)); + $this->logger->debug('Removing unlikely candidate (using conf) ' . $node->getNodePath() . ' by "' . $unlikelyMatchString . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0)); $node->parentNode->removeChild($node); --$nodeIndex; } @@ -893,7 +893,7 @@ class Readability implements LoggerAwareInterface $readability->value = round($readability->value * (1 - $this->getLinkDensity($item)), 0, PHP_ROUND_HALF_UP); if (!$topCandidate || $readability->value > (int) $topCandidate->getAttribute('readability')) { - $this->logger->debug('Candidate: '.$item->getNodePath().' ('.$item->getAttribute('class').':'.$item->getAttribute('id').') with score '.$readability->value); + $this->logger->debug('Candidate: ' . $item->getNodePath() . ' (' . $item->getAttribute('class') . ':' . $item->getAttribute('id') . ') with score ' . $readability->value); $topCandidate = $item; } } @@ -942,7 +942,7 @@ class Readability implements LoggerAwareInterface } } - $this->logger->debug('Top candidate: '.$topCandidate->getNodePath()); + $this->logger->debug('Top candidate: ' . $topCandidate->getNodePath()); /* * Now that we have the top candidate, look through its siblings for content that might also be related. @@ -962,7 +962,7 @@ class Readability implements LoggerAwareInterface $siblingNode = $siblingNodes->item($s); $siblingNodeName = $siblingNode->nodeName; $append = false; - $this->logger->debug('Looking at sibling node: '.$siblingNode->getNodePath().(($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score '.$siblingNode->getAttribute('readability')) : '')); + $this->logger->debug('Looking at sibling node: ' . $siblingNode->getNodePath() . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : '')); if ($siblingNode->isSameNode($topCandidate)) { $append = true; @@ -971,7 +971,7 @@ class Readability implements LoggerAwareInterface $contentBonus = 0; // Give a bonus if sibling nodes and top candidates have the same classname. - if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') { + if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') === $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') !== '') { $contentBonus += ((int) $topCandidate->getAttribute('readability')) * 0.2; } @@ -991,18 +991,18 @@ class Readability implements LoggerAwareInterface } if ($append) { - $this->logger->debug('Appending node: '.$siblingNode->getNodePath()); + $this->logger->debug('Appending node: ' . $siblingNode->getNodePath()); if (strcasecmp($siblingNodeName, 'div') !== 0 && strcasecmp($siblingNodeName, 'p') !== 0) { // We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. - $this->logger->debug('Altering siblingNode "'.$siblingNodeName.'" to "div".'); + $this->logger->debug('Altering siblingNode "' . $siblingNodeName . '" to "div".'); $nodeToAppend = $this->dom->createElement('div'); try { $nodeToAppend->setAttribute('alt', $siblingNodeName); $nodeToAppend->innerHTML = $siblingNode->innerHTML; } catch (\Exception $e) { - $this->logger->debug('Could not alter siblingNode "'.$siblingNodeName.'" to "div", reverting to original.'); + $this->logger->debug('Could not alter siblingNode "' . $siblingNodeName . '" to "div", reverting to original.'); $nodeToAppend = $siblingNode; --$s; --$sl; @@ -1038,17 +1038,17 @@ class Readability implements LoggerAwareInterface if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); - $this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to strip unlikely content.\n"); + $this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to strip unlikely content.\n"); return $this->grabArticle($this->body); } elseif ($this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) { $this->removeFlag(self::FLAG_WEIGHT_ATTRIBUTES); - $this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to weight attributes.\n"); + $this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to weight attributes.\n"); return $this->grabArticle($this->body); } elseif ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY); - $this->logger->debug('...content is shorter than '.self::MIN_ARTICLE_LENGTH." letters, trying not to clean at all.\n"); + $this->logger->debug('...content is shorter than ' . self::MIN_ARTICLE_LENGTH . " letters, trying not to clean at all.\n"); return $this->grabArticle($this->body); } @@ -1178,7 +1178,7 @@ class Readability implements LoggerAwareInterface // $attributeValue = trim($element->getAttribute('class')." ".$element->getAttribute('id')); $attributeValue = trim($element->getAttribute($attribute)); - if ($attributeValue != '') { + if ($attributeValue !== '') { if (preg_match($this->regexps['negative'], $attributeValue)) { $weight -= 25; } @@ -1250,7 +1250,7 @@ class Readability implements LoggerAwareInterface $currentItem = $targetList->item($y); if ($isEmbed) { - $attributeValues = $currentItem->getAttribute('src').' '.$currentItem->getAttribute('href'); + $attributeValues = $currentItem->getAttribute('src') . ' ' . $currentItem->getAttribute('href'); // First, check the elements attributes to see if any of them contain known media hosts if (preg_match($this->regexps['media'], $attributeValues)) { @@ -1295,7 +1295,7 @@ class Readability implements LoggerAwareInterface $node = $tagsList->item($i); $weight = $this->getWeight($node); $contentScore = ($node->hasAttribute('readability')) ? (int) $node->getAttribute('readability') : 0; - $this->logger->debug('Start conditional cleaning of '.$node->getNodePath().' (class='.$node->getAttribute('class').'; id='.$node->getAttribute('id').')'.(($node->hasAttribute('readability')) ? (' with score '.$node->getAttribute('readability')) : '')); + $this->logger->debug('Start conditional cleaning of ' . $node->getNodePath() . ' (class=' . $node->getAttribute('class') . '; id=' . $node->getAttribute('id') . ')' . (($node->hasAttribute('readability')) ? (' with score ' . $node->getAttribute('readability')) : '')); if ($weight + $contentScore < 0) { $this->logger->debug('Removing...'); @@ -1331,7 +1331,7 @@ class Readability implements LoggerAwareInterface $toRemove = false; if ($this->lightClean) { - if ($li > $p && $tag != 'ul' && $tag != 'ol') { + if ($li > $p && $tag !== 'ul' && $tag !== 'ol') { $this->logger->debug(' too many
  • elements, and parent is not