From 9ed89bde92764babdde6d10b16fffcb225944bcc Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 31 Mar 2023 02:59:44 +0200 Subject: [PATCH] Fix PHP-Cs-Fixer changes 1) src/Readability.php (braces, no_unneeded_control_parentheses, single_line_comment_spacing, global_namespace_import, no_unused_imports, phpdoc_align) 2) src/JSLikeHTMLElement.php (phpdoc_separation) Switch code blocks to Markdown syntax to work around `phpdoc_separation`, ApiGen uses Markdown these days anyway. --- src/JSLikeHTMLElement.php | 8 ++--- src/Readability.php | 71 +++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php index 3f382e1..2ec11a0 100644 --- a/src/JSLikeHTMLElement.php +++ b/src/JSLikeHTMLElement.php @@ -39,9 +39,9 @@ class JSLikeHTMLElement extends \DOMElement /** * Used for setting innerHTML like it's done in JavaScript:. * - * @code + * ```php * $div->innerHTML = '

Chapter 2

The story begins...

'; - * @endcode + * ``` */ public function __set($name, $value) { @@ -105,9 +105,9 @@ class JSLikeHTMLElement extends \DOMElement /** * Used for getting innerHTML like it's done in JavaScript:. * - * @code + * ```php * $string = $div->innerHTML; - * @endcode + * ``` */ public function __get($name) { diff --git a/src/Readability.php b/src/Readability.php index 4336bca..df55a8a 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -2,7 +2,6 @@ namespace Readability; -use DOMElement; use Masterminds\HTML5; use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; @@ -115,7 +114,7 @@ class Readability implements LoggerAwareInterface // HACK: replace linebreaks plus br's with p's '!(]*>[ \r\n\s]*){2,}!i' => '

', // replace noscripts - //'!!is' => '', + // '!!is' => '', // replace fonts to spans '!<(/?)font[^>]*>!is' => '<\\1span>', ]; @@ -126,8 +125,8 @@ class Readability implements LoggerAwareInterface // replace empty tags that break layouts '!<(?:a|div|p|figure)[^>]+/>!is' => '', // remove all attributes on text tags - //'!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>", - //single newlines cleanup + // '!<(\s*/?\s*(?:blockquote|br|hr|code|div|article|span|footer|aside|p|pre|dl|li|ul|ol)) [^>]+>!is' => "<\\1>", + // single newlines cleanup "/\n+/" => "\n", // modern web... '!]*>\s* 'convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) { $this->addFootnotes($articleContent); @@ -292,7 +291,7 @@ class Readability implements LoggerAwareInterface * * @see http://www.roughtype.com/archives/2010/05/experiments_in.php */ - public function addFootnotes(DOMElement $articleContent): void + public function addFootnotes(\DOMElement $articleContent): void { $footnotesWrapper = $this->dom->createElement('footer'); $footnotesWrapper->setAttribute('class', 'readability-footnotes'); @@ -335,7 +334,7 @@ class Readability implements LoggerAwareInterface $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); $footnote->setInnerHtml('^ '); - $footnoteLink->setInnerHtml(('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText)); + $footnoteLink->setInnerHtml('' !== $footnoteLink->getAttribute('title') ? $footnoteLink->getAttribute('title') : $linkText); $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); $footnote->appendChild($footnoteLink); @@ -356,7 +355,7 @@ class Readability implements LoggerAwareInterface */ public function prepArticle(\DOMNode $articleContent): void { - if (!$articleContent instanceof DOMElement) { + if (!$articleContent instanceof \DOMElement) { return; } @@ -456,9 +455,9 @@ class Readability implements LoggerAwareInterface * Get the inner text of a node. * This also strips out any excess whitespace to be found. * - * @param DOMElement $e - * @param bool $normalizeSpaces (default: true) - * @param bool $flattenLines (default: false) + * @param \DOMElement $e + * @param bool $normalizeSpaces (default: true) + * @param bool $flattenLines (default: false) */ public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string { @@ -482,7 +481,7 @@ class Readability implements LoggerAwareInterface /** * Remove the style attribute on every $e and under. */ - public function cleanStyles(DOMElement $e): void + public function cleanStyles(\DOMElement $e): void { if (\is_object($e)) { $elems = $e->getElementsByTagName('*'); @@ -515,7 +514,7 @@ class Readability implements LoggerAwareInterface * This is the amount of text that is inside a link divided by the total text in the node. * Can exclude external references to differentiate between simple text and menus/infoblocks. */ - public function getLinkDensity(DOMElement $e, bool $excludeExternal = false): float + public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): float { $links = $e->getElementsByTagName('a'); $textLength = mb_strlen($this->getInnerText($e, true, true)); @@ -538,7 +537,7 @@ class Readability implements LoggerAwareInterface /** * Get an element relative weight. */ - public function getWeight(DOMElement $e): int + public function getWeight(\DOMElement $e): int { if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) { return 0; @@ -556,7 +555,7 @@ class Readability implements LoggerAwareInterface /** * Remove extraneous break tags from a node. */ - public function killBreaks(DOMElement $node): void + public function killBreaks(\DOMElement $node): void { $html = $node->getInnerHTML(); $html = preg_replace($this->regexps['killBreaks'], '
', $html); @@ -569,7 +568,7 @@ class Readability implements LoggerAwareInterface * * Updated 2012-09-18 to preserve youtube/vimeo iframes */ - public function clean(DOMElement $e, string $tag): void + public function clean(\DOMElement $e, string $tag): void { $targetList = $e->getElementsByTagName($tag); $isEmbed = ('audio' === $tag || 'video' === $tag || 'iframe' === $tag || 'object' === $tag || 'embed' === $tag); @@ -601,7 +600,7 @@ class Readability implements LoggerAwareInterface * "Fishy" is an algorithm based on content length, classnames, * link density, number of images & embeds, etc. */ - public function cleanConditionally(DOMElement $e, string $tag): void + public function cleanConditionally(\DOMElement $e, string $tag): void { if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { return; @@ -714,7 +713,7 @@ class Readability implements LoggerAwareInterface /** * Clean out spurious headers from an Element. Checks things like classnames and link density. */ - public function cleanHeaders(DOMElement $e): void + public function cleanHeaders(\DOMElement $e): void { for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) { $headers = $e->getElementsByTagName('h' . $headerIndex); @@ -754,7 +753,7 @@ class Readability implements LoggerAwareInterface /** * Get the article title as an H1. * - * @return DOMElement + * @return \DOMElement */ protected function getArticleTitle() { @@ -826,7 +825,7 @@ class Readability implements LoggerAwareInterface * Initialize a node with the readability object. Also checks the * className/id for special names to add to its score. */ - protected function initializeNode(DOMElement $node): void + protected function initializeNode(\DOMElement $node): void { if (!isset($node->tagName)) { return; @@ -894,11 +893,11 @@ class Readability implements LoggerAwareInterface * Using a variety of metrics (content score, classname, element types), find the content that is * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. * - * @param DOMElement $page + * @param \DOMElement $page * - * @return DOMElement|false + * @return \DOMElement|false */ - protected function grabArticle(DOMElement $page = null) + protected function grabArticle(\DOMElement $page = null) { if (!$page) { $page = $this->dom; @@ -1040,7 +1039,7 @@ class Readability implements LoggerAwareInterface // For every SCORE_CHARS_IN_PARAGRAPH (default:100) characters in this paragraph, add another point. Up to 3 points. $contentScore += min(floor(mb_strlen($innerText) / self::SCORE_CHARS_IN_PARAGRAPH), 3); // For every SCORE_WORDS_IN_PARAGRAPH (default:20) words in this paragraph, add another point. Up to 3 points. - //$contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3); + // $contentScore += min(floor($this->getWordCount($innerText) / self::SCORE_WORDS_IN_PARAGRAPH), 3); foreach ($ancestors as $level => $ancestor) { if (!$ancestor->nodeName || !$ancestor->parentNode) { @@ -1211,7 +1210,7 @@ class Readability implements LoggerAwareInterface if (0 === strcasecmp($tagName, 'td') || 0 === strcasecmp($tagName, 'tr')) { $up = $topCandidate; - if ($up->parentNode instanceof DOMElement) { + if ($up->parentNode instanceof \DOMElement) { $up = $up->parentNode; if (0 === strcasecmp($up->tagName, 'table')) { @@ -1292,8 +1291,8 @@ class Readability implements LoggerAwareInterface // To ensure a node does not interfere with readability styles, remove its classnames & ids. // Now done via RegExp post_filter. - //$nodeToAppend->removeAttribute('class'); - //$nodeToAppend->removeAttribute('id'); + // $nodeToAppend->removeAttribute('class'); + // $nodeToAppend->removeAttribute('id'); // Append sibling and subtract from our list as appending removes a node. $articleContent->appendChild($nodeToAppend); } @@ -1340,7 +1339,7 @@ class Readability implements LoggerAwareInterface * Get an element weight by attribute. * Uses regular expressions to tell if this element looks good or bad. */ - protected function weightAttribute(DOMElement $element, string $attribute): int + protected function weightAttribute(\DOMElement $element, string $attribute): int { if (!$element->hasAttribute($attribute)) { return 0; @@ -1443,14 +1442,14 @@ class Readability implements LoggerAwareInterface libxml_use_internal_errors(false); } - $this->dom->registerNodeClass(DOMElement::class, \Readability\JSLikeHTMLElement::class); + $this->dom->registerNodeClass(\DOMElement::class, \Readability\JSLikeHTMLElement::class); } - private function getAncestors(DOMElement $node, int $maxDepth = 0): array + private function getAncestors(\DOMElement $node, int $maxDepth = 0): array { $ancestors = []; $i = 0; - while ($node->parentNode instanceof DOMElement) { + while ($node->parentNode instanceof \DOMElement) { $ancestors[] = $node->parentNode; if (++$i === $maxDepth) { break; @@ -1470,7 +1469,7 @@ class Readability implements LoggerAwareInterface }, iterator_to_array($node->childNodes)), true)); } - private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool + private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool { if (1 !== $node->childNodes->length || $node->childNodes->item(0)->nodeName !== $tag) { return false; @@ -1490,11 +1489,11 @@ class Readability implements LoggerAwareInterface * Tidy must be configured to not clean the input for this function to * work as expected, see $this->tidy_config['clean'] */ - private function isNodeVisible(DOMElement $node): bool + private function isNodeVisible(\DOMElement $node): bool { return !($node->hasAttribute('style') && preg_match($this->regexps['isNotVisible'], $node->getAttribute('style')) - ) + ) && !$node->hasAttribute('hidden'); } }