diff --git a/src/Readability.php b/src/Readability.php index 2866486..644cf62 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -214,7 +214,7 @@ class Readability /** * Get article title element. * - * @return DOMElement + * @return \DOMElement */ public function getTitle() { @@ -224,7 +224,7 @@ class Readability /** * Get article content element. * - * @return DOMElement + * @return \DOMElement */ public function getContent() { @@ -326,6 +326,8 @@ class Readability /** * Debug. + * + * @param string $msg */ protected function dbg($msg) //, $error=false) { @@ -348,11 +350,11 @@ class Readability /** * Run any post-process modifications to article content as necessary. * - * @param DOMElement + * @param \DOMElement $articleContent */ - public function postProcessContent($articleContent) + public function postProcessContent(\DOMElement $articleContent) { - if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', @$this->url)) { + if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) { $this->addFootnotes($articleContent); } } @@ -360,7 +362,7 @@ class Readability /** * Get the article title as an H1. * - * @return DOMElement + * @return \DOMElement */ protected function getArticleTitle() { @@ -433,8 +435,10 @@ class Readability * For easier reading, convert this document to have footnotes at the bottom rather than inline links. * * @see http://www.roughtype.com/archives/2010/05/experiments_in.php + * + * @param \DOMElement $articleContent */ - public function addFootnotes($articleContent) + public function addFootnotes(\DOMElement $articleContent) { $footnotesWrapper = $this->dom->createElement('footer'); $footnotesWrapper->setAttribute('class', 'readability-footnotes'); @@ -496,9 +500,9 @@ class Readability * Prepare the article node for display. Clean out any inline styles, * iframes, forms, strip extraneous

tags, etc. * - * @param DOMElement + * @param \DOMElement $articleContent */ - public function prepArticle($articleContent) + public function prepArticle(\DOMElement $articleContent) { if ($this->lightClean) { $this->dbg('Light clean enabled.'); @@ -595,9 +599,9 @@ class Readability * Initialize a node with the readability object. Also checks the * className/id for special names to add to its score. * - * @param Element + * @param \DOMElement $node */ - protected function initializeNode($node) + protected function initializeNode(\DOMElement $node) { if (!isset($node->tagName)) { return; @@ -664,9 +668,11 @@ class Readability * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. * - * @return DOMElement + * @param \DOMElement $page + * + * @return \DOMElement */ - protected function grabArticle($page = null) + protected function grabArticle(\DOMElement $page = null) { if (!$page) { $page = $this->dom; @@ -1015,13 +1021,13 @@ class Readability * Get the inner text of a node. * This also strips out any excess whitespace to be found. * - * @param DOMElement $e - * @param bool $normalizeSpaces (default: true) - * @param bool $flattenLines (default: false) + * @param \DOMElement $e + * @param bool $normalizeSpaces (default: true) + * @param bool $flattenLines (default: false) * * @return string */ - public function getInnerText($e, $normalizeSpaces = true, $flattenLines = false) + public function getInnerText(\DOMElement $e, $normalizeSpaces = true, $flattenLines = false) { if (!isset($e->textContent) || $e->textContent === '') { return ''; @@ -1041,9 +1047,9 @@ class Readability /** * Remove the style attribute on every $e and under. * - * @param DOMElement $e + * @param \DOMElement $e */ - public function cleanStyles($e) + public function cleanStyles(\DOMElement $e) { if (!is_object($e)) { return; @@ -1061,7 +1067,7 @@ class Readability * * @param string $text * - * @return number (integer) + * @return int */ public function getCommaCount($text) { @@ -1074,7 +1080,7 @@ class Readability * * @param string $text * - * @return number (integer) + * @return int */ public function getWordCount($text) { @@ -1086,12 +1092,12 @@ class Readability * This is the amount of text that is inside a link divided by the total text in the node. * Can exclude external references to differentiate between simple text and menus/infoblocks. * - * @param DOMElement $e - * @param string $excludeExternal + * @param \DOMElement $e + * @param string $excludeExternal * - * @return number (float) + * @return int */ - public function getLinkDensity($e, $excludeExternal = false) + public function getLinkDensity(\DOMElement $e, $excludeExternal = false) { $links = $e->getElementsByTagName('a'); $textLength = mb_strlen($this->getInnerText($e, true, true)); @@ -1115,12 +1121,12 @@ class Readability * Get an element weight by attribute. * Uses regular expressions to tell if this element looks good or bad. * - * @param DOMElement $element - * @param string $attribute + * @param \DOMElement $element + * @param string $attribute * - * @return number (Integer) + * @return int */ - protected function weightAttribute($element, $attribute) + protected function weightAttribute(\DOMElement $element, $attribute) { if (!$element->hasAttribute($attribute)) { return 0; @@ -1150,11 +1156,11 @@ class Readability /** * Get an element relative weight. * - * @param DOMElement $e + * @param \DOMElement $e * - * @return number (Integer) + * @return int */ - public function getWeight($e) + public function getWeight(\DOMElement $e) { if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) { return 0; @@ -1172,9 +1178,9 @@ class Readability /** * Remove extraneous break tags from a node. * - * @param DOMElement $node + * @param \DOMElement $node */ - public function killBreaks($node) + public function killBreaks(\DOMElement $node) { $html = $node->innerHTML; $html = preg_replace($this->regexps['killBreaks'], '
', $html); @@ -1187,10 +1193,10 @@ class Readability * * Updated 2012-09-18 to preserve youtube/vimeo iframes * - * @param DOMElement $e - * @param string $tag + * @param \DOMElement $e + * @param string $tag */ - public function clean($e, $tag) + public function clean(\DOMElement $e, $tag) { $targetList = $e->getElementsByTagName($tag); $isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed'); @@ -1222,10 +1228,10 @@ class Readability * "Fishy" is an algorithm based on content length, classnames, * link density, number of images & embeds, etc. * - * @param DOMElement $e - * @param string $tag + * @param \DOMElement $e + * @param string $tag */ - public function cleanConditionally($e, $tag) + public function cleanConditionally(\DOMElement $e, $tag) { if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { return; @@ -1337,9 +1343,9 @@ class Readability /** * Clean out spurious headers from an Element. Checks things like classnames and link density. * - * @param DOMElement $e + * @param \DOMElement $e */ - public function cleanHeaders($e) + public function cleanHeaders(\DOMElement $e) { for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) { $headers = $e->getElementsByTagName('h'.$headerIndex); @@ -1351,16 +1357,33 @@ class Readability } } + /** + * Check if the given flag is active. + * + * @param int $flag + * + * @return bool + */ public function flagIsActive($flag) { return ($this->flags & $flag) > 0; } + /** + * Add a flag. + * + * @param int $flag + */ public function addFlag($flag) { $this->flags = $this->flags | $flag; } + /** + * Remove a flag. + * + * @param int $flag + */ public function removeFlag($flag) { $this->flags = $this->flags & ~$flag;