diff --git a/src/Readability.php b/src/Readability.php index 2866486..644cf62 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -214,7 +214,7 @@ class Readability /** * Get article title element. * - * @return DOMElement + * @return \DOMElement */ public function getTitle() { @@ -224,7 +224,7 @@ class Readability /** * Get article content element. * - * @return DOMElement + * @return \DOMElement */ public function getContent() { @@ -326,6 +326,8 @@ class Readability /** * Debug. + * + * @param string $msg */ protected function dbg($msg) //, $error=false) { @@ -348,11 +350,11 @@ class Readability /** * Run any post-process modifications to article content as necessary. * - * @param DOMElement + * @param \DOMElement $articleContent */ - public function postProcessContent($articleContent) + public function postProcessContent(\DOMElement $articleContent) { - if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', @$this->url)) { + if ($this->convertLinksToFootnotes && !preg_match('/\bwiki/', $this->url)) { $this->addFootnotes($articleContent); } } @@ -360,7 +362,7 @@ class Readability /** * Get the article title as an H1. * - * @return DOMElement + * @return \DOMElement */ protected function getArticleTitle() { @@ -433,8 +435,10 @@ class Readability * For easier reading, convert this document to have footnotes at the bottom rather than inline links. * * @see http://www.roughtype.com/archives/2010/05/experiments_in.php + * + * @param \DOMElement $articleContent */ - public function addFootnotes($articleContent) + public function addFootnotes(\DOMElement $articleContent) { $footnotesWrapper = $this->dom->createElement('footer'); $footnotesWrapper->setAttribute('class', 'readability-footnotes'); @@ -496,9 +500,9 @@ class Readability * Prepare the article node for display. Clean out any inline styles, * iframes, forms, strip extraneous
tags, etc.
*
- * @param DOMElement
+ * @param \DOMElement $articleContent
*/
- public function prepArticle($articleContent)
+ public function prepArticle(\DOMElement $articleContent)
{
if ($this->lightClean) {
$this->dbg('Light clean enabled.');
@@ -595,9 +599,9 @@ class Readability
* Initialize a node with the readability object. Also checks the
* className/id for special names to add to its score.
*
- * @param Element
+ * @param \DOMElement $node
*/
- protected function initializeNode($node)
+ protected function initializeNode(\DOMElement $node)
{
if (!isset($node->tagName)) {
return;
@@ -664,9 +668,11 @@ class Readability
* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
*
- * @return DOMElement
+ * @param \DOMElement $page
+ *
+ * @return \DOMElement
*/
- protected function grabArticle($page = null)
+ protected function grabArticle(\DOMElement $page = null)
{
if (!$page) {
$page = $this->dom;
@@ -1015,13 +1021,13 @@ class Readability
* Get the inner text of a node.
* This also strips out any excess whitespace to be found.
*
- * @param DOMElement $e
- * @param bool $normalizeSpaces (default: true)
- * @param bool $flattenLines (default: false)
+ * @param \DOMElement $e
+ * @param bool $normalizeSpaces (default: true)
+ * @param bool $flattenLines (default: false)
*
* @return string
*/
- public function getInnerText($e, $normalizeSpaces = true, $flattenLines = false)
+ public function getInnerText(\DOMElement $e, $normalizeSpaces = true, $flattenLines = false)
{
if (!isset($e->textContent) || $e->textContent === '') {
return '';
@@ -1041,9 +1047,9 @@ class Readability
/**
* Remove the style attribute on every $e and under.
*
- * @param DOMElement $e
+ * @param \DOMElement $e
*/
- public function cleanStyles($e)
+ public function cleanStyles(\DOMElement $e)
{
if (!is_object($e)) {
return;
@@ -1061,7 +1067,7 @@ class Readability
*
* @param string $text
*
- * @return number (integer)
+ * @return int
*/
public function getCommaCount($text)
{
@@ -1074,7 +1080,7 @@ class Readability
*
* @param string $text
*
- * @return number (integer)
+ * @return int
*/
public function getWordCount($text)
{
@@ -1086,12 +1092,12 @@ class Readability
* This is the amount of text that is inside a link divided by the total text in the node.
* Can exclude external references to differentiate between simple text and menus/infoblocks.
*
- * @param DOMElement $e
- * @param string $excludeExternal
+ * @param \DOMElement $e
+ * @param string $excludeExternal
*
- * @return number (float)
+ * @return int
*/
- public function getLinkDensity($e, $excludeExternal = false)
+ public function getLinkDensity(\DOMElement $e, $excludeExternal = false)
{
$links = $e->getElementsByTagName('a');
$textLength = mb_strlen($this->getInnerText($e, true, true));
@@ -1115,12 +1121,12 @@ class Readability
* Get an element weight by attribute.
* Uses regular expressions to tell if this element looks good or bad.
*
- * @param DOMElement $element
- * @param string $attribute
+ * @param \DOMElement $element
+ * @param string $attribute
*
- * @return number (Integer)
+ * @return int
*/
- protected function weightAttribute($element, $attribute)
+ protected function weightAttribute(\DOMElement $element, $attribute)
{
if (!$element->hasAttribute($attribute)) {
return 0;
@@ -1150,11 +1156,11 @@ class Readability
/**
* Get an element relative weight.
*
- * @param DOMElement $e
+ * @param \DOMElement $e
*
- * @return number (Integer)
+ * @return int
*/
- public function getWeight($e)
+ public function getWeight(\DOMElement $e)
{
if (!$this->flagIsActive(self::FLAG_WEIGHT_ATTRIBUTES)) {
return 0;
@@ -1172,9 +1178,9 @@ class Readability
/**
* Remove extraneous break tags from a node.
*
- * @param DOMElement $node
+ * @param \DOMElement $node
*/
- public function killBreaks($node)
+ public function killBreaks(\DOMElement $node)
{
$html = $node->innerHTML;
$html = preg_replace($this->regexps['killBreaks'], '
', $html);
@@ -1187,10 +1193,10 @@ class Readability
*
* Updated 2012-09-18 to preserve youtube/vimeo iframes
*
- * @param DOMElement $e
- * @param string $tag
+ * @param \DOMElement $e
+ * @param string $tag
*/
- public function clean($e, $tag)
+ public function clean(\DOMElement $e, $tag)
{
$targetList = $e->getElementsByTagName($tag);
$isEmbed = ($tag === 'audio' || $tag === 'video' || $tag === 'iframe' || $tag === 'object' || $tag === 'embed');
@@ -1222,10 +1228,10 @@ class Readability
* "Fishy" is an algorithm based on content length, classnames,
* link density, number of images & embeds, etc.
*
- * @param DOMElement $e
- * @param string $tag
+ * @param \DOMElement $e
+ * @param string $tag
*/
- public function cleanConditionally($e, $tag)
+ public function cleanConditionally(\DOMElement $e, $tag)
{
if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
return;
@@ -1337,9 +1343,9 @@ class Readability
/**
* Clean out spurious headers from an Element. Checks things like classnames and link density.
*
- * @param DOMElement $e
+ * @param \DOMElement $e
*/
- public function cleanHeaders($e)
+ public function cleanHeaders(\DOMElement $e)
{
for ($headerIndex = 1; $headerIndex < 3; ++$headerIndex) {
$headers = $e->getElementsByTagName('h'.$headerIndex);
@@ -1351,16 +1357,33 @@ class Readability
}
}
+ /**
+ * Check if the given flag is active.
+ *
+ * @param int $flag
+ *
+ * @return bool
+ */
public function flagIsActive($flag)
{
return ($this->flags & $flag) > 0;
}
+ /**
+ * Add a flag.
+ *
+ * @param int $flag
+ */
public function addFlag($flag)
{
$this->flags = $this->flags | $flag;
}
+ /**
+ * Remove a flag.
+ *
+ * @param int $flag
+ */
public function removeFlag($flag)
{
$this->flags = $this->flags & ~$flag;