Add type annotations to properties

To preserve BC, we are not using type hints for now.
pull/93/head
Jan Tojnar 1 year ago
parent 4f5360df90
commit 32267cb7b4
  1. 111
      src/Readability.php
  2. 2
      tests/ReadabilityTest.php

@ -24,25 +24,60 @@ class Readability implements LoggerAwareInterface
public const MIN_ARTICLE_LENGTH = 200; public const MIN_ARTICLE_LENGTH = 200;
public const MIN_NODE_LENGTH = 80; public const MIN_NODE_LENGTH = 80;
public const MAX_LINK_DENSITY = 0.25; public const MAX_LINK_DENSITY = 0.25;
/**
* @var bool
*/
public $convertLinksToFootnotes = false; public $convertLinksToFootnotes = false;
/**
* @var bool
*/
public $revertForcedParagraphElements = false; public $revertForcedParagraphElements = false;
/**
* @var ?\DOMElement
*/
public $articleTitle; public $articleTitle;
/**
* @var ?\DOMElement
*/
public $articleContent; public $articleContent;
/**
* @var ?string
*/
public $original_html; public $original_html;
/** /**
* @var \DOMDocument * @var ?\DOMDocument
*/ */
public $dom; public $dom;
// optional - URL where HTML was retrieved
/**
* @var ?string URL where HTML was retrieved
*/
public $url = null; public $url = null;
// preserves more content (experimental)
/**
* @var bool preserves more content (experimental)
*/
public $lightClean = true; public $lightClean = true;
// no more used, keept to avoid BC
/**
* @var bool no more used, keept to avoid BC
*/
public $debug = false; public $debug = false;
/**
* @var bool
*/
public $tidied = false; public $tidied = false;
/** /**
* All of the regular expressions in use within readability. * @var array<string, string> All of the regular expressions in use within readability.
*
* Defined up here so we don't instantiate them repeatedly in loops. * Defined up here so we don't instantiate them repeatedly in loops.
*/ */
public $regexps = [ public $regexps = [
@ -57,10 +92,18 @@ class Readability implements LoggerAwareInterface
'hasContent' => '/\S$/', 'hasContent' => '/\S$/',
'isNotVisible' => '/display\s*:\s*none/', 'isNotVisible' => '/display\s*:\s*none/',
]; ];
/**
* @var array<string>
*/
public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre']; public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
/**
* @var array<string>
*/
public $phrasingElements = [
// The commented out elements qualify as phrasing content but tend to be // The commented out elements qualify as phrasing content but tend to be
// removed by readability when put into paragraphs, so we ignore them here. // removed by readability when put into paragraphs, so we ignore them here.
public $phrasingElements = [
// "CANVAS", "IFRAME", "SVG", "VIDEO", // "CANVAS", "IFRAME", "SVG", "VIDEO",
'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA', 'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA',
'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL', 'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL',
@ -68,6 +111,10 @@ class Readability implements LoggerAwareInterface
'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB', 'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB',
'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR', 'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR',
]; ];
/**
* @var array<string, bool|string>
*/
public $tidy_config = [ public $tidy_config = [
'tidy-mark' => false, 'tidy-mark' => false,
'vertical-space' => false, 'vertical-space' => false,
@ -92,20 +139,55 @@ class Readability implements LoggerAwareInterface
'output-encoding' => 'utf8', 'output-encoding' => 'utf8',
'hide-comments' => true, 'hide-comments' => true,
]; ];
// article domain regexp for calibration
/**
* @var ?string article domain regexp for calibration
*/
protected $domainRegExp = null; protected $domainRegExp = null;
/**
* @var ?\DOMElement
*/
protected $body = null; protected $body = null;
// Cache the body HTML in case we need to re-use it later
/**
* @var ?string Cache the body HTML in case we need to re-use it later
*/
protected $bodyCache = null; protected $bodyCache = null;
// 1 | 2 | 4; // Start with all processing flags set.
protected $flags = 7; /**
// indicates whether we were able to extract or not * @var int-mask-of<self::FLAG_*> start with all processing flags set
*/
protected $flags = self::FLAG_STRIP_UNLIKELYS | self::FLAG_WEIGHT_ATTRIBUTES | self::FLAG_CLEAN_CONDITIONALLY;
/**
* @var bool indicates whether we were able to extract or not
*/
protected $success = false; protected $success = false;
/**
* @var LoggerInterface
*/
protected $logger; protected $logger;
/**
* @var string
*/
protected $parser; protected $parser;
/**
* @var string
*/
protected $html; protected $html;
/**
* @var bool
*/
protected $useTidy; protected $useTidy;
// raw HTML filters
/**
* @var array<string, string> raw HTML filters
*/
protected $pre_filters = [ protected $pre_filters = [
// remove spans as we redefine styles and they're probably special-styled // remove spans as we redefine styles and they're probably special-styled
'!</?span[^>]*>!is' => '', '!</?span[^>]*>!is' => '',
@ -118,7 +200,10 @@ class Readability implements LoggerAwareInterface
// replace fonts to spans // replace fonts to spans
'!<(/?)font[^>]*>!is' => '<\\1span>', '!<(/?)font[^>]*>!is' => '<\\1span>',
]; ];
// output HTML filters
/**
* @var array<string, string> output HTML filters
*/
protected $post_filters = [ protected $post_filters = [
// replace excessive br's // replace excessive br's
'/<br\s*\/?>\s*<p/i' => '<p', '/<br\s*\/?>\s*<p/i' => '<p',

@ -338,7 +338,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$oldErrorReporting = error_reporting(\E_ALL); $oldErrorReporting = error_reporting(\E_ALL);
$oldDisplayErrors = ini_set('display_errors', '1'); $oldDisplayErrors = ini_set('display_errors', '1');
// dummy function to be used to the next test // dummy function to be used to the next test
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline) { set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline): bool {
throw new \Exception($errstr, $errno); throw new \Exception($errstr, $errno);
}); });

Loading…
Cancel
Save