Add type annotations to properties

To preserve BC, we are not using type hints for now.
pull/93/head
Jan Tojnar 1 year ago
parent 4f5360df90
commit 32267cb7b4
  1. 111
      src/Readability.php
  2. 2
      tests/ReadabilityTest.php

@ -24,25 +24,60 @@ class Readability implements LoggerAwareInterface
public const MIN_ARTICLE_LENGTH = 200;
public const MIN_NODE_LENGTH = 80;
public const MAX_LINK_DENSITY = 0.25;
/**
* @var bool
*/
public $convertLinksToFootnotes = false;
/**
* @var bool
*/
public $revertForcedParagraphElements = false;
/**
* @var ?\DOMElement
*/
public $articleTitle;
/**
* @var ?\DOMElement
*/
public $articleContent;
/**
* @var ?string
*/
public $original_html;
/**
* @var \DOMDocument
* @var ?\DOMDocument
*/
public $dom;
// optional - URL where HTML was retrieved
/**
* @var ?string URL where HTML was retrieved
*/
public $url = null;
// preserves more content (experimental)
/**
* @var bool preserves more content (experimental)
*/
public $lightClean = true;
// no more used, keept to avoid BC
/**
* @var bool no more used, keept to avoid BC
*/
public $debug = false;
/**
* @var bool
*/
public $tidied = false;
/**
* All of the regular expressions in use within readability.
* @var array<string, string> All of the regular expressions in use within readability.
*
* Defined up here so we don't instantiate them repeatedly in loops.
*/
public $regexps = [
@ -57,10 +92,18 @@ class Readability implements LoggerAwareInterface
'hasContent' => '/\S$/',
'isNotVisible' => '/display\s*:\s*none/',
];
/**
* @var array<string>
*/
public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
/**
* @var array<string>
*/
public $phrasingElements = [
// The commented out elements qualify as phrasing content but tend to be
// removed by readability when put into paragraphs, so we ignore them here.
public $phrasingElements = [
// "CANVAS", "IFRAME", "SVG", "VIDEO",
'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA',
'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL',
@ -68,6 +111,10 @@ class Readability implements LoggerAwareInterface
'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB',
'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR',
];
/**
* @var array<string, bool|string>
*/
public $tidy_config = [
'tidy-mark' => false,
'vertical-space' => false,
@ -92,20 +139,55 @@ class Readability implements LoggerAwareInterface
'output-encoding' => 'utf8',
'hide-comments' => true,
];
// article domain regexp for calibration
/**
* @var ?string article domain regexp for calibration
*/
protected $domainRegExp = null;
/**
* @var ?\DOMElement
*/
protected $body = null;
// Cache the body HTML in case we need to re-use it later
/**
* @var ?string Cache the body HTML in case we need to re-use it later
*/
protected $bodyCache = null;
// 1 | 2 | 4; // Start with all processing flags set.
protected $flags = 7;
// indicates whether we were able to extract or not
/**
* @var int-mask-of<self::FLAG_*> start with all processing flags set
*/
protected $flags = self::FLAG_STRIP_UNLIKELYS | self::FLAG_WEIGHT_ATTRIBUTES | self::FLAG_CLEAN_CONDITIONALLY;
/**
* @var bool indicates whether we were able to extract or not
*/
protected $success = false;
/**
* @var LoggerInterface
*/
protected $logger;
/**
* @var string
*/
protected $parser;
/**
* @var string
*/
protected $html;
/**
* @var bool
*/
protected $useTidy;
// raw HTML filters
/**
* @var array<string, string> raw HTML filters
*/
protected $pre_filters = [
// remove spans as we redefine styles and they're probably special-styled
'!</?span[^>]*>!is' => '',
@ -118,7 +200,10 @@ class Readability implements LoggerAwareInterface
// replace fonts to spans
'!<(/?)font[^>]*>!is' => '<\\1span>',
];
// output HTML filters
/**
* @var array<string, string> output HTML filters
*/
protected $post_filters = [
// replace excessive br's
'/<br\s*\/?>\s*<p/i' => '<p',

@ -338,7 +338,7 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$oldErrorReporting = error_reporting(\E_ALL);
$oldDisplayErrors = ini_set('display_errors', '1');
// dummy function to be used to the next test
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline) {
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline): bool {
throw new \Exception($errstr, $errno);
});

Loading…
Cancel
Save