From 235baf965c5c0675d44218cb86e4eb88de58ef9e Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Mon, 3 Mar 2025 23:26:30 +0100 Subject: [PATCH] Do not set domainRegExp for local files `parse_url($this->url, \PHP_URL_HOST)` will return `null` for local filesystem path. Casting it to `string` will produce an empty regular expression, which would match any link when computing link density. (cherry picked from commit c7208f6ad2febedce81a29dc276e15c42265fc1e) This also fixes a warning since 1.x passes the `null` directly to `preg_replace` instead of explicitly casting it to `string`. --- src/Readability.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Readability.php b/src/Readability.php index 1d2d2f5..207aeee 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1396,7 +1396,10 @@ class Readability implements LoggerAwareInterface $this->logger->debug('Parsing URL: ' . $this->url); if ($this->url) { - $this->domainRegExp = '/' . strtr(preg_replace('/www\d*\./', '', parse_url($this->url, \PHP_URL_HOST)), ['.' => '\.']) . '/'; + $host = parse_url($this->url, \PHP_URL_HOST); + if (null !== $host) { + $this->domainRegExp = '/' . strtr(preg_replace('/www\d*\./', '', $host), ['.' => '\.']) . '/'; + } } mb_internal_encoding('UTF-8');