From 6689f199562836142fa4c3e82b1885ff56255ccb Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Sat, 11 Jun 2022 00:11:36 +0200 Subject: [PATCH] Strip script and style tags through ::clean() method instead of preg_replace Huge tags can lead to a failure of preg_replace, thus erasing the whole fetched content. Fixes https://github.com/wallabag/wallabag/issues/5847 Signed-off-by: Kevin Decherf --- src/Readability.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 306fcf4..4336bca 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -108,10 +108,6 @@ class Readability implements LoggerAwareInterface protected $useTidy; // raw HTML filters protected $pre_filters = [ - // remove obvious scripts - '!]*>(.*?)!is' => '', - // remove obvious styles - '!]*>(.*?)!is' => '', // remove spans as we redefine styles and they're probably special-styled '!]*>!is' => '', // HACK: firewall-filtered content @@ -366,6 +362,9 @@ class Readability implements LoggerAwareInterface $this->logger->debug($this->lightClean ? 'Light clean enabled.' : 'Standard clean enabled.'); + $this->clean($articleContent, 'style'); + $this->clean($articleContent, 'script'); + $this->cleanStyles($articleContent); $this->killBreaks($articleContent);