From 651e8a6bb0818f59ad752125cecf5b88e719e912 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Sat, 11 Jun 2022 00:11:36 +0200 Subject: [PATCH] Strip script and style tags through ::clean() method instead of preg_replace Huge tags can lead to a failure of preg_replace, thus erasing the whole fetched content. Fixes https://github.com/wallabag/wallabag/issues/5847 Signed-off-by: Kevin Decherf --- src/Readability.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index db2e27a..8c1e62b 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -137,10 +137,6 @@ class Readability implements LoggerAwareInterface protected $useTidy; // raw HTML filters protected $pre_filters = [ - // remove obvious scripts - '!]*>(.*?)!is' => '', - // remove obvious styles - '!]*>(.*?)!is' => '', // remove spans as we redefine styles and they're probably special-styled '!]*>!is' => '', // HACK: firewall-filtered content @@ -397,6 +393,9 @@ class Readability implements LoggerAwareInterface $this->logger->debug($this->lightClean ? 'Light clean enabled.' : 'Standard clean enabled.'); + $this->clean($articleContent, 'style'); + $this->clean($articleContent, 'script'); + $this->cleanStyles($articleContent); $this->killBreaks($articleContent);