From 26c881d8644e6464370234607a60ca25a5ae291d Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Sun, 3 Feb 2019 16:15:55 +0100 Subject: [PATCH] tidy: use tidy_repair_string instead of tidy_parse_string+tidy_clean_repair A change released in tidy 5.6.0 breaks php-tidy when using tidy_parse_string+tidy_clean_repair and wrap=0, incorrectly wrapping every single word. Also it seems that $tidy->value should not be used to retrieve the repaired html as far as it is undocumented and for internal use. We replace the call with tidy_repair_string which directly returns the repaired string. Relates to https://github.com/htacg/tidy-html5/issues/673 Relates to https://bugs.php.net/bug.php?id=75947 Tests pass. Signed-off-by: Kevin Decherf --- src/Readability.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 5908eb4..986cce5 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -274,10 +274,10 @@ class Readability implements LoggerAwareInterface if ($this->useTidy) { $this->logger->debug('Tidying document'); - $tidy = tidy_parse_string($this->html, $this->tidy_config, 'UTF8'); - if (tidy_clean_repair($tidy)) { + $tidy = tidy_repair_string($this->html, $this->tidy_config, 'UTF8'); + if (false !== $tidy && $this->html !== $tidy) { $this->tidied = true; - $this->html = $tidy->value; + $this->html = $tidy; $this->html = preg_replace('/[\r\n]+/is', "\n", $this->html); } unset($tidy);