From f71c3a419623f821c245e0a003edfbf2c67f278e Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 23 Sep 2015 21:09:38 +0200 Subject: [PATCH] Do not remove html tag attributes They might contains useful information (at least language) --- src/Readability.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Readability.php b/src/Readability.php index 3dbb5e0..0bc5974 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -186,7 +186,6 @@ class Readability $this->original_html = $html; $this->tidied = true; $html = $tidy->value; - $html = preg_replace('/]+>/i', '', $html); $html = preg_replace('/[\r\n]+/is', "\n", $html); } unset($tidy);