From f2a43b476c039105cd0a4370b35b0244826913fa Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 19 May 2017 15:37:45 +0200 Subject: [PATCH 1/2] Avoid PHP Warning This isn't the best solution but the previous one using `@` wasn't really better. Appending a string into a fragment might generate some warning if the string contains bad entity. For example `+`. --- src/JSLikeHTMLElement.php | 89 +++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php index 2ce4898..15e7281 100644 --- a/src/JSLikeHTMLElement.php +++ b/src/JSLikeHTMLElement.php @@ -45,52 +45,59 @@ class JSLikeHTMLElement extends \DOMElement */ public function __set($name, $value) { - if ($name === 'innerHTML') { - // first, empty the element - for ($x = $this->childNodes->length - 1; $x >= 0; --$x) { - $this->removeChild($this->childNodes->item($x)); + if ($name !== 'innerHTML') { + $trace = debug_backtrace(); + trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE); + + return; + } + + // first, empty the element + for ($x = $this->childNodes->length - 1; $x >= 0; --$x) { + $this->removeChild($this->childNodes->item($x)); + } + + // $value holds our new inner HTML + $value = trim($value); + if (empty($value)) { + return; + } + + // ensure bad entity won't generate warning + $previousError = libxml_use_internal_errors(true); + + $f = $this->ownerDocument->createDocumentFragment(); + + // appendXML() expects well-formed markup (XHTML) + $result = $f->appendXML($value); + if ($result) { + if ($f->hasChildNodes()) { + $this->appendChild($f); } + } else { + // $value is probably ill-formed + $f = new \DOMDocument(); + $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); + + // Using will generate a warning, but so will bad HTML + // (and by this point, bad HTML is what we've got). + // We use it (and suppress the warning) because an HTML fragment will + // be wrapped around tags which we don't really want to keep. + // Note: despite the warning, if loadHTML succeeds it will return true. + $result = $f->loadHTML('' . $value . ''); + + if ($result) { + $import = $f->getElementsByTagName('htmlfragment')->item(0); - // $value holds our new inner HTML - if ($value !== '') { - $f = $this->ownerDocument->createDocumentFragment(); - - // appendXML() expects well-formed markup (XHTML) - // @ to suppress PHP warnings - $result = @$f->appendXML($value); - if ($result) { - if ($f->hasChildNodes()) { - $this->appendChild($f); - } - } else { - // $value is probably ill-formed - $f = new \DOMDocument(); - $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); - - // Using will generate a warning, but so will bad HTML - // (and by this point, bad HTML is what we've got). - // We use it (and suppress the warning) because an HTML fragment will - // be wrapped around tags which we don't really want to keep. - // Note: despite the warning, if loadHTML succeeds it will return true. - $result = @$f->loadHTML('' . $value . ''); - - if ($result) { - $import = $f->getElementsByTagName('htmlfragment')->item(0); - - foreach ($import->childNodes as $child) { - $importedNode = $this->ownerDocument->importNode($child, true); - $this->appendChild($importedNode); - } - } else { - // oh well, we tried, we really did. :( - // this element is now empty - } + foreach ($import->childNodes as $child) { + $importedNode = $this->ownerDocument->importNode($child, true); + $this->appendChild($importedNode); } } - } else { - $trace = debug_backtrace(); - trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_NOTICE); } + + libxml_clear_errors(); + libxml_use_internal_errors($previousError); } /** From 85fb92a04204f75f860acda197e4afe043c34b92 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 19 May 2017 16:02:36 +0200 Subject: [PATCH 2/2] Fix tests --- tests/ReadabilityTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index c904f8e..50618e8 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -443,7 +443,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase public function testPostFilters() { - $readability = $this->getReadability('
' . str_repeat('

This is the awesome content :)

', 7) . '
', 'http://0.0.0.0'); + $readability = $this->getReadability('
' . str_repeat('

This is the awesome content :)

', 10) . '
', 'http://0.0.0.0'); $readability->addPostFilter('!]*>(.*?)!is', ''); $res = $readability->init();