Remove addPreFilter

Pre filters are used in the __construct so adding more pre filters once the object is instantiated is useless.
pull/13/head
Jeremy Benoist 10 years ago
parent 209c404d7b
commit 149a333b40
  1. 5
      src/JSLikeHTMLElement.php
  2. 19
      src/Readability.php
  3. 35
      tests/ReadabilityTest.php

@ -56,7 +56,8 @@ class JSLikeHTMLElement extends \DOMElement
$f = $this->ownerDocument->createDocumentFragment();
// appendXML() expects well-formed markup (XHTML)
$result = @$f->appendXML($value); // @ to suppress PHP warnings
// @ to suppress PHP warnings
$result = @$f->appendXML($value);
if ($result) {
if ($f->hasChildNodes()) {
$this->appendChild($f);
@ -75,6 +76,7 @@ class JSLikeHTMLElement extends \DOMElement
if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0);
foreach ($import->childNodes as $child) {
$importedNode = $this->ownerDocument->importNode($child, true);
$this->appendChild($importedNode);
@ -102,6 +104,7 @@ class JSLikeHTMLElement extends \DOMElement
{
if ($name == 'innerHTML') {
$inner = '';
foreach ($this->childNodes as $child) {
$inner .= $this->ownerDocument->saveXML($child);
}

@ -250,17 +250,6 @@ class Readability
return $this->articleContent;
}
/**
* Add pre filter for raw input HTML processing.
*
* @param string RegExp for replace
* @param string (optional) Replacer
*/
public function addPreFilter($filter, $replacer = '')
{
$this->pre_filters[$filter] = $replacer;
}
/**
* Add post filter for raw output HTML processing.
*
@ -302,7 +291,7 @@ class Readability
}
}
if ($bodyElems->length > 0 && $this->body == null) {
if ($bodyElems->length > 0 && $this->body === null) {
$this->body = $bodyElems->item(0);
}
@ -385,12 +374,11 @@ class Readability
*/
protected function getArticleTitle()
{
$origTitle = '';
try {
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
} catch (\Exception $e) {
$curTitle = '';
$origTitle = '';
}
if (preg_match('/ [\|\-] /', $curTitle)) {
@ -431,7 +419,7 @@ class Readability
* In some cases a body element can't be found (if the HTML is totally hosed for example)
* so we create a new body node and append it to the document.
*/
if ($this->body == null) {
if ($this->body === null) {
$this->body = $this->dom->createElement('body');
$this->dom->documentElement->appendChild($this->body);
}
@ -571,7 +559,6 @@ class Readability
$this->cleanConditionally($articleContent, 'form');
$this->cleanConditionally($articleContent, 'table');
$this->cleanConditionally($articleContent, 'ul');
//if (!$this->lightClean)
$this->cleanConditionally($articleContent, 'div');
// Remove extra paragraphs.

@ -215,6 +215,22 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
}
public function testWithClassesWithoutLightClean()
{
$readability = new ReadabilityTested('<article>'.str_repeat('<p>This is an awesome text with some links, here there are: <a href="http://0.0.0.0/test.html">the awesome</a></p>', 7).'<div style="display:none">'.str_repeat('<p class="clock">This text should be removed</p>', 10).'</div></article>', 'http://0.0.0.0');
$readability->debug = true;
$readability->lightClean = false;
$res = $readability->init();
$this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
$this->assertContains('alt="article"', $readability->getContent()->innerHTML);
$this->assertEmpty($readability->getTitle()->innerHTML);
$this->assertContains('This is an awesome text with some links, here there are', $readability->getContent()->innerHTML);
$this->assertNotContains('This text should be removed', $readability->getContent()->innerHTML);
}
public function testWithTd()
{
$readability = new ReadabilityTested('<table><tr>'.str_repeat('<td><p>This is an awesome text with some links, here there are the awesome</td>', 7).'</tr></table>', 'http://0.0.0.0');
@ -429,7 +445,22 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($res);
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getContent());
$this->assertInstanceOf('Readability\JSLikeHTMLElement', $readability->getTitle());
// $this->assertContains('<iframe src="https://www.youtube.com/embed/PUep6xNeKjA" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"> </iframe>', $readability->getContent()->innerHTML);
// $this->assertContains('3D Touch', $readability->getTitle()->innerHTML);
}
public function testPostFilters()
{
$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$res = $readability->init();
$this->assertTrue($res);
$this->assertContains('This <strong>is</strong> the awesome content :)', $readability->getContent()->innerHTML);
$readability = new ReadabilityTested('<div>'.str_repeat('<p>This <b>is</b> the awesome content :)</p>', 7).'</div>', 'http://0.0.0.0');
$readability->addPostFilter('!<strong[^>]*>(.*?)</strong>!is', '');
$res = $readability->init();
$this->assertTrue($res);
$this->assertContains('This the awesome content :)', $readability->getContent()->innerHTML);
}
}

Loading…
Cancel
Save