From 74fbf6f0097d87aeb2e9a55b1d5ec9bf015e556f Mon Sep 17 00:00:00 2001
From: Jeremy Benoist
Date: Mon, 9 Nov 2015 11:12:07 +0100
Subject: [PATCH 1/3] Remove tidy from requirement
Sadly we can't suggest user to install the extension
---
composer.json | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/composer.json b/composer.json
index 07cbe59..6727aac 100644
--- a/composer.json
+++ b/composer.json
@@ -24,8 +24,7 @@
"role": "Developer (original JS version)"
}],
"require": {
- "php": ">=5.3.3",
- "ext-tidy": ">=1.2"
+ "php": ">=5.3.3"
},
"autoload": {
"psr-4": { "Readability\\": "src/" }
From 7c30d76b6e8861458d4b8cb62e7c34b600708c0b Mon Sep 17 00:00:00 2001
From: Jeremy Benoist
Date: Mon, 9 Nov 2015 19:59:18 +0100
Subject: [PATCH 2/3] Ensure tests are running without Tidy
---
tests/ReadabilityTest.php | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php
index fe6ce60..e82c1d6 100644
--- a/tests/ReadabilityTest.php
+++ b/tests/ReadabilityTest.php
@@ -19,6 +19,9 @@ class ReadabilityTested extends Readability
class ReadabilityTest extends \PHPUnit_Framework_TestCase
{
+ /**
+ * @requires extension tidy
+ */
public function testConstructDefault()
{
$readability = new ReadabilityTested('');
@@ -30,6 +33,9 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertInstanceOf('DomDocument', $readability->dom);
}
+ /**
+ * @requires extension tidy
+ */
public function testConstructSimple()
{
$readability = new ReadabilityTested('', 'http://0.0.0.0');
@@ -41,6 +47,28 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertInstanceOf('DomDocument', $readability->dom);
}
+ public function testConstructDefaultWithoutTidy()
+ {
+ $readability = new ReadabilityTested('', null, 'libxml', false);
+
+ $this->assertNull($readability->url);
+ $this->assertContains('Parsing URL', $readability->getDebugText());
+ $this->assertNotContains('Tidying document', $readability->getDebugText());
+ $this->assertNull($readability->getDomainRegexp());
+ $this->assertInstanceOf('DomDocument', $readability->dom);
+ }
+
+ public function testConstructSimpleWithoutTidy()
+ {
+ $readability = new ReadabilityTested('', 'http://0.0.0.0', 'libxml', false);
+
+ $this->assertEquals('http://0.0.0.0', $readability->url);
+ $this->assertContains('Parsing URL', $readability->getDebugText());
+ $this->assertNotContains('Tidying document', $readability->getDebugText());
+ $this->assertEquals('/0\.0\.0\.0/', $readability->getDomainRegexp());
+ $this->assertInstanceOf('DomDocument', $readability->dom);
+ }
+
public function testInitNoContent()
{
$readability = new ReadabilityTested('
', 'http://0.0.0.0');
From 252bb4ef42f598f9ae4b91f335ac2a5128229196 Mon Sep 17 00:00:00 2001
From: Jeremy Benoist
Date: Mon, 9 Nov 2015 20:25:39 +0100
Subject: [PATCH 3/3] Add Tidy requirement in README
---
README.md | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 05d62aa..27d933a 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,11 @@
[](https://travis-ci.org/j0k3r/php-readability)
[](https://scrutinizer-ci.com/g/j0k3r/php-readability/?branch=master)
-This is an extract of the Readability class from the [full-text-rss](https://github.com/Dither/full-text-rss) fork. It kind be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability/overview).
+This is an extract of the Readability class from this [full-text-rss](https://github.com/Dither/full-text-rss) fork. It can be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability/overview).
## Differences
-The default php-readability lib is really old and needs to be improved. I found a great fork of [full-text-rss](http://fivefilters.org/content-only/) from @Dither which improve the Readability class.
+The default php-readability lib is really old and needs to be improved. I found a great fork of full-text-rss from [@Dither](https://github.com/Dither/full-text-rss) which improve the Readability class.
- I've extracted the class from its fork to be able to use it out of the box
- I've added some simple tests
@@ -15,6 +15,12 @@ The default php-readability lib is really old and needs to be improved. I found
**But** the code is still really hard to understand / read ...
+## Requirements
+
+By default, this lib will use the [Tidy extension](https://github.com/htacg/tidy-html5) if it's available. Tidy is only used to cleanup the given HTML and avoid problems with bad HTML structure, etc ..
+
+Since Composer doesn't support suggestion on PHP extension, I write this suggestion here.
+
## Usage
```php
@@ -26,6 +32,8 @@ $url = 'http://www.medialens.org/index.php/alerts/alert-archive/alerts-2013/729-
$html = file_get_contents($url);
$readability = new Readability($html, $url);
+// or without Tidy
+// $readability = new Readability($html, $url, 'libxml', false);
$result = $readability->init();
if ($result) {