Merge pull request #10 from j0k3r/remove-tidy-constraint

Remove tidy from requirement
pull/11/head v1.0.9
Jeremy Benoist 11 years ago
commit 41d7440c6e
  1. 12
      README.md
  2. 3
      composer.json
  3. 28
      tests/ReadabilityTest.php

@ -3,11 +3,11 @@
[![Build Status](https://travis-ci.org/j0k3r/php-readability.svg?branch=master)](https://travis-ci.org/j0k3r/php-readability)
[![Code Coverage](https://scrutinizer-ci.com/g/j0k3r/php-readability/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/j0k3r/php-readability/?branch=master)
This is an extract of the Readability class from the [full-text-rss](https://github.com/Dither/full-text-rss) fork. It kind be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability/overview).
This is an extract of the Readability class from this [full-text-rss](https://github.com/Dither/full-text-rss) fork. It can be defined as a better version of the original [php-readability](https://bitbucket.org/fivefilters/php-readability/overview).
## Differences
The default php-readability lib is really old and needs to be improved. I found a great fork of [full-text-rss](http://fivefilters.org/content-only/) from @Dither which improve the Readability class.
The default php-readability lib is really old and needs to be improved. I found a great fork of full-text-rss from [@Dither](https://github.com/Dither/full-text-rss) which improve the Readability class.
- I've extracted the class from its fork to be able to use it out of the box
- I've added some simple tests
@ -15,6 +15,12 @@ The default php-readability lib is really old and needs to be improved. I found
**But** the code is still really hard to understand / read ...
## Requirements
By default, this lib will use the [Tidy extension](https://github.com/htacg/tidy-html5) if it's available. Tidy is only used to cleanup the given HTML and avoid problems with bad HTML structure, etc ..
Since Composer doesn't support suggestion on PHP extension, I write this suggestion here.
## Usage
```php
@ -26,6 +32,8 @@ $url = 'http://www.medialens.org/index.php/alerts/alert-archive/alerts-2013/729-
$html = file_get_contents($url);
$readability = new Readability($html, $url);
// or without Tidy
// $readability = new Readability($html, $url, 'libxml', false);
$result = $readability->init();
if ($result) {

@ -24,8 +24,7 @@
"role": "Developer (original JS version)"
}],
"require": {
"php": ">=5.3.3",
"ext-tidy": ">=1.2"
"php": ">=5.3.3"
},
"autoload": {
"psr-4": { "Readability\\": "src/" }

@ -19,6 +19,9 @@ class ReadabilityTested extends Readability
class ReadabilityTest extends \PHPUnit_Framework_TestCase
{
/**
* @requires extension tidy
*/
public function testConstructDefault()
{
$readability = new ReadabilityTested('');
@ -30,6 +33,9 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertInstanceOf('DomDocument', $readability->dom);
}
/**
* @requires extension tidy
*/
public function testConstructSimple()
{
$readability = new ReadabilityTested('<html/>', 'http://0.0.0.0');
@ -41,6 +47,28 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase
$this->assertInstanceOf('DomDocument', $readability->dom);
}
public function testConstructDefaultWithoutTidy()
{
$readability = new ReadabilityTested('', null, 'libxml', false);
$this->assertNull($readability->url);
$this->assertContains('Parsing URL', $readability->getDebugText());
$this->assertNotContains('Tidying document', $readability->getDebugText());
$this->assertNull($readability->getDomainRegexp());
$this->assertInstanceOf('DomDocument', $readability->dom);
}
public function testConstructSimpleWithoutTidy()
{
$readability = new ReadabilityTested('<html/>', 'http://0.0.0.0', 'libxml', false);
$this->assertEquals('http://0.0.0.0', $readability->url);
$this->assertContains('Parsing URL', $readability->getDebugText());
$this->assertNotContains('Tidying document', $readability->getDebugText());
$this->assertEquals('/0\.0\.0\.0/', $readability->getDomainRegexp());
$this->assertInstanceOf('DomDocument', $readability->dom);
}
public function testInitNoContent()
{
$readability = new ReadabilityTested('<html/>', 'http://0.0.0.0');

Loading…
Cancel
Save