*/
class Crawler implements \Countable, \IteratorAggregate
{
- /**
- * @var string The current URI
- */
protected $uri;
/**
private $isHtml = true;
/**
- * @param mixed $node A Node to use as the base for the crawling
- * @param string $currentUri The current URI
- * @param string $baseHref The base href value
+ * @param mixed $node A Node to use as the base for the crawling
+ * @param string $uri The current URI
+ * @param string $baseHref The base href value
*/
- public function __construct($node = null, $currentUri = null, $baseHref = null)
+ public function __construct($node = null, $uri = null, $baseHref = null)
{
- $this->uri = $currentUri;
- $this->baseHref = $baseHref ?: $currentUri;
+ $this->uri = $uri;
+ $this->baseHref = $baseHref ?: $uri;
$this->add($node);
}
*
* @param \DOMNodeList|\DOMNode|array|string|null $node A node
*
- * @throws \InvalidArgumentException When node is not the expected type.
+ * @throws \InvalidArgumentException when node is not the expected type
*/
public function add($node)
{
/**
* Adds HTML/XML content.
*
- * If the charset is not set via the content type, it is assumed
- * to be ISO-8859-1, which is the default charset defined by the
+ * If the charset is not set via the content type, it is assumed to be UTF-8,
+ * or ISO-8859-1 as a fallback, which is the default charset defined by the
* HTTP 1.1 specification.
*
* @param string $content A string to parse as HTML/XML
}
if (null === $charset) {
- $charset = 'ISO-8859-1';
+ $charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
}
if ('x' === $xmlMatches[1]) {
*/
public function filter($selector)
{
- if (!class_exists('Symfony\\Component\\CssSelector\\CssSelectorConverter')) {
- throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector 2.8+ is not installed (you can use filterXPath instead).');
+ if (!class_exists(CssSelectorConverter::class)) {
+ throw new \RuntimeException('To filter with a CSS selector, install the CssSelector component ("composer require symfony/css-selector"). Or use filterXpath instead.');
}
$converter = new CssSelectorConverter($this->isHtml);
public function selectButton($value)
{
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
- $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
+ $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%1$s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, static::xpathLiteral(' '.$value.' ')).
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id=%s or @name=%s] ', $translate, static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value)).
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id=%s or @name=%s]', static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value));
}
/**
- * @return \ArrayIterator
+ * @return \ArrayIterator|\DOMElement[]
*/
public function getIterator()
{
$nodes = array();
do {
- if ($node !== $this->getNode(0) && $node->nodeType === 1) {
+ if ($node !== $this->getNode(0) && 1 === $node->nodeType) {
$nodes[] = $node;
}
} while ($node = $node->$siblingDir);