5 * This module periodically check links in given node types, blocks etc.
7 * Developed by Alexander Hass, http://www.yaml-for-drupal.com/.
10 use Drupal\Component\Utility\Crypt;
11 use Drupal\Component\Utility\Html;
12 use Drupal\Component\Utility\Unicode;
13 use Drupal\Component\Utility\UrlHelper;
14 use Drupal\Component\Utility\Timer;
15 use Drupal\Core\Database\Database;
16 use Drupal\Core\Form\FormStateInterface;
17 use Drupal\Core\Logger\RfcLogLevel;
18 use Drupal\Core\Routing\RouteMatchInterface;
19 use Drupal\Core\Session\AccountInterface;
20 use Drupal\Core\Session\UserSession;
22 use Drupal\field\FieldConfigInterface;
23 use Drupal\filter\Entity;
24 use Drupal\field\Entity\FieldStorageConfig;
25 use Drupal\filter\Entity\FilterFormat;
26 use Drupal\node\Entity\Node;
27 use Drupal\node\Entity\NodeType;
28 use Drupal\node\NodeInterface;
29 use Drupal\node\NodeTypeInterface;
30 use GuzzleHttp\Exception\ClientException;
33 * Defines the maximum limit of links collected in one chunk if content is
34 * scanned for links. A value that is too high may overload the database server.
36 define('LINKCHECKER_SCAN_MAX_LINKS_PER_RUN', '100');
39 * A list of domain names reserved for use in documentation and not available
40 * for registration. See RFC 2606, Section 3 for more information.
42 define('LINKCHECKER_RESERVED_DOCUMENTATION_DOMAINS', "example.com\nexample.net\nexample.org");
45 * A list of blacklisted filters the modules do not need to run for the link
46 * extraction process. This filters only eat processing time or holds references
49 * - Align images, http://drupal.org/project/drupal
51 * - Line break converter, http://drupal.org/project/drupal
53 * - Caption images, http://drupal.org/project/drupal
54 * name: filter_caption
55 * - Insert block, http://drupal.org/project/insert_block
57 * tags: [block:name of module=delta of block]
58 * - Insert view filter, http://drupal.org/project/insert_view
60 * tags: [view:my_view]
61 * - Smiley filter, http://drupal.org/project/smiley
63 * tags: Depends on icon set, for e.g: ":) :-) :smile:"
64 * - Web Links Embed, http://drupal.org/project/weblinks
65 * name: weblinks_embed
66 * tags: [links-embed: id], [links-embed: name]
67 * - Web Links Filter, http://drupal.org/project/weblinks
68 * name: weblinks_filter
72 * - Smileys Filter, http://drupal.org/project/smileys
74 * tags: Depends on icon set, for e.g: ":) :-) :smile:"
75 * - Insert node, http://drupal.org/project/InsertNode
77 * tags: [node:<name of node> <parameters>]
78 * - Weblink filter, http://drupal.org/project/links
79 * name: links_weblink/0
80 * tags: [weblink:node_id|text], [weblink:node_id/link_id], [weblink:http://weblink.example.com/]
82 define('LINKCHECKER_DEFAULT_FILTER_BLACKLIST', 'filter_align|filter_autop|filter_caption|insert_block|insert_view|smiley|smileys|weblinks_embed|weblinks_filter');
85 * Implements hook_help().
87 function linkchecker_help($route_name, RouteMatchInterface $route_match) {
88 switch ($route_name) {
89 case 'help.page.linkchecker':
90 return '<p>' . t('This module provides an aid to finding broken links on your site. It periodically checks contents of all public nodes, tries to find any html links and check for their validity. It reports broken links through the admin interface. For more information about status codes see <a href="@rfc">Status Code Definitions</a>.', ['@rfc' => 'http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html']) . '</p>';
95 * Conditionally logs a system message.
98 * The category to which this message belongs. Can be any string, but the
99 * general practice is to use the name of the module calling watchdog().
101 * The message to store in the log. Keep $message translatable
102 * by not concatenating dynamic values into it! Variables in the
103 * message should be added by using placeholder strings alongside
104 * the variables argument to declare the value of the placeholders.
105 * See t() for documentation on how $message and $variables interact.
107 * Array of variables to replace in the message on display or
108 * NULL if message is already translated or not possible to
111 * The severity of the message; one of the following values as defined in
113 * A link to associate with the message.
115 * @link http://www.faqs.org/rfcs/rfc3164.html RFC 3164: @endlink
116 * - WATCHDOG_EMERGENCY: Emergency, system is unusable.
117 * - RfcLogLevel::ALERT: Alert, action must be taken immediately.
118 * - RfcLogLevel::CRITICAL: Critical conditions.
119 * - WATCHDOG_ERROR: Error conditions.
120 * - WATCHDOG_WARNING: Warning conditions.
121 * - RfcLogLevel::NOTICE: (default) Normal but significant conditions.
122 * - WATCHDOG_INFO: Informational messages.
123 * - WATCHDOG_DEBUG: Debug-level messages.
124 * @see watchdog_severity_levels()
127 function linkchecker_watchdog_log($type, $message, $variables = [], $severity = RfcLogLevel::NOTICE, $link = NULL) {
128 // @FIXME: $link is missing, could be in $variables.
129 if ($severity <= \Drupal::config('linkchecker.settings')->get('logging.level')) {
130 $logger = \Drupal::logger($type);
131 $logger->log($severity, $message, $variables);
136 * Access callback for user/%user/linkchecker.
138 * @param object $account
143 function _linkchecker_user_access_account_broken_links_report($account) {
144 $user = \Drupal::currentUser();
146 // Users with 'access own broken links report' permission can only view their
147 // own report. Users with the 'access broken links report' permission can
148 // view the report for any authenticated user.
149 return $account->id() && (($user->id() == $account->id() && \Drupal::currentUser()->hasPermission('access own broken links report')) || \Drupal::currentUser()->hasPermission('access broken links report'));
153 * Access callback for linkchecker/%linkchecker_link/edit.
155 * @param object $link
156 * An object representing the link to check.
159 * TRUE if the current user has the requested permission.
161 function _linkchecker_user_access_edit_link_settings($link) {
162 return \Drupal::currentUser()->hasPermission('edit link settings') && _linkchecker_link_access($link);
166 * Determines if the current user has access to view a link.
168 * Link URLs can contain private information (for example, usernames and
169 * passwords). So this module should only display links to a user if the link
170 * already appears in at least one place on the site where the user would
171 * otherwise have access to see it.
173 * @param object $link
174 * An object representing the link to check.
178 function _linkchecker_link_access($link) {
179 $link = (object) $link;
180 return _linkchecker_link_node_ids($link) || _linkchecker_link_comment_ids($link) || _linkchecker_link_block_ids($link);
184 * Returns IDs of nodes that contain a link which the current user may be allowed to view.
186 * Important note: For performance reasons, this function is not always
187 * guaranteed to return the exact list of node IDs that the current user is
188 * allowed to view. It will, however, always return an empty array if the user
189 * does not have access to view *any* such nodes, thereby meeting the security
190 * goals of _linkchecker_link_access() and other places that call it.
192 * In the case where a user has access to some of the nodes that contain the
193 * link, this function may return some node IDs that the user does not have
194 * access to. Therefore, use caution with its results.
196 * @param object $link
197 * An object representing the link to check.
198 * @param object $node_author_account
199 * (optional) If a user account object is provided, the returned nodes will
200 * additionally be restricted to only those owned by this account. Otherwise,
201 * nodes owned by any user account may be returned.
204 * An array of node IDs that contain the provided link and that the current
205 * user may be allowed to view.
207 function _linkchecker_link_node_ids($link, $node_author_account = NULL) {
208 static $fields_with_node_links = [];
210 // Exit if all node types are disabled or if the user cannot access content,
211 // there is no need to check further.
212 $linkchecker_scan_nodetypes = linkchecker_scan_node_types();
213 if (empty($linkchecker_scan_nodetypes) || !\Drupal::currentUser()->hasPermission('access content')) {
217 // Get a list of nodes containing the link, using addTag('node_access') to
218 // allow node access modules to exclude nodes that the current user does not
219 // have access to view.
220 if (!empty($node_author_account)) {
221 $query = \Drupal::database()->select('node', 'n');
222 $query->addTag('node_access');
223 $query->innerJoin('linkchecker_node', 'ln', 'ln.nid = n.nid');
224 $query->innerJoin('node_revision', 'r', 'r.vid = n.vid');
225 $query->condition('ln.lid', $link->lid);
226 $or_condition_group = $query->orConditionGroup()
227 ->condition('n.uid', $node_author_account->id())
228 ->condition('r.uid', $node_author_account->id());
229 $query->condition($or_condition_group);
231 $query->fields('n', ['nid']);
234 $query = \Drupal::database()->select('node', 'n');
235 $query->addTag('node_access');
236 $query->innerJoin('linkchecker_node', 'ln', 'ln.nid = n.nid');
237 $query->condition('ln.lid', $link->lid);
238 $query->fields('n', ['nid']);
240 $nodes = $query->execute();
242 // Check if the current user has access to view the link in each node.
243 // However, for performance reasons, as soon as we find one node where that
244 // is the case, stop checking and return the remainder of the list.
246 $access_allowed = FALSE;
247 foreach ($nodes as $node) {
248 if ($access_allowed) {
249 $nids[] = $node->id();
252 $node = Node::load($node->id());
254 // We must check whether the link is currently part of the node; if not, we
255 // do not want to return it (and it is not safe to, since we cannot know if
256 // it contained access restrictions for the current user at the point which
257 // it was originally extracted by the Link checker module).
258 if (!isset($fields_with_node_links[$node->id()])) {
259 $fields_with_node_links[$node->id()] = _linkchecker_extract_node_links($node, TRUE);
261 if (empty($fields_with_node_links[$node->id()][$link->url])) {
264 // If the link appears in fields and a field access module is being used,
265 // we must check that the current user has access to view at least one field
266 // that contains the link; if they don't, we should not return the node.
267 $fields = $fields_with_node_links[$node->id()][$link->url];
268 if (\Drupal::moduleHandler()->getImplementations('field_access')) {
269 $fields_with_access = [];
271 $bundle_instances = field_info_instances('node', $node->bundle());
272 foreach ($bundle_instances as $field_name => $field_instance) {
273 $field = field_info_field($field_name);
275 // Field types supported by linkchecker.
276 $fields_supported = [
283 // Only check link and text fields, since those are the only types we
284 // extract links from.
285 if (in_array($field['type'], $fields_supported) && field_access('view', $field, 'node', $node)) {
286 $fields_with_access[] = $field['field_name'];
289 if (!array_intersect($fields, $fields_with_access)) {
293 $nids[] = $node->id();
294 $access_allowed = TRUE;
301 * Returns IDs of comments that contain a link which the current user is allowed to view.
303 * @param object $link
304 * An object representing the link to check.
305 * @param object $comment_author_account
306 * (optional) If a user account object is provided, the returned comments
307 * will additionally be restricted to only those owned by this account.
308 * Otherwise, comments owned by any user account may be returned.
311 * An array of comment IDs that contain the provided link and that the
312 * current user is allowed to view.
314 function _linkchecker_link_comment_ids($link, $comment_author_account = NULL) {
315 // Exit if comments are disabled or if the user cannot access comments, there
316 // is no need to check further.
317 $comment_types = linkchecker_scan_comment_types();
318 if (empty($comment_types) || !\Drupal::currentUser()->hasPermission('access comments')) {
322 // Get a list of comments containing the link, using addTag('node_access') to
323 // allow comment access modules to exclude comments that the current user
324 // does not have access to view.
325 if (!empty($comment_author_account)) {
326 $query = \Drupal::database()->select('comment', 'c');
327 $query->addMetaData('base_table', 'comment');
328 $query->addTag('node_access');
329 $query->innerJoin('linkchecker_comment', 'lc', 'lc.cid = c.cid');
330 $query->condition('lc.lid', $link->lid);
331 $query->condition('c.uid', $comment_author_account->uid);
332 $query->fields('c', ['cid']);
335 $query = \Drupal::database()->select('comment', 'c');
336 $query->addMetaData('base_table', 'comment');
337 $query->addTag('node_access');
338 $query->innerJoin('linkchecker_comment', 'lc', 'lc.cid = c.cid');
339 $query->condition('lc.lid', $link->lid);
340 $query->fields('c', ['cid']);
342 $cids = $query->execute()->fetchCol();
344 // Return the array of comment IDs.
349 * Returns IDs of blocks that contain a link which the current user is allowed to view.
351 * @param object $link
352 * An object representing the link to check.
355 * An array of custom block IDs that contain the provided link and that the
356 * current user is allowed to view.
358 function _linkchecker_link_block_ids($link) {
359 $user = Drupal::currentUser();
360 // Exit if blocks are disabled.
361 if (!\Drupal::config('linkchecker.settings')->get('scan_blocks')) {
365 // Get the initial list of block IDs.
366 $connection = \Drupal::database();
367 $query = $connection->query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', [':lid' => $link->lid]);
368 $bids = $query->fetchCol();
370 // If the user can administer blocks, they're able to see all block content.
371 if ($user->hasPermission('administer blocks')) {
375 // Otherwise, only return blocks that this user (or anonymous users) have
377 $rids = array_keys($user->getRoles());
378 $rids[] = AccountInterface::ANONYMOUS_ROLE;
380 $query = \Drupal::database()->select('block', 'b');
381 $query->leftJoin('block_role', 'r', 'b.module = r.module AND b.delta = r.delta');
382 $query->condition('b.module', 'block');
383 $or_condition_group = $query->orConditionGroup()
384 ->condition('r.rid', $rids, 'IN')
386 $query->condition($or_condition_group);
387 $query->fields('b', ['delta']);
389 $allowed_bids = $query->execute()->fetchCol();
391 return array_intersect($bids, $allowed_bids);
395 * Implements hook_cron().
397 function linkchecker_cron() {
398 // Remove outdated links no longer in use once per day.
399 if (REQUEST_TIME - \Drupal::state()->get('linkchecker.cleanup_links_last') >= 86400) {
400 _linkchecker_cleanup_links();
401 \Drupal::state()->set('linkchecker.cleanup_links_last', REQUEST_TIME);
404 /* httprl module does not exists yet for D8
405 // Run link checker in a new process, independent of cron.
406 if (\Drupal::moduleHandler()->moduleExists('httprl') && \Drupal::config('linkchecker.settings')->get('check.library') == 'httprl') {
407 // Setup callback options array; call _linkchecker_check_links() in the
409 $callback_options = [['function' => '_linkchecker_check_links']];
410 // Queue up the request.
411 httprl_queue_background_callback($callback_options);
413 httprl_send_request();
415 // Exit here so we don't call _linkchecker_check_links() in this process.
418 // Run the link checks the normal way.
419 _linkchecker_check_links();
425 function _linkchecker_check_links() {
426 $config = \Drupal::config('linkchecker.settings');
428 // Get max_execution_time from configuration, override 0 with 240 seconds.
429 $max_execution_time = ini_get('max_execution_time') == 0 ? 240 : ini_get('max_execution_time');
430 // Make sure we have enough time to validate all of the links.
431 drupal_set_time_limit($max_execution_time);
433 // Make sure this is the only process trying to run this function.
434 $lock = \Drupal::lock();
435 if ($lock->acquire(__FUNCTION__, $max_execution_time)) {
437 // httprl module does not exists yet for D8
438 // $has_httprl = (\Drupal::moduleHandler()->moduleExists('httprl') && $config->get('check.library') == 'httprl');
441 // Do not confuse admins with a setting of maximum checkable links per cron
442 // run and guess that 2 links can be checked per second with 1 thread, what is
443 // nevertheless uncommon. The max_execution_time can be used to calculate
444 // a useful value that is higher, but not totally out of scope and limits the
445 // query result set to a reasonable size.
446 $linkchecker_check_connections_max = $config->get('check.connections_max');
448 $check_links_max_per_cron_run = ($has_httprl) ? ($linkchecker_check_connections_max * $max_execution_time) : $max_execution_time;
450 $linkchecker_check_links_interval = $config->get('check.interval');
451 $linkchecker_check_useragent = $config->get('check.useragent');
453 // Connection limit can be overridden via settings.php. Two connections is the
454 // limit defined in RFC http://www.ietf.org/rfc/rfc2616.txt. Modern browsers
455 // are typically using 6-8 connections and no more. Never use more and keep
456 // in mind that you can overload other people servers.
457 $linkchecker_check_domain_connections = $config->get('check.connections_max_per_domain');
459 // Get URLs for checking.
460 $connection = \Drupal::database();
461 $links = $connection->queryRange('SELECT * FROM {linkchecker_link} WHERE last_checked < :last_checked AND status = :status ORDER BY last_checked, lid ASC', 0, $check_links_max_per_cron_run, [':last_checked' => REQUEST_TIME - $linkchecker_check_links_interval, ':status' => 1]);
462 $links_remaining = Database::RETURN_AFFECTED;
464 foreach ($links as $link) {
466 $headers['User-Agent'] = $linkchecker_check_useragent;
468 $uri = @parse_url($link->url);
470 // URL contains a fragment.
471 if (in_array($link->method, ['HEAD', 'GET']) && !empty($uri['fragment'])) {
472 // We need the full content and not only the HEAD.
473 $link->method = 'GET';
474 // Request text content only (like Firefox/Chrome).
475 $headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
477 elseif ($link->method == 'GET') {
478 // Range: Only request the first 1024 bytes from remote server. This is
479 // required to prevent timeouts on URLs that are large downloads.
480 $headers['Range'] = 'bytes=0-1024';
483 // Add in the headers.
485 'headers' => $headers,
486 'method' => $link->method,
487 'max_redirects' => 0,
491 // Define the callback and add the $link object to it.
493 // - 'global_timeout' does not require a timer_read('page'), as this job
494 // runs in a new process, independent of cron.
496 'global_connections' => $linkchecker_check_connections_max,
497 'global_timeout' => $max_execution_time - 30,
498 'domain_connections' => $linkchecker_check_domain_connections,
501 'function' => '_linkchecker_status_handling',
503 $link, // This need to be passed or it's not send back to _linkchecker_status_handling()
506 // Queue up the requests.
507 httprl_request($link->url, $options);
510 // After all links are queued, run the url checks.
511 if ($links_remaining == 0) {
512 httprl_send_request();
518 // @fixme: Object is totally different in D8.
520 $response = \Drupal::httpClient()->request($link->method, $link->url, $options);
523 // Add 'redirect_code' property to core response object for consistency
524 // with HTTPRL object.
525 // if ($response->code == 301 && !isset($response->redirect_code)) {
526 // $response->redirect_code = $response->code;
528 // Add 'uri' property to core response object for 'fragment' check and
529 // consistency with HTTPRL object.
530 // $response->uri = $uri;
532 _linkchecker_status_handling($response, $link);
534 if ((Timer::read('page') / 1000) > ($max_execution_time / 2)) {
535 // Stop once we have used over half of the maximum execution time.
539 catch (ClientException $e) {
540 $response = $e->getResponse();
541 _linkchecker_status_handling($response, $link);
542 if ((Timer::read('page') / 1000) > ($max_execution_time / 2)) {
552 $lock->release(__FUNCTION__);
553 linkchecker_watchdog_log('linkchecker', 'Link checks completed.', [], RfcLogLevel::INFO);
554 linkchecker_watchdog_log('linkchecker', 'Memory usage: @memory_get_usage, Peak memory usage: @memory_get_peak_usage.', ['@memory_get_peak_usage' => format_size(memory_get_peak_usage()), '@memory_get_usage' => format_size(memory_get_usage())], RfcLogLevel::DEBUG);
559 linkchecker_watchdog_log('linkchecker', 'Attempted to re-run link checks while they are already running.', [], RfcLogLevel::WARNING);
565 * Status code handling.
567 * @param object $response
568 * An object containing the HTTP request headers, response code, headers,
569 * data and redirect status.
570 * @param string $link
571 * An object containing the url, lid and fail_count.
573 function _linkchecker_status_handling(&$response, $link) {
574 $config = \Drupal::config('linkchecker.settings');
575 $ignore_response_codes = preg_split('/(\r\n?|\n)/', \Drupal::config('linkchecker.settings')->get('error.ignore_response_codes'));
577 // - Prevent E_ALL warnings in DB updates for non-existing $response->error.
578 // - @todo drupal_http_request() may not provide an UTF8 encoded error message
579 // what results in a database UPDATE failure. For more information, see
580 // http://drupal.org/node/371495.
581 // Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
582 if (!isset($response->error)) {
583 $response->error = '';
585 if (!isset($response->status_message)) {
586 $response->status_message = '';
588 $response->error = trim(Unicode::convertToUtf8($response->error, 'ISO-8859-1'));
589 $response->status_message = trim(Unicode::convertToUtf8($response->status_message, 'ISO-8859-1'));
591 // Destination anchors in HTML documents may be specified either by:
592 // - the A element (naming it with the name attribute)
593 // - or by any other element (naming with the id attribute)
594 // - and must not contain a key/value pair as these type of hash fragments are
595 // typically used by AJAX applications to prevent additionally HTTP requests
596 // e.g. http://www.example.com/ajax.html#key1=value1&key2=value2
597 // - and must not contain '/' or ',' as this are not normal anchors.
598 // - and '#top' is a reserved fragment that must not exist in a page.
599 // See http://www.w3.org/TR/html401/struct/links.html
601 $response->code = $response->getStatusCode();
602 if ($response->code == 200
603 && !empty($response->getBody())
604 && !empty($response->getHeader('Content-Type'))
605 && !empty($response->getHeader('Link'))
606 && preg_match('/=|\/|,/', $response->getHeader('Link')[1]) == FALSE
607 && !in_array($response->getHeader('Link')[1], ['#top'])
608 && in_array($response->getHeader('Content-Type'), ['text/html', 'application/xhtml+xml', 'application/xml'])
609 && !preg_match('/(\s[^>]*(name|id)(\s+)?=(\s+)?["\'])(' . preg_quote(urldecode($response->getHeader('Link')[1]), '/') . ')(["\'][^>]*>)/i', $response->getBody())
611 // Override status code 200 with status code 404 so it can be handled with
612 // default status code 404 logic and custom error text.
613 $response->code = 404;
614 $response->status_message = $response->error = 'URL fragment identifier not found in content';
617 switch ($response->code) {
618 case -4: // HTTPRL: httprl_send_request timed out.
619 // Skip these and try them again next cron run.
622 case -2: // HTTPRL: maximum allowed redirects exhausted.
624 // Remote site send status code 301 and link needs an update.
625 \Drupal::database()->update('linkchecker_link')
626 ->condition('lid', $link->lid)
628 'code' => $response->redirect_code,
629 'error' => $response->status_message,
631 'last_checked' => time(),
633 ->expression('fail_count', 'fail_count + 1')
636 // A HTTP status code of 301 tells us an existing link have changed to
637 // a new link. The remote site owner was so kind to provide us the new
638 // link and if we trust this change we are able to replace the old link
639 // with the new one without any hand work.
640 $auto_repair_301 = \Drupal::config('linkchecker.settings')->get('error.action_status_code_301');
641 if ($auto_repair_301 && $auto_repair_301 <= ($link->fail_count + 1) && UrlHelper::isValid($response->redirect_url, TRUE)) {
642 // Switch anonymous user to an admin.
643 $accountSwitcher = Drupal::service('account_switcher');
644 $accountSwitcher->switchTo(new UserSession(['uid' => user_load_by_name($config->get('error.impersonate_account'))]));
646 // NODES: Autorepair all nodes having this outdated link.
647 $connection = \Drupal::database();
648 $result = $connection->query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', [':lid' => $link->lid]);
650 foreach ($result as $row) {
651 // Explicitly don't use Node::load_multiple() or the module may run
652 // into issues like http://drupal.org/node/1210606. With this logic
653 // nodes can be updated until an out of memory occurs and further
654 // updates will be made on the remaining nodes only.
655 $node = Node::load($row->nid);
657 // Has the node object loaded successfully?
658 if (is_object($node)) {
659 $node_original = clone $node;
660 $node = _linkchecker_replace_fields('node', $node->bundle(), $node, $link->url, $response->redirect_url);
662 if ($node_original != $node) {
663 // Always use the default revision setting. For more information,
664 // see node_object_prepare().
665 $node_options = \Drupal::config('linkchecker.settings')->get('node_options_' . $node->bundle());
666 $node->revision = in_array('revision', $node_options);
668 // Generate a log message for the node_revisions table, visible on
669 // the node's revisions tab.
670 $node->log = t('Changed permanently moved link in %node from %src to %dst.',
672 '%node' => url('node/' . $node->id()),
673 '%src' => $link->url,
674 '%dst' => $response->redirect_url
678 // Save changed node and update the node link list.
680 linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in %node from %src to %dst.',
682 '%node' => url('node/' . $node->id()),
683 '%src' => $link->url,
684 '%dst' => $response->redirect_url
690 linkchecker_watchdog_log('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.',
691 ['%node' => url('node/' . $row->nid), '%src' => $link->url],
697 linkchecker_watchdog_log('linkchecker', 'Loading node %node for update failed. Manual fix required.',
698 ['%node' => $row->nid],
704 // COMMENTS: Autorepair all comments having this outdated link.
705 $connection = \Drupal::database();
706 $result = $connection->query('SELECT cid FROM {linkchecker_comment} WHERE lid = :lid', [':lid' => $link->lid]);
708 foreach ($result as $row) {
709 // Explicitly don't use comment_load_multiple() or the module may run
710 // into issues like http://drupal.org/node/1210606. With this logic
711 // comment can be updated until an out of memory occurs and further
712 // updates will be made on the remaining comments only.
713 $comment = comment_load($row->cid);
715 // Has the comment object loaded successfully?
716 if (is_object($comment)) {
717 $comment_original = clone $comment;
719 // Replace links in subject.
720 _linkchecker_link_replace($comment->subject, $link->url, $response->redirect_url);
722 // Replace links in fields.
723 $comment = _linkchecker_replace_fields('comment', $comment->node_type, $comment, $link->url, $response->redirect_url);
725 // Save changed comment and update the comment link list.
726 if ($comment_original != $comment) {
727 comment_save($comment);
728 linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', ['%comment' => $comment->cid, '%src' => $link->url, '%dst' => $response->redirect_url], RfcLogLevel::INFO);
731 linkchecker_watchdog_log('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', ['%comment' => $comment->cid, '%src' => $link->url], RfcLogLevel::WARNING);
735 linkchecker_watchdog_log('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', ['%comment' => $comment->cid], WATCHDOG_ERROR);
739 // CUSTOM BLOCKS: Autorepair all custom blocks having this outdated
741 $connection = \Drupal::database();
742 $result = $connection->query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', [':lid' => $link->lid]);
743 foreach ($result as $row) {
744 $block_custom = linkchecker_block_custom_block_get($row->bid);
746 // Has the custom block object loaded successfully?
747 if (is_object($block_custom)) {
748 $block_custom_original = clone $block_custom;
750 // Now replace the outdated link with the permanently moved one in
751 // all custom block fields.
752 _linkchecker_link_replace($block_custom->info, $link->url, $response->redirect_url);
753 _linkchecker_link_replace($block_custom->body['value'], $link->url, $response->redirect_url);
755 if ($block_custom_original != $block_custom) {
756 // Save changed block and update the block link list.
757 block_custom_block_save((array) $block_custom, $block_custom->delta);
758 // There is no hook that fires on block_custom_block_save(),
759 // therefore do link extraction programmatically.
760 _linkchecker_add_block_custom_links($block_custom, $block_custom->delta);
761 linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in custom block %bid from %src to %dst.', ['%bid' => $block_custom->delta, '%src' => $link->url, '%dst' => $response->redirect_url], RfcLogLevel::INFO);
764 linkchecker_watchdog_log('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', ['%bid' => $block_custom->delta, '%src' => $link->url], RfcLogLevel::WARNING);
768 linkchecker_watchdog_log('linkchecker', 'Loading block %bid for update failed. Manual fix required.', ['%bid' => $block_custom->delta], RfcLogLevel::ERROR);
772 // Revert user back to anonymous.
773 $accountSwitcher->switchBack();
776 linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', ['%link' => $link->url], RfcLogLevel::NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
781 \Drupal::database()->update('linkchecker_link')
782 ->condition('lid', $link->lid)
784 'code' => $response->code,
785 'error' => $response->error,
787 'last_checked' => time(),
789 ->expression('fail_count', 'fail_count + 1')
791 $linkchecker_report_url = Url::fromRoute('linkchecker.admin_report_page', ['attributes' => ['target' => '_blank']]);
792 linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', ['%link' => $link->url], RfcLogLevel::NOTICE, Drupal::l(t('Broken links'), $linkchecker_report_url));
794 // If unpublishing limit is reached, unpublish all nodes having this link.
795 $linkchecker_action_status_code_404 = \Drupal::config('linkchecker.settings')->get('error.action_status_code_404');
796 if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= ($link->fail_count + 1)) {
797 // Switch anonymous user to an admin.
798 $accountSwitcher = Drupal::service('account_switcher');
799 $accountSwitcher->switchTo(new UserSession(['uid' => user_load_by_name($config->get('error.impersonate_account'))]));
800 _linkchecker_unpublish_nodes($link->lid);
801 $accountSwitcher->switchBack();
806 // - 405: Special error handling if method is not allowed. Switch link
807 // checking to GET method and try again.
808 \Drupal::database()->update('linkchecker_link')
809 ->condition('lid', $link->lid)
812 'code' => $response->code,
813 'error' => $response->error,
815 'last_checked' => time(),
817 ->expression('fail_count', 'fail_count + 1')
820 $linkchecker_report_url = Url::fromRoute('linkchecker.admin_report_page', ['attributes' => ['target' => '_blank']]);
821 linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', ['%link' => $link->url], RfcLogLevel::NOTICE, Drupal::l(t('Broken links'), $linkchecker_report_url));
825 // - 500: Like WGET, try with GET on "500 Internal server error".
826 // - If GET also fails with status code 500, than the link is broken.
827 if ($link->method == 'GET' && $response->code == 500) {
828 \Drupal::database()->update('linkchecker_link')
829 ->condition('lid', $link->lid)
831 'code' => $response->code,
832 'error' => $response->error,
834 'last_checked' => time(),
836 ->expression('fail_count', 'fail_count + 1')
839 $linkchecker_report_url = Url::fromRoute('linkchecker.admin_report_page', ['attributes' => ['target' => '_blank']]);
840 linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', ['%link' => $link->url], RfcLogLevel::NOTICE, Drupal::l(t('Broken links'), $linkchecker_report_url));
843 \Drupal::database()->update('linkchecker_link')
844 ->condition('lid', $link->lid)
847 'code' => $response->code,
848 'error' => $response->error,
850 'last_checked' => time(),
852 ->expression('fail_count', 'fail_count + 1')
855 $linkchecker_report_url = Url::fromRoute('linkchecker.admin_report_page', ['attributes' => ['target' => '_blank']]);
856 linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', ['%link' => $link->url], RfcLogLevel::NOTICE, Drupal::l(t('Broken links'), $linkchecker_report_url));
862 // Don't treat ignored response codes as errors.
863 if (in_array($response->code, $ignore_response_codes)) {
864 \Drupal::database()->update('linkchecker_link')
865 ->condition('lid', $link->lid)
867 'code' => $response->code,
868 'error' => $response->error,
870 'last_checked' => time(),
873 // linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), RfcLogLevel::ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
876 \Drupal::database()->update('linkchecker_link')
877 ->condition('lid', $link->lid)
879 'code' => $response->code,
880 'error' => $response->error,
882 'last_checked' => time(),
884 ->expression('fail_count', 'fail_count + 1')
886 // linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), RfcLogLevel::ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
891 $response = new stdClass();
895 * @fixme: remove after migration
896 * Implements hook_node_type_delete().
899 function linkchecker_node_type_delete($info) {
900 // @todo: Refactor this under D8.
901 //variable_del('linkchecker_scan_node_' . $info->type);
902 //variable_del('linkchecker_scan_comment_' . $info->type);
903 //Drupal::configFactory()->getEditable('linkchecker_scan_node_' . $info->type)->delete();
904 //Drupal::configFactory()->getEditable('linkchecker_scan_comment_' . $info->type)->delete();
908 * Implements hook_node_prepare().
910 function linkchecker_node_prepare_form(NodeInterface $node, $operation, FormStateInterface $form_state) {
911 // Node edit tab is viewed.
912 // @todo: Review if this is the correct upgrade path to D8
913 // if (arg(0) == 'node' && is_numeric(arg(1)) && arg(2) == 'edit' && isset($node->nid)) {
914 if (!$node->isNew()) {
915 // Show a message on node edit page if a link check failed once or more.
916 $ignore_response_codes = preg_split('/(\r\n?|\n)/', \Drupal::config('linkchecker.settings')->get('error.ignore_response_codes'));
917 $connection = \Drupal::database();
918 $links = $connection->query('SELECT ll.* FROM {linkchecker_node} ln INNER JOIN {linkchecker_link} ll ON ln.lid = ll.lid WHERE ln.nid = :nid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes[])', [':nid' => $node->id(), ':fail_count' => 0, ':status' => 1, ':codes[]' => $ignore_response_codes]);
919 foreach ($links as $link) {
920 if (_linkchecker_link_access($link)) {
921 drupal_set_message(\Drupal::translation()->formatPlural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', ['@url' => $link->url, '@code' => $link->code]), 'warning', FALSE);
928 * Implements hook_node_delete().
930 function linkchecker_node_delete($node) {
931 _linkchecker_delete_node_links($node->id());
935 * Implements hook_node_insert().
937 function linkchecker_node_insert($node) {
938 // Every moderation module saving a forward revision needs to exit here.
939 // Please see _linkchecker_isdefaultrevision() for more details.
940 // @todo: Refactor this workaround under D8.
941 if (!$node->isDefaultRevision()) {
945 //if (!_linkchecker_isdefaultrevision($node)) {
949 $node_type = NodeType::load($node->bundle());
950 // The node is going to be published.
951 if ($node_type->getThirdPartySetting('linkchecker', 'scan_node', FALSE) && $node->isPublished()) {
952 _linkchecker_add_node_links($node);
957 * Implements hook_node_update().
959 function linkchecker_node_update($node) {
960 // Every moderation module saving a forward revision needs to exit here.
961 // Please see _linkchecker_isdefaultrevision() for more details.
962 // @todo: Refactor this workaround under D8.
963 //if (!_linkchecker_isdefaultrevision($node)) {
966 if (!$node->isDefaultRevision()) {
970 // The node is going to be published.
971 $node_type = NodeType::load($node->bundle());
972 if ($node_type->getThirdPartySetting('linkchecker', 'scan_node', FALSE) && $node->isPublished()) {
973 _linkchecker_add_node_links($node);
976 // The node is going to be unpublished.
977 linkchecker_node_delete($node);
982 * Implements hook_comment_delete().
984 function linkchecker_comment_delete($comment) {
985 _linkchecker_delete_comment_links($comment->cid);
989 * Implements hook_comment_insert().
991 function linkchecker_comment_insert($comment) {
992 // The comment is going to be published.
993 $connection = \Drupal::database();
994 $node_type = $connection->query('SELECT type FROM {node} WHERE nid = :nid', [':nid' => $comment->getCommentedEntityId()])->fetchField();
995 if (\Drupal::config('linkchecker.settings')->get('linkchecker_scan_comment_' . $node_type) && $comment->getStatus() == COMMENT_PUBLISHED) {
996 _linkchecker_add_comment_links($comment);
1001 * Implements hook_comment_update().
1003 function linkchecker_comment_update($comment) {
1004 // The node is going to be published.
1005 $connection = \Drupal::database();
1006 $node_type = $connection->query('SELECT type FROM {node} WHERE nid = :nid', [':nid' => $comment->getCommentedEntityId()])->fetchField();
1007 if (\Drupal::config('linkchecker.settings')->get('linkchecker_scan_comment_' . $node_type) && $comment->getStatus() == COMMENT_PUBLISHED) {
1008 _linkchecker_add_comment_links($comment);
1011 // The node is going to be unpublished.
1012 linkchecker_comment_delete($comment);
1017 * Implements hook_form_alter().
1019 function linkchecker_form_alter(&$form, FormStateInterface $form_state, $form_id) {
1021 // Catch the custom block add/configure form and add custom submit handler.
1022 case 'block_add_block_form':
1023 // Add custom submit handler to custom block add form.
1024 $form['#submit'][] = 'linkchecker_block_custom_add_form_submit';
1027 // @todo block_admin_configure has been removed in D8
1028 case 'block_admin_configure':
1029 // When displaying the form, show the broken links warning.
1030 // @ $form_state is an object in D7
1031 if (empty($form_state['input']) && is_numeric(arg(5))) {
1032 // Show a message on custom block edit page if a link check failed once
1034 $ignore_response_codes = preg_split('/(\r\n?|\n)/', \Drupal::config('linkchecker.settings')->get('error.ignore_response_codes'));
1035 $connection = \Drupal::database();
1036 $links = $connection->query('SELECT ll.* FROM {linkchecker_block_custom} lb INNER JOIN {linkchecker_link} ll ON lb.lid = ll.lid WHERE lb.bid = :bid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes[])', [':bid' => arg(5), ':fail_count' => 0, ':status' => 1, ':codes[]' => $ignore_response_codes]);
1037 foreach ($links as $link) {
1038 if (_linkchecker_link_access($link)) {
1039 drupal_set_message(\Drupal::translation()->formatPlural($link->fail_count, 'Link check of <a href=":url">:url</a> failed once (status code: @code).', 'Link check of <a href=":url">:url</a> failed @count times (status code: @code).', [':url' => $link->url, '@code' => $link->code]), 'warning', FALSE);
1044 // Add custom submit handler to custom block configuration form.
1045 $form['#submit'][] = 'linkchecker_block_custom_configure_form_submit';
1048 case 'block_custom_block_delete':
1049 // Add custom submit handler to custom block delete form.
1050 $form['#submit'][] = 'linkchecker_block_custom_delete_form_submit';
1056 * Implements hook_form_FORM_ID_alter() for \Drupal\node\NodeTypeForm.
1058 * Adds linkchecker options to the node type form.
1060 * @see NodeTypeForm::form()
1061 * @see linkchecker_form_node_type_form_submit()
1063 function linkchecker_form_node_type_form_alter(&$form, FormStateInterface $form_state) {
1064 /** @var \Drupal\node\NodeTypeInterface $type */
1065 $type = $form_state->getFormObject()->getEntity();
1066 $form['linkchecker'] = [
1067 '#type' => 'details',
1068 '#title' => t('Link checker'),
1070 'library' => ['linkchecker/linkchecker.content_types'],
1072 '#group' => 'additional_settings',
1074 $form['linkchecker']['linkchecker_scan_node'] = [
1075 '#type' => 'checkbox',
1076 '#title' => t('Scan content'),
1077 '#description' => t('Enables link checking for this content type.'),
1078 '#default_value' => $type->getThirdPartySetting('linkchecker', 'scan_node', FALSE),
1080 if (\Drupal::moduleHandler()->moduleExists('comment')) {
1081 $form['linkchecker']['linkchecker_scan_comment'] = [
1082 '#type' => 'checkbox',
1083 '#title' => t('Scan comments'),
1084 '#description' => t('Enables link checking for comments.'),
1085 '#default_value' => $type->getThirdPartySetting('linkchecker', 'scan_comment', FALSE),
1089 $form['#submit'][] = 'linkchecker_form_node_type_form_submit';
1090 $form['#entity_builders'][] = 'linkchecker_form_node_type_form_builder';
1094 * Submit handler for forms with linkchecker options.
1096 * @see linkchecker_form_node_type_form_alter()
1098 function linkchecker_form_node_type_form_submit(&$form, FormStateInterface $form_state) {
1099 $node_type = $form_state->getValue('type');
1100 if (!$form['linkchecker']['linkchecker_scan_node']['#default_value'] && $form_state->getValue('linkchecker_scan_node')) {
1101 // We need to scan this node-type now.
1102 module_load_include('inc', 'linkchecker', 'linkchecker.batch');
1103 batch_set(_linkchecker_batch_import_nodes([$node_type]));
1106 // Default to TRUE if comment module isn't enabled, we don't care.
1107 $original_linkchecker_comment_state = TRUE;
1108 if (\Drupal::moduleHandler()->moduleExists('comment')) {
1109 $original_linkchecker_comment_state = $form['linkchecker']['linkchecker_scan_comment']['#default_value'];
1111 // Use !empty here for when comment module isn't enabled and there is no
1113 if (!$original_linkchecker_comment_state && !empty($form_state->getValue('linkchecker_scan_comment'))) {
1114 // We need to scan comments for this node-type now.
1115 module_load_include('inc', 'linkchecker', 'linkchecker.batch');
1116 batch_set(_linkchecker_batch_import_comments([$node_type]));
1121 * Entity builder for the node type form with linkchecker options.
1123 * @see linkchecker_form_node_type_form_alter()
1125 function linkchecker_form_node_type_form_builder($entity_type, NodeTypeInterface $type, &$form, FormStateInterface $form_state) {
1126 $type->setThirdPartySetting('linkchecker', 'scan_node', $form_state->getValue('linkchecker_scan_node'));
1127 $type->setThirdPartySetting('linkchecker', 'scan_comment', $form_state->getValue('linkchecker_scan_comment'));
1131 * Implements hook_form_BASE_FORM_ID_alter().
1133 function linkchecker_form_comment_form_alter(&$form, &$form_state, $form_id) {
1134 // When displaying the form as 'view' or 'preview', show the broken links
1136 $current_path = \Drupal::service('path.current')->getPath();
1137 $path_args = explode('/', $current_path);
1138 if ((empty($form_state->getUserInput()) || (isset($form_state->getUserInput()['op']) && $form_state->getUserInput()['op'] == t('Preview'))) && $path_args[1] == 'comment' && is_numeric($path_args[1]) && $path_args[2] == 'edit') {
1139 // Show a message on comment edit page if a link check failed once or
1141 $ignore_response_codes = preg_split('/(\r\n?|\n)/', \Drupal::config('linkchecker.settings')->get('error.ignore_response_codes'));
1142 $connection = \Drupal::database();
1143 $links = $connection->query('SELECT ll.* FROM {linkchecker_comment} lc INNER JOIN {linkchecker_link} ll ON lc.lid = ll.lid WHERE lc.cid = :cid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes[])', [':cid' => arg(1), ':fail_count' => 0, ':status' => 1, ':codes[]' => $ignore_response_codes]);
1144 foreach ($links as $link) {
1145 if (_linkchecker_link_access($link)) {
1146 drupal_set_message(\Drupal::translation()->formatPlural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', ['@url' => $link->url, '@code' => $link->code]), 'warning', FALSE);
1153 * Custom submit handler for block add page.
1155 function linkchecker_block_custom_add_form_submit($form, &$form_state) {
1156 if (\Drupal::config('linkchecker.settings')->get('scan_blocks')) {
1157 $connection = \Drupal::database();
1158 $bid = $connection->query('SELECT MAX(bid) FROM {block_custom}')->fetchField();
1159 _linkchecker_add_block_custom_links($form_state['values'], $bid);
1164 * Custom submit handler for block configure page.
1166 function linkchecker_block_custom_configure_form_submit($form, &$form_state) {
1167 if (\Drupal::config('linkchecker.settings')->get('scan_blocks')) {
1168 _linkchecker_add_block_custom_links($form_state['values'], $form_state['values']['delta']);
1173 * Custom submit handler for block delete page.
1175 function linkchecker_block_custom_delete_form_submit($form, &$form_state) {
1176 _linkchecker_delete_block_custom_links($form_state['values']['bid']);
1180 * Returns information from database about a user-created (custom) block.
1183 * ID of the block to get information for.
1186 * Associative object of information stored in the database for this block.
1188 * - module: 'block' as the source of the custom blocks data.
1189 * - delta: Block ID.
1190 * - info: Block description.
1191 * - body['value']: Block contents.
1192 * - body['format']: Filter ID of the filter format for the body.
1194 function linkchecker_block_custom_block_get($bid) {
1195 $block_custom = block_custom_block_get($bid);
1197 if ($block_custom) {
1198 $block = new stdClass();
1199 $block->module = 'block';
1200 $block->delta = $block_custom['bid'];
1201 $block->info = $block_custom['info'];
1203 $block->body['value'] = $block_custom['body'];
1204 $block->body['format'] = $block_custom['format'];
1214 * Extracts links from a node.
1216 * @param object $node
1217 * The fully populated node object.
1218 * @param bool $return_field_names
1219 * If set to TRUE, the returned array will contain the link URLs as keys, and
1220 * each element will be an array containing all field names in which the URL
1221 * is found. Otherwise, a simple array of URLs will be returned.
1224 * An array whose keys are fully qualified and unique URLs found in the node
1225 * (as returned by _linkchecker_extract_links()), or a more complex
1226 * structured array (see above) if $return_field_names is TRUE.
1228 function _linkchecker_extract_node_links($node, $return_field_names = FALSE) {
1229 $filter = new stdClass();
1230 $filter->settings['filter_url_length'] = 72;
1232 // Create array of node fields to scan.
1234 $text_items_by_field = [];
1236 // Add fields typically not used for urls to the bottom. This way a link may
1237 // found earlier while looping over $text_items_by_field below.
1238 $link_check = _linkchecker_parse_fields('node', $node->bundle(), $node, TRUE);
1239 $text_items_by_field = array_merge($text_items_by_field, $link_check);
1240 $text_items = _linkchecker_array_values_recursive($text_items_by_field);
1242 // Get the absolute node path for extraction of relative links.
1243 // @FIXME: Review again. What happens with languages in D8?
1244 // $languages = language_list();
1245 // Note: An "undefined language" (value: 'und') isn't listed in the available
1246 // languages variable $languages.
1247 // $url_options = (empty($node->language) || empty($languages[$node->language])) ? array('absolute' => TRUE) : array('language' => $languages[$node->language], 'absolute' => TRUE);
1248 // $path = url('node/' . $node->nid, $url_options);
1249 $path = Url::fromUri('base:' . 'node/' . $node->id())->toString();
1251 // Extract all links in a node.
1252 $links = _linkchecker_extract_links(implode(' ', $text_items), $path);
1254 // Return either the array of links, or an array of field names containing
1255 // each link, depending on what was requested.
1256 if (!$return_field_names) {
1261 foreach ($text_items_by_field as $field_name => $items) {
1262 foreach ($items as $item) {
1263 foreach ($links as $uri => $link) {
1264 // We only need to do a quick check here to see if the URL appears
1265 // anywhere in the text; if so, that means users with access to this
1266 // field will be able to see the URL (and any private data such as
1267 // passwords contained in it). This is sufficient for the purposes of
1268 // _linkchecker_link_node_ids(), where this information is used.
1269 foreach ($link as $original_link) {
1270 if (strpos($item, $original_link) !== FALSE) {
1271 $field_names[$uri][$field_name] = $field_name;
1273 // URLs in $links have been auto-decoded by DOMDocument->loadHTML
1274 // and does not provide the RAW url with html special chars.
1275 // NOTE: htmlspecialchars() is 30% slower than str_replace().
1276 elseif (strpos($item, str_replace('&', '&', $original_link)) !== FALSE) {
1277 $field_names[$uri][$field_name] = $field_name;
1284 return $field_names;
1289 * Add node links to database.
1291 * @param object $node
1292 * The fully populated node object.
1293 * @param bool $skip_missing_links_detection
1294 * To prevent endless batch loops the value need to be TRUE. With FALSE
1295 * the need for content re-scans is detected by the number of missing links.
1297 function _linkchecker_add_node_links($node, $skip_missing_links_detection = FALSE) {
1298 $links = array_keys(_linkchecker_extract_node_links($node));
1301 if (!empty($links)) {
1302 // Remove all links from the links array already in the database and only
1303 // add missing links to database.
1304 $missing_links = _linkchecker_node_links_missing($node->id(), $links);
1306 // Only add links to database that do not exists.
1308 foreach ($missing_links as $url) {
1309 $urlhash = Crypt::hashBase64($url);
1310 $connection = \Drupal::database();
1311 $link = $connection->query('SELECT lid FROM {linkchecker_link} WHERE urlhash = :urlhash', [':urlhash' => $urlhash])->fetchObject();
1313 $link = new stdClass();
1314 $link->urlhash = $urlhash;
1316 $link->status = _linkchecker_link_check_status_filter($url);
1317 drupal_write_record('linkchecker_link', $link);
1319 db_insert('linkchecker_node')
1321 'nid' => $node->nid,
1322 'lid' => $link->lid,
1326 // Break processing if max links limit per run has been reached.
1328 if ($i >= LINKCHECKER_SCAN_MAX_LINKS_PER_RUN) {
1333 // The first chunk of links not yet found in the {linkchecker_link} table
1334 // have now been imported by the above code. If the number of missing links
1335 // still exceeds the scan limit defined in LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1336 // the content need to be re-scanned until all links have been collected and
1337 // saved in {linkchecker_link} table.
1339 // Above code has already scanned a number of LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1340 // links and need to be substracted from the number of missing links to
1341 // calculate the correct number of re-scan rounds.
1343 // To prevent endless loops the $skip_missing_links_detection need to be TRUE.
1344 // This value will be set by the calling batch process that already knows
1345 // that it is running a batch job and the number of required re-scan rounds.
1346 $missing_links_count = count($missing_links) - LINKCHECKER_SCAN_MAX_LINKS_PER_RUN;
1347 if (!$skip_missing_links_detection && $missing_links_count > 0) {
1348 module_load_include('inc', 'linkchecker', 'linkchecker.batch');
1349 batch_set(_linkchecker_batch_import_single_node($node->id(), $missing_links_count));
1351 // If batches were set in the submit handlers, we process them now,
1352 // possibly ending execution. We make sure we do not react to the batch
1353 // that is already being processed (if a batch operation performs a
1355 if ($batch = &batch_get() && !isset($batch['current_set'])) {
1356 batch_process('node/' . $node->id());
1361 // Remove dead link references for cleanup reasons as very last step.
1362 _linkchecker_cleanup_node_references($node->id(), $links);
1366 * Add comment links to database.
1368 * @param object $comment
1369 * The fully populated comment object.
1370 * @param bool $skip_missing_links_detection
1371 * To prevent endless batch loops the value need to be TRUE. With FALSE
1372 * the need for content re-scans is detected by the number of missing links.
1374 function _linkchecker_add_comment_links($comment, $skip_missing_links_detection = FALSE) {
1375 $filter = new stdClass();
1376 $filter->settings['filter_url_length'] = 72;
1378 // Create array of comment fields to scan.
1380 $text_items[] = _filter_url($comment->subject, $filter);
1381 $text_items = array_merge($text_items, _linkchecker_parse_fields('comment', $comment->node_type, $comment));
1383 // Get the absolute node path for extraction of relative links.
1384 $languages = \Drupal::languageManager()->getLanguages();
1385 $node = Node::load($comment->getCommentedEntityId());
1386 $url_options = (empty($node->language()->getId()) || empty($languages[$node->language()->getId()])) ? array('absolute' => TRUE) : ['language' => $languages[$node->language()->getId()], 'absolute' => TRUE];
1387 $path = url('node/' . $comment->getCommentedEntityId(), $url_options);
1389 // Extract all links in a comment.
1390 $links = array_keys(_linkchecker_extract_links(implode(' ', $text_items), $path));
1392 // Comment have links.
1393 if (!empty($links)) {
1394 // Remove all links from the links array already in the database and only
1395 // add missing links to database.
1396 $missing_links = _linkchecker_comment_links_missing($comment->cid, $links);
1398 // Only add unique links to database that do not exist.
1400 foreach ($missing_links as $url) {
1401 $urlhash = Crypt::hashBase64($url);
1402 $connection = \Drupal::database();
1403 $link = $connection->query('SELECT lid FROM {linkchecker_link} WHERE urlhash = :urlhash', [':urlhash' => $urlhash])->fetchObject();
1405 $link = new stdClass();
1406 $link->urlhash = $urlhash;
1408 $link->status = _linkchecker_link_check_status_filter($url);
1409 drupal_write_record('linkchecker_link', $link);
1411 db_insert('linkchecker_comment')
1413 'cid' => $comment->cid,
1414 'lid' => $link->lid,
1418 // Break processing if max links limit per run has been reached.
1420 if ($i >= LINKCHECKER_SCAN_MAX_LINKS_PER_RUN) {
1425 // The first chunk of links not yet found in the {linkchecker_link} table
1426 // have now been imported by the above code. If the number of missing links
1427 // still exceeds the scan limit defined in LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1428 // the content need to be re-scanned until all links have been collected and
1429 // saved in {linkchecker_link} table.
1431 // Above code has already scanned a number of LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1432 // links and need to be substracted from the number of missing links to
1433 // calculate the correct number of re-scan rounds.
1435 // To prevent endless loops the $skip_missing_links_detection need to be TRUE.
1436 // This value will be set by the calling batch process that already knows
1437 // that it is running a batch job and the number of required re-scan rounds.
1438 $missing_links_count = count($missing_links) - LINKCHECKER_SCAN_MAX_LINKS_PER_RUN;
1439 if (!$skip_missing_links_detection && $missing_links_count > 0) {
1440 module_load_include('inc', 'linkchecker', 'linkchecker.batch');
1441 batch_set(_linkchecker_batch_import_single_comment($comment->cid, $missing_links_count));
1443 // If batches were set in the submit handlers, we process them now,
1444 // possibly ending execution. We make sure we do not react to the batch
1445 // that is already being processed (if a batch operation performs a
1447 if ($batch = &batch_get() && !isset($batch['current_set'])) {
1448 batch_process('node/' . $comment->getCommentedEntityId());
1453 // Remove dead link references for cleanup reasons as very last step.
1454 _linkchecker_cleanup_comment_references($comment->cid, $links);
1458 * Add custom block links to database.
1460 * @param array|object $block_custom
1461 * The fully populated custom block object.
1463 * Block id from table {block}.bid.
1464 * @param bool $skip_missing_links_detection
1465 * To prevent endless batch loops the value need to be TRUE. With FALSE
1466 * the need for content re-scans is detected by the number of missing links.
1468 function _linkchecker_add_block_custom_links($block_custom, $bid, $skip_missing_links_detection = FALSE) {
1469 // Convert custom block array to object.
1470 // @todo: Are we able to remove this global conversion?
1471 $block_custom = (object) $block_custom;
1473 // Custom blocks really suxxx as it's very inconsistent core logic (values are
1474 // integers or strings) and there are no usable hooks. Try to workaround this
1475 // bad logic as good as possible to prevent warnings/errors.
1476 // NOTE: Only custom blocks from block.module are supported. Skip all others.
1477 if ($block_custom->module != 'block' || !is_numeric($block_custom->delta) || !is_numeric($bid) || $block_custom->delta != $bid) {
1481 $filter = new stdClass();
1482 $filter->settings['filter_url_length'] = 72;
1484 // Create array of custom block fields to scan. All fields cannot exists.
1486 if (!empty($block_custom->info)) {
1487 $text_items[] = _filter_url($block_custom->info, $filter);
1489 // $block_custom from editing/scanning a block. See block_custom_block_save().
1490 if (!empty($block_custom->body) && is_array($block_custom->body) && array_key_exists('value', $block_custom->body) && array_key_exists('format', $block_custom->body)) {
1491 $text_items[] = _linkchecker_check_markup($block_custom->body['value'], $block_custom->body['format']);
1494 // Extract all links in a custom block.
1495 $links = array_keys(_linkchecker_extract_links(implode(' ', $text_items)));
1497 // Custom block has links.
1498 if (!empty($links)) {
1499 // Remove all links from the links array already in the database and only
1500 // add missing links to database.
1501 $missing_links = _linkchecker_block_custom_links_missing($bid, $links);
1503 // Only add unique links to database that do not exist.
1505 foreach ($missing_links as $url) {
1506 $urlhash = Crypt::hashBase64($url);
1507 $connection = \Drupal::database();
1508 $link = $connection->query('SELECT lid FROM {linkchecker_link} WHERE urlhash = :urlhash', [':urlhash' => $urlhash])->fetchObject();
1510 $link = new stdClass();
1511 $link->urlhash = $urlhash;
1513 $link->status = _linkchecker_link_check_status_filter($url);
1514 drupal_write_record('linkchecker_link', $link);
1516 db_insert('linkchecker_block_custom')
1519 'lid' => $link->lid,
1523 // Break processing if max links limit per run has been reached.
1525 if ($i >= LINKCHECKER_SCAN_MAX_LINKS_PER_RUN) {
1530 // The first chunk of links not yet found in the {linkchecker_link} table
1531 // have now been imported by the above code. If the number of missing links
1532 // still exceeds the scan limit defined in LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1533 // the content need to be re-scanned until all links have been collected and
1534 // saved in {linkchecker_link} table.
1536 // Above code has already scanned a number of LINKCHECKER_SCAN_MAX_LINKS_PER_RUN
1537 // links and need to be substracted from the number of missing links to
1538 // calculate the correct number of re-scan rounds.
1540 // To prevent endless loops the $skip_missing_links_detection need to be TRUE.
1541 // This value will be set by the calling batch process that already knows
1542 // that it is running a batch job and the number of required re-scan rounds.
1543 $missing_links_count = count($missing_links) - LINKCHECKER_SCAN_MAX_LINKS_PER_RUN;
1544 if (!$skip_missing_links_detection && $missing_links_count > 0) {
1545 module_load_include('inc', 'linkchecker', 'linkchecker.batch');
1546 batch_set(_linkchecker_batch_import_single_block_custom($bid, $missing_links_count));
1548 // If batches were set in the submit handlers, we process them now,
1549 // possibly ending execution. We make sure we do not react to the batch
1550 // that is already being processed (if a batch operation performs a
1552 if ($batch = &batch_get() && !isset($batch['current_set'])) {
1553 batch_process('admin/structure/block');
1558 // Remove dead link references for cleanup reasons as very last step.
1559 _linkchecker_cleanup_block_custom_references($bid, $links);
1563 * Remove all node references to links in the linkchecker_node table.
1568 function _linkchecker_delete_node_links($nid) {
1569 \Drupal::database()->delete('linkchecker_node')
1570 ->condition('nid', $nid)
1575 * Remove all comment references to links in the linkchecker_comment table.
1580 function _linkchecker_delete_comment_links($cid) {
1581 \Drupal::database()->delete('linkchecker_comment')
1582 ->condition('cid', $cid)
1587 * Remove all block references to links in the linkchecker_block_custom table.
1592 function _linkchecker_delete_block_custom_links($bid) {
1593 \Drupal::database()->delete('linkchecker_block_custom')
1594 ->condition('bid', $bid)
1599 * Cleanup no longer used node references to links in the linkchecker_node table.
1603 * @param array $links
1606 function _linkchecker_cleanup_node_references($nid = 0, $links = []) {
1607 if (empty($links)) {
1608 // Node do not have links. Delete all references if exists.
1609 \Drupal::database()->delete('linkchecker_node')
1610 ->condition('nid', $nid)
1614 // The node still have more than one link, but other links may have been
1615 // removed and links no longer in the content need to be deleted from the
1616 // linkchecker_node reference table.
1617 $subquery = \Drupal::database()->select('linkchecker_link')
1618 ->fields('linkchecker_link', ['lid'])
1619 ->condition('urlhash', array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links), 'IN');
1621 \Drupal::database()->delete('linkchecker_node')
1622 ->condition('nid', $nid)
1623 ->condition('lid', $subquery, 'NOT IN')
1629 * Cleanup no longer used comment references to links in the linkchecker_comment table.
1633 * @param array $links
1635 function _linkchecker_cleanup_comment_references($cid = 0, $links = []) {
1636 if (empty($links)) {
1637 // Comment do not have links. Delete all references if exists.
1638 \Drupal::database()->delete('linkchecker_comment')
1639 ->condition('cid', $cid)
1643 // The comment still have more than one link, but other links may have been
1644 // removed and links no longer in the content need to be deleted from the
1645 // linkchecker_comment reference table.
1646 $subquery = \Drupal::database()->select('linkchecker_link', 'll')
1647 ->fields('ll', ['lid'])
1648 ->condition('ll.urlhash', array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links), 'IN');
1650 \Drupal::database()->delete('linkchecker_comment')
1651 ->condition('cid', $cid)
1652 ->condition('lid', $subquery, 'NOT IN')
1658 * Cleanup no longer used custom block references to links in the linkchecker_block_custom table.
1662 * @param array $links
1664 function _linkchecker_cleanup_block_custom_references($bid = 0, $links = []) {
1665 if (empty($links)) {
1666 // Block do not have links. Delete all references if exists.
1667 \Drupal::database()->delete('linkchecker_block_custom')
1668 ->condition('bid', $bid)
1672 // The block still have more than one link, but other links may have been
1673 // removed and links no longer in the content need to be deleted from the
1674 // linkchecker_block_custom reference table.
1675 $subquery = \Drupal::database()->select('linkchecker_link')
1676 ->fields('linkchecker_link', ['lid'])
1677 ->condition('urlhash', array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links), 'IN');
1679 \Drupal::database()->delete('linkchecker_block_custom')
1680 ->condition('bid', $bid)
1681 ->condition('lid', $subquery, 'NOT IN')
1687 * Returns an array of node references missing in the linkchecker_node table.
1691 * @param array $links
1692 * An array of links.
1695 * An array of node references missing in the linkchecker_node table.
1697 function _linkchecker_node_links_missing($nid, $links) {
1698 $connection = \Drupal::database();
1699 $result = $connection->query('SELECT ll.url FROM {linkchecker_link} ll INNER JOIN {linkchecker_node} ln ON ln.lid = ll.lid WHERE ln.nid = :nid AND ll.urlhash IN (:urlhashes[])', [':nid' => $nid, ':urlhashes[]' => array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links)]);
1700 $links_in_database = [];
1701 foreach ($result as $row) {
1702 $links_in_database[] = $row->url;
1704 return array_diff($links, $links_in_database);
1708 * Returns an array of comment references missing in the linkchecker_comment table.
1712 * @param array $links
1713 * An array of links.
1716 * An array of comment references missing in the linkchecker_comment table.
1718 function _linkchecker_comment_links_missing($cid, $links) {
1719 $connection = \Drupal::database();
1720 $result = $connection->query('SELECT ll.url FROM {linkchecker_link} ll INNER JOIN {linkchecker_comment} lc ON lc.lid = ll.lid WHERE lc.cid = :cid AND ll.urlhash IN (:urlhashes[])', array(':cid' => $cid, ':urlhashes[]' => array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links)));
1721 $links_in_database = [];
1722 foreach ($result as $row) {
1723 $links_in_database[] = $row->url;
1725 return array_diff($links, $links_in_database);
1729 * Returns an array of custom block references missing in the linkchecker_block_custom table.
1733 * @param array $links
1734 * An array of links.
1737 * An array of custom block references missing in the linkchecker_block_custom
1740 function _linkchecker_block_custom_links_missing($bid, $links) {
1741 $connection = \Drupal::database();
1742 $result = $connection->query('SELECT ll.url FROM {linkchecker_link} ll INNER JOIN {linkchecker_block_custom} lb ON lb.lid = ll.lid WHERE lb.bid = :bid AND ll.urlhash IN (:urlhashes[])', array(':bid' => $bid, ':urlhashes[]' => array_map('\Drupal\Component\Utility\Crypt::hashBase64', $links)));
1743 $links_in_database = [];
1744 foreach ($result as $row) {
1745 $links_in_database[] = $row->url;
1747 return array_diff($links, $links_in_database);
1751 * Parse the urls from entity.
1753 * This function parse all fields from the entity and returns an array of
1754 * filtered field items.
1756 * @param string $entity_type
1757 * The type of entity; e.g., 'node', 'comment'.
1758 * @param string $bundle_name
1759 * The name of the bundle aka node type, e.g., 'article', 'page'.
1760 * @param object $entity
1761 * The entity to parse, a $node or a $comment object.
1762 * @param bool $return_field_names
1763 * If set to TRUE, the returned array will contain the content as keys, and
1764 * each element will be an array containing all field names in which the
1765 * content is found. Otherwise, a simple array with content will be returned.
1768 * Array of field items with filters applied.
1770 function _linkchecker_parse_fields($entity_type, $bundle_name, $entity, $return_field_names = FALSE) {
1772 $text_items_by_field = [];
1774 // Create settings for _filter_url() function.
1775 $filter = new stdClass();
1776 $filter->settings['filter_url_length'] = 72;
1778 // Collect the fields from this entity_type and bundle.
1779 $entityManager = \Drupal::service('entity_field.manager');
1780 $fields = $entityManager->getFieldDefinitions($entity_type, $bundle_name);
1782 foreach ($fields as $field_name => $instance) {
1783 if ($instance instanceof FieldConfigInterface ) {
1784 $field = FieldStorageConfig::loadByName($entity_type, $field_name);
1785 // #1923328: field_name array may be missing in $entity.
1786 $entity_field = isset($entity->{$field->getName()}) ? $entity->{$field->getName()} : [];
1788 switch ($field->getType()) {
1790 case 'text_with_summary':
1792 $field_value = $entity_field->getValue();
1793 foreach ($field_value as $item) {
1800 $text_items[] = $text_items_by_field[$field_name][] = _linkchecker_check_markup($item['value'], $item['format'], linkchecker_entity_language($entity_type, $entity), TRUE);
1801 $text_items[] = $text_items_by_field[$field_name][] = _linkchecker_check_markup($item['summary'], $item['format'], linkchecker_entity_language($entity_type, $entity), TRUE);
1810 $field_value = $entity_field->getValue();
1812 foreach ($field_value as $item) {
1817 $text_items[] = $text_items_by_field[$field->getName()][] = _linkchecker_check_markup($item['value'], $item['format'], linkchecker_entity_language($entity_type, $entity), TRUE);
1821 // Link module field, http://drupal.org/project/link.
1823 foreach ($entity_field->getValue() as $item) {
1828 $text_items[] = $text_items_by_field[$field_name][] = \Drupal::l($item['title'], Url::fromUri($item['uri']), $options);
1829 $text_items[] = $text_items_by_field[$field_name][] = _linkchecker_check_markup($item['title'], NULL, linkchecker_entity_language($entity_type, $entity), TRUE);
1837 return ($return_field_names) ? $text_items_by_field : $text_items;
1841 * Replace the old url by a new url on 301 status codes.
1843 * @param string $entity_type
1844 * The type of entity; e.g., 'node', 'comment'.
1845 * @param string $bundle_name
1846 * The name of the bundle aka node type, e.g., 'article', 'page'.
1847 * @param object $entity
1848 * The entity to parse, a $node or a $comment object.
1849 * @param string $old_url
1851 * @param string $new_url
1852 * The new url to replace the old.
1856 function _linkchecker_replace_fields($entity_type, $bundle_name, $entity, $old_url, $new_url) {
1857 // Collect the fields from this entity_type and bundle.
1858 foreach (field_info_instances($entity_type, $bundle_name) as $field_name => $instance) {
1859 $field = field_info_field($field_name);
1860 $entity_field =& $entity->{$field['field_name']};
1862 switch ($field['type']) {
1864 case 'text_with_summary':
1865 foreach ($entity_field as $language_name => $language_value) {
1866 foreach ($language_value as $item_name => $item_value) {
1867 _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
1868 _linkchecker_link_replace($entity_field[$language_name][$item_name]['summary'], $old_url, $new_url);
1876 foreach ($entity_field as $language_name => $language_value) {
1877 foreach ($language_value as $item_name => $item_value) {
1878 _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
1883 // @todo: Module is now in D8 core, review if all works the same way and fix the comment.
1884 // Link module field, http://drupal.org/project/link.
1886 // @todo: What happend to the language? It it really not needed anymore?
1887 // foreach ($entity_field as $language_name => $language_value) {
1888 // foreach ($language_value as $item_name => $item_value) {
1889 // _linkchecker_link_replace($entity_field[$language_name][$item_name]['url'], $old_url, $new_url);
1890 // _linkchecker_link_replace($entity_field[$language_name][$item_name]['title'], $old_url, $new_url);
1892 foreach ($entity_field->getValue() as $item) {
1893 _linkchecker_link_replace($entity_field[$field_name]['url'], $old_url, $new_url);
1894 _linkchecker_link_replace($entity_field[$field_name]['title'], $old_url, $new_url);
1904 * Run perodically via cron and delete all links without a references.
1906 * For speed reasons and check results we keep the links for some time
1907 * as they may be reused by other new content.
1909 function _linkchecker_cleanup_links() {
1910 // Remove disabled node types no longer in use.
1911 $node_types = linkchecker_scan_node_types();
1912 if (!empty($node_types)) {
1913 $subquery1 = \Drupal::database()->select('node', 'n')
1914 ->fields('n', ['nid'])
1915 ->condition('n.type', $node_types, 'NOT IN');
1917 \Drupal::database()->delete('linkchecker_node')
1918 ->condition('nid', $subquery1, 'IN')
1921 // @todo Remove comments link references from table.
1922 // db_query('DELETE FROM {linkchecker_comment} WHERE cid IN (SELECT nid FROM {node} n WHERE n.type NOT IN (' . db_placeholders($node_types, 'varchar') . '))', $node_types);
1925 // No active node_type. Remove all items from table.
1926 \Drupal::database()->truncate('linkchecker_node')->execute();
1927 // @todo Remove comments link references from table.
1930 // Remove comment link references if comment scanning is disabled.
1931 // @todo Remove comments of unpublished nodes.
1932 $comment_types = linkchecker_scan_comment_types();
1933 if (empty($comment_types)) {
1934 \Drupal::database()->truncate('linkchecker_comment')->execute();
1937 // Remove block link references if block scanning is disabled.
1938 if (\Drupal::config('linkchecker.settings')->get('scan_blocks') == FALSE) {
1939 \Drupal::database()->truncate('linkchecker_block_custom')->execute();
1942 // Remove dead links without references.
1943 $linkchecker_node = \Drupal::database()->select('linkchecker_node', 'ln')
1945 ->fields('ln', ['lid']);
1947 $linkchecker_comment = \Drupal::database()->select('linkchecker_comment', 'lc')
1949 ->fields('lc', ['lid']);
1951 $linkchecker_block_custom = \Drupal::database()->select('linkchecker_block_custom', 'lb')
1953 ->fields('lb', ['lid']);
1955 // UNION all linkchecker type tables.
1956 $subquery2 = \Drupal::database()->select($linkchecker_block_custom->union($linkchecker_comment)->union($linkchecker_node), 'q1')
1958 ->fields('q1', ['lid']);
1960 \Drupal::database()->delete('linkchecker_link')
1961 ->condition('lid', $subquery2, 'NOT IN')
1966 * Extract links from content.
1968 * @param string $text
1969 * The text to be scanned for links.
1970 * @param string $content_path
1971 * Path to the content that is currently scanned for links. This value is
1972 * required to build full qualified links from relative links. Relative links
1973 * are not extracted from content, if path is not provided.
1976 * Array whose keys are fully qualified and unique URLs found in the
1977 * content, and whose values are arrays of actual text (raw URLs or paths)
1978 * corresponding to each fully qualified URL.
1980 function _linkchecker_extract_links($text = '', $content_path = NULL) {
1981 global $base_root, $is_https;
1983 $html_dom = Html::load($text);
1986 // Finds all hyperlinks in the content.
1987 if (\Drupal::config('linkchecker.settings')->get('extract.from_a') == TRUE) {
1988 $links = $html_dom->getElementsByTagName('a');
1989 foreach ($links as $link) {
1990 $urls[] = $link->getAttribute('href');
1993 $links = $html_dom->getElementsByTagName('area');
1994 foreach ($links as $link) {
1995 $urls[] = $link->getAttribute('href');
1999 // Finds all audio links in the content.
2000 if (\Drupal::config('linkchecker.settings')->get('extract.from_audio') == TRUE) {
2001 $audios = $html_dom->getElementsByTagName('audio');
2002 foreach ($audios as $audio) {
2003 $urls[] = $audio->getAttribute('src');
2005 // Finds source tags with links in the audio tag.
2006 $sources = $audio->getElementsByTagName('source');
2007 foreach ($sources as $source) {
2008 $urls[] = $source->getAttribute('src');
2010 // Finds track tags with links in the audio tag.
2011 $tracks = $audio->getElementsByTagName('track');
2012 foreach ($tracks as $track) {
2013 $urls[] = $track->getAttribute('src');
2018 // Finds embed tags with links in the content.
2019 if (\Drupal::config('linkchecker.settings')->get('extract.from_embed') == TRUE) {
2020 $embeds = $html_dom->getElementsByTagName('embed');
2021 foreach ($embeds as $embed) {
2022 $urls[] = $embed->getAttribute('src');
2023 $urls[] = $embed->getAttribute('pluginurl');
2024 $urls[] = $embed->getAttribute('pluginspage');
2028 // Finds iframe tags with links in the content.
2029 if (\Drupal::config('linkchecker.settings')->get('extract.from_iframe') == TRUE) {
2030 $iframes = $html_dom->getElementsByTagName('iframe');
2031 foreach ($iframes as $iframe) {
2032 $urls[] = $iframe->getAttribute('src');
2036 // Finds img tags with links in the content.
2037 if (\Drupal::config('linkchecker.settings')->get('extract.from_img') == TRUE) {
2038 $imgs = $html_dom->getElementsByTagName('img');
2039 foreach ($imgs as $img) {
2040 $urls[] = $img->getAttribute('src');
2041 $urls[] = $img->getAttribute('longdesc');
2045 // Finds object/param tags with links in the content.
2046 if (\Drupal::config('linkchecker.settings')->get('extract.from_object') == TRUE) {
2047 $objects = $html_dom->getElementsByTagName('object');
2048 foreach ($objects as $object) {
2049 $urls[] = $object->getAttribute('data');
2050 $urls[] = $object->getAttribute('codebase');
2052 // Finds param tags with links in the object tag.
2053 $params = $object->getElementsByTagName('param');
2054 foreach ($params as $param) {
2056 // - Try to extract links in unkown "flashvars" values
2057 // (e.g., file=http://, data=http://).
2058 $names = ['archive', 'filename', 'href', 'movie', 'src', 'url'];
2059 if ($param->hasAttribute('name') && in_array($param->getAttribute('name'), $names)) {
2060 $urls[] = $param->getAttribute('value');
2064 if ($param->hasAttribute('src') && in_array($param->getAttribute('src'), $srcs)) {
2065 $urls[] = $param->getAttribute('value');
2071 // Finds video tags with links in the content.
2072 if (\Drupal::config('linkchecker.settings')->get('extract.from_video') == TRUE) {
2073 $videos = $html_dom->getElementsByTagName('video');
2074 foreach ($videos as $video) {
2075 $urls[] = $video->getAttribute('poster');
2076 $urls[] = $video->getAttribute('src');
2078 // Finds source tags with links in the video tag.
2079 $sources = $video->getElementsByTagName('source');
2080 foreach ($sources as $source) {
2081 $urls[] = $source->getAttribute('src');
2083 // Finds track tags with links in the audio tag.
2084 $tracks = $video->getElementsByTagName('track');
2085 foreach ($tracks as $track) {
2086 $urls[] = $track->getAttribute('src');
2091 // Remove empty values.
2092 $urls = array_filter($urls);
2093 // Remove duplicate urls.
2094 $urls = array_unique($urls);
2095 // What type of links should be checked?
2096 $linkchecker_check_links_types = \Drupal::config('linkchecker.settings')->get('check_links_types');
2099 foreach ($urls as $url) {
2100 // Decode HTML links into plain text links.
2101 // DOMDocument->loadHTML does not provide the RAW url from code. All html
2102 // entities are already decoded.
2103 // @todo: Try to find a way to get the raw value.
2104 $url_decoded = $url;
2106 // Prefix protocol relative urls with a protocol to allow link checking.
2107 if (preg_match('!^//!', $url_decoded)) {
2108 $http_protocol = $is_https ? 'https' : 'http';
2109 $url_decoded = $http_protocol . ':' . $url_decoded;
2112 // FIXME: #1149596 HACK - Encode spaces in URLs, so validation equals TRUE and link gets added.
2113 $url_encoded = str_replace(' ', '%20', $url_decoded);
2115 // Full qualified URLs.
2116 if ($linkchecker_check_links_types != 2 && UrlHelper::isValid($url_encoded, TRUE)) {
2117 // Add to Array and change HTML links into plain text links.
2118 $links[$url_decoded][] = $url;
2120 // Skip mailto:, javascript:, etc.
2121 elseif (preg_match('/^\w[\w.+]*:/', $url_decoded)) {
2124 // Local URLs. $linkchecker_check_links_types = 0 or 2
2125 elseif ($linkchecker_check_links_types != 1 && UrlHelper::isValid($url_encoded, FALSE)) {
2126 // Get full qualified url with base path of content.
2127 $absolute_content_path = _linkchecker_absolute_content_path($content_path);
2129 // Absolute local URLs need to start with [/].
2130 if (preg_match('!^/!', $url_decoded)) {
2131 // Add to Array and change HTML encoded links into plain text links.
2132 $links[$base_root . $url_decoded][] = $url;
2134 // Anchors and URL parameters like "#foo" and "?foo=bar".
2135 elseif (!empty($content_path) && preg_match('!^[?#]!', $url_decoded)) {
2136 // Add to Array and change HTML encoded links into plain text links.
2137 $links[$content_path . $url_decoded][] = $url;
2139 // Relative URLs like "./foo/bar" and "../foo/bar".
2140 elseif (!empty($absolute_content_path) && preg_match('!^\.{1,2}/!', $url_decoded)) {
2141 // Build the URI without hostname before the URI is normalized and
2142 // dot-segments will be removed. The hostname is added back after the
2143 // normalization has completed to prevent hostname removal by the regex.
2144 // This logic intentionally does not implement all the rules definied in
2145 // RFC 3986, section 5.2.4 to show broken links and over-dot-segmented
2146 // URIs; e.g., http://example.com/../../foo/bar.
2147 // For more information, see http://drupal.org/node/832388.
2148 $path = substr_replace($absolute_content_path . $url_decoded, '', 0, strlen($base_root));
2150 // Remove './' segments where possible.
2151 $path = str_replace('/./', '/', $path);
2153 // Remove '../' segments where possible. Loop until all segments are
2154 // removed. Taken over from _drupal_build_css_path() in common.inc.
2156 while ($path != $last) {
2158 $path = preg_replace('`(^|/)(?!\.\./)([^/]+)/\.\./`', '$1', $path);
2161 // Glue the hostname and path to full-qualified URI.
2162 $links[$base_root . $path][] = $url;
2164 // Relative URLs like "test.png".
2165 elseif (!empty($absolute_content_path) && preg_match('!^[^/]!', $url_decoded)) {
2166 $links[$absolute_content_path . $url_decoded][] = $url;
2169 // @todo Are there more special cases the module need to handle?
2178 * Replaces old link with new link in text.
2180 * @param string $text
2181 * The text a link is inside. Passed in as a reference.
2182 * @param string $old_link_fqdn
2183 * The old link to search for in strings.
2184 * @param string $new_link_fqdn
2185 * The old link should be overwritten with this new link.
2187 function _linkchecker_link_replace(&$text, $old_link_fqdn = '', $new_link_fqdn = '') {
2188 // Don't do any string replacement if one of the values is empty.
2189 if (!empty($text) && !empty($old_link_fqdn) && !empty($new_link_fqdn)) {
2190 // Remove protocols and hostname from local URLs.
2192 Unicode::strtolower('http://' . $_SERVER['HTTP_HOST']),
2193 Unicode::strtolower('https://' . $_SERVER['HTTP_HOST']),
2195 $old_link = str_replace($base_roots, '', $old_link_fqdn);
2196 $new_link = str_replace($base_roots, '', $new_link_fqdn);
2198 // Build variables with all URLs and run check_url() only once.
2199 $old_html_link_fqdn = UrlHelper::filterBadProtocol($old_link_fqdn);
2200 $new_html_link_fqdn = UrlHelper::filterBadProtocol($new_link_fqdn);
2201 $old_html_link = UrlHelper::filterBadProtocol($old_link);
2202 $new_html_link = UrlHelper::filterBadProtocol($new_link);
2204 // Replace links in link fields and text and Links weblink fields.
2205 if (in_array($text, [$old_html_link_fqdn, $old_html_link, $old_link_fqdn, $old_link])) {
2206 // Keep old and new links in the same encoding and format and short or
2208 $text = str_replace($old_html_link_fqdn, $new_html_link_fqdn, $text);
2209 $text = str_replace($old_html_link, $new_html_link, $text);
2210 $text = str_replace($old_link_fqdn, $new_link_fqdn, $text);
2211 $text = str_replace($old_link, $new_link, $text);
2214 // Create an array of links with HTML decoded and encoded URLs.
2216 $old_html_link_fqdn,
2221 // Remove duplicate URLs from array if URLs do not have URL parameters.
2222 // If more than one URL parameter exists - one URL in the array will have
2223 // an unencoded ampersand "&" and a second URL will have an HTML encoded
2224 // ampersand "&".
2225 $old_links = array_unique($old_links);
2227 // Load HTML code into DOM.
2228 $html_dom = Html::load($text);
2230 // Finds all hyperlinks in the content.
2231 if (\Drupal::config('linkchecker.settings')->get('extract.from_a') == TRUE) {
2232 $links = $html_dom->getElementsByTagName('a');
2233 foreach ($links as $link) {
2234 if (in_array($link->getAttribute('href'), $old_links)) {
2235 $link->setAttribute('href', $new_html_link);
2237 // Replace link text, if same like the URL. If a link text contains
2238 // other child tags like <img> it will be skipped.
2239 if (in_array($link->nodeValue, $old_links)) {
2240 $link->nodeValue = $new_html_link;
2244 $links = $html_dom->getElementsByTagName('area');
2245 foreach ($links as $link) {
2246 if (in_array($link->getAttribute('href'), $old_links)) {
2247 $link->setAttribute('href', $new_html_link);
2252 // Finds all audio links in the content.
2253 if (\Drupal::config('linkchecker.settings')->get('extract.from_a') == TRUE) {
2254 $audios = $html_dom->getElementsByTagName('audio');
2255 foreach ($audios as $audio) {
2256 if (in_array($audio->getAttribute('src'), $old_links)) {
2257 $audio->setAttribute('src', $new_html_link);
2260 // Finds source tags with links in the audio tag.
2261 $sources = $audio->getElementsByTagName('source');
2262 foreach ($sources as $source) {
2263 if (in_array($source->getAttribute('src'), $old_links)) {
2264 $source->setAttribute('src', $new_html_link);
2267 // Finds track tags with links in the audio tag.
2268 $tracks = $audio->getElementsByTagName('track');
2269 foreach ($tracks as $track) {
2270 if (in_array($track->getAttribute('src'), $old_links)) {
2271 $track->setAttribute('src', $new_html_link);
2277 // Finds embed tags with links in the content.
2278 if (\Drupal::config('linkchecker.settings')->get('extract.from_embed') == TRUE) {
2279 $embeds = $html_dom->getElementsByTagName('embed');
2280 foreach ($embeds as $embed) {
2281 if (in_array($embed->getAttribute('src'), $old_links)) {
2282 $embed->setAttribute('src', $new_html_link);
2284 if (in_array($embed->getAttribute('pluginurl'), $old_links)) {
2285 $embed->setAttribute('pluginurl', $new_html_link);
2287 if (in_array($embed->getAttribute('pluginspage'), $old_links)) {
2288 $embed->setAttribute('pluginspage', $new_html_link);
2293 // Finds iframe tags with links in the content.
2294 if (\Drupal::config('linkchecker.settings')->get('extract.from_iframe') == TRUE) {
2295 $iframes = $html_dom->getElementsByTagName('iframe');
2296 foreach ($iframes as $iframe) {
2297 if (in_array($iframe->getAttribute('src'), $old_links)) {
2298 $iframe->setAttribute('src', $new_html_link);
2303 // Finds img tags with links in the content.
2304 if (\Drupal::config('linkchecker.settings')->get('extract.from_img') == TRUE) {
2305 $imgs = $html_dom->getElementsByTagName('img');
2306 foreach ($imgs as $img) {
2307 if (in_array($img->getAttribute('src'), $old_links)) {
2308 $img->setAttribute('src', $new_html_link);
2310 if (in_array($img->getAttribute('longdesc'), $old_links)) {
2311 $img->setAttribute('longdesc', $new_html_link);
2316 // Finds object/param tags with links in the content.
2317 if (\Drupal::config('linkchecker.settings')->get('extract.from_object') == TRUE) {
2318 $objects = $html_dom->getElementsByTagName('object');
2319 foreach ($objects as $object) {
2320 if (in_array($object->getAttribute('data'), $old_links)) {
2321 $object->setAttribute('data', $new_html_link);
2323 if (in_array($object->getAttribute('codebase'), $old_links)) {
2324 $object->setAttribute('codebase', $new_html_link);
2327 // Finds param tags with links in the object tag.
2328 $params = $object->getElementsByTagName('param');
2329 foreach ($params as $param) {
2331 // - Try to replace links in unkown "flashvars" values
2332 // (e.g., file=http://, data=http://).
2333 $names = ['archive', 'filename', 'href', 'movie', 'src', 'url'];
2334 if ($param->hasAttribute('name') && in_array($param->getAttribute('name'), $names)) {
2335 if (in_array($param->getAttribute('value'), $old_links)) {
2336 $param->setAttribute('value', $new_html_link);
2341 if ($param->hasAttribute('src') && in_array($param->getAttribute('src'), $srcs)) {
2342 if (in_array($param->getAttribute('value'), $old_links)) {
2343 $param->setAttribute('value', $new_html_link);
2350 // Finds video tags with links in the content.
2351 if (\Drupal::config('linkchecker.settings')->get('extract.from_video') == TRUE) {
2352 $videos = $html_dom->getElementsByTagName('video');
2353 foreach ($videos as $video) {
2354 if (in_array($video->getAttribute('poster'), $old_links)) {
2355 $video->setAttribute('poster', $new_html_link);
2357 if (in_array($video->getAttribute('src'), $old_links)) {
2358 $video->setAttribute('src', $new_html_link);
2361 // Finds source tags with links in the video tag.
2362 $sources = $video->getElementsByTagName('source');
2363 foreach ($sources as $source) {
2364 if (in_array($source->getAttribute('src'), $old_links)) {
2365 $source->setAttribute('src', $new_html_link);
2368 // Finds track tags with links in the audio tag.
2369 $tracks = $video->getElementsByTagName('track');
2370 foreach ($tracks as $track) {
2371 if (in_array($track->getAttribute('src'), $old_links)) {
2372 $track->setAttribute('src', $new_html_link);
2378 // Set the updated $text for the calling function.
2379 $text = filter_dom_serialize($html_dom);
2385 * Customized clone of core check_markup() with additional filter blacklist.
2387 * See http://api.drupal.org/api/function/check_markup/7 for API documentation.
2389 function _linkchecker_check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) {
2390 if (!isset($text)) {
2394 if (!isset($format_id)) {
2395 $format_id = filter_fallback_format();
2397 // If the requested text format does not exist, the text cannot be filtered.
2398 /** @var \Drupal\filter\Entity\FilterFormat $format **/
2399 $format = FilterFormat::load($format_id);
2401 linkchecker_watchdog_log('filter', 'Missing text format: %format.', ['%format' => $format_id], RfcLogLevel::ALERT);
2405 // Check for a cached version of this piece of text.
2406 $cache = $cache && !empty($format->cache);
2409 $cache_id = 'linkchecker:' . $format->id() . ':' . $langcode . ':' . hash('sha256', $text);
2410 if ($cached = \Drupal::cache()->get($cache_id)) {
2411 return $cached->data;
2415 // Convert all Windows and Mac newlines to a single newline, so filters only
2416 // need to deal with one possibility.
2417 $text = str_replace(array("\r\n", "\r"), "\n", $text);
2419 // Get a complete list of filters, ordered properly.
2420 /** @var \Drupal\filter\Plugin\FilterInterface[] $filters **/
2421 $filters = $format->filters();
2422 $filter_info = filter_formats();
2424 // Do not run placeholder or special tag filters used as references to nodes
2425 // like 'weblink' or 'weblinks' node types. If the original link node is
2426 // updated, all links are automatically up-to-date and there is no need to
2427 // notify about the broken link on all nodes having a link reference in
2428 // content. This would only confuse the authors as they may also not be able
2429 // to fix the source node of the reference.
2430 $filters_blacklist = array_keys(array_filter(\Drupal::config('linkchecker.settings')->get('extract.filter_blacklist')));
2432 // Give filters the chance to escape HTML-like data such as code or formulas.
2433 foreach ($filters->getAll() as $filter) {
2434 $name = $filter->getType();
2435 $status = $filter->status;
2437 if (!in_array($name, $filters_blacklist)) {
2438 if ($status && isset($filter_info[$name]['prepare callback']) && function_exists($filter_info[$name]['prepare callback'])) {
2439 $function = $filter_info[$name]['prepare callback'];
2440 $text = $function($text, $filters, $format, $langcode, $cache, $cache_id);
2445 // Perform filtering.
2446 foreach ($filters->getAll() as $name => $filter) {
2447 if (!in_array($name, $filters_blacklist)) {
2448 if ($filter->status && isset($filter_info[$name]['process callback']) && function_exists($filter_info[$name]['process callback'])) {
2449 $function = $filter_info[$name]['process callback'];
2450 $text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
2455 // Store in cache with a minimum expiration time of 1 day.
2457 \Drupal::cache()->set($cache_id, $text, REQUEST_TIME + (60 * 60 * 24));
2463 * Get the path of an URL.
2465 * @param string $url
2466 * The http/https URL to parse.
2469 * Full qualified URL with absolute path of the URL.
2471 function _linkchecker_absolute_content_path($url) {
2473 // Parse the URL and make sure we can handle the schema.
2474 $uri = @parse_url($url);
2476 if ($uri == FALSE) {
2480 if (!isset($uri['scheme'])) {
2484 // Break if the schema is not supported.
2485 if (!in_array($uri['scheme'], ['http', 'https'])) {
2489 $scheme = isset($uri['scheme']) ? $uri['scheme'] . '://' : '';
2490 $user = isset($uri['user']) ? $uri['user'] . ($uri['pass'] ? ':' . $uri['pass'] : '') . '@' : '';
2491 $port = isset($uri['port']) ? $uri['port'] : 80;
2492 $host = $uri['host'] . ($port != 80 ? ':' . $port : '');
2493 $path = isset($uri['path']) ? $uri['path'] : '/';
2495 // Glue the URL variables.
2496 $absolute_url = $scheme . $user . $host . $path;
2498 // Find the last slash and remove all after the last slash to get the path.
2499 $last_slash = strrpos($absolute_url, '/');
2500 $absolute_content_url = drupal_substr($absolute_url, 0, $last_slash + 1);
2502 return $absolute_content_url;
2506 * Verifies against blacklists, if the link status should be checked or not.
2508 function _linkchecker_link_check_status_filter($url) {
2511 // Is url in domain blacklist?
2512 $urls = \Drupal::config('linkchecker.settings')->get('check.disable_link_check_for_urls');
2513 if (!empty($urls) && preg_match('/' . implode('|', array_map(function ($links) { return preg_quote($links, '/'); }, preg_split('/(\r\n?|\n)/', $urls))) . '/', $url)) {
2517 // Protocol whitelist check (without curl, only http/https is supported).
2518 if (!preg_match('/^(https?):\/\//i', $url)) {
2526 * Defines the list of allowed response codes for form input validation.
2529 * An numeric response code.
2532 * TRUE if the status code is valid, otherwise FALSE.
2534 function _linkchecker_isvalid_response_code($code) {
2538 101 => 'Switching Protocols',
2542 203 => 'Non-Authoritative Information',
2543 204 => 'No Content',
2544 205 => 'Reset Content',
2545 206 => 'Partial Content',
2546 300 => 'Multiple Choices',
2547 301 => 'Moved Permanently',
2550 304 => 'Not Modified',
2552 307 => 'Temporary Redirect',
2553 400 => 'Bad Request',
2554 401 => 'Unauthorized',
2555 402 => 'Payment Required',
2558 405 => 'Method Not Allowed',
2559 406 => 'Not Acceptable',
2560 407 => 'Proxy Authentication Required',
2561 408 => 'Request Time-out',
2564 411 => 'Length Required',
2565 412 => 'Precondition Failed',
2566 413 => 'Request Entity Too Large',
2567 414 => 'Request-URI Too Large',
2568 415 => 'Unsupported Media Type',
2569 416 => 'Requested range not satisfiable',
2570 417 => 'Expectation Failed',
2571 500 => 'Internal Server Error',
2572 501 => 'Not Implemented',
2573 502 => 'Bad Gateway',
2574 503 => 'Service Unavailable',
2575 504 => 'Gateway Time-out',
2576 505 => 'HTTP Version not supported',
2579 return array_key_exists($code, $responses);
2583 * Return all content type enable with link checking.
2586 * An array of node type names, keyed by the type.
2588 function linkchecker_scan_node_types() {
2590 foreach (node_type_get_names() as $type => $name) {
2591 $node_type = NodeType::load($type);
2592 if ($node_type->getThirdPartySetting('linkchecker', 'scan_node', FALSE)) {
2593 $types[$type] = $type;
2600 * Return all content type enable with comment link checking.
2603 * An array of node type names, keyed by the type.
2605 function linkchecker_scan_comment_types() {
2607 foreach (node_type_get_names() as $type => $name) {
2608 $node_type = NodeType::load($type);
2609 if ($node_type->getThirdPartySetting('linkchecker', 'scan_comment', FALSE)) {
2610 $types[$type] = $type;
2617 * Unpublishes all nodes having the specified link id.
2620 * A link ID that have reached a defined failcount.
2622 function _linkchecker_unpublish_nodes($lid) {
2623 $connection = \Drupal::database();
2624 $result = $connection->query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', [':lid' => $lid]);
2625 foreach ($result as $row) {
2626 // Explicitly don't use Node::load_multiple() or the module may run
2627 // into issues like http://drupal.org/node/1210606. With this logic
2628 // nodes can be updated until an out of memory occurs and further
2629 // updates will be made on the remaining nodes only.
2630 $node = Node::load($row->nid);
2631 $node->setPublished(FALSE);
2633 linkchecker_watchdog_log('linkchecker', 'Set @type %title to unpublished.', ['@type' => $node->bundle(), '%title' => $node->get('title')->value]);
2638 * Load link as object.
2645 function linkchecker_link_load($lid) {
2646 $connection = \Drupal::database();
2647 return $connection->query('SELECT * FROM {linkchecker_link} WHERE lid = :lid', [':lid' => $lid])->fetchObject();
2651 * Checks if this entity is the default revision (published).
2653 * @param object $entity
2654 * The entity object, e.g., $node.
2657 * TRUE if the entity is the default revision, FALSE otherwise.
2659 function _linkchecker_isdefaultrevision($entity) {
2660 // D7 "Forward revisioning" is complex and causes a node_save() with the
2661 // future node in node table. This fires hook_node_update() twice and cause
2662 // abnormal behaviour in linkchecker.
2664 // The steps taken by Workbench Moderation is to save the forward revision
2665 // first and overwrite this with the live version in a shutdown function in
2666 // a second step. This will confuse linkchecker. D7 has no generic property
2667 // in the node object, if the node that is updated is the 'published' version
2668 // or only a draft of a future version.
2670 // This behaviour will change in D8 where $node->isDefaultRevision has been
2671 // introduced. See below links for more details.
2672 // - http://drupal.org/node/1879482
2673 // - http://drupal.org/node/218755
2674 // - http://drupal.org/node/1522154
2676 // Every moderation module saving a forward revision needs to return FALSE.
2677 // @FIXME: Refactor this workaround under D8.
2678 // Workbench Moderation module.
2679 // if (\Drupal::moduleHandler()->moduleExists('workbench_moderation') && workbench_moderation_node_type_moderated($entity->type) === TRUE && empty($entity->workbench_moderation['updating_live_revision'])) {
2680 // if (\Drupal::moduleHandler()->moduleExists('workbench_moderation') && ($entity->hasHandlerClass('moderation')) && empty($entity->workbench_moderation['updating_live_revision'])) {
2688 * Returns the language code of the given entity.
2690 * Backward compatibility layer to ensure that installations running an older
2691 * version of core where entity_language() is not avilable do not break.
2693 * @param string $entity_type
2695 * @param object $entity
2699 * The entity language code.
2701 function linkchecker_entity_language($entity_type, $entity) {
2703 if (function_exists('entity_language')) {
2704 $langcode = entity_language($entity_type, $entity);
2706 elseif (!empty($entity->language)) {
2707 $langcode = $entity->language;
2713 * Return all the values of one-dimensional and multidimensional arrays.
2716 * Returns all the values from the input array and indexes the array numerically.
2718 function _linkchecker_array_values_recursive(array $array) {
2721 foreach ($array as $value) {
2722 if (is_array($value)) {
2723 $array_values = array_merge($array_values, _linkchecker_array_values_recursive($value));
2726 $array_values[] = $value;
2730 return $array_values;