3 namespace Drupal\pathauto;
5 use Drupal\Component\Render\PlainTextOutput;
6 use Drupal\Component\Transliteration\TransliterationInterface;
7 use Drupal\Component\Utility\Html;
8 use Drupal\Component\Utility\Unicode;
9 use Drupal\Core\Cache\CacheBackendInterface;
10 use Drupal\Core\Config\ConfigFactoryInterface;
11 use Drupal\Core\Extension\ModuleHandlerInterface;
12 use Drupal\Core\Language\LanguageManagerInterface;
15 * Provides an alias cleaner.
17 class AliasCleaner implements AliasCleanerInterface {
22 * @var \Drupal\Core\Config\ConfigFactoryInterface
24 protected $configFactory;
27 * The alias storage helper.
29 * @var AliasStorageHelperInterface
31 protected $aliasStorageHelper;
36 * @var \Drupal\Core\Language\LanguageManagerInterface
38 protected $languageManager;
43 * @var \Drupal\Core\Cache\CacheBackendInterface
45 protected $cacheBackend;
48 * Calculated settings cache.
50 * @todo Split this up into separate properties.
54 protected $cleanStringCache = array();
57 * Transliteration service.
59 * @var \Drupal\Component\Transliteration\TransliterationInterface
61 protected $transliteration;
66 * @var \Drupal\Core\Extension\ModuleHandlerInterface
68 protected $moduleHandler;
71 * Creates a new AliasCleaner.
73 * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
75 * @param \Drupal\pathauto\AliasStorageHelperInterface $alias_storage_helper
76 * The alias storage helper.
77 * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
78 * The language manager.
79 * @param \Drupal\Core\Cache\CacheBackendInterface $cache_backend
81 * @param \Drupal\Component\Transliteration\TransliterationInterface $transliteration
82 * The transliteration service.
83 * @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler
86 public function __construct(ConfigFactoryInterface $config_factory, AliasStorageHelperInterface $alias_storage_helper, LanguageManagerInterface $language_manager, CacheBackendInterface $cache_backend, TransliterationInterface $transliteration, ModuleHandlerInterface $module_handler) {
87 $this->configFactory = $config_factory;
88 $this->aliasStorageHelper = $alias_storage_helper;
89 $this->languageManager = $language_manager;
90 $this->cacheBackend = $cache_backend;
91 $this->transliteration = $transliteration;
92 $this->moduleHandler = $module_handler;
98 public function cleanAlias($alias) {
99 $config = $this->configFactory->get('pathauto.settings');
100 $alias_max_length = min($config->get('max_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength());
104 // Trim duplicate, leading, and trailing separators. Do this before cleaning
105 // backslashes since a pattern like "[token1]/[token2]-[token3]/[token4]"
106 // could end up like "value1/-/value2" and if backslashes were cleaned first
107 // this would result in a duplicate backslash.
108 $output = $this->getCleanSeparators($output);
110 // Trim duplicate, leading, and trailing backslashes.
111 $output = $this->getCleanSeparators($output, '/');
113 // Shorten to a logical place based on word boundaries.
114 $output = Unicode::truncate($output, $alias_max_length, TRUE);
122 public function getCleanSeparators($string, $separator = NULL) {
123 $config = $this->configFactory->get('pathauto.settings');
125 if (!isset($separator)) {
126 $separator = $config->get('separator');
131 if (strlen($separator)) {
132 // Trim any leading or trailing separators.
133 $output = trim($output, $separator);
135 // Escape the separator for use in regular expressions.
136 $seppattern = preg_quote($separator, '/');
138 // Replace multiple separators with a single one.
139 $output = preg_replace("/$seppattern+/", $separator, $output);
141 // Replace trailing separators around slashes.
142 if ($separator !== '/') {
143 $output = preg_replace("/\/+$seppattern\/+|$seppattern\/+|\/+$seppattern/", "/", $output);
146 // If the separator is a slash, we need to re-add the leading slash
147 // dropped by the trim function.
148 $output = '/' . $output;
158 public function cleanString($string, array $options = array()) {
159 if (empty($this->cleanStringCache)) {
160 // Generate and cache variables used in this method.
161 $config = $this->configFactory->get('pathauto.settings');
162 $this->cleanStringCache = array(
163 'separator' => $config->get('separator'),
164 'strings' => array(),
165 'transliterate' => $config->get('transliterate'),
166 'punctuation' => array(),
167 'reduce_ascii' => (bool) $config->get('reduce_ascii'),
168 'ignore_words_regex' => FALSE,
169 'lowercase' => (bool) $config->get('case'),
170 'maxlength' => min($config->get('max_component_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength()),
173 // Generate and cache the punctuation replacements for strtr().
174 $punctuation = $this->getPunctuationCharacters();
175 foreach ($punctuation as $name => $details) {
176 $action = $config->get('punctuation.' . $name);
178 case PathautoGeneratorInterface::PUNCTUATION_REMOVE:
179 $this->cleanStringCache['punctuation'][$details['value']] = '';
182 case PathautoGeneratorInterface::PUNCTUATION_REPLACE:
183 $this->cleanStringCache['punctuation'][$details['value']] = $this->cleanStringCache['separator'];
186 case PathautoGeneratorInterface::PUNCTUATION_DO_NOTHING:
187 // Literally do nothing.
192 // Generate and cache the ignored words regular expression.
193 $ignore_words = $config->get('ignore_words');
194 $ignore_words_regex = preg_replace(array('/^[,\s]+|[,\s]+$/', '/[,\s]+/'), array('', '\b|\b'), $ignore_words);
195 if ($ignore_words_regex) {
196 $this->cleanStringCache['ignore_words_regex'] = '\b' . $ignore_words_regex . '\b';
197 if (function_exists('mb_eregi_replace')) {
198 mb_regex_encoding('UTF-8');
199 $this->cleanStringCache['ignore_words_callback'] = 'mb_eregi_replace';
202 $this->cleanStringCache['ignore_words_callback'] = 'preg_replace';
203 $this->cleanStringCache['ignore_words_regex'] = '/' . $this->cleanStringCache['ignore_words_regex'] . '/i';
208 // Empty strings do not need any processing.
209 if ($string === '' || $string === NULL) {
214 if (!empty($options['language'])) {
215 $langcode = $options['language']->getId();
217 elseif (!empty($options['langcode'])) {
218 $langcode = $options['langcode'];
221 // Check if the string has already been processed, and if so return the
223 if (isset($this->cleanStringCache['strings'][$langcode][(string) $string])) {
224 return $this->cleanStringCache['strings'][$langcode][(string) $string];
227 // Remove all HTML tags from the string.
228 $output = Html::decodeEntities($string);
229 $output = PlainTextOutput::renderFromHtml($output);
231 // Optionally transliterate.
232 if ($this->cleanStringCache['transliterate']) {
233 // If the reduce strings to letters and numbers is enabled, don't bother
234 // replacing unknown characters with a question mark. Use an empty string
236 $output = $this->transliteration->transliterate($output, $langcode, $this->cleanStringCache['reduce_ascii'] ? '' : '?');
239 // Replace or drop punctuation based on user settings.
240 $output = strtr($output, $this->cleanStringCache['punctuation']);
242 // Reduce strings to letters and numbers.
243 if ($this->cleanStringCache['reduce_ascii']) {
244 $output = preg_replace('/[^a-zA-Z0-9\/]+/', $this->cleanStringCache['separator'], $output);
247 // Get rid of words that are on the ignore list.
248 if ($this->cleanStringCache['ignore_words_regex']) {
249 $words_removed = $this->cleanStringCache['ignore_words_callback']($this->cleanStringCache['ignore_words_regex'], '', $output);
250 if (mb_strlen(trim($words_removed)) > 0) {
251 $output = $words_removed;
255 // Always replace whitespace with the separator.
256 $output = preg_replace('/\s+/', $this->cleanStringCache['separator'], $output);
258 // Trim duplicates and remove trailing and leading separators.
259 $output = $this->getCleanSeparators($this->getCleanSeparators($output, $this->cleanStringCache['separator']));
261 // Optionally convert to lower case.
262 if ($this->cleanStringCache['lowercase']) {
263 $output = mb_strtolower($output);
266 // Shorten to a logical place based on word boundaries.
267 $output = Unicode::truncate($output, $this->cleanStringCache['maxlength'], TRUE);
269 // Cache this result in the static array.
270 $this->cleanStringCache['strings'][$langcode][(string) $string] = $output;
279 public function getPunctuationCharacters() {
280 if (empty($this->punctuationCharacters)) {
281 $langcode = $this->languageManager->getCurrentLanguage()->getId();
283 $cid = 'pathauto:punctuation:' . $langcode;
284 if ($cache = $this->cacheBackend->get($cid)) {
285 $this->punctuationCharacters = $cache->data;
288 $punctuation = array();
289 $punctuation['double_quotes'] = array('value' => '"', 'name' => t('Double quotation marks'));
290 $punctuation['quotes'] = array('value' => '\'', 'name' => t("Single quotation marks (apostrophe)"));
291 $punctuation['backtick'] = array('value' => '`', 'name' => t('Back tick'));
292 $punctuation['comma'] = array('value' => ',', 'name' => t('Comma'));
293 $punctuation['period'] = array('value' => '.', 'name' => t('Period'));
294 $punctuation['hyphen'] = array('value' => '-', 'name' => t('Hyphen'));
295 $punctuation['underscore'] = array('value' => '_', 'name' => t('Underscore'));
296 $punctuation['colon'] = array('value' => ':', 'name' => t('Colon'));
297 $punctuation['semicolon'] = array('value' => ';', 'name' => t('Semicolon'));
298 $punctuation['pipe'] = array('value' => '|', 'name' => t('Vertical bar (pipe)'));
299 $punctuation['left_curly'] = array('value' => '{', 'name' => t('Left curly bracket'));
300 $punctuation['left_square'] = array('value' => '[', 'name' => t('Left square bracket'));
301 $punctuation['right_curly'] = array('value' => '}', 'name' => t('Right curly bracket'));
302 $punctuation['right_square'] = array('value' => ']', 'name' => t('Right square bracket'));
303 $punctuation['plus'] = array('value' => '+', 'name' => t('Plus sign'));
304 $punctuation['equal'] = array('value' => '=', 'name' => t('Equal sign'));
305 $punctuation['asterisk'] = array('value' => '*', 'name' => t('Asterisk'));
306 $punctuation['ampersand'] = array('value' => '&', 'name' => t('Ampersand'));
307 $punctuation['percent'] = array('value' => '%', 'name' => t('Percent sign'));
308 $punctuation['caret'] = array('value' => '^', 'name' => t('Caret'));
309 $punctuation['dollar'] = array('value' => '$', 'name' => t('Dollar sign'));
310 $punctuation['hash'] = array('value' => '#', 'name' => t('Number sign (pound sign, hash)'));
311 $punctuation['at'] = array('value' => '@', 'name' => t('At sign'));
312 $punctuation['exclamation'] = array('value' => '!', 'name' => t('Exclamation mark'));
313 $punctuation['tilde'] = array('value' => '~', 'name' => t('Tilde'));
314 $punctuation['left_parenthesis'] = array('value' => '(', 'name' => t('Left parenthesis'));
315 $punctuation['right_parenthesis'] = array('value' => ')', 'name' => t('Right parenthesis'));
316 $punctuation['question_mark'] = array('value' => '?', 'name' => t('Question mark'));
317 $punctuation['less_than'] = array('value' => '<', 'name' => t('Less-than sign'));
318 $punctuation['greater_than'] = array('value' => '>', 'name' => t('Greater-than sign'));
319 $punctuation['slash'] = array('value' => '/', 'name' => t('Slash'));
320 $punctuation['back_slash'] = array('value' => '\\', 'name' => t('Backslash'));
322 // Allow modules to alter the punctuation list and cache the result.
323 $this->moduleHandler->alter('pathauto_punctuation_chars', $punctuation);
324 $this->cacheBackend->set($cid, $punctuation);
325 $this->punctuationCharacters = $punctuation;
329 return $this->punctuationCharacters;
335 public function cleanTokenValues(&$replacements, $data = array(), $options = array()) {
336 foreach ($replacements as $token => $value) {
337 // Only clean non-path tokens.
338 $config = $this->configFactory->get('pathauto.settings');
339 $safe_tokens = implode('|', (array) $config->get('safe_tokens'));
340 if (!preg_match('/:(' . $safe_tokens . ')(:|\]$)/', $token)) {
341 $replacements[$token] = $this->cleanString($value, $options);
349 public function resetCaches() {
350 $this->cleanStringCache = array();