3 namespace Drupal\typogrify;
6 * SmartyPants - Smart punctuation for web sites.
8 * By John Gruber <http://daringfireball.net>
10 * PHP port by Michel Fortin
11 * <http://www.michelf.com/>
13 * Copyright (c) 2003-2004 John Gruber
14 * Copyright (c) 2004-2005 Michel Fortin
15 * Copyright (c) 2012 Micha Glave - i18n-quote-style
17 * Re-released under GPLv2 for Drupal.
19 * Wrapped in a class for D8.
20 * Do we trust the GPLv2 notice above?
27 const SMARTYPANTS_PHP_VERSION = '1.5.1e';
32 const SMARTYPANTS_SYNTAX_VERSION = '1.5.1';
35 * Regex-pattern for tags we don't mess with.
37 const SMARTYPANTS_TAGS_TO_SKIP
38 = '@<(/?)(?:pre|code|kbd|script|textarea|tt|math)[\s>]@';
41 * Current SmartyPants configuration setting.
43 * Change this to configure.
44 * 1 => "--" for em-dashes; no en-dash support
45 * 2 => "---" for em-dashes; "--" for en-dashes
46 * 3 => "--" for em-dashes; "---" for en-dashes
47 * See docs for more configuration options.
51 protected static $smartypantsAttr = "1";
53 /* -- Smarty Modifier Interface ------------------------------------------*/
56 * Wrapper methode for SmartyPants.
58 public static function modifierSmartypants($text, $attr = NULL) {
59 return self::process($text, $attr);
63 * Returns a locale-specific array of quotes.
65 public static function i18nQuotes($langcode = NULL) {
66 // Ignore all english-equivalents served by fallback.
69 'ar' => array('«', '»', '‹', '›'),
71 'be' => array('«', '»', '„', '“'),
73 'bg' => array('„', '“', '‚', '‘'),
75 'da' => array('»', '«', '›', '‹'),
77 'de' => array('„', '“', '‚', '‘'),
79 'el' => array('«', '»', '‹', '›'),
81 'en' => array('“', '”', '‘', '’'),
83 'eo' => array('“', '”', '“', '”'),
85 'es' => array('«', '»', '“', '“'),
87 'et' => array('„', '“', '„', '“'),
89 'fi' => array('”', '”', '’', '’'),
91 'fr' => array('«', '»', '‹', '›'),
93 'gsw-berne' => array('„', '“', '‚', '‘'),
95 'he' => array('“', '“', '«', '»'),
97 'hr' => array('»', '«', '›', '‹'),
99 'hu' => array('„', '“', '„', '“'),
101 'is' => array('„', '“', '‚', '‘'),
103 'it' => array('«', '»', '‘', '’'),
105 'lt' => array('„', '“', '‚', '‘'),
107 'lv' => array('„', '“', '„', '“'),
109 'nl' => array('„', '”', '‘', '’'),
111 'no' => array('„', '“', '„', '“'),
113 'pl' => array('„', '”', '«', '»'),
115 'pt' => array('“', '”', '‘', '’'),
117 'ro' => array('„', '“', '«', '»'),
119 'ru' => array('«', '»', '„', '“'),
121 'sk' => array('„', '“', '‚', '‘'),
123 'sl' => array('„', '“', '‚', '‘'),
125 'sq' => array('«', '»', '‹', '›'),
127 'sr' => array('„', '“', '‚', '‘'),
129 'sv' => array('”', '”', '’', '’'),
131 'tr' => array('«', '»', '‹', '›'),
133 'uk' => array('«', '»', '„', '“'),
135 if ($langcode == 'all') {
138 if (isset($quotes[$langcode])) {
139 return $quotes[$langcode];
141 return $quotes['en'];
147 * @param string $text
149 * @param string $attr
150 * Value of the smart_quotes="" attribute.
152 * MT context object (unused).
154 public static function process($text, $attr = NULL, $ctx = NULL) {
156 $attr = self::$smartypantsAttr;
158 // Options to specify which transformations to make.
160 // Should we translate " entities into normal quotes?
166 // 2 : set all, using old school en- and em- dash shortcuts
167 // 3 : set all, using inverted old school en and em- dash shortcuts
170 // b : backtick quotes (``double'' and ,,double`` only)
171 // B : backtick quotes (``double'', ,,double``, ,single` and `single')
173 // D : old school dashes
174 // i : inverted old school dashes
176 // w : convert " entities to " for Dreamweaver users.
181 elseif ($attr == "1") {
182 // Do everything, turn all options on.
188 elseif ($attr == "2") {
189 // Do everything, turn all options on, use old school dash shorthand.
195 elseif ($attr == "3") {
196 // Do everything, turn all options on,
197 // use inverted old school dash shorthand.
203 elseif ($attr == "-1") {
204 // Special "stupefy" mode.
208 $chars = preg_split('//', $attr);
209 foreach ($chars as $c) {
239 if ($do_quotes == 2) {
240 $doc_lang = $ctx['langcode'];
246 $tokens = self::tokenizeHtml($text);
248 // Keep track of when we're inside <pre> or <code> tags.
251 // This is a cheat, used to get some context
252 // for one-character tokens that consist of
253 // just a quote char. What we do is remember
254 // the last character of the previous text
255 // token, to use as context to curl single-
256 // character quote tokens correctly.
257 $prev_token_last_char = '';
259 foreach ($tokens as $cur_token) {
260 if ($cur_token[0] == 'tag') {
261 // Don't mess with quotes inside tags.
262 $result .= $cur_token[1];
263 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
264 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
267 // Reading language from span.
268 if (preg_match('/<span .*(xml:)?lang="(..)"/', $cur_token[1], $matches)) {
269 $span_lang = $matches[2];
271 elseif ($cur_token[1] == '</span>') {
278 // Remember last char of this token before processing.
279 $last_char = mb_substr($t, -1);
281 $quotes = self::i18nQuotes(isset($span_lang) ? $span_lang : $doc_lang);
283 $t = self::processEscapes($t);
286 $t = preg_replace('/"/', '"', $t);
290 if ($do_dashes == 1) {
291 $t = self::educateDashes($t);
293 elseif ($do_dashes == 2) {
294 $t = self::educateDashesOldSchool($t);
296 elseif ($do_dashes == 3) {
297 $t = self::educateDashesOldSchoolInverted($t);
302 $t = self::educateEllipses($t);
305 // Note: backticks need to be processed before quotes.
307 $t = self::educateBackticks($t);
308 if ($do_backticks == 2) {
309 $t = self::educateSingleBackticks($t);
313 $t = self::educateBackticks($t);
315 // Special case: single-character ' token.
316 if (preg_match('/\S/', $prev_token_last_char)) {
324 // Special case: single-character " token.
325 if (preg_match('/\S/', $prev_token_last_char)) {
334 $t = self::educateQuotes($t, $quotes);
338 $t = self::stupefyEntities($t);
341 $prev_token_last_char = $last_char;
352 * @param string $text
354 * @param string $attr
355 * Value of the smart_quotes="" attribute.
357 * MT context object (unused).
360 * $text with replacements.
362 protected static function smartQuotes($text, $attr = NULL, $ctx = NULL) {
364 $attr = self::$smartypantsAttr;
366 // Should we educate ``backticks'' -style quotes?
373 elseif ($attr == 2) {
374 // Smarten ``backticks'' -style quotes.
381 // Special case to handle quotes at the very end of $text when preceded by
382 // an HTML tag. Add a space to give the quote education algorithm a bit of
383 // context, so that it can guess correctly that it's a closing quote:
384 $add_extra_space = 0;
385 if (preg_match("/>['\"]\\z/", $text)) {
386 // Remember, so we can trim the extra space later.
387 $add_extra_space = 1;
391 $tokens = self::tokenizeHtml($text);
393 // Keep track of when we're inside <pre> or <code> tags.
396 // This is a cheat, used to get some context
397 // for one-character tokens that consist of
398 // just a quote char. What we do is remember
399 // the last character of the previous text
400 // token, to use as context to curl single-
401 // character quote tokens correctly.
402 $prev_token_last_char = "";
404 foreach ($tokens as $cur_token) {
405 if ($cur_token[0] == "tag") {
406 // Don't mess with quotes inside tags.
407 $result .= $cur_token[1];
408 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
409 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
414 // Remember last char of this token before processing.
415 $last_char = mb_substr($t, -1);
417 $t = self::processEscapes($t);
419 $t = self::educateBackticks($t);
423 // Special case: single-character ' token.
424 if (preg_match('/\S/', $prev_token_last_char)) {
432 // Special case: single-character " token.
433 if (preg_match('/\S/', $prev_token_last_char)) {
442 $t = self::educateQuotes($t);
446 $prev_token_last_char = $last_char;
451 if ($add_extra_space) {
452 // Trim trailing space if we added one earlier.
453 preg_replace('/ \z/', '', $result);
461 * @param string $text
463 * @param string $attr
464 * Value of the smart_quotes="" attribute.
466 * MT context object (unused).
469 * $text with replacements.
471 protected static function smartDashes($text, $attr = NULL, $ctx = NULL) {
473 $attr = self::$smartypantsAttr;
476 // Reference to the subroutine to use for dash education,
477 // default to educateDashes:
478 $dash_sub_ref = 'educateDashes';
484 elseif ($attr == 2) {
485 // Use old smart dash shortcuts, "--" for en, "---" for em.
486 $dash_sub_ref = 'educateDashesOldSchool';
488 elseif ($attr == 3) {
489 // Inverse of 2, "--" for em, "---" for en.
490 $dash_sub_ref = 'educateDashesOldSchoolInverted';
494 $tokens = self::tokenizeHtml($text);
497 // Keep track of when we're inside <pre> or <code> tags.
499 foreach ($tokens as $cur_token) {
500 if ($cur_token[0] == "tag") {
501 // Don't mess with quotes inside tags.
502 $result .= $cur_token[1];
503 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
504 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
510 $t = self::processEscapes($t);
511 $t = self::$dash_sub_ref($t);
522 * @param string $text
524 * @param string $attr
525 * Value of the smart_quotes="" attribute.
527 * MT context object (unused).
530 * $text with replacements.
532 protected static function smartEllipses($text, $attr = NULL, $ctx = NULL) {
534 $attr = self::$smartypantsAttr;
543 $tokens = self::tokenizeHtml($text);
546 // Keep track of when we're inside <pre> or <code> tags.
548 foreach ($tokens as $cur_token) {
549 if ($cur_token[0] == "tag") {
550 // Don't mess with quotes inside tags.
551 $result .= $cur_token[1];
552 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
553 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
559 $t = self::processEscapes($t);
560 $t = self::educateEllipses($t);
569 * Replaces '=' with '­' for easier manual hyphenating.
571 * @param string $text
572 * The string to prettify.
574 public static function hyphenate($text) {
576 $tokens = self::tokenizeHtml($text);
578 $equal_finder = '/(\p{L})=(\p{L})/u';
580 // Keep track of when we're inside <pre> or <code> tags.
582 foreach ($tokens as $cur_token) {
583 if ($cur_token[0] == "tag") {
584 // Don't mess with quotes inside tags.
585 $result .= $cur_token[1];
586 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
587 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
593 $t = preg_replace($equal_finder, '\1­\2', $t);
602 * Adding space in numbers for easier reading aka digit grouping.
604 * @param string $text
605 * The string to prettify.
607 * The kind of space to use.
611 public static function smartNumbers($text, $attr = 0, $ctx = NULL) {
613 $tokens = self::tokenizeHtml($text);
618 elseif ($attr == 1) {
619 $method = 'numberNarrownbsp';
621 elseif ($attr == 2) {
622 $method = 'numberThinsp';
624 elseif ($attr == 3) {
625 $method = 'numberSpan';
627 elseif ($attr == 4) {
628 $method = 'numberJustSpan';
632 // Keep track of when we're inside <pre> or <code> tags.
635 foreach ($tokens as $cur_token) {
636 if ($cur_token[0] == "tag") {
637 // Don't mess with quotes inside tags.
638 $result .= $cur_token[1];
639 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
640 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
642 elseif (preg_match('/<span .*class="[^"]*\b(number|phone|ignore)\b[^"]*"/', $cur_token[1], $matches)) {
643 $span_stop = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
645 elseif ($cur_token[1] == '</span>') {
651 if (!$in_pre && !$span_stop) {
652 $number_finder = '@(?:(&#\d{2,4};|&(#x[0-9a-fA-F]{2,4}|frac\d\d);)|' .
653 '(\d{4}-\d\d-\d\d)|(\d\d\.\d\d\.\d{4})|' .
654 '(0[ \d-/]+)|([+-]?\d+)([.,]\d+|))@';
655 $t = preg_replace_callback($number_finder, self::$method, $t);
664 * Internal: wraps and inserts space in numbers.
667 * A matcher-array from preg_replace_callback.
668 * @param string $thbl
669 * The kind of whitespace to add.
671 protected static function numberReplacer($hit, $thbl) {
673 // Html-entity number: don't touch.
676 elseif ($hit[2] != '') {
677 // Html-entity number: don't touch.
680 elseif ($hit[3] != '') {
681 // Don't format german phone-numbers.
684 elseif ($hit[4] != '') {
685 // Date -`iso don't touch.
688 elseif ($hit[5] != '') {
689 // Date -`dd.mm.yyyy don't touch.
692 if (preg_match('/[+-]?\d{5,}/', $hit[6]) == 1) {
693 $dec = preg_replace('/[+-]?\d{1,3}(?=(\d{3})+(?!\d))/', '\0' . $thbl, $hit[6]);
698 $frac = preg_replace('/\d{3}/', '\0' . $thbl, $hit[7]);
699 return '<span class="number">' . $dec . $frac . '</span>';
703 * Wrapping numbers and adding whitespace '&narrownbsp;'.
706 * A matcher-array from preg_replace_callback.
708 protected static function numberNarrownbsp($hit) {
709 return self::numberReplacer($hit, ' ');
713 * Wrapping numbers and adding whitespace ' '.
716 * A matcher-array from preg_replace_callback.
718 protected static function numberThinsp($hit) {
719 return self::numberReplacer($hit, ' ');
723 * Wrapping numbers and adding whitespace by setting margin-left in a span.
726 * A matcher-array from preg_replace_callback.
728 protected static function numberSpan($hit) {
729 $thbl = '<span style="margin-left:0.167em"></span>';
730 return self::numberReplacer($hit, $thbl);
734 * Wrapping numbers and adding whitespace by setting margin-left in a span.
737 * A matcher-array from preg_replace_callback.
739 protected static function numberJustSpan($hit) {
740 return self::numberReplacer($hit, '');
744 * Wrapping abbreviations and adding half space between digit grouping.
746 * @param string $text
747 * The string to prettify.
749 * The kind of space to use.
753 public static function smartAbbreviation($text, $attr = 0, $ctx = NULL) {
755 $tokens = self::tokenizeHtml($text);
757 $replace_method = 'abbrAsis';
759 $replace_method = 'abbrNarrownbsp';
761 elseif ($attr == 2) {
762 $replace_method = 'abbrThinsp';
764 elseif ($attr == 3) {
765 $replace_method = 'abbrSpan';
768 // Keep track of when we're inside <pre> or <code> tags.
770 foreach ($tokens as $cur_token) {
771 if ($cur_token[0] == "tag") {
772 // Don't mess with quotes inside tags.
773 $result .= $cur_token[1];
774 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
775 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
781 $abbr_finder = '/(?<=\s|)(\p{L}+\.)(\p{L}+\.)+(?=\s|)/u';
782 $t = preg_replace_callback($abbr_finder, self::$replace_method, $t);
791 * Wrapping abbreviations without adding whitespace.
794 * A matcher-array from preg_replace_callback.
796 public static function abbrAsis($hit) {
797 return '<span class="abbr">' . $hit[0] . '</span>';
801 * Wrapping abbreviations adding whitespace ' '.
804 * A matcher-array from preg_replace_callback.
806 protected static function abbrThinsp($hit) {
807 $res = preg_replace('/\.(\p{L})/u', '. \1', $hit[0]);
808 return '<span class="abbr">' . $res . '</span>';
812 * Wrapping abbreviations adding whitespace '&narrownbsp;'.
815 * A matcher-array from preg_replace_callback.
817 protected static function abbrNarrownbsp($hit) {
818 $res = preg_replace('/\.(\p{L})/u', '. \1', $hit[0]);
819 return '<span class="abbr">' . $res . '</span>';
823 * Wrapping abbreviations adding whitespace by setting margin-left in a span.
826 * A matcher-array from preg_replace_callback.
828 protected static function abbrSpan($hit) {
829 $thbl = '.<span style="margin-left:0.167em"><span style="display:none"> </span></span>';
830 $res = preg_replace('/\.(\p{L})/u', $thbl . '\1', $hit[0]);
831 return '<span class="abbr">' . $res . '</span>';
835 * Wrapping ampersands.
837 * @param string $text
838 * The text to work on.
840 public static function smartAmpersand($text) {
842 $tokens = self::tokenizeHtml($text);
845 // Keep track of when we're inside <pre> or <code> tags.
847 foreach ($tokens as $cur_token) {
848 if ($cur_token[0] == "tag") {
849 // Don't mess with quotes inside tags.
850 $result .= $cur_token[1];
851 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
852 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
858 $t = Typogrify::amp($t);
869 * Example input: "Isn't this fun?"
870 * Example output: [0]Isn’t this fun?[1];
876 * The string, with "educated" curly quote HTML entities.
878 protected static function educateQuotes($_, $quotes) {
879 // Make our own "punctuation" character class, because the POSIX-style
880 // [:PUNCT:] is only available in Perl 5.6 or later:
881 $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
883 // Special case if the very first character is a quote
884 // followed by punctuation at a non-word-break.
885 // Close the quotes by brute force:
887 array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
888 array($quotes[3], $quotes[1]), $_);
890 // Special case for double sets of quotes, e.g.:
891 // <p>He said, "'Quoted' words in a larger quote."</p>.
901 $quotes[0] . $spacer . $quotes[2],
902 $quotes[2] . $spacer . $quotes[0],
903 '\1' . $quotes[1] . $spacer . $quotes[3],
904 '\1' . $quotes[3] . $spacer . $quotes[1],
907 // Special case for decade abbreviations (the '80s):
908 $_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
910 // Special case for apostroph.
911 $_ = preg_replace("/(\\p{L})(')(?=\\p{L}|$)/u", '\1’', $_);
913 $close_class = '[^\ \t\r\n\[\{\(\-]';
914 $dec_dashes = '&\#8211;|&\#8212;';
916 // Get most opening single quotes:
919 \\s | # a whitespace char, or
920 | # a non-breaking space entity, or
922 &[mn]dash; | # named dash entities
923 $dec_dashes | # or decimal entities
924 &\\#x201[34]; # or hex
927 (?=\\p{L}) # followed by a word character
928 }x", '\1' . $quotes[2], $_);
929 // Single closing quotes:
933 (?(1)| # If $1 captured, then do nothing;
934 (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
935 ) # char or an 's' at a word ending position. This
936 # is a special case to handle something like:
937 # \"<i>Custer</i>'s Last Stand.\"
938 }xi", '\1' . $quotes[3], $_);
940 // Any remaining single quotes should be opening ones:
941 $_ = str_replace("'", $quotes[2], $_);
943 // Get most opening double quotes:
946 \\s | # a whitespace char, or
947 | # a non-breaking space entity, or
949 &[mn]dash; | # named dash entities
950 $dec_dashes | # or decimal entities
951 &\\#x201[34]; # or hex
954 (?=\\p{L}) # followed by a word character
955 }x", '\1' . $quotes[0], $_);
957 // Double closing quotes:
961 (?(1)|(?=\\s)) # If $1 captured, then do nothing;
962 # if not, then make sure the next char is whitespace.
963 }x", '\1' . $quotes[1], $_);
965 // Any remaining quotes should be opening ones.
966 $_ = str_replace('"', $quotes[0], $_);
974 * Example input: ``Isn't this fun?''
975 * Example output: “Isn't this fun?”.
981 * The string, with ``backticks'' -style double quotes
982 * translated into HTML curly quote entities.
984 protected static function educateBackticks($_) {
985 $_ = str_replace(array("``", "''", ",,"),
986 array('“', '”', '„'), $_);
991 * Educate single backticks.
993 * Example input: `Isn't this fun?'
994 * Example output: ‘Isn’t this fun?’
1000 * The string, with `backticks' -style single quotes
1001 * translated into HTML curly quote entities.
1003 protected static function educateSingleBackticks($_) {
1004 $_ = str_replace(array("`", "'"), array('‘', '’'), $_);
1011 * Example input: `Isn't this fun?'
1012 * Example output: ‘Isn’t this fun?’
1018 * The string, with each instance of "--" translated to
1019 * an em-dash HTML entity.
1021 protected static function educateDashes($_) {
1022 $_ = str_replace('--', '—', $_);
1027 * EducateDashesOldSchool.
1033 * The string, with each instance of "--" translated to
1034 * an en-dash HTML entity, and each "---" translated to
1035 * an em-dash HTML entity.
1037 protected static function educateDashesOldSchool($_) {
1038 $_ = str_replace(array("---", "--"), array('—', '–'), $_);
1043 * EducateDashesOldSchoolInverted.
1049 * The string, with each instance of "--" translated to an em-dash HTML
1050 * entity, and each "---" translated to an en-dash HTML entity. Two
1051 * reasons why: First, unlike the en- and em-dash syntax supported by
1052 * educateDashesOldSchool(), it's compatible with existing entries written
1053 * before SmartyPants 1.1, back when "--" was only used for em-dashes.
1054 * Second, em-dashes are more common than en-dashes, and so it sort of makes
1055 * sense that the shortcut should be shorter to type. (Thanks to Aaron
1056 * Swartz for the idea.)
1058 public static function educateDashesOldSchoolInverted($_) {
1060 $_ = str_replace(array("---", "--"), array('–', '—'), $_);
1067 * Example input: Huh...?
1068 * Example output: Huh…?
1074 * The string, with each instance of "..." translated to
1075 * an ellipsis HTML entity. Also converts the case where
1076 * there are spaces between the dots.
1078 protected static function educateEllipses($_) {
1079 $_ = str_replace(array("...", ". . ."), '…', $_);
1086 * Example input: “Hello — world.”
1087 * Example output: "Hello -- world."
1093 * The string, with each SmartyPants HTML entity translated to
1094 * it's ASCII counterpart.
1096 protected static function stupefyEntities($_) {
1097 // Dashes en-dash em-dash.
1098 $_ = str_replace(array('–', '—'), array('-', '--'), $_);
1100 // Single quote open close.
1101 $_ = str_replace(array('‘', '’', '‚'), "'", $_);
1103 // Double quote open close.
1104 $_ = str_replace(array('“', '”', '„'), '"', $_);
1107 $_ = str_replace('…', '...', $_);
1129 * The string, with after processing the following backslash
1130 * escape sequences. This is useful if you want to force a "dumb"
1131 * quote or other character to appear.
1133 public static function processEscapes($_) {
1171 * Replaces the space before a "double punctuation mark" (!?:;) with
1173 * Especially useful in french.
1175 public static function spaceToNbsp($text) {
1177 $tokens = self::tokenizeHtml($text);
1180 // Keep track of when we're inside <pre> or <code> tags.
1182 foreach ($tokens as $cur_token) {
1183 if ($cur_token[0] == "tag") {
1184 // Don't mess with quotes inside tags.
1185 $result .= $cur_token[1];
1186 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
1187 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
1193 $t = preg_replace("/\s([\!\?\:;])/", ' $1', $t);
1204 * Replaces a normal dash with em-dash between whitespaces.
1206 public static function spaceHyphens($text) {
1208 $tokens = self::tokenizeHtml($text);
1211 // Keep track of when we're inside <pre> or <code> tags.
1213 foreach ($tokens as $cur_token) {
1214 if ($cur_token[0] == "tag") {
1215 // Don't mess with quotes inside tags.
1216 $result .= $cur_token[1];
1217 if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
1218 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
1224 $t = preg_replace("/\s(-{1,3})\s/", ' — ', $t);
1233 * Fallback Tokenizer if Markdown not present.
1235 * @param string $str
1236 * String containing HTML markup.
1239 * An array of the tokens comprising the input string. Each token is either
1240 * a tag (possibly with nested, tags contained therein, such as
1241 * <a href="<MTFoo>" />, or a run of text between tags. Each element of the
1242 * array is a two-element array; the first is either 'tag' or 'text'; the
1243 * second is the actual value.
1245 * Regular expression derived from the _tokenize() subroutine in
1246 * Brad Choate's MTRegex plugin.
1247 * <http://www.bradchoate.com/past/mtregex.php>
1249 public static function tokenizeHtml($str) {
1255 // Processing instruction
1257 $match = '(?s:<!(?:--.*?--\s*)+>)|' .
1259 '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
1261 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1263 foreach ($parts as $part) {
1264 if (++$index % 2 && $part != '') {
1265 $tokens[] = array('text', $part);
1268 $tokens[] = array('tag', $part);
1277 Copyright and License
1278 ---------------------
1280 Copyright (c) 2003 John Gruber
1281 <http://daringfireball.net/>
1282 All rights reserved.
1284 Copyright (c) 2004-2005 Michel Fortin
1285 <http://www.michelf.com>
1287 Redistribution and use in source and binary forms, with or without
1288 modification, are permitted provided that the following conditions are met:
1290 * Redistributions of source code must retain the above copyright
1291 notice, this list of conditions and the following disclaimer.
1293 * Redistributions in binary form must reproduce the above copyright
1294 notice, this list of conditions and the following disclaimer in the
1295 documentation and/or other materials provided with the distribution.
1297 * Neither the name "SmartyPants" nor the names of its contributors may
1298 be used to endorse or promote products derived from this software
1299 without specific prior written permission.
1301 This software is provided by the copyright holders and contributors "as is"
1302 and any express or implied warranties, including, but not limited to, the
1303 implied warranties of merchantability and fitness for a particular purpose
1304 are disclaimed. In no event shall the copyright owner or contributors be
1305 liable for any direct, indirect, incidental, special, exemplary, or
1306 consequential damages (including, but not limited to, procurement of
1307 substitute goods or services; loss of use, data, or profits; or business
1308 interruption) however caused and on any theory of liability, whether in
1309 contract, strict liability, or tort (including negligence or otherwise)
1310 arising in any way out of the use of this software, even if advised of the
1311 possibility of such damage.